wined3d: Swizzle D3DCOLOR attributes in the GLSL FFP replacement when necessary.
[wine.git] / dlls / d3dx9_36 / preshader.c
blob7373c41ec2c08c3d7c4f36de64d5f32c6b12dcf3
1 /*
2 * Copyright 2016 Paul Gofman
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 #include "config.h"
20 #include "wine/port.h"
22 #include "d3dx9_private.h"
24 #include <float.h>
26 WINE_DEFAULT_DEBUG_CHANNEL(d3dx);
28 enum pres_ops
30 PRESHADER_OP_NOP,
31 PRESHADER_OP_MOV,
32 PRESHADER_OP_ADD,
33 PRESHADER_OP_MUL,
34 PRESHADER_OP_DOT,
35 PRESHADER_OP_NEG,
36 PRESHADER_OP_RCP,
37 PRESHADER_OP_LT,
38 PRESHADER_OP_FRC,
39 PRESHADER_OP_MIN,
40 PRESHADER_OP_MAX,
41 PRESHADER_OP_GE,
42 PRESHADER_OP_CMP,
43 PRESHADER_OP_SIN,
44 PRESHADER_OP_COS,
45 PRESHADER_OP_RSQ,
46 PRESHADER_OP_EXP,
47 PRESHADER_OP_DOTSWIZ6,
48 PRESHADER_OP_DOTSWIZ8,
51 typedef double (*pres_op_func)(double *args, int n);
53 static double pres_mov(double *args, int n) {return args[0];}
54 static double pres_add(double *args, int n) {return args[0] + args[1];}
55 static double pres_mul(double *args, int n) {return args[0] * args[1];}
56 static double pres_dot(double *args, int n)
58 int i;
59 double sum;
61 sum = 0.0;
62 for (i = 0; i < n; ++i)
63 sum += args[i] * args[i + n];
64 return sum;
67 static double pres_dotswiz6(double *args, int n)
69 return pres_dot(args, 3);
72 static double pres_dotswiz8(double *args, int n)
74 return pres_dot(args, 4);
77 static double pres_neg(double *args, int n) {return -args[0];}
78 static double pres_rcp(double *args, int n) {return 1.0 / args[0];}
79 static double pres_lt(double *args, int n) {return args[0] < args[1] ? 1.0 : 0.0;}
80 static double pres_ge(double *args, int n) {return args[0] >= args[1] ? 1.0 : 0.0;}
81 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);}
82 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);}
83 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);}
84 static double pres_cmp(double *args, int n) {return args[0] < 0.0 ? args[2] : args[1];}
85 static double pres_sin(double *args, int n) {return sin(args[0]);}
86 static double pres_cos(double *args, int n) {return cos(args[0]);}
87 static double pres_rsq(double *args, int n)
89 double v;
91 v = fabs(args[0]);
92 if (v == 0.0)
93 return INFINITY;
94 else
95 return 1.0 / sqrt(v);
97 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);}
99 #define PRES_OPCODE_MASK 0x7ff00000
100 #define PRES_OPCODE_SHIFT 20
101 #define PRES_SCALAR_FLAG 0x80000000
102 #define PRES_NCOMP_MASK 0x0000ffff
104 #define FOURCC_PRES 0x53455250
105 #define FOURCC_CLIT 0x54494c43
106 #define FOURCC_FXLC 0x434c5846
107 #define FOURCC_PRSI 0x49535250
108 #define PRES_SIGN 0x46580000
110 struct op_info
112 unsigned int opcode;
113 char mnem[16];
114 unsigned int input_count;
115 BOOL func_all_comps;
116 pres_op_func func;
119 static const struct op_info pres_op_info[] =
121 {0x000, "nop", 0, 0, NULL }, /* PRESHADER_OP_NOP */
122 {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */
123 {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */
124 {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */
125 {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */
126 {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */
127 {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */
128 {0x202, "lt", 2, 0, pres_lt }, /* PRESHADER_OP_LT */
129 {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */
130 {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */
131 {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */
132 {0x203, "ge", 2, 0, pres_ge }, /* PRESHADER_OP_GE */
133 {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */
134 {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */
135 {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */
136 {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */
137 {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */
138 {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */
139 {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */
142 enum pres_value_type
144 PRES_VT_FLOAT,
145 PRES_VT_DOUBLE,
146 PRES_VT_INT,
147 PRES_VT_BOOL
150 static const struct
152 unsigned int component_size;
153 unsigned int reg_component_count;
154 enum pres_value_type type;
156 table_info[] =
158 {sizeof(double), 1, PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */
159 {sizeof(float), 4, PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */
160 {sizeof(float), 4, PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */
161 {sizeof(BOOL), 1, PRES_VT_BOOL }, /* PRES_REGTAB_OBCONST */
162 {sizeof(int), 4, PRES_VT_INT, }, /* PRES_REGTAB_OICONST */
163 /* TODO: use double precision for 64 bit */
164 {sizeof(float), 4, PRES_VT_FLOAT } /* PRES_REGTAB_TEMP */
167 static const char *table_symbol[] =
169 "imm", "c", "oc", "ob", "oi", "r", "(null)",
172 static const enum pres_reg_tables pres_regset2table[] =
174 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */
175 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */
176 PRES_REGTAB_CONST, /* D3DXRS_FLOAT4 */
177 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */
180 static const enum pres_reg_tables shad_regset2table[] =
182 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */
183 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */
184 PRES_REGTAB_OCONST, /* D3DXRS_FLOAT4 */
185 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */
188 struct d3dx_pres_operand
190 enum pres_reg_tables table;
191 /* offset is component index, not register index, e. g.
192 offset for component c3.y is 13 (3 * 4 + 1) */
193 unsigned int offset;
196 #define MAX_INPUTS_COUNT 8
198 struct d3dx_pres_ins
200 enum pres_ops op;
201 /* first input argument is scalar,
202 scalar component is propagated */
203 BOOL scalar_op;
204 unsigned int component_count;
205 struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT];
206 struct d3dx_pres_operand output;
209 static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
211 return offset / table_info[table].reg_component_count;
214 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)
216 static HRESULT init_set_constants(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab);
218 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
220 unsigned int size;
222 size = rs->table_sizes[table] * table_info[table].reg_component_count * table_info[table].component_size;
223 if (size)
225 rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
226 rs->table_value_set[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
227 sizeof(*rs->table_value_set[table]) *
228 ((rs->table_sizes[table] + PRES_BITMASK_BLOCK_SIZE - 1) / PRES_BITMASK_BLOCK_SIZE));
229 if (!rs->tables[table] || !rs->table_value_set[table])
230 return E_OUTOFMEMORY;
232 return D3D_OK;
235 static void regstore_free_tables(struct d3dx_regstore *rs)
237 unsigned int i;
239 for (i = 0; i < PRES_REGTAB_COUNT; ++i)
241 HeapFree(GetProcessHeap(), 0, rs->tables[i]);
242 HeapFree(GetProcessHeap(), 0, rs->table_value_set[i]);
246 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, void *data,
247 unsigned int start_offset, unsigned int count)
249 unsigned int block_idx, start, end, start_block, end_block;
251 if (!count)
252 return;
254 memcpy((BYTE *)rs->tables[table] + start_offset * table_info[table].component_size,
255 data, count * table_info[table].component_size);
257 start = get_reg_offset(table, start_offset);
258 start_block = start / PRES_BITMASK_BLOCK_SIZE;
259 start -= start_block * PRES_BITMASK_BLOCK_SIZE;
260 end = get_reg_offset(table, start_offset + count - 1);
261 end_block = end / PRES_BITMASK_BLOCK_SIZE;
262 end = (end_block + 1) * PRES_BITMASK_BLOCK_SIZE - 1 - end;
264 if (start_block == end_block)
266 rs->table_value_set[table][start_block] |= (~0u << start) & (~0u >> end);
268 else
270 rs->table_value_set[table][start_block] |= ~0u << start;
272 for (block_idx = start_block + 1; block_idx < end_block; ++block_idx)
273 rs->table_value_set[table][block_idx] = ~0u;
275 rs->table_value_set[table][end_block] |= ~0u >> end;
279 static unsigned int regstore_is_val_set_reg(struct d3dx_regstore *rs, unsigned int table, unsigned int reg_idx)
281 return rs->table_value_set[table][reg_idx / PRES_BITMASK_BLOCK_SIZE] &
282 (1u << (reg_idx % PRES_BITMASK_BLOCK_SIZE));
285 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset)
287 BYTE *p;
289 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
290 switch (table_info[table].type)
292 case PRES_VT_FLOAT:
293 return *(float *)p;
294 case PRES_VT_DOUBLE:
295 return *(double *)p;
296 default:
297 FIXME("Unexpected preshader input from table %u.\n", table);
298 return NAN;
302 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v)
304 BYTE *p;
305 unsigned int reg_idx;
307 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
308 switch (table_info[table].type)
310 case PRES_VT_FLOAT : *(float *)p = v; break;
311 case PRES_VT_DOUBLE: *(double *)p = v; break;
312 case PRES_VT_INT : *(int *)p = lrint(v); break;
313 case PRES_VT_BOOL : *(BOOL *)p = !!v; break;
315 reg_idx = get_reg_offset(table, offset);
316 rs->table_value_set[table][reg_idx / PRES_BITMASK_BLOCK_SIZE] |=
317 1u << (reg_idx % PRES_BITMASK_BLOCK_SIZE);
320 static void regstore_reset_table(struct d3dx_regstore *rs, unsigned int table)
322 unsigned int size;
324 size = rs->table_sizes[table] * table_info[table].reg_component_count * table_info[table].component_size;
326 memset(rs->tables[table], 0, size);
327 memset(rs->table_value_set[table], 0,
328 sizeof(*rs->table_value_set[table]) *
329 ((rs->table_sizes[table] + PRES_BITMASK_BLOCK_SIZE - 1) / PRES_BITMASK_BLOCK_SIZE));
332 static void dump_bytecode(void *data, unsigned int size)
334 unsigned int *bytecode = (unsigned int *)data;
335 unsigned int i, j, n;
337 size /= sizeof(*bytecode);
338 i = 0;
339 while (i < size)
341 n = min(size - i, 8);
342 for (j = 0; j < n; ++j)
343 TRACE("0x%08x,", bytecode[i + j]);
344 i += n;
345 TRACE("\n");
349 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count,
350 unsigned int fourcc, unsigned int *size)
352 /* Provide at least one value in comment section on non-NULL return. */
353 while (count > 2 && (*ptr & 0xffff) == 0xfffe)
355 unsigned int section_size;
357 section_size = (*ptr >> 16);
358 if (!section_size || section_size + 1 > count)
359 break;
360 if (*(ptr + 1) == fourcc)
362 *size = section_size;
363 return ptr + 2;
365 count -= section_size + 1;
366 ptr += section_size + 1;
368 return NULL;
371 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr)
373 static const enum pres_reg_tables reg_table[8] =
375 PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT,
376 PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP
379 if (count < 3)
381 WARN("Byte code buffer ends unexpectedly.\n");
382 return NULL;
385 if (*ptr)
387 FIXME("Relative addressing not supported yet, word %#x.\n", *ptr);
388 return NULL;
390 ++ptr;
392 if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT)
394 FIXME("Unsupported register table %#x.\n", *ptr);
395 return NULL;
397 opr->table = reg_table[*ptr++];
398 opr->offset = *ptr++;
400 if (opr->table == PRES_REGTAB_OBCONST)
401 opr->offset /= 4;
402 return ptr;
405 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins)
407 unsigned int ins_code, ins_raw;
408 unsigned int input_count;
409 unsigned int i;
411 if (count < 2)
413 WARN("Byte code buffer ends unexpectedly.\n");
414 return NULL;
417 ins_raw = *ptr++;
418 ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT;
419 ins->component_count = ins_raw & PRES_NCOMP_MASK;
420 ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG);
422 if (ins->component_count < 1 || ins->component_count > 4)
424 FIXME("Unsupported number of components %u.\n", ins->component_count);
425 return NULL;
427 input_count = *ptr++;
428 count -= 2;
429 for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i)
430 if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count)
431 break;
432 if (i == ARRAY_SIZE(pres_op_info))
434 FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw);
435 return NULL;
437 ins->op = i;
438 if (input_count > ARRAY_SIZE(ins->inputs))
440 FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count,
441 pres_op_info[i].mnem);
442 return NULL;
444 for (i = 0; i < input_count; ++i)
446 unsigned int *p;
448 p = parse_pres_arg(ptr, count, &ins->inputs[i]);
449 if (!p)
450 return NULL;
451 count -= p - ptr;
452 ptr = p;
454 ptr = parse_pres_arg(ptr, count, &ins->output);
455 return ptr;
458 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc)
460 D3DXCONSTANT_DESC buffer[2];
461 HRESULT hr;
462 unsigned int count;
464 count = ARRAY_SIZE(buffer);
465 if (FAILED(hr = ID3DXConstantTable_GetConstantDesc(ctab, hc, buffer, &count)))
467 FIXME("Could not get constant desc, hr %#x.\n", hr);
468 return hr;
470 else if (count != 1)
472 FIXME("Unexpected constant descriptors count %u.\n", count);
473 return D3DERR_INVALIDCALL;
475 *desc = buffer[0];
476 return D3D_OK;
479 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out, struct d3dx9_base_effect *base)
481 ID3DXConstantTable *ctab;
482 D3DXCONSTANT_DESC *cdesc;
483 struct d3dx_parameter **inputs_param;
484 D3DXCONSTANTTABLE_DESC desc;
485 HRESULT hr;
486 D3DXHANDLE hc;
487 unsigned int i;
489 out->inputs = cdesc = NULL;
490 out->inputs_param = NULL;
491 out->input_count = 0;
492 inputs_param = NULL;
493 hr = D3DXGetShaderConstantTable(byte_code, &ctab);
494 if (FAILED(hr) || !ctab)
496 TRACE("Could not get CTAB data, hr %#x.\n", hr);
497 /* returning OK, shaders and preshaders without CTAB are valid */
498 return D3D_OK;
500 if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc)))
502 FIXME("Could not get CTAB desc, hr %#x.\n", hr);
503 goto err_out;
506 cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants);
507 inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants);
508 if (!cdesc || !inputs_param)
510 hr = E_OUTOFMEMORY;
511 goto err_out;
514 for (i = 0; i < desc.Constants; ++i)
516 hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
517 if (!hc)
519 FIXME("Null constant handle.\n");
520 goto err_out;
522 if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[i])))
523 goto err_out;
524 inputs_param[i] = get_parameter_by_name(base, NULL, cdesc[i].Name);
525 if (cdesc[i].Class == D3DXPC_OBJECT)
526 TRACE("Object %s, parameter %p.\n", cdesc[i].Name, inputs_param[i]);
527 else if (!inputs_param[i])
528 WARN("Could not find parameter %s in effect.\n", cdesc[i].Name);
530 out->input_count = desc.Constants;
531 out->inputs = cdesc;
532 out->inputs_param = inputs_param;
533 hr = init_set_constants(out, ctab);
534 ID3DXConstantTable_Release(ctab);
535 return hr;
536 err_out:
537 HeapFree(GetProcessHeap(), 0, cdesc);
538 HeapFree(GetProcessHeap(), 0, inputs_param);
539 if (ctab)
540 ID3DXConstantTable_Release(ctab);
541 return hr;
544 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register)
546 if (table < PRES_REGTAB_COUNT)
547 table_sizes[table] = max(table_sizes[table], max_register + 1);
550 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab)
552 unsigned int i, table, max_register;
554 for (i = 0; i < ctab->input_count; ++i)
556 if (!ctab->inputs[i].RegisterCount)
557 continue;
558 max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1;
559 table = ctab->regset2table[ctab->inputs[i].RegisterSet];
560 update_table_size(table_sizes, table, max_register);
564 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count)
566 static const char *xyzw_str = "xyzw";
567 unsigned int i, table;
569 table = arg->table;
570 if (table == PRES_REGTAB_IMMED)
572 TRACE("(");
573 for (i = 0; i < component_count; ++i)
574 TRACE(i < component_count - 1 ? "%.16e, " : "%.16e",
575 ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->offset + i]);
576 TRACE(")");
578 else
580 TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->offset));
581 for (i = 0; i < component_count; ++i)
582 TRACE("%c", xyzw_str[(arg->offset + i) % 4]);
586 static void dump_registers(struct d3dx_const_tab *ctab)
588 unsigned int table, i;
590 for (i = 0; i < ctab->input_count; ++i)
592 table = ctab->regset2table[ctab->inputs[i].RegisterSet];
593 TRACE("// %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)",
594 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount);
598 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins)
600 unsigned int i;
602 TRACE(" %s ", pres_op_info[ins->op].mnem);
603 dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count);
604 for (i = 0; i < pres_op_info[ins->op].input_count; ++i)
606 TRACE(", ");
607 dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count);
609 TRACE("\n");
612 static void dump_preshader(struct d3dx_preshader *pres)
614 unsigned int i;
616 TRACE("// Preshader registers:\n");
617 dump_registers(&pres->inputs);
618 TRACE(" preshader\n");
619 for (i = 0; i < pres->ins_count; ++i)
620 dump_ins(&pres->regs, &pres->ins[i]);
623 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base)
625 unsigned int *p;
626 unsigned int i, j, const_count;
627 double *dconst;
628 HRESULT hr;
629 unsigned int saved_word;
630 unsigned int section_size;
632 TRACE("Preshader version %#x.\n", *ptr & 0xffff);
634 if (!count)
636 WARN("Unexpected end of byte code buffer.\n");
637 return D3DXERR_INVALIDDATA;
640 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, &section_size);
641 if (p)
643 const_count = *p++;
644 if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int)))
646 WARN("Byte code buffer ends unexpectedly.\n");
647 return D3DXERR_INVALIDDATA;
649 dconst = (double *)p;
651 else
653 const_count = 0;
654 dconst = NULL;
656 TRACE("%u double constants.\n", const_count);
658 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, &section_size);
659 if (!p)
661 WARN("Could not find preshader code.\n");
662 return D3D_OK;
664 pres->ins_count = *p++;
665 --section_size;
666 if (pres->ins_count > UINT_MAX / sizeof(*pres->ins))
668 WARN("Invalid instruction count %u.\n", pres->ins_count);
669 return D3DXERR_INVALIDDATA;
671 TRACE("%u instructions.\n", pres->ins_count);
672 pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count);
673 if (!pres->ins)
674 return E_OUTOFMEMORY;
675 for (i = 0; i < pres->ins_count; ++i)
677 unsigned int *ptr_next;
679 ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]);
680 if (!ptr_next)
681 return D3DXERR_INVALIDDATA;
682 section_size -= ptr_next - p;
683 p = ptr_next;
686 pres->inputs.regset2table = pres_regset2table;
688 saved_word = *ptr;
689 *ptr = 0xfffe0000;
690 hr = get_constants_desc(ptr, &pres->inputs, base);
691 *ptr = saved_word;
692 if (FAILED(hr))
693 return hr;
695 pres->regs.table_sizes[PRES_REGTAB_IMMED] = const_count;
697 for (i = 0; i < pres->ins_count; ++i)
699 for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j)
700 update_table_size(pres->regs.table_sizes, pres->ins[i].inputs[j].table,
701 get_reg_offset(pres->ins[i].inputs[j].table,
702 pres->ins[i].inputs[j].offset + pres->ins[i].component_count - 1));
704 update_table_size(pres->regs.table_sizes, pres->ins[i].output.table,
705 get_reg_offset(pres->ins[i].output.table,
706 pres->ins[i].output.offset + pres->ins[i].component_count - 1));
708 update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs);
709 if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED)))
710 return E_OUTOFMEMORY;
711 regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count);
713 return D3D_OK;
716 void d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size,
717 D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out)
719 struct d3dx_param_eval *peval;
720 unsigned int *ptr;
721 HRESULT hr;
722 unsigned int i;
723 BOOL shader;
724 unsigned int count, pres_size;
726 TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n",
727 base_effect, byte_code, byte_code_size, type, peval_out);
729 count = byte_code_size / sizeof(unsigned int);
730 if (!byte_code || !count)
732 *peval_out = NULL;
733 return;
736 peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval));
737 if (!peval)
738 goto err_out;
740 peval->param_type = type;
741 switch (type)
743 case D3DXPT_VERTEXSHADER:
744 case D3DXPT_PIXELSHADER:
745 shader = TRUE;
746 break;
747 default:
748 shader = FALSE;
749 break;
751 peval->shader_inputs.regset2table = shad_regset2table;
753 ptr = (unsigned int *)byte_code;
754 if (shader)
756 if ((*ptr & 0xfffe0000) != 0xfffe0000)
758 FIXME("Invalid shader signature %#x.\n", *ptr);
759 goto err_out;
761 TRACE("Shader version %#x.\n", *ptr & 0xffff);
763 if (FAILED(hr = get_constants_desc(ptr, &peval->shader_inputs, base_effect)))
765 FIXME("Could not get shader constant table, hr %#x.\n", hr);
766 goto err_out;
768 update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs);
769 ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size);
770 if (!ptr)
771 TRACE("No preshader found.\n");
773 else
775 pres_size = count;
778 if (ptr && FAILED(parse_preshader(&peval->pres, ptr, pres_size, base_effect)))
780 FIXME("Failed parsing preshader, byte code for analysis follows.\n");
781 dump_bytecode(byte_code, byte_code_size);
782 goto err_out;
785 for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i)
787 if (FAILED(regstore_alloc_table(&peval->pres.regs, i)))
788 goto err_out;
791 if (TRACE_ON(d3dx))
793 dump_bytecode(byte_code, byte_code_size);
794 dump_preshader(&peval->pres);
795 if (shader)
797 TRACE("// Shader registers:\n");
798 dump_registers(&peval->shader_inputs);
801 *peval_out = peval;
802 TRACE("Created parameter evaluator %p.\n", *peval_out);
803 return;
805 err_out:
806 FIXME("Error creating parameter evaluator.\n");
807 d3dx_free_param_eval(peval);
808 *peval_out = NULL;
811 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
813 HeapFree(GetProcessHeap(), 0, ctab->inputs);
814 HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
815 HeapFree(GetProcessHeap(), 0, ctab->const_set);
818 static void d3dx_free_preshader(struct d3dx_preshader *pres)
820 HeapFree(GetProcessHeap(), 0, pres->ins);
822 regstore_free_tables(&pres->regs);
823 d3dx_free_const_tab(&pres->inputs);
826 void d3dx_free_param_eval(struct d3dx_param_eval *peval)
828 TRACE("peval %p.\n", peval);
830 if (!peval)
831 return;
833 d3dx_free_preshader(&peval->pres);
834 d3dx_free_const_tab(&peval->shader_inputs);
835 HeapFree(GetProcessHeap(), 0, peval);
838 static void set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab)
840 unsigned int const_idx;
842 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
844 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
845 unsigned int table = const_set->table;
846 struct d3dx_parameter *param = const_set->param;
847 enum pres_value_type table_type = table_info[table].type;
848 unsigned int i, j, n, start_offset;
849 unsigned int minor, major, major_stride, param_offset;
850 BOOL transpose;
851 unsigned int count;
853 transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS)
854 || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS);
855 if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS)
857 major = param->columns;
858 minor = param->rows;
860 else
862 major = param->rows;
863 minor = param->columns;
865 start_offset = const_set->register_index * table_info[table].reg_component_count;
866 major_stride = max(minor, table_info[table].reg_component_count);
867 n = min(major * major_stride,
868 const_set->register_count * table_info[table].reg_component_count + major_stride - 1) / major_stride;
869 count = n * minor;
870 if (((param->type == D3DXPT_FLOAT && table_type == PRES_VT_FLOAT)
871 || (param->type == D3DXPT_INT && table_type == PRES_VT_INT)
872 || (param->type == D3DXPT_BOOL && table_type == PRES_VT_BOOL))
873 && !transpose && minor == major_stride
874 && count == table_info[table].reg_component_count * const_set->register_count
875 && count * sizeof(unsigned int) <= param->bytes)
877 regstore_set_values(rs, table, param->data, start_offset, count);
878 continue;
881 for (i = 0; i < n; ++i)
883 for (j = 0; j < minor; ++j)
885 unsigned int out;
886 unsigned int *in;
887 unsigned int offset;
889 offset = start_offset + i * major_stride + j;
890 if (offset / table_info[table].reg_component_count >= rs->table_sizes[table])
892 if (table_info[table].reg_component_count != 1)
893 FIXME("Output offset exceeds table size, name %s, component %u.\n",
894 debugstr_a(param->name), i);
895 break;
897 if (transpose)
898 param_offset = i + j * major;
899 else
900 param_offset = i * minor + j;
901 if (param_offset * sizeof(unsigned int) >= param->bytes)
903 WARN("Parameter data is too short, name %s, component %u.\n", debugstr_a(param->name), i);
904 break;
907 in = (unsigned int *)param->data + param_offset;
908 switch (table_type)
910 case PRES_VT_FLOAT: set_number(&out, D3DXPT_FLOAT, in, param->type); break;
911 case PRES_VT_INT: set_number(&out, D3DXPT_INT, in, param->type); break;
912 case PRES_VT_BOOL: set_number(&out, D3DXPT_BOOL, in, param->type); break;
913 default:
914 FIXME("Unexpected type %#x.\n", table_info[table].type);
915 break;
917 regstore_set_values(rs, table, &out, offset, 1);
923 #define INITIAL_CONST_SET_SIZE 16
925 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set)
927 if (const_tab->const_set_count >= const_tab->const_set_size)
929 unsigned int new_size;
930 struct d3dx_const_param_eval_output *new_alloc;
932 if (!const_tab->const_set_size)
934 new_size = INITIAL_CONST_SET_SIZE;
935 new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size);
936 if (!new_alloc)
938 ERR("Out of memory.\n");
939 return E_OUTOFMEMORY;
942 else
944 new_size = const_tab->const_set_size * 2;
945 new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
946 sizeof(*const_tab->const_set) * new_size);
947 if (!new_alloc)
949 ERR("Out of memory.\n");
950 return E_OUTOFMEMORY;
953 const_tab->const_set = new_alloc;
954 const_tab->const_set_size = new_size;
956 const_tab->const_set[const_tab->const_set_count++] = *set;
957 return D3D_OK;
960 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab,
961 D3DXHANDLE hc, struct d3dx_parameter *param)
963 D3DXCONSTANT_DESC desc;
964 unsigned int const_count, param_count, i;
965 BOOL get_element;
966 struct d3dx_const_param_eval_output const_set;
967 HRESULT hr;
969 if (FAILED(get_ctab_constant_desc(ctab, hc, &desc)))
970 return D3DERR_INVALIDCALL;
972 if (param->element_count)
974 param_count = param->element_count;
975 const_count = desc.Elements;
976 get_element = TRUE;
978 else
980 if (desc.Elements > 1)
982 FIXME("Unexpected number of constant elements %u.\n", desc.Elements);
983 return D3DERR_INVALIDCALL;
985 param_count = param->member_count;
986 const_count = desc.StructMembers;
987 get_element = FALSE;
989 if (const_count != param_count)
991 FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n",
992 param_count, const_count);
993 return D3DERR_INVALIDCALL;
995 if (const_count)
997 HRESULT ret;
998 D3DXHANDLE hc_element;
1000 ret = D3D_OK;
1001 for (i = 0; i < const_count; ++i)
1003 if (get_element)
1004 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i);
1005 else
1006 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i);
1007 if (!hc_element)
1009 FIXME("Could not get constant.\n");
1010 hr = D3DERR_INVALIDCALL;
1012 else
1014 hr = init_set_constants_param(const_tab, ctab, hc_element, &param->members[i]);
1016 if (FAILED(hr))
1017 ret = hr;
1019 return ret;
1022 TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n",
1023 debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes);
1024 TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n",
1025 debugstr_a(param->name), param->rows, param->columns, param->class,
1026 param->flags, param->bytes);
1028 const_set.param = param;
1029 const_set.constant_class = desc.Class;
1030 if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table))
1032 FIXME("Unknown register set %u.\n", desc.RegisterSet);
1033 return D3DERR_INVALIDCALL;
1035 const_set.register_index = desc.RegisterIndex;
1036 const_set.table = const_tab->regset2table[desc.RegisterSet];
1037 if (const_set.table >= PRES_REGTAB_COUNT)
1039 ERR("Unexpected register set %u.\n", desc.RegisterSet);
1040 return D3DERR_INVALIDCALL;
1042 const_set.register_count = desc.RegisterCount;
1043 if (FAILED(hr = append_const_set(const_tab, &const_set)))
1044 return hr;
1046 return D3D_OK;
1049 static HRESULT init_set_constants(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab)
1051 unsigned int i;
1052 HRESULT hr, ret;
1053 D3DXHANDLE hc;
1055 ret = D3D_OK;
1056 for (i = 0; i < const_tab->input_count; ++i)
1058 if (!const_tab->inputs_param[i] || const_tab->inputs_param[i]->class == D3DXPC_OBJECT)
1059 continue;
1060 hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
1061 if (hc)
1063 hr = init_set_constants_param(const_tab, ctab, hc, const_tab->inputs_param[i]);
1065 else
1067 FIXME("Could not get constant, index %u.\n", i);
1068 hr = D3DERR_INVALIDCALL;
1070 if (FAILED(hr))
1071 ret = hr;
1074 if (const_tab->const_set_count)
1076 const_tab->const_set = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
1077 sizeof(*const_tab->const_set) * const_tab->const_set_count);
1078 if (!const_tab->const_set)
1080 ERR("Out of memory.\n");
1081 return E_OUTOFMEMORY;
1083 const_tab->const_set_size = const_tab->const_set_count;
1085 return ret;
1088 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins,
1089 const struct d3dx_pres_operand *opr, unsigned int comp)
1091 if (!regstore_is_val_set_reg(rs, opr->table, (opr->offset + comp) / table_info[opr->table].reg_component_count))
1093 WARN("Using uninitialized input ");
1094 dump_arg(rs, opr, comp);
1095 TRACE(".\n");
1096 dump_ins(rs, ins);
1098 return regstore_get_double(rs, opr->table, opr->offset + comp);
1101 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr,
1102 unsigned int comp, double res)
1104 regstore_set_double(rs, opr->table, opr->offset + comp, res);
1107 #define ARGS_ARRAY_SIZE 8
1108 static HRESULT execute_preshader(struct d3dx_preshader *pres)
1110 unsigned int i, j, k;
1111 double args[ARGS_ARRAY_SIZE];
1112 double res;
1114 for (i = 0; i < pres->ins_count; ++i)
1116 const struct d3dx_pres_ins *ins;
1117 const struct op_info *oi;
1119 ins = &pres->ins[i];
1120 oi = &pres_op_info[ins->op];
1121 if (oi->func_all_comps)
1123 if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE)
1125 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count);
1126 return E_FAIL;
1128 for (k = 0; k < oi->input_count; ++k)
1129 for (j = 0; j < ins->component_count; ++j)
1130 args[k * ins->component_count + j] = exec_get_arg(&pres->regs, ins, &ins->inputs[k],
1131 ins->scalar_op && !k ? 0 : j);
1132 res = oi->func(args, ins->component_count);
1134 /* only 'dot' instruction currently falls here */
1135 exec_set_arg(&pres->regs, &ins->output, 0, res);
1137 else
1139 for (j = 0; j < ins->component_count; ++j)
1141 for (k = 0; k < oi->input_count; ++k)
1142 args[k] = exec_get_arg(&pres->regs, ins, &ins->inputs[k], ins->scalar_op && !k ? 0 : j);
1143 res = oi->func(args, ins->component_count);
1144 exec_set_arg(&pres->regs, &ins->output, j, res);
1148 return D3D_OK;
1151 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param, void *param_value)
1153 HRESULT hr;
1154 unsigned int i;
1155 unsigned int elements, elements_param, elements_table;
1156 float *oc;
1158 TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);
1160 set_constants(&peval->pres.regs, &peval->pres.inputs);
1162 if (FAILED(hr = execute_preshader(&peval->pres)))
1163 return hr;
1165 elements_table = table_info[PRES_REGTAB_OCONST].reg_component_count
1166 * peval->pres.regs.table_sizes[PRES_REGTAB_OCONST];
1167 elements_param = param->bytes / sizeof(unsigned int);
1168 elements = min(elements_table, elements_param);
1169 oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST];
1170 for (i = 0; i < elements; ++i)
1171 set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT);
1172 return D3D_OK;
1175 static HRESULT set_shader_constants_device(struct IDirect3DDevice9 *device, struct d3dx_regstore *rs,
1176 D3DXPARAMETER_TYPE type, enum pres_reg_tables table)
1178 unsigned int start, count;
1179 void *ptr;
1180 HRESULT hr, result;
1182 result = D3D_OK;
1183 start = 0;
1184 while (start < rs->table_sizes[table])
1186 count = 0;
1187 while (start < rs->table_sizes[table] && !regstore_is_val_set_reg(rs, table, start))
1188 ++start;
1189 while (start + count < rs->table_sizes[table] && regstore_is_val_set_reg(rs, table, start + count))
1190 ++count;
1191 if (!count)
1192 break;
1193 TRACE("Setting %u constants at %u.\n", count, start);
1194 ptr = (BYTE *)rs->tables[table] + start * table_info[table].reg_component_count
1195 * table_info[table].component_size;
1196 if (type == D3DXPT_VERTEXSHADER)
1198 switch(table)
1200 case PRES_REGTAB_OCONST:
1201 hr = IDirect3DDevice9_SetVertexShaderConstantF(device, start, (const float *)ptr, count);
1202 break;
1203 case PRES_REGTAB_OICONST:
1204 hr = IDirect3DDevice9_SetVertexShaderConstantI(device, start, (const int *)ptr, count);
1205 break;
1206 case PRES_REGTAB_OBCONST:
1207 hr = IDirect3DDevice9_SetVertexShaderConstantB(device, start, (const BOOL *)ptr, count);
1208 break;
1209 default:
1210 FIXME("Unexpected register table %u.\n", table);
1211 return D3DERR_INVALIDCALL;
1214 else if (type == D3DXPT_PIXELSHADER)
1216 switch(table)
1218 case PRES_REGTAB_OCONST:
1219 hr = IDirect3DDevice9_SetPixelShaderConstantF(device, start, (const float *)ptr, count);
1220 break;
1221 case PRES_REGTAB_OICONST:
1222 hr = IDirect3DDevice9_SetPixelShaderConstantI(device, start, (const int *)ptr, count);
1223 break;
1224 case PRES_REGTAB_OBCONST:
1225 hr = IDirect3DDevice9_SetPixelShaderConstantB(device, start, (const BOOL *)ptr, count);
1226 break;
1227 default:
1228 FIXME("Unexpected register table %u.\n", table);
1229 return D3DERR_INVALIDCALL;
1232 else
1234 FIXME("Unexpected parameter type %u.\n", type);
1235 return D3DERR_INVALIDCALL;
1238 if (FAILED(hr))
1240 ERR("Setting constants failed, type %u, table %u, hr %#x.\n", type, table, hr);
1241 result = hr;
1243 start += count;
1245 regstore_reset_table(rs, table);
1246 return result;
1249 HRESULT d3dx_param_eval_set_shader_constants(struct IDirect3DDevice9 *device, struct d3dx_param_eval *peval)
1251 static const enum pres_reg_tables set_tables[] =
1252 {PRES_REGTAB_OCONST, PRES_REGTAB_OICONST, PRES_REGTAB_OBCONST};
1253 HRESULT hr, result;
1254 struct d3dx_preshader *pres = &peval->pres;
1255 struct d3dx_regstore *rs = &pres->regs;
1256 unsigned int i;
1258 TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);
1260 set_constants(rs, &pres->inputs);
1261 if (FAILED(hr = execute_preshader(pres)))
1262 return hr;
1264 set_constants(rs, &peval->shader_inputs);
1265 result = D3D_OK;
1266 for (i = 0; i < ARRAY_SIZE(set_tables); ++i)
1268 if (FAILED(hr = set_shader_constants_device(device, rs, peval->param_type, set_tables[i])))
1269 result = hr;
1271 return result;