cmd: DIR command outputs free space for the path.
[wine.git] / dlls / d3dx9_36 / preshader.c
blobebf62a99c6ab3c2fc71a0860c70a7bc39eaddac4
1 /*
2 * Copyright 2016 Paul Gofman
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20 #include "d3dx9_private.h"
22 #include <float.h>
23 #include <math.h>
24 #include <assert.h>
26 WINE_DEFAULT_DEBUG_CHANNEL(d3dx);
28 enum pres_ops
30 PRESHADER_OP_NOP,
31 PRESHADER_OP_MOV,
32 PRESHADER_OP_NEG,
33 PRESHADER_OP_RCP,
34 PRESHADER_OP_FRC,
35 PRESHADER_OP_EXP,
36 PRESHADER_OP_LOG,
37 PRESHADER_OP_RSQ,
38 PRESHADER_OP_SIN,
39 PRESHADER_OP_COS,
40 PRESHADER_OP_ASIN,
41 PRESHADER_OP_ACOS,
42 PRESHADER_OP_ATAN,
43 PRESHADER_OP_MIN,
44 PRESHADER_OP_MAX,
45 PRESHADER_OP_LT,
46 PRESHADER_OP_GE,
47 PRESHADER_OP_ADD,
48 PRESHADER_OP_MUL,
49 PRESHADER_OP_ATAN2,
50 PRESHADER_OP_DIV,
51 PRESHADER_OP_CMP,
52 PRESHADER_OP_DOT,
53 PRESHADER_OP_DOTSWIZ6,
54 PRESHADER_OP_DOTSWIZ8,
57 typedef double (*pres_op_func)(double *args, int n);
59 static double to_signed_nan(double v)
61 static const union
63 ULONG64 ulong64_value;
64 double double_value;
66 signed_nan =
68 0xfff8000000000000
71 return isnan(v) ? signed_nan.double_value : v;
74 static double pres_mov(double *args, int n) {return args[0];}
75 static double pres_add(double *args, int n) {return args[0] + args[1];}
76 static double pres_mul(double *args, int n) {return args[0] * args[1];}
77 static double pres_dot(double *args, int n)
79 int i;
80 double sum;
82 sum = 0.0;
83 for (i = 0; i < n; ++i)
84 sum += args[i] * args[i + n];
85 return sum;
88 static double pres_dotswiz6(double *args, int n)
90 return pres_dot(args, 3);
93 static double pres_dotswiz8(double *args, int n)
95 return pres_dot(args, 4);
98 static double pres_neg(double *args, int n) {return -args[0];}
99 static double pres_rcp(double *args, int n) {return 1.0 / args[0];}
100 static double pres_lt(double *args, int n) {return args[0] < args[1] ? 1.0 : 0.0;}
101 static double pres_ge(double *args, int n) {return args[0] >= args[1] ? 1.0 : 0.0;}
102 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);}
103 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);}
104 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);}
105 static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];}
106 static double pres_sin(double *args, int n) {return sin(args[0]);}
107 static double pres_cos(double *args, int n) {return cos(args[0]);}
108 static double pres_rsq(double *args, int n)
110 double v;
112 v = fabs(args[0]);
113 if (v == 0.0)
114 return INFINITY;
115 else
116 return 1.0 / sqrt(v);
118 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);}
119 static double pres_log(double *args, int n)
121 double v;
123 v = fabs(args[0]);
124 if (v == 0.0)
125 return 0.0;
126 else
127 return log2(v);
129 static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));}
130 static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));}
131 static double pres_atan(double *args, int n) {return atan(args[0]);}
132 static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);}
134 /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever
135 * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */
136 static double pres_div(double *args, int n) {return 0.0;}
138 #define PRES_OPCODE_MASK 0x7ff00000
139 #define PRES_OPCODE_SHIFT 20
140 #define PRES_SCALAR_FLAG 0x80000000
141 #define PRES_NCOMP_MASK 0x0000ffff
143 #define FOURCC_PRES 0x53455250
144 #define FOURCC_CLIT 0x54494c43
145 #define FOURCC_FXLC 0x434c5846
146 #define FOURCC_PRSI 0x49535250
147 #define FOURCC_TX_1 0x54580100
148 #define PRES_SIGN 0x46580000
150 struct op_info
152 unsigned int opcode;
153 char mnem[16];
154 unsigned int input_count;
155 BOOL func_all_comps;
156 pres_op_func func;
159 static const struct op_info pres_op_info[] =
161 {0x000, "nop", 0, 0, NULL }, /* PRESHADER_OP_NOP */
162 {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */
163 {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */
164 {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */
165 {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */
166 {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */
167 {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */
168 {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */
169 {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */
170 {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */
171 {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */
172 {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */
173 {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */
174 {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */
175 {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */
176 {0x202, "lt", 2, 0, pres_lt }, /* PRESHADER_OP_LT */
177 {0x203, "ge", 2, 0, pres_ge }, /* PRESHADER_OP_GE */
178 {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */
179 {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */
180 {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */
181 {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */
182 {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */
183 {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */
184 {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */
185 {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */
188 enum pres_value_type
190 PRES_VT_FLOAT,
191 PRES_VT_DOUBLE,
192 PRES_VT_INT,
193 PRES_VT_BOOL,
194 PRES_VT_COUNT
197 static const struct
199 unsigned int component_size;
200 enum pres_value_type type;
202 table_info[] =
204 {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */
205 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */
206 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_INPUT */
207 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */
208 {sizeof(BOOL), PRES_VT_BOOL }, /* PRES_REGTAB_OBCONST */
209 {sizeof(int), PRES_VT_INT, }, /* PRES_REGTAB_OICONST */
210 /* TODO: use double precision for 64 bit */
211 {sizeof(float), PRES_VT_FLOAT } /* PRES_REGTAB_TEMP */
214 static const char *table_symbol[] =
216 "imm", "c", "v", "oc", "ob", "oi", "r", "(null)",
219 static const enum pres_reg_tables pres_regset2table[] =
221 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */
222 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */
223 PRES_REGTAB_CONST, /* D3DXRS_FLOAT4 */
224 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */
227 static const enum pres_reg_tables shad_regset2table[] =
229 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */
230 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */
231 PRES_REGTAB_OCONST, /* D3DXRS_FLOAT4 */
232 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */
235 struct d3dx_pres_reg
237 enum pres_reg_tables table;
238 /* offset is component index, not register index, e. g.
239 offset for component c3.y is 13 (3 * 4 + 1) */
240 unsigned int offset;
243 struct d3dx_pres_operand
245 struct d3dx_pres_reg reg;
246 struct d3dx_pres_reg index_reg;
249 #define MAX_INPUTS_COUNT 8
251 struct d3dx_pres_ins
253 enum pres_ops op;
254 /* first input argument is scalar,
255 scalar component is propagated */
256 BOOL scalar_op;
257 unsigned int component_count;
258 struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT];
259 struct d3dx_pres_operand output;
262 struct const_upload_info
264 BOOL transpose;
265 unsigned int major, minor;
266 unsigned int major_stride;
267 unsigned int major_count;
268 unsigned int count;
269 unsigned int minor_remainder;
272 static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type)
274 switch (type)
276 case D3DXPT_FLOAT:
277 return PRES_VT_FLOAT;
278 case D3DXPT_INT:
279 return PRES_VT_INT;
280 case D3DXPT_BOOL:
281 return PRES_VT_BOOL;
282 default:
283 FIXME("Unsupported type %u.\n", type);
284 return PRES_VT_COUNT;
288 static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
290 return table == PRES_REGTAB_OBCONST ? offset : offset >> 2;
293 static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx)
295 return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2;
298 static unsigned int get_reg_components(unsigned int table)
300 return get_offset_reg(table, 1);
303 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)
305 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
307 unsigned int size;
309 size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size;
310 if (size)
312 rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
313 if (!rs->tables[table])
314 return E_OUTOFMEMORY;
316 return D3D_OK;
319 static void regstore_free_tables(struct d3dx_regstore *rs)
321 unsigned int i;
323 for (i = 0; i < PRES_REGTAB_COUNT; ++i)
325 HeapFree(GetProcessHeap(), 0, rs->tables[i]);
329 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data,
330 unsigned int start_offset, unsigned int count)
332 BYTE *dst = rs->tables[table];
333 const BYTE *src = data;
334 unsigned int size;
336 dst += start_offset * table_info[table].component_size;
337 size = count * table_info[table].component_size;
338 assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst));
339 memcpy(dst, src, size);
342 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset)
344 BYTE *p;
346 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
347 switch (table_info[table].type)
349 case PRES_VT_FLOAT:
350 return *(float *)p;
351 case PRES_VT_DOUBLE:
352 return *(double *)p;
353 default:
354 FIXME("Unexpected preshader input from table %u.\n", table);
355 return NAN;
359 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v)
361 BYTE *p;
363 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
364 switch (table_info[table].type)
366 case PRES_VT_FLOAT : *(float *)p = v; break;
367 case PRES_VT_DOUBLE: *(double *)p = v; break;
368 case PRES_VT_INT : *(int *)p = lrint(v); break;
369 case PRES_VT_BOOL : *(BOOL *)p = !!v; break;
370 default:
371 FIXME("Bad type %u.\n", table_info[table].type);
372 break;
376 static void dump_bytecode(void *data, unsigned int size)
378 unsigned int *bytecode = (unsigned int *)data;
379 unsigned int i, j, n;
381 size /= sizeof(*bytecode);
382 i = 0;
383 while (i < size)
385 n = min(size - i, 8);
386 for (j = 0; j < n; ++j)
387 TRACE("0x%08x,", bytecode[i + j]);
388 i += n;
389 TRACE("\n");
393 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count,
394 unsigned int fourcc, unsigned int *size)
396 /* Provide at least one value in comment section on non-NULL return. */
397 while (count > 2 && (*ptr & 0xffff) == 0xfffe)
399 unsigned int section_size;
401 section_size = (*ptr >> 16);
402 if (!section_size || section_size + 1 > count)
403 break;
404 if (*(ptr + 1) == fourcc)
406 *size = section_size;
407 return ptr + 2;
409 count -= section_size + 1;
410 ptr += section_size + 1;
412 return NULL;
415 static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg)
417 static const enum pres_reg_tables reg_table[8] =
419 PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_INPUT,
420 PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP
423 if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT)
425 FIXME("Unsupported register table %#x.\n", *ptr);
426 return NULL;
429 reg->table = reg_table[*ptr++];
430 reg->offset = *ptr++;
431 return ptr;
434 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr)
436 if (count < 3 || (*ptr && count < 5))
438 WARN("Byte code buffer ends unexpectedly, count %u.\n", count);
439 return NULL;
442 if (*ptr)
444 if (*ptr != 1)
446 FIXME("Unknown relative addressing flag, word %#x.\n", *ptr);
447 return NULL;
449 ptr = parse_pres_reg(ptr + 1, &opr->index_reg);
450 if (!ptr)
451 return NULL;
453 else
455 opr->index_reg.table = PRES_REGTAB_COUNT;
456 ++ptr;
459 ptr = parse_pres_reg(ptr, &opr->reg);
461 if (opr->reg.table == PRES_REGTAB_OBCONST)
462 opr->reg.offset /= 4;
463 return ptr;
466 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins)
468 unsigned int ins_code, ins_raw;
469 unsigned int input_count;
470 unsigned int i;
472 if (count < 2)
474 WARN("Byte code buffer ends unexpectedly.\n");
475 return NULL;
478 ins_raw = *ptr++;
479 ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT;
480 ins->component_count = ins_raw & PRES_NCOMP_MASK;
481 ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG);
483 if (ins->component_count < 1 || ins->component_count > 4)
485 FIXME("Unsupported number of components %u.\n", ins->component_count);
486 return NULL;
488 input_count = *ptr++;
489 count -= 2;
490 for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i)
491 if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count)
492 break;
493 if (i == ARRAY_SIZE(pres_op_info))
495 FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw);
496 return NULL;
498 ins->op = i;
499 if (input_count > ARRAY_SIZE(ins->inputs))
501 FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count,
502 pres_op_info[i].mnem);
503 return NULL;
505 for (i = 0; i < input_count; ++i)
507 unsigned int *p;
509 p = parse_pres_arg(ptr, count, &ins->inputs[i]);
510 if (!p)
511 return NULL;
512 count -= p - ptr;
513 ptr = p;
515 ptr = parse_pres_arg(ptr, count, &ins->output);
516 if (ins->output.index_reg.table != PRES_REGTAB_COUNT)
518 FIXME("Relative addressing in output register not supported.\n");
519 return NULL;
521 if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset
522 + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1))
523 != get_reg_offset(ins->output.reg.table, ins->output.reg.offset))
525 FIXME("Instructions outputting multiple registers are not supported.\n");
526 return NULL;
528 return ptr;
531 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc,
532 WORD *constantinfo_reserved)
534 const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc);
536 if (!constant)
538 FIXME("Could not get constant desc.\n");
539 if (constantinfo_reserved)
540 *constantinfo_reserved = 0;
541 return D3DERR_INVALIDCALL;
543 *desc = constant->desc;
544 if (constantinfo_reserved)
545 *constantinfo_reserved = constant->constantinfo_reserved;
546 return D3D_OK;
549 static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set,
550 struct const_upload_info *info)
552 struct d3dx_parameter *param = const_set->param;
553 unsigned int table = const_set->table;
555 info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS)
556 || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS);
557 if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS)
559 info->major = param->columns;
560 info->minor = param->rows;
562 else
564 info->major = param->rows;
565 info->minor = param->columns;
568 if (get_reg_components(table) == 1)
570 unsigned int const_length = get_offset_reg(table, const_set->register_count);
572 info->major_stride = info->minor;
573 info->major_count = const_length / info->major_stride;
574 info->minor_remainder = const_length % info->major_stride;
576 else
578 info->major_stride = get_reg_components(table);
579 info->major_count = const_set->register_count;
580 info->minor_remainder = 0;
582 info->count = info->major_count * info->minor + info->minor_remainder;
585 #define INITIAL_CONST_SET_SIZE 16
587 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set)
589 if (const_tab->const_set_count >= const_tab->const_set_size)
591 unsigned int new_size;
592 struct d3dx_const_param_eval_output *new_alloc;
594 if (!const_tab->const_set_size)
596 new_size = INITIAL_CONST_SET_SIZE;
597 new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size);
598 if (!new_alloc)
600 ERR("Out of memory.\n");
601 return E_OUTOFMEMORY;
604 else
606 new_size = const_tab->const_set_size * 2;
607 new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
608 sizeof(*const_tab->const_set) * new_size);
609 if (!new_alloc)
611 ERR("Out of memory.\n");
612 return E_OUTOFMEMORY;
615 const_tab->const_set = new_alloc;
616 const_tab->const_set_size = new_size;
618 const_tab->const_set[const_tab->const_set_count++] = *set;
619 return D3D_OK;
622 static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab,
623 struct d3dx_preshader *pres)
625 unsigned int i;
626 struct d3dx_const_param_eval_output const_set = {NULL};
628 for (i = 0; i < pres->ins_count; ++i)
630 const struct d3dx_pres_ins *ins = &pres->ins[i];
631 const struct d3dx_pres_reg *reg = &ins->output.reg;
633 if (reg->table == PRES_REGTAB_TEMP)
634 continue;
636 const_set.register_index = get_reg_offset(reg->table, reg->offset);
637 const_set.register_count = 1;
638 const_set.table = reg->table;
639 const_set.constant_class = D3DXPC_FORCE_DWORD;
640 const_set.element_count = 1;
641 append_const_set(const_tab, &const_set);
645 static int __cdecl compare_const_set(const void *a, const void *b)
647 const struct d3dx_const_param_eval_output *r1 = a;
648 const struct d3dx_const_param_eval_output *r2 = b;
650 if (r1->table != r2->table)
651 return r1->table - r2->table;
652 return r1->register_index - r2->register_index;
655 static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab,
656 struct d3dx_parameter *param, unsigned int index)
658 unsigned int i, start_index = index;
659 DWORD *current_data;
660 enum pres_reg_tables current_table;
661 unsigned int current_start_offset, element_count;
662 struct d3dx_const_param_eval_output *first_const;
664 if (!const_tab->const_set_count)
665 return D3D_OK;
667 while (index < const_tab->const_set_count - 1)
669 first_const = &const_tab->const_set[index];
670 current_data = first_const->param->data;
671 current_table = first_const->table;
672 current_start_offset = get_offset_reg(current_table, first_const->register_index);
673 element_count = 0;
674 for (i = index; i < const_tab->const_set_count; ++i)
676 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i];
677 unsigned int count = get_offset_reg(const_set->table,
678 const_set->register_count * const_set->element_count);
679 unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index);
681 if (!(const_set->table == current_table && current_start_offset == start_offset
682 && const_set->direct_copy == first_const->direct_copy
683 && current_data == const_set->param->data
684 && (const_set->direct_copy || (first_const->param->type == const_set->param->type
685 && first_const->param->class == const_set->param->class
686 && first_const->param->columns == const_set->param->columns
687 && first_const->param->rows == const_set->param->rows
688 && first_const->register_count == const_set->register_count
689 && (i == const_tab->const_set_count - 1
690 || first_const->param->element_count == const_set->param->element_count)))))
691 break;
693 current_start_offset += count;
694 current_data += const_set->direct_copy ? count : const_set->param->rows
695 * const_set->param->columns * const_set->element_count;
696 element_count += const_set->element_count;
699 if (i > index + 1)
701 TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index,
702 debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy);
704 first_const->element_count = element_count;
705 if (first_const->direct_copy)
707 first_const->element_count = 1;
708 if (index == start_index
709 && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT))
711 if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
712 return D3DERR_INVALIDCALL;
713 first_const->param = param;
715 first_const->register_count = get_reg_offset(current_table, current_start_offset)
716 - first_const->register_index;
718 memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i],
719 sizeof(*const_tab->const_set) * (const_tab->const_set_count - i));
720 const_tab->const_set_count -= i - index - 1;
722 else
724 TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n",
725 const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy);
727 index = i;
729 return D3D_OK;
732 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab,
733 D3DXHANDLE hc, struct d3dx_parameter *param)
735 D3DXCONSTANT_DESC desc;
736 unsigned int const_count, param_count, i;
737 BOOL get_element;
738 struct d3dx_const_param_eval_output const_set;
739 struct const_upload_info info;
740 enum pres_value_type table_type;
741 HRESULT hr;
743 if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL)))
744 return D3DERR_INVALIDCALL;
746 if (param->element_count)
748 param_count = param->element_count;
749 const_count = desc.Elements;
750 get_element = TRUE;
752 else
754 if (desc.Elements > 1)
756 FIXME("Unexpected number of constant elements %u.\n", desc.Elements);
757 return D3DERR_INVALIDCALL;
759 param_count = param->member_count;
760 const_count = desc.StructMembers;
761 get_element = FALSE;
763 if (const_count != param_count)
765 FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n",
766 param_count, const_count);
767 return D3DERR_INVALIDCALL;
769 if (const_count)
771 HRESULT ret = D3D_OK;
772 D3DXHANDLE hc_element;
773 unsigned int index = const_tab->const_set_count;
775 for (i = 0; i < const_count; ++i)
777 if (get_element)
778 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i);
779 else
780 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i);
781 if (!hc_element)
783 FIXME("Could not get constant.\n");
784 hr = D3DERR_INVALIDCALL;
786 else
788 hr = init_set_constants_param(const_tab, ctab, hc_element, &param->members[i]);
790 if (FAILED(hr))
791 ret = hr;
793 if (FAILED(ret))
794 return ret;
795 return merge_const_set_entries(const_tab, param, index);
798 TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n",
799 debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes);
800 TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n",
801 debugstr_a(param->name), param->rows, param->columns, param->class,
802 param->flags, param->bytes);
804 const_set.element_count = 1;
805 const_set.param = param;
806 const_set.constant_class = desc.Class;
807 if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table))
809 FIXME("Unknown register set %u.\n", desc.RegisterSet);
810 return D3DERR_INVALIDCALL;
812 const_set.register_index = desc.RegisterIndex;
813 const_set.table = const_tab->regset2table[desc.RegisterSet];
814 if (const_set.table >= PRES_REGTAB_COUNT)
816 ERR("Unexpected register set %u.\n", desc.RegisterSet);
817 return D3DERR_INVALIDCALL;
819 assert(table_info[const_set.table].component_size == sizeof(unsigned int));
820 assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int));
821 const_set.register_count = desc.RegisterCount;
822 table_type = table_info[const_set.table].type;
823 get_const_upload_info(&const_set, &info);
824 if (!info.count)
826 TRACE("%s has zero count, skipping.\n", debugstr_a(param->name));
827 return D3D_OK;
830 if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
831 return D3DERR_INVALIDCALL;
833 const_set.direct_copy = table_type_from_param_type(param->type) == table_type
834 && !info.transpose && info.minor == info.major_stride
835 && info.count == get_offset_reg(const_set.table, const_set.register_count)
836 && info.count * sizeof(unsigned int) <= param->bytes;
837 if (info.minor_remainder && !const_set.direct_copy && !info.transpose)
838 FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n",
839 debugstr_a(param->name));
841 if (info.major_count > info.major
842 || (info.major_count == info.major && info.minor_remainder))
844 WARN("Constant dimensions exceed parameter size.\n");
845 return D3DERR_INVALIDCALL;
848 if (FAILED(hr = append_const_set(const_tab, &const_set)))
849 return hr;
851 return D3D_OK;
854 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out,
855 struct d3dx_parameters_store *parameters, const char **skip_constants,
856 unsigned int skip_constants_count, struct d3dx_preshader *pres)
858 ID3DXConstantTable *ctab;
859 D3DXCONSTANT_DESC *cdesc;
860 struct d3dx_parameter **inputs_param;
861 D3DXCONSTANTTABLE_DESC desc;
862 HRESULT hr;
863 D3DXHANDLE hc;
864 unsigned int i, j;
866 hr = D3DXGetShaderConstantTable((DWORD *)byte_code, &ctab);
867 if (FAILED(hr) || !ctab)
869 TRACE("Could not get CTAB data, hr %#lx.\n", hr);
870 /* returning OK, shaders and preshaders without CTAB are valid */
871 return D3D_OK;
873 if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc)))
875 FIXME("Could not get CTAB desc, hr %#lx.\n", hr);
876 goto cleanup;
879 out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants);
880 out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants);
881 if (!cdesc || !inputs_param)
883 hr = E_OUTOFMEMORY;
884 goto cleanup;
887 for (i = 0; i < desc.Constants; ++i)
889 unsigned int index = out->input_count;
890 WORD constantinfo_reserved;
892 hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
893 if (!hc)
895 FIXME("Null constant handle.\n");
896 goto cleanup;
898 if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved)))
899 goto cleanup;
900 inputs_param[index] = get_parameter_by_name(parameters, NULL, cdesc[index].Name);
901 if (!inputs_param[index])
903 WARN("Could not find parameter %s in effect.\n", cdesc[index].Name);
904 continue;
906 if (cdesc[index].Class == D3DXPC_OBJECT)
908 TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]);
909 if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT
910 || !is_param_type_sampler(inputs_param[index]->type))
912 WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name));
913 hr = D3DERR_INVALIDCALL;
914 goto cleanup;
916 if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount)
918 WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name));
919 hr = D3DERR_INVALIDCALL;
920 goto cleanup;
923 if (!is_top_level_parameter(inputs_param[index]))
925 WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name));
926 hr = E_FAIL;
927 goto cleanup;
930 for (j = 0; j < skip_constants_count; ++j)
932 if (!strcmp(cdesc[index].Name, skip_constants[j]))
934 if (!constantinfo_reserved)
936 WARN("skip_constants parameter %s is not register bound.\n",
937 cdesc[index].Name);
938 hr = D3DERR_INVALIDCALL;
939 goto cleanup;
941 TRACE("Skipping constant %s.\n", cdesc[index].Name);
942 break;
945 if (j < skip_constants_count)
946 continue;
947 ++out->input_count;
948 if (inputs_param[index]->class == D3DXPC_OBJECT)
949 continue;
950 if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index])))
951 goto cleanup;
953 if (pres)
954 append_pres_const_sets_for_shader_input(out, pres);
955 if (out->const_set_count)
957 struct d3dx_const_param_eval_output *new_alloc;
959 qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set);
961 i = 0;
962 while (i < out->const_set_count - 1)
964 if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD
965 && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD
966 && out->const_set[i].table == out->const_set[i + 1].table
967 && out->const_set[i].register_index + out->const_set[i].register_count
968 >= out->const_set[i + 1].register_index)
970 assert(out->const_set[i].register_index + out->const_set[i].register_count
971 <= out->const_set[i + 1].register_index + 1);
972 out->const_set[i].register_count = out->const_set[i + 1].register_index + 1
973 - out->const_set[i].register_index;
974 memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i])
975 * (out->const_set_count - i - 2));
976 --out->const_set_count;
978 else
980 ++i;
984 new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set,
985 sizeof(*out->const_set) * out->const_set_count);
986 if (new_alloc)
988 out->const_set = new_alloc;
989 out->const_set_size = out->const_set_count;
991 else
993 WARN("Out of memory.\n");
996 cleanup:
997 ID3DXConstantTable_Release(ctab);
998 return hr;
1001 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register)
1003 if (table < PRES_REGTAB_COUNT)
1004 table_sizes[table] = max(table_sizes[table], max_register + 1);
1007 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab)
1009 unsigned int i, table, max_register;
1011 for (i = 0; i < ctab->input_count; ++i)
1013 if (!ctab->inputs[i].RegisterCount)
1014 continue;
1015 max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1;
1016 table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1017 update_table_size(table_sizes, table, max_register);
1021 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count)
1023 static const char *xyzw_str = "xyzw";
1024 unsigned int i, table, reg_offset;
1026 table = arg->reg.table;
1027 if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT)
1029 TRACE("(");
1030 for (i = 0; i < component_count; ++i)
1031 TRACE(i < component_count - 1 ? "%.16e, " : "%.16e",
1032 ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]);
1033 TRACE(")");
1035 else
1037 reg_offset = get_reg_offset(table, arg->reg.offset);
1039 if (arg->index_reg.table == PRES_REGTAB_COUNT)
1041 if (table == PRES_REGTAB_INPUT && reg_offset < 2)
1042 TRACE("%s%s.", table_symbol[table], reg_offset ? "PSize" : "Pos");
1043 else
1044 TRACE("%s%u.", table_symbol[table], reg_offset);
1046 else
1048 unsigned int index_reg;
1050 index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset);
1051 TRACE("%s[%u + %s%u.%c].", table_symbol[table], reg_offset,
1052 table_symbol[arg->index_reg.table], index_reg,
1053 xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]);
1055 for (i = 0; i < component_count; ++i)
1056 TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]);
1060 static void dump_registers(struct d3dx_const_tab *ctab)
1062 unsigned int table, i;
1064 for (i = 0; i < ctab->input_count; ++i)
1066 table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1067 TRACE("// %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)",
1068 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount);
1072 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins)
1074 unsigned int i;
1076 TRACE("%s ", pres_op_info[ins->op].mnem);
1077 dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count);
1078 for (i = 0; i < pres_op_info[ins->op].input_count; ++i)
1080 TRACE(", ");
1081 dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count);
1083 TRACE("\n");
1086 static void dump_preshader(struct d3dx_preshader *pres)
1088 unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4;
1089 const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED];
1091 if (immediate_count)
1092 TRACE("// Immediates:\n");
1093 for (i = 0; i < immediate_count; ++i)
1095 if (!(i % 4))
1096 TRACE("// ");
1097 TRACE("%.8e", immediates[i]);
1098 if (i % 4 == 3)
1099 TRACE("\n");
1100 else
1101 TRACE(", ");
1103 TRACE("// Preshader registers:\n");
1104 dump_registers(&pres->inputs);
1105 TRACE("preshader\n");
1106 for (i = 0; i < pres->ins_count; ++i)
1107 dump_ins(&pres->regs, &pres->ins[i]);
1110 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count,
1111 struct d3dx_parameters_store *parameters)
1113 unsigned int *p;
1114 unsigned int i, j, const_count, magic;
1115 double *dconst;
1116 HRESULT hr;
1117 unsigned int saved_word;
1118 unsigned int section_size;
1120 magic = *ptr;
1122 TRACE("Preshader version %#x.\n", *ptr);
1124 if (!count)
1126 WARN("Unexpected end of byte code buffer.\n");
1127 return D3DXERR_INVALIDDATA;
1130 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, &section_size);
1131 if (p)
1133 const_count = *p++;
1134 if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int)))
1136 WARN("Byte code buffer ends unexpectedly.\n");
1137 return D3DXERR_INVALIDDATA;
1139 dconst = (double *)p;
1141 else
1143 const_count = 0;
1144 dconst = NULL;
1146 TRACE("%u double constants.\n", const_count);
1148 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, &section_size);
1149 if (!p)
1151 WARN("Could not find preshader code.\n");
1152 return D3D_OK;
1154 pres->ins_count = *p++;
1155 --section_size;
1156 if (pres->ins_count > UINT_MAX / sizeof(*pres->ins))
1158 WARN("Invalid instruction count %u.\n", pres->ins_count);
1159 return D3DXERR_INVALIDDATA;
1161 TRACE("%u instructions.\n", pres->ins_count);
1162 pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count);
1163 if (!pres->ins)
1164 return E_OUTOFMEMORY;
1165 for (i = 0; i < pres->ins_count; ++i)
1167 unsigned int *ptr_next;
1169 ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]);
1170 if (!ptr_next)
1171 return D3DXERR_INVALIDDATA;
1172 section_size -= ptr_next - p;
1173 p = ptr_next;
1176 pres->inputs.regset2table = pres_regset2table;
1178 saved_word = *ptr;
1179 *ptr = 0xfffe0000;
1180 hr = get_constants_desc(ptr, &pres->inputs, parameters, NULL, 0, NULL);
1181 *ptr = saved_word;
1182 if (FAILED(hr))
1183 return hr;
1185 if (const_count % get_reg_components(PRES_REGTAB_IMMED))
1187 FIXME("const_count %u is not a multiple of %u.\n", const_count,
1188 get_reg_components(PRES_REGTAB_IMMED));
1189 return D3DXERR_INVALIDDATA;
1191 pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count);
1192 if (magic == FOURCC_TX_1)
1193 pres->regs.table_sizes[PRES_REGTAB_INPUT] = 2;
1195 update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs);
1196 for (i = 0; i < pres->ins_count; ++i)
1198 for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j)
1200 enum pres_reg_tables table;
1201 unsigned int reg_idx;
1203 if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT)
1205 unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0
1206 : pres->ins[i].component_count - 1;
1208 table = pres->ins[i].inputs[j].reg.table;
1209 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset
1210 + last_component_index);
1212 else
1214 table = pres->ins[i].inputs[j].index_reg.table;
1215 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset);
1217 if (reg_idx >= pres->regs.table_sizes[table])
1219 /* Native accepts these broken preshaders. */
1220 FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u, preshader parsing failed.\n",
1221 i, j, table, reg_idx);
1222 return D3DXERR_INVALIDDATA;
1225 update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table,
1226 get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset));
1228 if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED)))
1229 return E_OUTOFMEMORY;
1230 regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count);
1232 return D3D_OK;
1235 HRESULT d3dx_create_param_eval(struct d3dx_parameters_store *parameters, void *byte_code, unsigned int byte_code_size,
1236 D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter,
1237 const char **skip_constants, unsigned int skip_constants_count)
1239 struct d3dx_param_eval *peval;
1240 unsigned int *ptr, *shader_ptr = NULL;
1241 unsigned int i;
1242 BOOL shader;
1243 unsigned int count, pres_size;
1244 HRESULT ret;
1246 TRACE("parameters %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n",
1247 parameters, byte_code, byte_code_size, type, peval_out);
1249 count = byte_code_size / sizeof(unsigned int);
1250 if (!byte_code || !count)
1252 *peval_out = NULL;
1253 return D3D_OK;
1256 peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval));
1257 if (!peval)
1259 ret = E_OUTOFMEMORY;
1260 goto err_out;
1262 peval->version_counter = version_counter;
1264 peval->param_type = type;
1265 switch (type)
1267 case D3DXPT_VERTEXSHADER:
1268 case D3DXPT_PIXELSHADER:
1269 shader = TRUE;
1270 break;
1271 default:
1272 shader = FALSE;
1273 break;
1275 peval->shader_inputs.regset2table = shad_regset2table;
1277 ptr = (unsigned int *)byte_code;
1278 if (shader)
1280 if ((*ptr & 0xfffe0000) != 0xfffe0000)
1282 FIXME("Invalid shader signature %#x.\n", *ptr);
1283 ret = D3DXERR_INVALIDDATA;
1284 goto err_out;
1286 TRACE("Shader version %#x.\n", *ptr & 0xffff);
1287 shader_ptr = ptr;
1288 ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size);
1289 if (!ptr)
1290 TRACE("No preshader found.\n");
1292 else
1294 pres_size = count;
1297 if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, parameters)))
1299 FIXME("Failed parsing preshader, byte code for analysis follows.\n");
1300 dump_bytecode(byte_code, byte_code_size);
1301 goto err_out;
1304 if (shader)
1306 if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, parameters,
1307 skip_constants, skip_constants_count, &peval->pres)))
1309 TRACE("Could not get shader constant table, hr %#lx.\n", ret);
1310 goto err_out;
1312 update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs);
1315 for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i)
1317 if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i)))
1318 goto err_out;
1321 if (TRACE_ON(d3dx))
1323 dump_bytecode(byte_code, byte_code_size);
1324 dump_preshader(&peval->pres);
1325 if (shader)
1327 TRACE("// Shader registers:\n");
1328 dump_registers(&peval->shader_inputs);
1331 *peval_out = peval;
1332 TRACE("Created parameter evaluator %p.\n", *peval_out);
1333 return D3D_OK;
1335 err_out:
1336 WARN("Error creating parameter evaluator.\n");
1337 if (TRACE_ON(d3dx))
1338 dump_bytecode(byte_code, byte_code_size);
1340 d3dx_free_param_eval(peval);
1341 *peval_out = NULL;
1342 return ret;
1345 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
1347 HeapFree(GetProcessHeap(), 0, ctab->inputs);
1348 HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
1349 HeapFree(GetProcessHeap(), 0, ctab->const_set);
1352 static void d3dx_free_preshader(struct d3dx_preshader *pres)
1354 HeapFree(GetProcessHeap(), 0, pres->ins);
1356 regstore_free_tables(&pres->regs);
1357 d3dx_free_const_tab(&pres->inputs);
1360 void d3dx_free_param_eval(struct d3dx_param_eval *peval)
1362 TRACE("peval %p.\n", peval);
1364 if (!peval)
1365 return;
1367 d3dx_free_preshader(&peval->pres);
1368 d3dx_free_const_tab(&peval->shader_inputs);
1369 HeapFree(GetProcessHeap(), 0, peval);
1372 static void pres_int_from_float(void *out, const void *in, unsigned int count)
1374 unsigned int i;
1375 const float *in_float = in;
1376 int *out_int = out;
1378 for (i = 0; i < count; ++i)
1379 out_int[i] = in_float[i];
1382 static void pres_bool_from_value(void *out, const void *in, unsigned int count)
1384 unsigned int i;
1385 const DWORD *in_dword = in;
1386 BOOL *out_bool = out;
1388 for (i = 0; i < count; ++i)
1389 out_bool[i] = !!in_dword[i];
1392 static void pres_float_from_int(void *out, const void *in, unsigned int count)
1394 unsigned int i;
1395 const int *in_int = in;
1396 float *out_float = out;
1398 for (i = 0; i < count; ++i)
1399 out_float[i] = in_int[i];
1402 static void pres_float_from_bool(void *out, const void *in, unsigned int count)
1404 unsigned int i;
1405 const BOOL *in_bool = in;
1406 float *out_float = out;
1408 for (i = 0; i < count; ++i)
1409 out_float[i] = !!in_bool[i];
1412 static void pres_int_from_bool(void *out, const void *in, unsigned int count)
1414 unsigned int i;
1415 const float *in_bool = in;
1416 int *out_int = out;
1418 for (i = 0; i < count; ++i)
1419 out_int[i] = !!in_bool[i];
1422 static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table,
1423 unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type)
1425 typedef void (*conv_func)(void *out, const void *in, unsigned int count);
1426 static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] =
1428 {NULL, NULL, pres_int_from_float, pres_bool_from_value},
1429 {NULL, NULL, NULL, NULL},
1430 {pres_float_from_int, NULL, NULL, pres_bool_from_value},
1431 {pres_float_from_bool, NULL, pres_int_from_bool, NULL}
1433 enum pres_value_type table_type = table_info[table].type;
1435 if (param_type == table_type)
1437 regstore_set_values(rs, table, in, offset, count);
1438 return;
1441 set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count);
1444 static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1445 D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr,
1446 unsigned int start, unsigned int count)
1448 if (type == D3DXPT_VERTEXSHADER)
1450 switch(table)
1452 case PRES_REGTAB_OCONST:
1453 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count);
1454 case PRES_REGTAB_OICONST:
1455 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count);
1456 case PRES_REGTAB_OBCONST:
1457 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count);
1458 default:
1459 FIXME("Unexpected register table %u.\n", table);
1460 return D3DERR_INVALIDCALL;
1463 else if (type == D3DXPT_PIXELSHADER)
1465 switch(table)
1467 case PRES_REGTAB_OCONST:
1468 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count);
1469 case PRES_REGTAB_OICONST:
1470 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count);
1471 case PRES_REGTAB_OBCONST:
1472 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count);
1473 default:
1474 FIXME("Unexpected register table %u.\n", table);
1475 return D3DERR_INVALIDCALL;
1478 else
1480 FIXME("Unexpected parameter type %u.\n", type);
1481 return D3DERR_INVALIDCALL;
1485 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
1486 ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1487 D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty)
1489 unsigned int const_idx;
1490 unsigned int current_start = 0, current_count = 0;
1491 enum pres_reg_tables current_table = PRES_REGTAB_COUNT;
1492 BOOL update_device = manager || device;
1493 HRESULT hr, result = D3D_OK;
1494 ULONG64 update_version = const_tab->update_version;
1496 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1498 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1499 enum pres_reg_tables table = const_set->table;
1500 struct d3dx_parameter *param = const_set->param;
1501 unsigned int element, i, j, start_offset;
1502 struct const_upload_info info;
1503 unsigned int *data;
1504 enum pres_value_type param_type;
1506 if (!(param && is_param_dirty(param, update_version)))
1507 continue;
1509 data = param->data;
1510 start_offset = get_offset_reg(table, const_set->register_index);
1511 if (const_set->direct_copy)
1513 regstore_set_values(rs, table, data, start_offset,
1514 get_offset_reg(table, const_set->register_count));
1515 continue;
1517 param_type = table_type_from_param_type(param->type);
1518 if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR)
1520 unsigned int count = max(param->rows, param->columns);
1522 if (count >= get_reg_components(table))
1524 regstore_set_data(rs, table, start_offset, data,
1525 count * const_set->element_count, param_type);
1527 else
1529 for (element = 0; element < const_set->element_count; ++element)
1530 regstore_set_data(rs, table, start_offset + get_offset_reg(table, element),
1531 &data[element * count], count, param_type);
1533 continue;
1535 get_const_upload_info(const_set, &info);
1536 for (element = 0; element < const_set->element_count; ++element)
1538 unsigned int *out = (unsigned int *)rs->tables[table] + start_offset;
1540 /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer.
1541 * All the supported types of parameters and table values have the same size. */
1542 if (info.transpose)
1544 for (i = 0; i < info.major_count; ++i)
1545 for (j = 0; j < info.minor; ++j)
1546 out[i * info.major_stride + j] = data[i + j * info.major];
1548 for (j = 0; j < info.minor_remainder; ++j)
1549 out[i * info.major_stride + j] = data[i + j * info.major];
1551 else
1553 for (i = 0; i < info.major_count; ++i)
1554 for (j = 0; j < info.minor; ++j)
1555 out[i * info.major_stride + j] = data[i * info.minor + j];
1557 start_offset += get_offset_reg(table, const_set->register_count);
1558 data += param->rows * param->columns;
1560 start_offset = get_offset_reg(table, const_set->register_index);
1561 if (table_info[table].type != param_type)
1562 regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset,
1563 get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type);
1565 const_tab->update_version = new_update_version;
1566 if (!update_device)
1567 return D3D_OK;
1569 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1571 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1573 if (device_update_all || (const_set->param
1574 ? is_param_dirty(const_set->param, update_version) : pres_dirty))
1576 enum pres_reg_tables table = const_set->table;
1578 if (table == current_table && current_start + current_count == const_set->register_index)
1580 current_count += const_set->register_count * const_set->element_count;
1582 else
1584 if (current_count)
1586 if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1587 (DWORD *)rs->tables[current_table]
1588 + get_offset_reg(current_table, current_start), current_start, current_count)))
1589 result = hr;
1591 current_table = table;
1592 current_start = const_set->register_index;
1593 current_count = const_set->register_count * const_set->element_count;
1597 if (current_count)
1599 if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1600 (DWORD *)rs->tables[current_table]
1601 + get_offset_reg(current_table, current_start), current_start, current_count)))
1602 result = hr;
1604 return result;
1607 static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset)
1609 return regstore_get_double(rs, table, offset);
1612 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp)
1614 unsigned int offset, base_index, reg_index, table;
1616 table = opr->reg.table;
1618 if (opr->index_reg.table == PRES_REGTAB_COUNT)
1619 base_index = 0;
1620 else
1621 base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset));
1623 offset = get_offset_reg(table, base_index) + opr->reg.offset + comp;
1624 reg_index = get_reg_offset(table, offset);
1626 if (reg_index >= rs->table_sizes[table])
1628 unsigned int wrap_size;
1630 if (table == PRES_REGTAB_CONST)
1632 /* As it can be guessed from tests, offset into floating constant table is wrapped
1633 * to the nearest power of 2 and not to the actual table size. */
1634 for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1)
1637 else
1639 wrap_size = rs->table_sizes[table];
1641 WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n",
1642 reg_index, table, wrap_size, rs->table_sizes[table]);
1643 reg_index %= wrap_size;
1645 if (reg_index >= rs->table_sizes[table])
1646 return 0.0;
1648 offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table);
1651 return exec_get_reg_value(rs, table, offset);
1654 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg,
1655 unsigned int comp, double res)
1657 regstore_set_double(rs, reg->table, reg->offset + comp, res);
1660 #define ARGS_ARRAY_SIZE 8
1661 static HRESULT execute_preshader(struct d3dx_preshader *pres)
1663 unsigned int i, j, k;
1664 double args[ARGS_ARRAY_SIZE];
1665 double res;
1667 for (i = 0; i < pres->ins_count; ++i)
1669 const struct d3dx_pres_ins *ins;
1670 const struct op_info *oi;
1672 ins = &pres->ins[i];
1673 oi = &pres_op_info[ins->op];
1674 if (oi->func_all_comps)
1676 if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE)
1678 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count);
1679 return E_FAIL;
1681 for (k = 0; k < oi->input_count; ++k)
1682 for (j = 0; j < ins->component_count; ++j)
1683 args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k],
1684 ins->scalar_op && !k ? 0 : j);
1685 res = oi->func(args, ins->component_count);
1687 /* only 'dot' instruction currently falls here */
1688 exec_set_arg(&pres->regs, &ins->output.reg, 0, res);
1690 else
1692 for (j = 0; j < ins->component_count; ++j)
1694 for (k = 0; k < oi->input_count; ++k)
1695 args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j);
1696 res = oi->func(args, ins->component_count);
1697 exec_set_arg(&pres->regs, &ins->output.reg, j, res);
1701 return D3D_OK;
1704 static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version)
1706 unsigned int i;
1708 if (update_version == ULONG64_MAX)
1709 update_version = ctab->update_version;
1710 for (i = 0; i < ctab->input_count; ++i)
1712 if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]),
1713 update_version))
1714 return TRUE;
1716 return FALSE;
1719 BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version)
1721 return is_const_tab_input_dirty(&peval->pres.inputs, update_version)
1722 || is_const_tab_input_dirty(&peval->shader_inputs, update_version);
1725 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param,
1726 void *param_value)
1728 HRESULT hr;
1729 unsigned int i;
1730 unsigned int elements, elements_param, elements_table;
1731 BOOL is_dirty;
1732 float *oc;
1734 TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);
1736 if ((is_dirty = is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX)))
1738 set_constants(&peval->pres.regs, &peval->pres.inputs,
1739 next_update_version(peval->version_counter), NULL, NULL,
1740 peval->param_type, FALSE, FALSE);
1743 if (is_dirty || peval->pres.regs.table_sizes[PRES_REGTAB_INPUT])
1745 if (FAILED(hr = execute_preshader(&peval->pres)))
1746 return hr;
1749 elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]);
1750 elements_param = param->bytes / sizeof(unsigned int);
1751 elements = min(elements_table, elements_param);
1752 oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST];
1753 for (i = 0; i < elements; ++i)
1754 set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT);
1755 return D3D_OK;
1758 HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1759 struct d3dx_param_eval *peval, BOOL update_all)
1761 HRESULT hr;
1762 struct d3dx_preshader *pres = &peval->pres;
1763 struct d3dx_regstore *rs = &pres->regs;
1764 ULONG64 new_update_version = next_update_version(peval->version_counter);
1765 BOOL pres_dirty = FALSE;
1767 TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);
1769 if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX))
1771 set_constants(rs, &pres->inputs, new_update_version,
1772 NULL, NULL, peval->param_type, FALSE, FALSE);
1773 if (FAILED(hr = execute_preshader(pres)))
1774 return hr;
1775 pres_dirty = TRUE;
1778 return set_constants(rs, &peval->shader_inputs, new_update_version,
1779 manager, device, peval->param_type, update_all, pres_dirty);