kernel32/nls: Convert remaining files to UTF-8.
[wine.git] / dlls / d3dx9_36 / preshader.c
bloba16c065fb8ef50cfe544fafbdfbe078340cf921a
1 /*
2 * Copyright 2016 Paul Gofman
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 #include "config.h"
20 #include "wine/port.h"
22 #include "d3dx9_private.h"
24 #include <float.h>
26 WINE_DEFAULT_DEBUG_CHANNEL(d3dx);
28 enum pres_ops
30 PRESHADER_OP_NOP,
31 PRESHADER_OP_MOV,
32 PRESHADER_OP_ADD,
33 PRESHADER_OP_MUL,
34 PRESHADER_OP_DOT,
35 PRESHADER_OP_NEG,
36 PRESHADER_OP_RCP,
37 PRESHADER_OP_LT,
38 PRESHADER_OP_FRC,
39 PRESHADER_OP_MIN,
40 PRESHADER_OP_MAX,
41 PRESHADER_OP_GE,
42 PRESHADER_OP_CMP,
43 PRESHADER_OP_SIN,
44 PRESHADER_OP_COS,
45 PRESHADER_OP_RSQ,
46 PRESHADER_OP_EXP,
49 typedef double (*pres_op_func)(double *args, int n);
51 static double pres_mov(double *args, int n) {return args[0];}
52 static double pres_add(double *args, int n) {return args[0] + args[1];}
53 static double pres_mul(double *args, int n) {return args[0] * args[1];}
54 static double pres_dot(double *args, int n)
56 int i;
57 double sum;
59 sum = 0.0;
60 for (i = 0; i < n; ++i)
61 sum += args[i] * args[i + n];
62 return sum;
64 static double pres_neg(double *args, int n) {return -args[0];}
65 static double pres_rcp(double *args, int n) {return 1.0 / args[0];}
66 static double pres_lt(double *args, int n) {return args[0] < args[1] ? 1.0 : 0.0;}
67 static double pres_ge(double *args, int n) {return args[0] >= args[1] ? 1.0 : 0.0;}
68 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);}
69 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);}
70 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);}
71 static double pres_cmp(double *args, int n) {return args[0] < 0.0 ? args[2] : args[1];}
72 static double pres_sin(double *args, int n) {return sin(args[0]);}
73 static double pres_cos(double *args, int n) {return cos(args[0]);}
74 static double pres_rsq(double *args, int n)
76 double v;
78 v = fabs(args[0]);
79 if (v == 0.0)
80 return INFINITY;
81 else
82 return 1.0 / sqrt(v);
84 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);}
86 #define PRES_OPCODE_MASK 0x7ff00000
87 #define PRES_OPCODE_SHIFT 20
88 #define PRES_SCALAR_FLAG 0x80000000
89 #define PRES_NCOMP_MASK 0x0000ffff
91 #define FOURCC_PRES 0x53455250
92 #define FOURCC_CLIT 0x54494c43
93 #define FOURCC_FXLC 0x434c5846
94 #define FOURCC_PRSI 0x49535250
95 #define PRES_SIGN 0x46580000
97 struct op_info
99 unsigned int opcode;
100 char mnem[8];
101 unsigned int input_count;
102 BOOL func_all_comps;
103 pres_op_func func;
106 static const struct op_info pres_op_info[] =
108 {0x000, "nop", 0, 0, NULL }, /* PRESHADER_OP_NOP */
109 {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */
110 {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */
111 {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */
112 {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */
113 {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */
114 {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */
115 {0x202, "lt", 2, 0, pres_lt }, /* PRESHADER_OP_LT */
116 {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */
117 {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */
118 {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */
119 {0x203, "ge", 2, 0, pres_ge }, /* PRESHADER_OP_GE */
120 {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */
121 {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */
122 {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */
123 {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */
124 {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */
127 enum pres_value_type
129 PRES_VT_FLOAT,
130 PRES_VT_DOUBLE,
131 PRES_VT_INT,
132 PRES_VT_BOOL
135 static const struct
137 unsigned int component_size;
138 unsigned int reg_component_count;
139 enum pres_value_type type;
141 table_info[] =
143 {sizeof(double), 1, PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */
144 {sizeof(float), 4, PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */
145 {sizeof(float), 4, PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */
146 {sizeof(BOOL), 1, PRES_VT_BOOL }, /* PRES_REGTAB_OBCONST */
147 {sizeof(int), 4, PRES_VT_INT, }, /* PRES_REGTAB_OICONST */
148 /* TODO: use double precision for 64 bit */
149 {sizeof(float), 4, PRES_VT_FLOAT } /* PRES_REGTAB_TEMP */
152 static const char *table_symbol[] =
154 "imm", "c", "oc", "ob", "oi", "r", "(null)",
157 static const enum pres_reg_tables pres_regset2table[] =
159 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */
160 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */
161 PRES_REGTAB_CONST, /* D3DXRS_FLOAT4 */
162 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */
165 static const enum pres_reg_tables shad_regset2table[] =
167 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */
168 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */
169 PRES_REGTAB_OCONST, /* D3DXRS_FLOAT4 */
170 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */
173 struct d3dx_pres_operand
175 enum pres_reg_tables table;
176 /* offset is component index, not register index, e. g.
177 offset for component c3.y is 13 (3 * 4 + 1) */
178 unsigned int offset;
181 #define MAX_INPUTS_COUNT 3
183 struct d3dx_pres_ins
185 enum pres_ops op;
186 /* first input argument is scalar,
187 scalar component is propagated */
188 BOOL scalar_op;
189 unsigned int component_count;
190 struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT];
191 struct d3dx_pres_operand output;
194 static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
196 return offset / table_info[table].reg_component_count;
199 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)
201 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
203 unsigned int size;
205 size = rs->table_sizes[table] * table_info[table].reg_component_count * table_info[table].component_size;
206 if (size)
208 rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
209 rs->table_value_set[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
210 sizeof(*rs->table_value_set[table]) *
211 ((rs->table_sizes[table] + PRES_BITMASK_BLOCK_SIZE - 1) / PRES_BITMASK_BLOCK_SIZE));
212 if (!rs->tables[table] || !rs->table_value_set[table])
213 return E_OUTOFMEMORY;
215 return D3D_OK;
218 static void regstore_free_tables(struct d3dx_regstore *rs)
220 unsigned int i;
222 for (i = 0; i < PRES_REGTAB_COUNT; ++i)
224 HeapFree(GetProcessHeap(), 0, rs->tables[i]);
225 HeapFree(GetProcessHeap(), 0, rs->table_value_set[i]);
229 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, void *data,
230 unsigned int start_offset, unsigned int count)
232 unsigned int block_idx, start, end, start_block, end_block;
234 if (!count)
235 return;
237 memcpy((BYTE *)rs->tables[table] + start_offset * table_info[table].component_size,
238 data, count * table_info[table].component_size);
240 start = get_reg_offset(table, start_offset);
241 start_block = start / PRES_BITMASK_BLOCK_SIZE;
242 start -= start_block * PRES_BITMASK_BLOCK_SIZE;
243 end = get_reg_offset(table, start_offset + count - 1);
244 end_block = end / PRES_BITMASK_BLOCK_SIZE;
245 end = (end_block + 1) * PRES_BITMASK_BLOCK_SIZE - 1 - end;
247 if (start_block == end_block)
249 rs->table_value_set[table][start_block] |= (~0u << start) & (~0u >> end);
251 else
253 rs->table_value_set[table][start_block] |= ~0u << start;
255 for (block_idx = start_block + 1; block_idx < end_block; ++block_idx)
256 rs->table_value_set[table][block_idx] = ~0u;
258 rs->table_value_set[table][end_block] |= ~0u >> end;
262 static unsigned int regstore_is_val_set_reg(struct d3dx_regstore *rs, unsigned int table, unsigned int reg_idx)
264 return rs->table_value_set[table][reg_idx / PRES_BITMASK_BLOCK_SIZE] &
265 (1u << (reg_idx % PRES_BITMASK_BLOCK_SIZE));
268 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset)
270 BYTE *p;
272 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
273 switch (table_info[table].type)
275 case PRES_VT_FLOAT:
276 return *(float *)p;
277 case PRES_VT_DOUBLE:
278 return *(double *)p;
279 default:
280 FIXME("Unexpected preshader input from table %u.\n", table);
281 return NAN;
285 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v)
287 BYTE *p;
288 unsigned int reg_idx;
290 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
291 switch (table_info[table].type)
293 case PRES_VT_FLOAT : *(float *)p = v; break;
294 case PRES_VT_DOUBLE: *(double *)p = v; break;
295 case PRES_VT_INT : *(int *)p = lrint(v); break;
296 case PRES_VT_BOOL : *(BOOL *)p = !!v; break;
298 reg_idx = get_reg_offset(table, offset);
299 rs->table_value_set[table][reg_idx / PRES_BITMASK_BLOCK_SIZE] |=
300 1u << (reg_idx % PRES_BITMASK_BLOCK_SIZE);
303 static void regstore_reset_table(struct d3dx_regstore *rs, unsigned int table)
305 unsigned int size;
307 size = rs->table_sizes[table] * table_info[table].reg_component_count * table_info[table].component_size;
309 memset(rs->tables[table], 0, size);
310 memset(rs->table_value_set[table], 0,
311 sizeof(*rs->table_value_set[table]) *
312 ((rs->table_sizes[table] + PRES_BITMASK_BLOCK_SIZE - 1) / PRES_BITMASK_BLOCK_SIZE));
315 static void dump_bytecode(void *data, unsigned int size)
317 unsigned int *bytecode = (unsigned int *)data;
318 unsigned int i, j, n;
320 size /= sizeof(*bytecode);
321 i = 0;
322 while (i < size)
324 n = min(size - i, 8);
325 for (j = 0; j < n; ++j)
326 TRACE("0x%08x,", bytecode[i + j]);
327 i += n;
328 TRACE("\n");
332 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count,
333 unsigned int fourcc, unsigned int *size)
335 /* Provide at least one value in comment section on non-NULL return. */
336 while (count > 2 && (*ptr & 0xffff) == 0xfffe)
338 unsigned int section_size;
340 section_size = (*ptr >> 16);
341 if (!section_size || section_size + 1 > count)
342 break;
343 if (*(ptr + 1) == fourcc)
345 *size = section_size;
346 return ptr + 2;
348 count -= section_size + 1;
349 ptr += section_size + 1;
351 return NULL;
354 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr)
356 static const enum pres_reg_tables reg_table[8] =
358 PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT,
359 PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP
362 if (count < 3)
364 WARN("Byte code buffer ends unexpectedly.\n");
365 return NULL;
368 if (*ptr)
370 FIXME("Relative addressing not supported yet, word %#x.\n", *ptr);
371 return NULL;
373 ++ptr;
375 if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT)
377 FIXME("Unsupported register table %#x.\n", *ptr);
378 return NULL;
380 opr->table = reg_table[*ptr++];
381 opr->offset = *ptr++;
383 if (opr->table == PRES_REGTAB_OBCONST)
384 opr->offset /= 4;
385 return ptr;
388 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins)
390 unsigned int ins_code, ins_raw;
391 unsigned int input_count;
392 unsigned int i;
394 if (count < 2)
396 WARN("Byte code buffer ends unexpectedly.\n");
397 return NULL;
400 ins_raw = *ptr++;
401 ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT;
402 ins->component_count = ins_raw & PRES_NCOMP_MASK;
403 ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG);
405 if (ins->component_count < 1 || ins->component_count > 4)
407 FIXME("Unsupported number of components %u.\n", ins->component_count);
408 return NULL;
410 input_count = *ptr++;
411 count -= 2;
412 for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i)
413 if (ins_code == pres_op_info[i].opcode)
414 break;
415 if (i == ARRAY_SIZE(pres_op_info))
417 FIXME("Unknown opcode %#x, raw %#x.\n", ins_code, ins_raw);
418 return NULL;
420 ins->op = i;
421 if (input_count > ARRAY_SIZE(ins->inputs) || input_count != pres_op_info[i].input_count)
423 FIXME("Actual input args %u, expected %u, instruction %s.\n", input_count,
424 pres_op_info[i].input_count, pres_op_info[i].mnem);
425 return NULL;
427 for (i = 0; i < input_count; ++i)
429 unsigned int *p;
431 p = parse_pres_arg(ptr, count, &ins->inputs[i]);
432 if (!p)
433 return NULL;
434 count -= p - ptr;
435 ptr = p;
437 ptr = parse_pres_arg(ptr, count, &ins->output);
438 return ptr;
441 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc)
443 D3DXCONSTANT_DESC buffer[2];
444 HRESULT hr;
445 unsigned int count;
447 count = ARRAY_SIZE(buffer);
448 if (FAILED(hr = ID3DXConstantTable_GetConstantDesc(ctab, hc, buffer, &count)))
450 FIXME("Could not get constant desc, hr %#x.\n", hr);
451 return hr;
453 else if (count != 1)
455 FIXME("Unexpected constant descriptors count %u.\n", count);
456 return D3DERR_INVALIDCALL;
458 *desc = buffer[0];
459 return D3D_OK;
462 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out, struct d3dx9_base_effect *base)
464 ID3DXConstantTable *ctab;
465 D3DXCONSTANT_DESC *cdesc;
466 struct d3dx_parameter **inputs_param;
467 D3DXCONSTANTTABLE_DESC desc;
468 HRESULT hr;
469 D3DXHANDLE hc;
470 unsigned int i;
472 out->inputs = cdesc = NULL;
473 out->ctab = NULL;
474 out->inputs_param = NULL;
475 out->input_count = 0;
476 inputs_param = NULL;
477 hr = D3DXGetShaderConstantTable(byte_code, &ctab);
478 if (FAILED(hr) || !ctab)
480 TRACE("Could not get CTAB data, hr %#x.\n", hr);
481 /* returning OK, shaders and preshaders without CTAB are valid */
482 return D3D_OK;
484 if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc)))
486 FIXME("Could not get CTAB desc, hr %#x.\n", hr);
487 goto err_out;
490 cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants);
491 inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants);
492 if (!cdesc || !inputs_param)
494 hr = E_OUTOFMEMORY;
495 goto err_out;
498 for (i = 0; i < desc.Constants; ++i)
500 hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
501 if (!hc)
503 FIXME("Null constant handle.\n");
504 goto err_out;
506 if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[i])))
507 goto err_out;
508 inputs_param[i] = get_parameter_by_name(base, NULL, cdesc[i].Name);
509 if (cdesc[i].Class == D3DXPC_OBJECT)
510 TRACE("Object %s, parameter %p.\n", cdesc[i].Name, inputs_param[i]);
511 else if (!inputs_param[i])
512 ERR("Could not find parameter %s in effect.\n", cdesc[i].Name);
514 out->input_count = desc.Constants;
515 out->inputs = cdesc;
516 out->inputs_param = inputs_param;
517 out->ctab = ctab;
518 return D3D_OK;
520 err_out:
521 HeapFree(GetProcessHeap(), 0, cdesc);
522 HeapFree(GetProcessHeap(), 0, inputs_param);
523 if (ctab)
524 ID3DXConstantTable_Release(ctab);
525 return hr;
528 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register)
530 if (table < PRES_REGTAB_COUNT)
531 table_sizes[table] = max(table_sizes[table], max_register + 1);
534 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab)
536 unsigned int i, table, max_register;
538 for (i = 0; i < ctab->input_count; ++i)
540 if (!ctab->inputs[i].RegisterCount)
541 continue;
542 max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1;
543 table = ctab->regset2table[ctab->inputs[i].RegisterSet];
544 update_table_size(table_sizes, table, max_register);
548 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count)
550 static const char *xyzw_str = "xyzw";
551 unsigned int i, table;
553 table = arg->table;
554 if (table == PRES_REGTAB_IMMED)
556 TRACE("(");
557 for (i = 0; i < component_count; ++i)
558 TRACE(i < component_count - 1 ? "%.16e, " : "%.16e",
559 ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->offset + i]);
560 TRACE(")");
562 else
564 TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->offset));
565 for (i = 0; i < component_count; ++i)
566 TRACE("%c", xyzw_str[(arg->offset + i) % 4]);
570 static void dump_registers(struct d3dx_const_tab *ctab)
572 unsigned int table, i;
574 for (i = 0; i < ctab->input_count; ++i)
576 table = ctab->regset2table[ctab->inputs[i].RegisterSet];
577 TRACE("// %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)",
578 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount);
582 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins)
584 unsigned int i;
586 TRACE(" %s ", pres_op_info[ins->op].mnem);
587 dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count);
588 for (i = 0; i < pres_op_info[ins->op].input_count; ++i)
590 TRACE(", ");
591 dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count);
593 TRACE("\n");
596 static void dump_preshader(struct d3dx_preshader *pres)
598 unsigned int i;
600 TRACE("// Preshader registers:\n");
601 dump_registers(&pres->inputs);
602 TRACE(" preshader\n");
603 for (i = 0; i < pres->ins_count; ++i)
604 dump_ins(&pres->regs, &pres->ins[i]);
607 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base)
609 unsigned int *p;
610 unsigned int i, j, const_count;
611 double *dconst;
612 HRESULT hr;
613 unsigned int saved_word;
614 unsigned int section_size;
616 TRACE("Preshader version %#x.\n", *ptr & 0xffff);
618 if (!count)
620 WARN("Unexpected end of byte code buffer.\n");
621 return D3DXERR_INVALIDDATA;
624 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, &section_size);
625 if (p)
627 const_count = *p++;
628 if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int)))
630 WARN("Byte code buffer ends unexpectedly.\n");
631 return D3DXERR_INVALIDDATA;
633 dconst = (double *)p;
635 else
637 const_count = 0;
638 dconst = NULL;
640 TRACE("%u double constants.\n", const_count);
642 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, &section_size);
643 if (!p)
645 WARN("Could not find preshader code.\n");
646 return D3D_OK;
648 pres->ins_count = *p++;
649 --section_size;
650 if (pres->ins_count > UINT_MAX / sizeof(*pres->ins))
652 WARN("Invalid instruction count %u.\n", pres->ins_count);
653 return D3DXERR_INVALIDDATA;
655 TRACE("%u instructions.\n", pres->ins_count);
656 pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count);
657 if (!pres->ins)
658 return E_OUTOFMEMORY;
659 for (i = 0; i < pres->ins_count; ++i)
661 unsigned int *ptr_next;
663 ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]);
664 if (!ptr_next)
665 return D3DXERR_INVALIDDATA;
666 section_size -= ptr_next - p;
667 p = ptr_next;
670 saved_word = *ptr;
671 *ptr = 0xfffe0000;
672 hr = get_constants_desc(ptr, &pres->inputs, base);
673 *ptr = saved_word;
674 if (FAILED(hr))
675 return hr;
677 pres->inputs.regset2table = pres_regset2table;
679 pres->regs.table_sizes[PRES_REGTAB_IMMED] = const_count;
681 for (i = 0; i < pres->ins_count; ++i)
683 for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j)
684 update_table_size(pres->regs.table_sizes, pres->ins[i].inputs[j].table,
685 get_reg_offset(pres->ins[i].inputs[j].table,
686 pres->ins[i].inputs[j].offset + pres->ins[i].component_count - 1));
688 update_table_size(pres->regs.table_sizes, pres->ins[i].output.table,
689 get_reg_offset(pres->ins[i].output.table,
690 pres->ins[i].output.offset + pres->ins[i].component_count - 1));
692 update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs);
693 if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED)))
694 return E_OUTOFMEMORY;
695 regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count);
697 return D3D_OK;
700 void d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size,
701 D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out)
703 struct d3dx_param_eval *peval;
704 unsigned int *ptr;
705 HRESULT hr;
706 unsigned int i;
707 BOOL shader;
708 unsigned int count, pres_size;
710 TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n",
711 base_effect, byte_code, byte_code_size, type, peval_out);
713 count = byte_code_size / sizeof(unsigned int);
714 if (!byte_code || !count)
716 *peval_out = NULL;
717 return;
720 peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval));
721 if (!peval)
722 goto err_out;
724 peval->param_type = type;
725 switch (type)
727 case D3DXPT_VERTEXSHADER:
728 case D3DXPT_PIXELSHADER:
729 shader = TRUE;
730 break;
731 default:
732 shader = FALSE;
733 break;
735 peval->shader_inputs.regset2table = shad_regset2table;
737 ptr = (unsigned int *)byte_code;
738 if (shader)
740 if ((*ptr & 0xfffe0000) != 0xfffe0000)
742 FIXME("Invalid shader signature %#x.\n", *ptr);
743 goto err_out;
745 TRACE("Shader version %#x.\n", *ptr & 0xffff);
747 if (FAILED(hr = get_constants_desc(ptr, &peval->shader_inputs, base_effect)))
749 FIXME("Could not get shader constant table, hr %#x.\n", hr);
750 goto err_out;
752 update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs);
753 ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size);
754 if (!ptr)
755 TRACE("No preshader found.\n");
757 else
759 pres_size = count;
762 if (ptr && FAILED(parse_preshader(&peval->pres, ptr, pres_size, base_effect)))
764 FIXME("Failed parsing preshader, byte code for analysis follows.\n");
765 dump_bytecode(byte_code, byte_code_size);
766 goto err_out;
769 for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i)
771 if (FAILED(regstore_alloc_table(&peval->pres.regs, i)))
772 goto err_out;
775 if (TRACE_ON(d3dx))
777 dump_bytecode(byte_code, byte_code_size);
778 dump_preshader(&peval->pres);
779 if (shader)
781 TRACE("// Shader registers:\n");
782 dump_registers(&peval->shader_inputs);
785 *peval_out = peval;
786 TRACE("Created parameter evaluator %p.\n", *peval_out);
787 return;
789 err_out:
790 FIXME("Error creating parameter evaluator.\n");
791 d3dx_free_param_eval(peval);
792 *peval_out = NULL;
795 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
797 HeapFree(GetProcessHeap(), 0, ctab->inputs);
798 HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
799 if (ctab->ctab)
800 ID3DXConstantTable_Release(ctab->ctab);
803 static void d3dx_free_preshader(struct d3dx_preshader *pres)
805 HeapFree(GetProcessHeap(), 0, pres->ins);
807 regstore_free_tables(&pres->regs);
808 d3dx_free_const_tab(&pres->inputs);
811 void d3dx_free_param_eval(struct d3dx_param_eval *peval)
813 TRACE("peval %p.\n", peval);
815 if (!peval)
816 return;
818 d3dx_free_preshader(&peval->pres);
819 d3dx_free_const_tab(&peval->shader_inputs);
820 HeapFree(GetProcessHeap(), 0, peval);
823 static HRESULT set_constants_param(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
824 D3DXHANDLE hc, struct d3dx_parameter *param)
826 ID3DXConstantTable *ctab = const_tab->ctab;
827 D3DXCONSTANT_DESC desc;
828 unsigned int const_count, param_count, i, j, n, table, start_offset;
829 unsigned int minor, major, major_stride, param_offset;
830 BOOL transpose, get_element;
832 if (FAILED(get_ctab_constant_desc(ctab, hc, &desc)))
833 return D3DERR_INVALIDCALL;
835 if (param->element_count)
837 param_count = param->element_count;
838 const_count = desc.Elements;
839 get_element = TRUE;
841 else
843 if (desc.Elements > 1)
845 FIXME("Unexpected number of constant elements %u.\n", desc.Elements);
846 return D3DERR_INVALIDCALL;
848 param_count = param->member_count;
849 const_count = desc.StructMembers;
850 get_element = FALSE;
852 if (const_count != param_count)
854 FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n",
855 param_count, const_count);
856 return D3DERR_INVALIDCALL;
858 if (const_count)
860 HRESULT hr, ret;
861 D3DXHANDLE hc_element;
863 ret = D3D_OK;
864 for (i = 0; i < const_count; ++i)
866 if (get_element)
867 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i);
868 else
869 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i);
870 if (!hc_element)
872 FIXME("Could not get constant.\n");
873 hr = D3DERR_INVALIDCALL;
875 else
877 hr = set_constants_param(rs, const_tab, hc_element, &param->members[i]);
879 if (FAILED(hr))
880 ret = hr;
882 return ret;
885 transpose = (desc.Class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS)
886 || (param->class == D3DXPC_MATRIX_COLUMNS && desc.Class == D3DXPC_MATRIX_ROWS);
887 if (desc.Class == D3DXPC_MATRIX_COLUMNS)
889 major = param->columns;
890 minor = param->rows;
892 else
894 major = param->rows;
895 minor = param->columns;
898 TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n",
899 debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes);
900 TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u, transpose %#x.\n",
901 debugstr_a(param->name), param->rows, param->columns, param->class,
902 param->flags, param->bytes, transpose);
904 if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table))
906 FIXME("Unknown register set %u.\n", desc.RegisterSet);
907 return D3DERR_INVALIDCALL;
909 table = const_tab->regset2table[desc.RegisterSet];
910 if (table >= PRES_REGTAB_COUNT)
912 ERR("Unexpected register set %u.\n", desc.RegisterSet);
913 return D3DERR_INVALIDCALL;
915 start_offset = desc.RegisterIndex * table_info[table].reg_component_count;
916 major_stride = max(minor, table_info[table].reg_component_count);
917 n = min(major * major_stride,
918 desc.RegisterCount * table_info[table].reg_component_count + major_stride - 1) / major_stride;
919 for (i = 0; i < n; ++i)
921 for (j = 0; j < minor; ++j)
923 unsigned int out;
924 unsigned int *in;
925 unsigned int offset;
927 offset = start_offset + i * major_stride + j;
928 if (offset / table_info[table].reg_component_count >= rs->table_sizes[table])
930 if (table_info[table].reg_component_count != 1)
931 FIXME("Output offset exceeds table size, name %s, component %u.\n", desc.Name, i);
932 break;
934 if (transpose)
935 param_offset = i + j * major;
936 else
937 param_offset = i * minor + j;
938 if (param_offset * sizeof(unsigned int) >= param->bytes)
940 WARN("Parameter data is too short, name %s, component %u.\n", desc.Name, i);
941 break;
944 in = (unsigned int *)param->data + param_offset;
945 /* TODO: store data transfer / convert operation instead of performing an operation
946 from here, to move this to parsing stage */
947 switch (table_info[table].type)
949 case PRES_VT_FLOAT: set_number(&out, D3DXPT_FLOAT, in, param->type); break;
950 case PRES_VT_INT: set_number(&out, D3DXPT_INT, in, param->type); break;
951 case PRES_VT_BOOL: set_number(&out, D3DXPT_BOOL, in, param->type); break;
952 default:
953 FIXME("Unexpected type %#x.\n", table_info[table].type);
954 break;
956 regstore_set_values(rs, table, &out, offset, 1);
960 return D3D_OK;
963 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab)
965 unsigned int i;
966 HRESULT hr, ret;
967 D3DXHANDLE hc;
969 ret = D3D_OK;
970 for (i = 0; i < const_tab->input_count; ++i)
972 if (!const_tab->inputs_param[i] || const_tab->inputs_param[i]->class == D3DXPC_OBJECT)
973 continue;
974 hc = ID3DXConstantTable_GetConstant(const_tab->ctab, NULL, i);
975 if (hc)
977 hr = set_constants_param(rs, const_tab, hc, const_tab->inputs_param[i]);
979 else
981 FIXME("Could not get constant, index %u.\n", i);
982 hr = D3DERR_INVALIDCALL;
984 if (FAILED(hr))
985 ret = hr;
987 return ret;
990 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins,
991 const struct d3dx_pres_operand *opr, unsigned int comp)
993 if (!regstore_is_val_set_reg(rs, opr->table, (opr->offset + comp) / table_info[opr->table].reg_component_count))
995 WARN("Using uninitialized input ");
996 dump_arg(rs, opr, comp);
997 TRACE(".\n");
998 dump_ins(rs, ins);
1000 return regstore_get_double(rs, opr->table, opr->offset + comp);
1003 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr,
1004 unsigned int comp, double res)
1006 regstore_set_double(rs, opr->table, opr->offset + comp, res);
1009 #define ARGS_ARRAY_SIZE 8
1010 static HRESULT execute_preshader(struct d3dx_preshader *pres)
1012 unsigned int i, j, k;
1013 double args[ARGS_ARRAY_SIZE];
1014 double res;
1016 for (i = 0; i < pres->ins_count; ++i)
1018 const struct d3dx_pres_ins *ins;
1019 const struct op_info *oi;
1021 ins = &pres->ins[i];
1022 oi = &pres_op_info[ins->op];
1023 if (oi->func_all_comps)
1025 if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE)
1027 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count);
1028 return E_FAIL;
1030 for (k = 0; k < oi->input_count; ++k)
1031 for (j = 0; j < ins->component_count; ++j)
1032 args[k * ins->component_count + j] = exec_get_arg(&pres->regs, ins, &ins->inputs[k],
1033 ins->scalar_op && !k ? 0 : j);
1034 res = oi->func(args, ins->component_count);
1036 /* only 'dot' instruction currently falls here */
1037 exec_set_arg(&pres->regs, &ins->output, 0, res);
1039 else
1041 for (j = 0; j < ins->component_count; ++j)
1043 for (k = 0; k < oi->input_count; ++k)
1044 args[k] = exec_get_arg(&pres->regs, ins, &ins->inputs[k], ins->scalar_op && !k ? 0 : j);
1045 res = oi->func(args, ins->component_count);
1046 exec_set_arg(&pres->regs, &ins->output, j, res);
1050 return D3D_OK;
1053 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param, void *param_value)
1055 HRESULT hr;
1056 unsigned int i;
1057 unsigned int elements, elements_param, elements_table;
1058 float *oc;
1060 TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);
1062 if (FAILED(hr = set_constants(&peval->pres.regs, &peval->pres.inputs)))
1063 return hr;
1065 if (FAILED(hr = execute_preshader(&peval->pres)))
1066 return hr;
1068 elements_table = table_info[PRES_REGTAB_OCONST].reg_component_count
1069 * peval->pres.regs.table_sizes[PRES_REGTAB_OCONST];
1070 elements_param = param->bytes / sizeof(unsigned int);
1071 elements = min(elements_table, elements_param);
1072 oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST];
1073 for (i = 0; i < elements; ++i)
1074 set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT);
1075 return D3D_OK;
1078 static HRESULT set_shader_constants_device(struct IDirect3DDevice9 *device, struct d3dx_regstore *rs,
1079 D3DXPARAMETER_TYPE type, enum pres_reg_tables table)
1081 unsigned int start, count;
1082 void *ptr;
1083 HRESULT hr, result;
1085 result = D3D_OK;
1086 start = 0;
1087 while (start < rs->table_sizes[table])
1089 count = 0;
1090 while (start < rs->table_sizes[table] && !regstore_is_val_set_reg(rs, table, start))
1091 ++start;
1092 while (start + count < rs->table_sizes[table] && regstore_is_val_set_reg(rs, table, start + count))
1093 ++count;
1094 if (!count)
1095 break;
1096 TRACE("Setting %u constants at %u.\n", count, start);
1097 ptr = (BYTE *)rs->tables[table] + start * table_info[table].reg_component_count
1098 * table_info[table].component_size;
1099 if (type == D3DXPT_VERTEXSHADER)
1101 switch(table)
1103 case PRES_REGTAB_OCONST:
1104 hr = IDirect3DDevice9_SetVertexShaderConstantF(device, start, (const float *)ptr, count);
1105 break;
1106 case PRES_REGTAB_OICONST:
1107 hr = IDirect3DDevice9_SetVertexShaderConstantI(device, start, (const int *)ptr, count);
1108 break;
1109 case PRES_REGTAB_OBCONST:
1110 hr = IDirect3DDevice9_SetVertexShaderConstantB(device, start, (const BOOL *)ptr, count);
1111 break;
1112 default:
1113 FIXME("Unexpected register table %u.\n", table);
1114 return D3DERR_INVALIDCALL;
1117 else if (type == D3DXPT_PIXELSHADER)
1119 switch(table)
1121 case PRES_REGTAB_OCONST:
1122 hr = IDirect3DDevice9_SetPixelShaderConstantF(device, start, (const float *)ptr, count);
1123 break;
1124 case PRES_REGTAB_OICONST:
1125 hr = IDirect3DDevice9_SetPixelShaderConstantI(device, start, (const int *)ptr, count);
1126 break;
1127 case PRES_REGTAB_OBCONST:
1128 hr = IDirect3DDevice9_SetPixelShaderConstantB(device, start, (const BOOL *)ptr, count);
1129 break;
1130 default:
1131 FIXME("Unexpected register table %u.\n", table);
1132 return D3DERR_INVALIDCALL;
1135 else
1137 FIXME("Unexpected parameter type %u.\n", type);
1138 return D3DERR_INVALIDCALL;
1141 if (FAILED(hr))
1143 ERR("Setting constants failed, type %u, table %u, hr %#x.\n", type, table, hr);
1144 result = hr;
1146 start += count;
1148 regstore_reset_table(rs, table);
1149 return result;
1152 HRESULT d3dx_param_eval_set_shader_constants(struct IDirect3DDevice9 *device, struct d3dx_param_eval *peval)
1154 static const enum pres_reg_tables set_tables[] =
1155 {PRES_REGTAB_OCONST, PRES_REGTAB_OICONST, PRES_REGTAB_OBCONST};
1156 HRESULT hr, result;
1157 struct d3dx_preshader *pres = &peval->pres;
1158 struct d3dx_regstore *rs = &pres->regs;
1159 unsigned int i;
1161 TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);
1163 if (FAILED(hr = set_constants(rs, &pres->inputs)))
1164 return hr;
1165 if (FAILED(hr = execute_preshader(pres)))
1166 return hr;
1167 if (FAILED(hr = set_constants(rs, &peval->shader_inputs)))
1168 return hr;
1170 result = D3D_OK;
1171 for (i = 0; i < ARRAY_SIZE(set_tables); ++i)
1173 if (FAILED(hr = set_shader_constants_device(device, rs, peval->param_type, set_tables[i])))
1174 result = hr;
1176 return result;