mshtml: Implement MediaQueryList's addListener method.
[wine.git] / dlls / wined3d / shader.c
blob9924a2d806ef06e50a3348c9f3f84810b3a976ae
1 /*
2 * Copyright 2002-2003 Jason Edmeades
3 * Copyright 2002-2003 Raphael Junqueira
4 * Copyright 2004 Christian Costa
5 * Copyright 2005 Oliver Stieber
6 * Copyright 2006 Ivan Gyurdiev
7 * Copyright 2007-2008, 2013 Stefan Dösinger for CodeWeavers
8 * Copyright 2009-2011 Henri Verbeet for CodeWeavers
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
25 #include <stdio.h>
26 #include <string.h>
28 #include "wined3d_private.h"
30 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
32 const struct wined3d_vec4 wined3d_srgb_const[] =
34 /* pow, mul_high, sub_high, mul_low */
35 {0.41666f, 1.055f, 0.055f, 12.92f},
36 /* cmp */
37 {0.0031308f, 0.0f, 0.0f, 0.0f},
40 static const char * const shader_opcode_names[] =
42 /* WINED3DSIH_ABS */ "abs",
43 /* WINED3DSIH_ADD */ "add",
44 /* WINED3DSIH_AND */ "and",
45 /* WINED3DSIH_ATOMIC_AND */ "atomic_and",
46 /* WINED3DSIH_ATOMIC_CMP_STORE */ "atomic_cmp_store",
47 /* WINED3DSIH_ATOMIC_IADD */ "atomic_iadd",
48 /* WINED3DSIH_ATOMIC_IMAX */ "atomic_imax",
49 /* WINED3DSIH_ATOMIC_IMIN */ "atomic_imin",
50 /* WINED3DSIH_ATOMIC_OR */ "atomic_or",
51 /* WINED3DSIH_ATOMIC_UMAX */ "atomic_umax",
52 /* WINED3DSIH_ATOMIC_UMIN */ "atomic_umin",
53 /* WINED3DSIH_ATOMIC_XOR */ "atomic_xor",
54 /* WINED3DSIH_BEM */ "bem",
55 /* WINED3DSIH_BFI */ "bfi",
56 /* WINED3DSIH_BFREV */ "bfrev",
57 /* WINED3DSIH_BREAK */ "break",
58 /* WINED3DSIH_BREAKC */ "breakc",
59 /* WINED3DSIH_BREAKP */ "breakp",
60 /* WINED3DSIH_BUFINFO */ "bufinfo",
61 /* WINED3DSIH_CALL */ "call",
62 /* WINED3DSIH_CALLNZ */ "callnz",
63 /* WINED3DSIH_CASE */ "case",
64 /* WINED3DSIH_CMP */ "cmp",
65 /* WINED3DSIH_CND */ "cnd",
66 /* WINED3DSIH_CONTINUE */ "continue",
67 /* WINED3DSIH_CONTINUEP */ "continuec",
68 /* WINED3DSIH_COUNTBITS */ "countbits",
69 /* WINED3DSIH_CRS */ "crs",
70 /* WINED3DSIH_CUT */ "cut",
71 /* WINED3DSIH_CUT_STREAM */ "cut_stream",
72 /* WINED3DSIH_DCL */ "dcl",
73 /* WINED3DSIH_DCL_CONSTANT_BUFFER */ "dcl_constantBuffer",
74 /* WINED3DSIH_DCL_FUNCTION_BODY */ "dcl_function_body",
75 /* WINED3DSIH_DCL_FUNCTION_TABLE */ "dcl_function_table",
76 /* WINED3DSIH_DCL_GLOBAL_FLAGS */ "dcl_globalFlags",
77 /* WINED3DSIH_DCL_GS_INSTANCES */ "dcl_gs_instances",
78 /* WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT */ "dcl_hs_fork_phase_instance_count",
79 /* WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */ "dcl_hs_join_phase_instance_count",
80 /* WINED3DSIH_DCL_HS_MAX_TESSFACTOR */ "dcl_hs_max_tessfactor",
81 /* WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER */ "dcl_immediateConstantBuffer",
82 /* WINED3DSIH_DCL_INDEX_RANGE */ "dcl_index_range",
83 /* WINED3DSIH_DCL_INDEXABLE_TEMP */ "dcl_indexableTemp",
84 /* WINED3DSIH_DCL_INPUT */ "dcl_input",
85 /* WINED3DSIH_DCL_INPUT_CONTROL_POINT_COUNT */ "dcl_input_control_point_count",
86 /* WINED3DSIH_DCL_INPUT_PRIMITIVE */ "dcl_inputPrimitive",
87 /* WINED3DSIH_DCL_INPUT_PS */ "dcl_input_ps",
88 /* WINED3DSIH_DCL_INPUT_PS_SGV */ "dcl_input_ps_sgv",
89 /* WINED3DSIH_DCL_INPUT_PS_SIV */ "dcl_input_ps_siv",
90 /* WINED3DSIH_DCL_INPUT_SGV */ "dcl_input_sgv",
91 /* WINED3DSIH_DCL_INPUT_SIV */ "dcl_input_siv",
92 /* WINED3DSIH_DCL_INTERFACE */ "dcl_interface",
93 /* WINED3DSIH_DCL_OUTPUT */ "dcl_output",
94 /* WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT */ "dcl_output_control_point_count",
95 /* WINED3DSIH_DCL_OUTPUT_SIV */ "dcl_output_siv",
96 /* WINED3DSIH_DCL_OUTPUT_TOPOLOGY */ "dcl_outputTopology",
97 /* WINED3DSIH_DCL_RESOURCE_RAW */ "dcl_resource_raw",
98 /* WINED3DSIH_DCL_RESOURCE_STRUCTURED */ "dcl_resource_structured",
99 /* WINED3DSIH_DCL_SAMPLER */ "dcl_sampler",
100 /* WINED3DSIH_DCL_STREAM */ "dcl_stream",
101 /* WINED3DSIH_DCL_TEMPS */ "dcl_temps",
102 /* WINED3DSIH_DCL_TESSELLATOR_DOMAIN */ "dcl_tessellator_domain",
103 /* WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE */ "dcl_tessellator_output_primitive",
104 /* WINED3DSIH_DCL_TESSELLATOR_PARTITIONING */ "dcl_tessellator_partitioning",
105 /* WINED3DSIH_DCL_TGSM_RAW */ "dcl_tgsm_raw",
106 /* WINED3DSIH_DCL_TGSM_STRUCTURED */ "dcl_tgsm_structured",
107 /* WINED3DSIH_DCL_THREAD_GROUP */ "dcl_thread_group",
108 /* WINED3DSIH_DCL_UAV_RAW */ "dcl_uav_raw",
109 /* WINED3DSIH_DCL_UAV_STRUCTURED */ "dcl_uav_structured",
110 /* WINED3DSIH_DCL_UAV_TYPED */ "dcl_uav_typed",
111 /* WINED3DSIH_DCL_VERTICES_OUT */ "dcl_maxOutputVertexCount",
112 /* WINED3DSIH_DEF */ "def",
113 /* WINED3DSIH_DEFAULT */ "default",
114 /* WINED3DSIH_DEFB */ "defb",
115 /* WINED3DSIH_DEFI */ "defi",
116 /* WINED3DSIH_DIV */ "div",
117 /* WINED3DSIH_DP2 */ "dp2",
118 /* WINED3DSIH_DP2ADD */ "dp2add",
119 /* WINED3DSIH_DP3 */ "dp3",
120 /* WINED3DSIH_DP4 */ "dp4",
121 /* WINED3DSIH_DST */ "dst",
122 /* WINED3DSIH_DSX */ "dsx",
123 /* WINED3DSIH_DSX_COARSE */ "deriv_rtx_coarse",
124 /* WINED3DSIH_DSX_FINE */ "deriv_rtx_fine",
125 /* WINED3DSIH_DSY */ "dsy",
126 /* WINED3DSIH_DSY_COARSE */ "deriv_rty_coarse",
127 /* WINED3DSIH_DSY_FINE */ "deriv_rty_fine",
128 /* WINED3DSIH_ELSE */ "else",
129 /* WINED3DSIH_EMIT */ "emit",
130 /* WINED3DSIH_EMIT_STREAM */ "emit_stream",
131 /* WINED3DSIH_ENDIF */ "endif",
132 /* WINED3DSIH_ENDLOOP */ "endloop",
133 /* WINED3DSIH_ENDREP */ "endrep",
134 /* WINED3DSIH_ENDSWITCH */ "endswitch",
135 /* WINED3DSIH_EQ */ "eq",
136 /* WINED3DSIH_EVAL_CENTROID */ "eval_centroid",
137 /* WINED3DSIH_EVAL_SAMPLE_INDEX */ "eval_sample_index",
138 /* WINED3DSIH_EXP */ "exp",
139 /* WINED3DSIH_EXPP */ "expp",
140 /* WINED3DSIH_F16TOF32 */ "f16tof32",
141 /* WINED3DSIH_F32TOF16 */ "f32tof16",
142 /* WINED3DSIH_FCALL */ "fcall",
143 /* WINED3DSIH_FIRSTBIT_HI */ "firstbit_hi",
144 /* WINED3DSIH_FIRSTBIT_LO */ "firstbit_lo",
145 /* WINED3DSIH_FIRSTBIT_SHI */ "firstbit_shi",
146 /* WINED3DSIH_FRC */ "frc",
147 /* WINED3DSIH_FTOI */ "ftoi",
148 /* WINED3DSIH_FTOU */ "ftou",
149 /* WINED3DSIH_GATHER4 */ "gather4",
150 /* WINED3DSIH_GATHER4_C */ "gather4_c",
151 /* WINED3DSIH_GATHER4_PO */ "gather4_po",
152 /* WINED3DSIH_GATHER4_PO_C */ "gather4_po_c",
153 /* WINED3DSIH_GE */ "ge",
154 /* WINED3DSIH_HS_CONTROL_POINT_PHASE */ "hs_control_point_phase",
155 /* WINED3DSIH_HS_DECLS */ "hs_decls",
156 /* WINED3DSIH_HS_FORK_PHASE */ "hs_fork_phase",
157 /* WINED3DSIH_HS_JOIN_PHASE */ "hs_join_phase",
158 /* WINED3DSIH_IADD */ "iadd",
159 /* WINED3DSIH_IBFE */ "ibfe",
160 /* WINED3DSIH_IEQ */ "ieq",
161 /* WINED3DSIH_IF */ "if",
162 /* WINED3DSIH_IFC */ "ifc",
163 /* WINED3DSIH_IGE */ "ige",
164 /* WINED3DSIH_ILT */ "ilt",
165 /* WINED3DSIH_IMAD */ "imad",
166 /* WINED3DSIH_IMAX */ "imax",
167 /* WINED3DSIH_IMIN */ "imin",
168 /* WINED3DSIH_IMM_ATOMIC_ALLOC */ "imm_atomic_alloc",
169 /* WINED3DSIH_IMM_ATOMIC_AND */ "imm_atomic_and",
170 /* WINED3DSIH_IMM_ATOMIC_CMP_EXCH */ "imm_atomic_cmp_exch",
171 /* WINED3DSIH_IMM_ATOMIC_CONSUME */ "imm_atomic_consume",
172 /* WINED3DSIH_IMM_ATOMIC_EXCH */ "imm_atomic_exch",
173 /* WINED3DSIH_IMM_ATOMIC_IADD */ "imm_atomic_iadd",
174 /* WINED3DSIH_IMM_ATOMIC_IMAX */ "imm_atomic_imax",
175 /* WINED3DSIH_IMM_ATOMIC_IMIN */ "imm_atomic_imin",
176 /* WINED3DSIH_IMM_ATOMIC_OR */ "imm_atomic_or",
177 /* WINED3DSIH_IMM_ATOMIC_UMAX */ "imm_atomic_umax",
178 /* WINED3DSIH_IMM_ATOMIC_UMIN */ "imm_atomic_umin",
179 /* WINED3DSIH_IMM_ATOMIC_XOR */ "imm_atomic_xor",
180 /* WINED3DSIH_IMUL */ "imul",
181 /* WINED3DSIH_INE */ "ine",
182 /* WINED3DSIH_INEG */ "ineg",
183 /* WINED3DSIH_ISHL */ "ishl",
184 /* WINED3DSIH_ISHR */ "ishr",
185 /* WINED3DSIH_ITOF */ "itof",
186 /* WINED3DSIH_LABEL */ "label",
187 /* WINED3DSIH_LD */ "ld",
188 /* WINED3DSIH_LD2DMS */ "ld2dms",
189 /* WINED3DSIH_LD_RAW */ "ld_raw",
190 /* WINED3DSIH_LD_STRUCTURED */ "ld_structured",
191 /* WINED3DSIH_LD_UAV_TYPED */ "ld_uav_typed",
192 /* WINED3DSIH_LIT */ "lit",
193 /* WINED3DSIH_LOD */ "lod",
194 /* WINED3DSIH_LOG */ "log",
195 /* WINED3DSIH_LOGP */ "logp",
196 /* WINED3DSIH_LOOP */ "loop",
197 /* WINED3DSIH_LRP */ "lrp",
198 /* WINED3DSIH_LT */ "lt",
199 /* WINED3DSIH_M3x2 */ "m3x2",
200 /* WINED3DSIH_M3x3 */ "m3x3",
201 /* WINED3DSIH_M3x4 */ "m3x4",
202 /* WINED3DSIH_M4x3 */ "m4x3",
203 /* WINED3DSIH_M4x4 */ "m4x4",
204 /* WINED3DSIH_MAD */ "mad",
205 /* WINED3DSIH_MAX */ "max",
206 /* WINED3DSIH_MIN */ "min",
207 /* WINED3DSIH_MOV */ "mov",
208 /* WINED3DSIH_MOVA */ "mova",
209 /* WINED3DSIH_MOVC */ "movc",
210 /* WINED3DSIH_MUL */ "mul",
211 /* WINED3DSIH_NE */ "ne",
212 /* WINED3DSIH_NOP */ "nop",
213 /* WINED3DSIH_NOT */ "not",
214 /* WINED3DSIH_NRM */ "nrm",
215 /* WINED3DSIH_OR */ "or",
216 /* WINED3DSIH_PHASE */ "phase",
217 /* WINED3DSIH_POW */ "pow",
218 /* WINED3DSIH_RCP */ "rcp",
219 /* WINED3DSIH_REP */ "rep",
220 /* WINED3DSIH_RESINFO */ "resinfo",
221 /* WINED3DSIH_RET */ "ret",
222 /* WINED3DSIH_RETP */ "retp",
223 /* WINED3DSIH_ROUND_NE */ "round_ne",
224 /* WINED3DSIH_ROUND_NI */ "round_ni",
225 /* WINED3DSIH_ROUND_PI */ "round_pi",
226 /* WINED3DSIH_ROUND_Z */ "round_z",
227 /* WINED3DSIH_RSQ */ "rsq",
228 /* WINED3DSIH_SAMPLE */ "sample",
229 /* WINED3DSIH_SAMPLE_B */ "sample_b",
230 /* WINED3DSIH_SAMPLE_C */ "sample_c",
231 /* WINED3DSIH_SAMPLE_C_LZ */ "sample_c_lz",
232 /* WINED3DSIH_SAMPLE_GRAD */ "sample_d",
233 /* WINED3DSIH_SAMPLE_INFO */ "sample_info",
234 /* WINED3DSIH_SAMPLE_LOD */ "sample_l",
235 /* WINED3DSIH_SAMPLE_POS */ "sample_pos",
236 /* WINED3DSIH_SETP */ "setp",
237 /* WINED3DSIH_SGE */ "sge",
238 /* WINED3DSIH_SGN */ "sgn",
239 /* WINED3DSIH_SINCOS */ "sincos",
240 /* WINED3DSIH_SLT */ "slt",
241 /* WINED3DSIH_SQRT */ "sqrt",
242 /* WINED3DSIH_STORE_RAW */ "store_raw",
243 /* WINED3DSIH_STORE_STRUCTURED */ "store_structured",
244 /* WINED3DSIH_STORE_UAV_TYPED */ "store_uav_typed",
245 /* WINED3DSIH_SUB */ "sub",
246 /* WINED3DSIH_SWAPC */ "swapc",
247 /* WINED3DSIH_SWITCH */ "switch",
248 /* WINED3DSIH_SYNC */ "sync",
249 /* WINED3DSIH_TEX */ "texld",
250 /* WINED3DSIH_TEXBEM */ "texbem",
251 /* WINED3DSIH_TEXBEML */ "texbeml",
252 /* WINED3DSIH_TEXCOORD */ "texcrd",
253 /* WINED3DSIH_TEXDEPTH */ "texdepth",
254 /* WINED3DSIH_TEXDP3 */ "texdp3",
255 /* WINED3DSIH_TEXDP3TEX */ "texdp3tex",
256 /* WINED3DSIH_TEXKILL */ "texkill",
257 /* WINED3DSIH_TEXLDD */ "texldd",
258 /* WINED3DSIH_TEXLDL */ "texldl",
259 /* WINED3DSIH_TEXM3x2DEPTH */ "texm3x2depth",
260 /* WINED3DSIH_TEXM3x2PAD */ "texm3x2pad",
261 /* WINED3DSIH_TEXM3x2TEX */ "texm3x2tex",
262 /* WINED3DSIH_TEXM3x3 */ "texm3x3",
263 /* WINED3DSIH_TEXM3x3DIFF */ "texm3x3diff",
264 /* WINED3DSIH_TEXM3x3PAD */ "texm3x3pad",
265 /* WINED3DSIH_TEXM3x3SPEC */ "texm3x3spec",
266 /* WINED3DSIH_TEXM3x3TEX */ "texm3x3tex",
267 /* WINED3DSIH_TEXM3x3VSPEC */ "texm3x3vspec",
268 /* WINED3DSIH_TEXREG2AR */ "texreg2ar",
269 /* WINED3DSIH_TEXREG2GB */ "texreg2gb",
270 /* WINED3DSIH_TEXREG2RGB */ "texreg2rgb",
271 /* WINED3DSIH_UBFE */ "ubfe",
272 /* WINED3DSIH_UDIV */ "udiv",
273 /* WINED3DSIH_UGE */ "uge",
274 /* WINED3DSIH_ULT */ "ult",
275 /* WINED3DSIH_UMAX */ "umax",
276 /* WINED3DSIH_UMIN */ "umin",
277 /* WINED3DSIH_UMUL */ "umul",
278 /* WINED3DSIH_USHR */ "ushr",
279 /* WINED3DSIH_UTOF */ "utof",
280 /* WINED3DSIH_XOR */ "xor",
283 static const char * const semantic_names[] =
285 /* WINED3D_DECL_USAGE_POSITION */ "SV_POSITION",
286 /* WINED3D_DECL_USAGE_BLEND_WEIGHT */ "BLENDWEIGHT",
287 /* WINED3D_DECL_USAGE_BLEND_INDICES */ "BLENDINDICES",
288 /* WINED3D_DECL_USAGE_NORMAL */ "NORMAL",
289 /* WINED3D_DECL_USAGE_PSIZE */ "PSIZE",
290 /* WINED3D_DECL_USAGE_TEXCOORD */ "TEXCOORD",
291 /* WINED3D_DECL_USAGE_TANGENT */ "TANGENT",
292 /* WINED3D_DECL_USAGE_BINORMAL */ "BINORMAL",
293 /* WINED3D_DECL_USAGE_TESS_FACTOR */ "TESSFACTOR",
294 /* WINED3D_DECL_USAGE_POSITIONT */ "POSITIONT",
295 /* WINED3D_DECL_USAGE_COLOR */ "COLOR",
296 /* WINED3D_DECL_USAGE_FOG */ "FOG",
297 /* WINED3D_DECL_USAGE_DEPTH */ "DEPTH",
298 /* WINED3D_DECL_USAGE_SAMPLE */ "SAMPLE",
301 const char *debug_d3dshaderinstructionhandler(enum WINED3D_SHADER_INSTRUCTION_HANDLER handler_idx)
303 if (handler_idx >= ARRAY_SIZE(shader_opcode_names))
304 return wine_dbg_sprintf("UNRECOGNIZED(%#x)", handler_idx);
306 return shader_opcode_names[handler_idx];
309 static const char *shader_semantic_name_from_usage(enum wined3d_decl_usage usage)
311 if (usage >= ARRAY_SIZE(semantic_names))
313 FIXME("Unrecognized usage %#x.\n", usage);
314 return "UNRECOGNIZED";
317 return semantic_names[usage];
320 static enum wined3d_decl_usage shader_usage_from_semantic_name(const char *name)
322 unsigned int i;
324 for (i = 0; i < ARRAY_SIZE(semantic_names); ++i)
326 if (!strcmp(name, semantic_names[i]))
327 return i;
330 return ~0U;
333 static enum wined3d_sysval_semantic shader_sysval_semantic_from_usage(enum wined3d_decl_usage usage)
335 switch (usage)
337 case WINED3D_DECL_USAGE_POSITION:
338 return WINED3D_SV_POSITION;
339 default:
340 return 0;
344 BOOL shader_match_semantic(const char *semantic_name, enum wined3d_decl_usage usage)
346 return !strcmp(semantic_name, shader_semantic_name_from_usage(usage));
349 static void shader_signature_from_semantic(struct wined3d_shader_signature_element *e,
350 const struct wined3d_shader_semantic *s)
352 e->semantic_name = shader_semantic_name_from_usage(s->usage);
353 e->semantic_idx = s->usage_idx;
354 e->stream_idx = 0;
355 e->sysval_semantic = shader_sysval_semantic_from_usage(s->usage);
356 e->component_type = WINED3D_TYPE_FLOAT;
357 e->register_idx = s->reg.reg.idx[0].offset;
358 e->mask = s->reg.write_mask;
361 static void shader_signature_from_usage(struct wined3d_shader_signature_element *e,
362 enum wined3d_decl_usage usage, UINT usage_idx, UINT reg_idx, DWORD write_mask)
364 e->semantic_name = shader_semantic_name_from_usage(usage);
365 e->semantic_idx = usage_idx;
366 e->stream_idx = 0;
367 e->sysval_semantic = shader_sysval_semantic_from_usage(usage);
368 e->component_type = WINED3D_TYPE_FLOAT;
369 e->register_idx = reg_idx;
370 e->mask = write_mask;
373 static const struct wined3d_shader_frontend *shader_select_frontend(enum vkd3d_shader_source_type source_type)
375 switch (source_type)
377 case VKD3D_SHADER_SOURCE_D3D_BYTECODE:
378 return &sm1_shader_frontend;
380 case VKD3D_SHADER_SOURCE_DXBC_TPF:
381 return &sm4_shader_frontend;
383 default:
384 WARN("Invalid source type %#x specified.\n", source_type);
385 return NULL;
389 void string_buffer_clear(struct wined3d_string_buffer *buffer)
391 buffer->buffer[0] = '\0';
392 buffer->content_size = 0;
395 BOOL string_buffer_init(struct wined3d_string_buffer *buffer)
397 buffer->buffer_size = 32;
398 if (!(buffer->buffer = heap_alloc(buffer->buffer_size)))
400 ERR("Failed to allocate shader buffer memory.\n");
401 return FALSE;
404 string_buffer_clear(buffer);
405 return TRUE;
408 void string_buffer_free(struct wined3d_string_buffer *buffer)
410 heap_free(buffer->buffer);
413 BOOL string_buffer_resize(struct wined3d_string_buffer *buffer, int rc)
415 char *new_buffer;
416 unsigned int new_buffer_size = buffer->buffer_size * 2;
418 while (rc > 0 && (unsigned int)rc >= new_buffer_size - buffer->content_size)
419 new_buffer_size *= 2;
420 if (!(new_buffer = heap_realloc(buffer->buffer, new_buffer_size)))
422 ERR("Failed to grow buffer.\n");
423 buffer->buffer[buffer->content_size] = '\0';
424 return FALSE;
426 buffer->buffer = new_buffer;
427 buffer->buffer_size = new_buffer_size;
428 return TRUE;
431 int shader_vaddline(struct wined3d_string_buffer *buffer, const char *format, va_list args)
433 unsigned int rem;
434 int rc;
436 rem = buffer->buffer_size - buffer->content_size;
437 rc = vsnprintf(&buffer->buffer[buffer->content_size], rem, format, args);
438 if (rc < 0 /* C89 */ || (unsigned int)rc >= rem /* C99 */)
439 return rc;
441 buffer->content_size += rc;
442 return 0;
445 int shader_addline(struct wined3d_string_buffer *buffer, const char *format, ...)
447 va_list args;
448 int ret;
450 for (;;)
452 va_start(args, format);
453 ret = shader_vaddline(buffer, format, args);
454 va_end(args);
455 if (!ret)
456 return ret;
457 if (!string_buffer_resize(buffer, ret))
458 return -1;
462 struct wined3d_string_buffer *string_buffer_get(struct wined3d_string_buffer_list *list)
464 struct wined3d_string_buffer *buffer;
466 if (list_empty(&list->list))
468 buffer = heap_alloc(sizeof(*buffer));
469 if (!buffer || !string_buffer_init(buffer))
471 ERR("Couldn't allocate buffer for temporary string.\n");
472 heap_free(buffer);
473 return NULL;
476 else
478 buffer = LIST_ENTRY(list_head(&list->list), struct wined3d_string_buffer, entry);
479 list_remove(&buffer->entry);
481 string_buffer_clear(buffer);
482 return buffer;
485 static int string_buffer_vsprintf(struct wined3d_string_buffer *buffer, const char *format, va_list args)
487 if (!buffer)
488 return 0;
489 string_buffer_clear(buffer);
490 return shader_vaddline(buffer, format, args);
493 void string_buffer_sprintf(struct wined3d_string_buffer *buffer, const char *format, ...)
495 va_list args;
496 int ret;
498 for (;;)
500 va_start(args, format);
501 ret = string_buffer_vsprintf(buffer, format, args);
502 va_end(args);
503 if (!ret)
504 return;
505 if (!string_buffer_resize(buffer, ret))
506 return;
510 void string_buffer_release(struct wined3d_string_buffer_list *list, struct wined3d_string_buffer *buffer)
512 if (!buffer)
513 return;
514 list_add_head(&list->list, &buffer->entry);
517 void string_buffer_list_init(struct wined3d_string_buffer_list *list)
519 list_init(&list->list);
522 void string_buffer_list_cleanup(struct wined3d_string_buffer_list *list)
524 struct wined3d_string_buffer *buffer, *buffer_next;
526 LIST_FOR_EACH_ENTRY_SAFE(buffer, buffer_next, &list->list, struct wined3d_string_buffer, entry)
528 string_buffer_free(buffer);
529 heap_free(buffer);
531 list_init(&list->list);
534 static void shader_delete_constant_list(struct list *clist)
536 struct wined3d_shader_lconst *constant, *constant_next;
538 LIST_FOR_EACH_ENTRY_SAFE(constant, constant_next, clist, struct wined3d_shader_lconst, entry)
539 heap_free(constant);
540 list_init(clist);
543 static void shader_set_limits(struct wined3d_shader *shader)
545 static const struct limits_entry
547 unsigned int min_version;
548 unsigned int max_version;
549 struct wined3d_shader_limits limits;
551 vs_limits[] =
553 /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
554 {WINED3D_SHADER_VERSION(1, 0), WINED3D_SHADER_VERSION(1, 1), { 0, 0, 256, 0, 12, 0}},
555 {WINED3D_SHADER_VERSION(2, 0), WINED3D_SHADER_VERSION(2, 1), { 0, 16, 256, 16, 12, 0}},
556 /* DX10 cards on Windows advertise a D3D9 constant limit of 256
557 * even though they are capable of supporting much more (GL
558 * drivers advertise 1024). d3d9.dll and d3d8.dll clamp the
559 * wined3d-advertised maximum. Clamp the constant limit for <= 3.0
560 * shaders to 256. */
561 {WINED3D_SHADER_VERSION(3, 0), WINED3D_SHADER_VERSION(3, 0), { 4, 16, 256, 16, 12, 0}},
562 {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), {16, 0, 0, 0, 16, 0}},
563 {WINED3D_SHADER_VERSION(4, 1), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 32, 0}},
566 hs_limits[] =
568 /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packet_input */
569 {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 32, 32}},
571 ds_limits[] =
573 /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packet_input */
574 {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 32, 32}},
576 gs_limits[] =
578 /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
579 {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), {16, 0, 0, 0, 32, 16}},
580 {WINED3D_SHADER_VERSION(4, 1), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 32, 32}},
583 ps_limits[] =
585 /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
586 {WINED3D_SHADER_VERSION(1, 0), WINED3D_SHADER_VERSION(1, 3), { 4, 0, 8, 0, 0, 0}},
587 {WINED3D_SHADER_VERSION(1, 4), WINED3D_SHADER_VERSION(1, 4), { 6, 0, 8, 0, 0, 0}},
588 {WINED3D_SHADER_VERSION(2, 0), WINED3D_SHADER_VERSION(2, 0), {16, 0, 32, 0, 0, 0}},
589 {WINED3D_SHADER_VERSION(2, 1), WINED3D_SHADER_VERSION(2, 1), {16, 16, 32, 16, 0, 0}},
590 {WINED3D_SHADER_VERSION(3, 0), WINED3D_SHADER_VERSION(3, 0), {16, 16, 224, 16, 0, 10}},
591 {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 0, 32}},
594 cs_limits[] =
596 /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
597 {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 0, 0}},
599 const struct limits_entry *limits_array;
600 DWORD shader_version = WINED3D_SHADER_VERSION(shader->reg_maps.shader_version.major,
601 shader->reg_maps.shader_version.minor);
602 int i = 0;
604 switch (shader->reg_maps.shader_version.type)
606 default:
607 FIXME("Unexpected shader type %u found.\n", shader->reg_maps.shader_version.type);
608 /* Fall-through. */
609 case WINED3D_SHADER_TYPE_VERTEX:
610 limits_array = vs_limits;
611 break;
612 case WINED3D_SHADER_TYPE_HULL:
613 limits_array = hs_limits;
614 break;
615 case WINED3D_SHADER_TYPE_DOMAIN:
616 limits_array = ds_limits;
617 break;
618 case WINED3D_SHADER_TYPE_GEOMETRY:
619 limits_array = gs_limits;
620 break;
621 case WINED3D_SHADER_TYPE_PIXEL:
622 limits_array = ps_limits;
623 break;
624 case WINED3D_SHADER_TYPE_COMPUTE:
625 limits_array = cs_limits;
626 break;
629 while (limits_array[i].min_version && limits_array[i].min_version <= shader_version)
631 if (shader_version <= limits_array[i].max_version)
633 shader->limits = &limits_array[i].limits;
634 break;
636 ++i;
638 if (!shader->limits)
640 FIXME("Unexpected shader version \"%u.%u\".\n",
641 shader->reg_maps.shader_version.major,
642 shader->reg_maps.shader_version.minor);
643 shader->limits = &limits_array[max(0, i - 1)].limits;
647 static BOOL shader_record_register_usage(struct wined3d_shader *shader, struct wined3d_shader_reg_maps *reg_maps,
648 const struct wined3d_shader_register *reg, enum wined3d_shader_type shader_type, unsigned int constf_size)
650 switch (reg->type)
652 case WINED3DSPR_TEXTURE: /* WINED3DSPR_ADDR */
653 if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
654 reg_maps->texcoord |= 1u << reg->idx[0].offset;
655 else
656 reg_maps->address |= 1u << reg->idx[0].offset;
657 break;
659 case WINED3DSPR_TEMP:
660 reg_maps->temporary |= 1u << reg->idx[0].offset;
661 break;
663 case WINED3DSPR_INPUT:
664 if (reg->idx[0].rel_addr)
665 reg_maps->input_rel_addressing = 1;
666 if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
668 /* If relative addressing is used, we must assume that all
669 * registers are used. Even if it is a construct like v3[aL],
670 * we can't assume that v0, v1 and v2 aren't read because aL
671 * can be negative. */
672 if (reg->idx[0].rel_addr)
673 shader->u.ps.input_reg_used = ~0u;
674 else
675 shader->u.ps.input_reg_used |= 1u << reg->idx[0].offset;
677 else
679 reg_maps->input_registers |= 1u << reg->idx[0].offset;
681 break;
683 case WINED3DSPR_RASTOUT:
684 if (reg->idx[0].offset == 1)
685 reg_maps->fog = 1;
686 if (reg->idx[0].offset == 2)
687 reg_maps->point_size = 1;
688 break;
690 case WINED3DSPR_MISCTYPE:
691 if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
693 if (!reg->idx[0].offset)
694 reg_maps->vpos = 1;
695 else if (reg->idx[0].offset == 1)
696 reg_maps->usesfacing = 1;
698 break;
700 case WINED3DSPR_CONST:
701 if (reg->idx[0].rel_addr)
703 if (reg->idx[0].offset < reg_maps->min_rel_offset)
704 reg_maps->min_rel_offset = reg->idx[0].offset;
705 if (reg->idx[0].offset > reg_maps->max_rel_offset)
706 reg_maps->max_rel_offset = reg->idx[0].offset;
707 reg_maps->usesrelconstF = TRUE;
709 else
711 if (reg->idx[0].offset >= min(shader->limits->constant_float, constf_size))
713 WARN("Shader using float constant %u which is not supported.\n", reg->idx[0].offset);
714 return FALSE;
716 else
718 wined3d_insert_bits(reg_maps->constf, reg->idx[0].offset, 1, 0x1);
721 break;
723 case WINED3DSPR_CONSTINT:
724 if (reg->idx[0].offset >= shader->limits->constant_int)
726 WARN("Shader using integer constant %u which is not supported.\n", reg->idx[0].offset);
727 return FALSE;
729 else
731 reg_maps->integer_constants |= (1u << reg->idx[0].offset);
733 break;
735 case WINED3DSPR_CONSTBOOL:
736 if (reg->idx[0].offset >= shader->limits->constant_bool)
738 WARN("Shader using bool constant %u which is not supported.\n", reg->idx[0].offset);
739 return FALSE;
741 else
743 reg_maps->boolean_constants |= (1u << reg->idx[0].offset);
745 break;
747 case WINED3DSPR_COLOROUT:
748 reg_maps->rt_mask |= (1u << reg->idx[0].offset);
749 break;
751 case WINED3DSPR_OUTCONTROLPOINT:
752 reg_maps->vocp = 1;
753 break;
755 case WINED3DSPR_SAMPLEMASK:
756 reg_maps->sample_mask = 1;
757 break;
759 default:
760 TRACE("Not recording register of type %#x and [%#x][%#x].\n",
761 reg->type, reg->idx[0].offset, reg->idx[1].offset);
762 break;
764 return TRUE;
767 static void shader_record_sample(struct wined3d_shader_reg_maps *reg_maps,
768 unsigned int resource_idx, unsigned int sampler_idx, unsigned int bind_idx)
770 struct wined3d_shader_sampler_map_entry *entries, *entry;
771 struct wined3d_shader_sampler_map *map;
772 unsigned int i;
774 map = &reg_maps->sampler_map;
775 entries = map->entries;
776 for (i = 0; i < map->count; ++i)
778 if (entries[i].resource_idx == resource_idx && entries[i].sampler_idx == sampler_idx)
779 return;
782 if (!map->size)
784 if (!(entries = heap_calloc(4, sizeof(*entries))))
786 ERR("Failed to allocate sampler map entries.\n");
787 return;
789 map->size = 4;
790 map->entries = entries;
792 else if (map->count == map->size)
794 size_t new_size = map->size * 2;
796 if (sizeof(*entries) * new_size <= sizeof(*entries) * map->size
797 || !(entries = heap_realloc(entries, sizeof(*entries) * new_size)))
799 ERR("Failed to resize sampler map entries.\n");
800 return;
802 map->size = new_size;
803 map->entries = entries;
806 entry = &entries[map->count++];
807 entry->resource_idx = resource_idx;
808 entry->sampler_idx = sampler_idx;
809 entry->bind_idx = bind_idx;
812 static unsigned int get_instr_extra_regcount(enum WINED3D_SHADER_INSTRUCTION_HANDLER instr, unsigned int param)
814 switch (instr)
816 case WINED3DSIH_M4x4:
817 case WINED3DSIH_M3x4:
818 return param == 1 ? 3 : 0;
820 case WINED3DSIH_M4x3:
821 case WINED3DSIH_M3x3:
822 return param == 1 ? 2 : 0;
824 case WINED3DSIH_M3x2:
825 return param == 1 ? 1 : 0;
827 default:
828 return 0;
832 static HRESULT shader_reg_maps_add_tgsm(struct wined3d_shader_reg_maps *reg_maps,
833 unsigned int register_idx, unsigned int size, unsigned int stride)
835 struct wined3d_shader_tgsm *tgsm;
837 if (register_idx >= MAX_TGSM_REGISTERS)
839 ERR("Invalid TGSM register index %u.\n", register_idx);
840 return S_OK;
842 if (reg_maps->shader_version.type != WINED3D_SHADER_TYPE_COMPUTE)
844 FIXME("TGSM declarations are allowed only in compute shaders.\n");
845 return S_OK;
848 if (!wined3d_array_reserve((void **)&reg_maps->tgsm, &reg_maps->tgsm_capacity,
849 register_idx + 1, sizeof(*reg_maps->tgsm)))
850 return E_OUTOFMEMORY;
852 reg_maps->tgsm_count = max(register_idx + 1, reg_maps->tgsm_count);
853 tgsm = &reg_maps->tgsm[register_idx];
854 tgsm->size = size;
855 tgsm->stride = stride;
856 return S_OK;
859 static HRESULT shader_record_shader_phase(struct wined3d_shader *shader,
860 struct wined3d_shader_phase **current_phase, const struct wined3d_shader_instruction *ins,
861 const DWORD *current_instruction_ptr, const DWORD *previous_instruction_ptr)
863 struct wined3d_shader_phase *phase;
865 if ((phase = *current_phase))
867 phase->end = previous_instruction_ptr;
868 *current_phase = NULL;
871 if (shader->reg_maps.shader_version.type != WINED3D_SHADER_TYPE_HULL)
873 ERR("Unexpected shader type %s.\n", debug_shader_type(shader->reg_maps.shader_version.type));
874 return E_FAIL;
877 switch (ins->handler_idx)
879 case WINED3DSIH_HS_CONTROL_POINT_PHASE:
880 if (shader->u.hs.phases.control_point)
882 FIXME("Multiple control point phases.\n");
883 heap_free(shader->u.hs.phases.control_point);
885 if (!(shader->u.hs.phases.control_point = heap_alloc_zero(sizeof(*shader->u.hs.phases.control_point))))
886 return E_OUTOFMEMORY;
887 phase = shader->u.hs.phases.control_point;
888 break;
889 case WINED3DSIH_HS_FORK_PHASE:
890 if (!wined3d_array_reserve((void **)&shader->u.hs.phases.fork,
891 &shader->u.hs.phases.fork_size, shader->u.hs.phases.fork_count + 1,
892 sizeof(*shader->u.hs.phases.fork)))
893 return E_OUTOFMEMORY;
894 phase = &shader->u.hs.phases.fork[shader->u.hs.phases.fork_count++];
895 break;
896 case WINED3DSIH_HS_JOIN_PHASE:
897 if (!wined3d_array_reserve((void **)&shader->u.hs.phases.join,
898 &shader->u.hs.phases.join_size, shader->u.hs.phases.join_count + 1,
899 sizeof(*shader->u.hs.phases.join)))
900 return E_OUTOFMEMORY;
901 phase = &shader->u.hs.phases.join[shader->u.hs.phases.join_count++];
902 break;
903 default:
904 ERR("Unexpected opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
905 return E_FAIL;
908 phase->start = current_instruction_ptr;
909 *current_phase = phase;
911 return WINED3D_OK;
914 static HRESULT shader_calculate_clip_or_cull_distance_mask(
915 const struct wined3d_shader_signature_element *e, unsigned int *mask)
917 /* Clip and cull distances are packed in 4 component registers. 0 and 1 are
918 * the only allowed semantic indices.
920 if (e->semantic_idx >= WINED3D_MAX_CLIP_DISTANCES / 4)
922 *mask = 0;
923 WARN("Invalid clip/cull distance index %u.\n", e->semantic_idx);
924 return WINED3DERR_INVALIDCALL;
927 *mask = (e->mask & WINED3DSP_WRITEMASK_ALL) << (4 * e->semantic_idx);
928 return WINED3D_OK;
931 static void wined3d_insert_interpolation_mode(uint32_t *packed_interpolation_mode,
932 unsigned int register_idx, enum wined3d_shader_interpolation_mode mode)
934 if (mode > WINED3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE)
935 FIXME("Unexpected interpolation mode %#x.\n", mode);
937 wined3d_insert_bits(packed_interpolation_mode,
938 register_idx * WINED3D_PACKED_INTERPOLATION_BIT_COUNT, WINED3D_PACKED_INTERPOLATION_BIT_COUNT, mode);
941 static HRESULT shader_scan_output_signature(struct wined3d_shader *shader)
943 const struct wined3d_shader_signature *output_signature = &shader->output_signature;
944 struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
945 unsigned int i;
946 HRESULT hr;
948 for (i = 0; i < output_signature->element_count; ++i)
950 const struct wined3d_shader_signature_element *e = &output_signature->elements[i];
951 unsigned int mask;
953 reg_maps->output_registers |= 1u << e->register_idx;
954 if (e->sysval_semantic == WINED3D_SV_CLIP_DISTANCE)
956 if (FAILED(hr = shader_calculate_clip_or_cull_distance_mask(e, &mask)))
957 return hr;
958 reg_maps->clip_distance_mask |= mask;
960 else if (e->sysval_semantic == WINED3D_SV_CULL_DISTANCE)
962 if (FAILED(hr = shader_calculate_clip_or_cull_distance_mask(e, &mask)))
963 return hr;
964 reg_maps->cull_distance_mask |= mask;
966 else if (e->sysval_semantic == WINED3D_SV_VIEWPORT_ARRAY_INDEX)
968 reg_maps->viewport_array = 1;
972 return WINED3D_OK;
975 /* Note that this does not count the loop register as an address register. */
976 static HRESULT shader_get_registers_used(struct wined3d_shader *shader, DWORD constf_size)
978 struct wined3d_shader_signature_element input_signature_elements[max(MAX_ATTRIBS, MAX_REG_INPUT)];
979 struct wined3d_shader_signature_element output_signature_elements[MAX_REG_OUTPUT];
980 struct wined3d_shader_signature *output_signature = &shader->output_signature;
981 struct wined3d_shader_signature *input_signature = &shader->input_signature;
982 struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
983 const struct wined3d_shader_frontend *fe = shader->frontend;
984 unsigned int cur_loop_depth = 0, max_loop_depth = 0;
985 struct wined3d_shader_version shader_version;
986 struct wined3d_shader_phase *phase = NULL;
987 const DWORD *ptr, *prev_ins, *current_ins;
988 void *fe_data = shader->frontend_data;
989 unsigned int i;
990 HRESULT hr;
992 memset(reg_maps, 0, sizeof(*reg_maps));
993 memset(input_signature_elements, 0, sizeof(input_signature_elements));
994 memset(output_signature_elements, 0, sizeof(output_signature_elements));
995 reg_maps->min_rel_offset = ~0U;
996 list_init(&reg_maps->indexable_temps);
998 fe->shader_read_header(fe_data, &ptr, &shader_version);
999 prev_ins = current_ins = ptr;
1000 reg_maps->shader_version = shader_version;
1002 shader_set_limits(shader);
1004 if (!(reg_maps->constf = heap_calloc(((min(shader->limits->constant_float, constf_size) + 31) / 32),
1005 sizeof(*reg_maps->constf))))
1007 ERR("Failed to allocate constant map memory.\n");
1008 return E_OUTOFMEMORY;
1011 while (!fe->shader_is_end(fe_data, &ptr))
1013 struct wined3d_shader_instruction ins;
1015 current_ins = ptr;
1016 /* Fetch opcode. */
1017 fe->shader_read_instruction(fe_data, &ptr, &ins);
1019 /* Unhandled opcode, and its parameters. */
1020 if (ins.handler_idx == WINED3DSIH_TABLE_SIZE)
1022 WARN("Encountered unrecognised or invalid instruction.\n");
1023 return WINED3DERR_INVALIDCALL;
1026 /* Handle declarations. */
1027 if (ins.handler_idx == WINED3DSIH_DCL
1028 || ins.handler_idx == WINED3DSIH_DCL_UAV_TYPED)
1030 struct wined3d_shader_semantic *semantic = &ins.declaration.semantic;
1031 unsigned int reg_idx = semantic->reg.reg.idx[0].offset;
1033 switch (semantic->reg.reg.type)
1035 /* Mark input registers used. */
1036 case WINED3DSPR_INPUT:
1037 if (reg_idx >= MAX_REG_INPUT)
1039 ERR("Invalid input register index %u.\n", reg_idx);
1040 break;
1042 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL && shader_version.major == 3
1043 && semantic->usage == WINED3D_DECL_USAGE_POSITION && !semantic->usage_idx)
1044 return WINED3DERR_INVALIDCALL;
1045 reg_maps->input_registers |= 1u << reg_idx;
1046 shader_signature_from_semantic(&input_signature_elements[reg_idx], semantic);
1047 break;
1049 /* Vertex shader: mark 3.0 output registers used, save token. */
1050 case WINED3DSPR_OUTPUT:
1051 if (reg_idx >= MAX_REG_OUTPUT)
1053 ERR("Invalid output register index %u.\n", reg_idx);
1054 break;
1056 reg_maps->output_registers |= 1u << reg_idx;
1057 shader_signature_from_semantic(&output_signature_elements[reg_idx], semantic);
1058 if (semantic->usage == WINED3D_DECL_USAGE_FOG)
1059 reg_maps->fog = 1;
1060 if (semantic->usage == WINED3D_DECL_USAGE_PSIZE)
1061 reg_maps->point_size = 1;
1062 break;
1064 case WINED3DSPR_SAMPLER:
1065 shader_record_sample(reg_maps, reg_idx, reg_idx, reg_idx);
1066 case WINED3DSPR_RESOURCE:
1067 if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1069 ERR("Invalid resource index %u.\n", reg_idx);
1070 break;
1072 reg_maps->resource_info[reg_idx].type = semantic->resource_type;
1073 reg_maps->resource_info[reg_idx].data_type = semantic->resource_data_type;
1074 wined3d_bitmap_set(reg_maps->resource_map, reg_idx);
1075 break;
1077 case WINED3DSPR_UAV:
1078 if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
1080 ERR("Invalid UAV resource index %u.\n", reg_idx);
1081 break;
1083 reg_maps->uav_resource_info[reg_idx].type = semantic->resource_type;
1084 reg_maps->uav_resource_info[reg_idx].data_type = semantic->resource_data_type;
1085 if (ins.flags)
1086 FIXME("Ignoring typed UAV flags %#x.\n", ins.flags);
1087 break;
1089 default:
1090 TRACE("Not recording DCL register type %#x.\n", semantic->reg.reg.type);
1091 break;
1094 else if (ins.handler_idx == WINED3DSIH_DCL_CONSTANT_BUFFER)
1096 struct wined3d_shader_register *reg = &ins.declaration.src.reg;
1097 if (reg->idx[0].offset >= WINED3D_MAX_CBS)
1099 ERR("Invalid CB index %u.\n", reg->idx[0].offset);
1101 else
1103 reg_maps->cb_sizes[reg->idx[0].offset] = reg->idx[1].offset;
1104 wined3d_bitmap_set(&reg_maps->cb_map, reg->idx[0].offset);
1107 else if (ins.handler_idx == WINED3DSIH_DCL_GLOBAL_FLAGS)
1109 if (ins.flags & WINED3DSGF_FORCE_EARLY_DEPTH_STENCIL)
1111 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1112 shader->u.ps.force_early_depth_stencil = TRUE;
1113 else
1114 FIXME("Invalid instruction %#x for shader type %#x.\n",
1115 ins.handler_idx, shader_version.type);
1117 else
1119 WARN("Ignoring global flags %#x.\n", ins.flags);
1122 else if (ins.handler_idx == WINED3DSIH_DCL_GS_INSTANCES)
1124 if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1125 shader->u.gs.instance_count = ins.declaration.count;
1126 else
1127 FIXME("Invalid instruction %#x for shader type %#x.\n",
1128 ins.handler_idx, shader_version.type);
1130 else if (ins.handler_idx == WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT
1131 || ins.handler_idx == WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT)
1133 if (phase)
1134 phase->instance_count = ins.declaration.count;
1135 else
1136 FIXME("Instruction %s outside of shader phase.\n",
1137 debug_d3dshaderinstructionhandler(ins.handler_idx));
1139 else if (ins.handler_idx == WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER)
1141 if (reg_maps->icb)
1142 FIXME("Multiple immediate constant buffers.\n");
1143 reg_maps->icb = ins.declaration.icb;
1145 else if (ins.handler_idx == WINED3DSIH_DCL_INDEXABLE_TEMP)
1147 if (phase)
1149 FIXME("Indexable temporary registers not supported.\n");
1151 else
1153 struct wined3d_shader_indexable_temp *reg;
1155 if (!(reg = heap_alloc(sizeof(*reg))))
1156 return E_OUTOFMEMORY;
1158 *reg = ins.declaration.indexable_temp;
1159 list_add_tail(&reg_maps->indexable_temps, &reg->entry);
1162 else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PRIMITIVE)
1164 if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1165 shader->u.gs.input_type = ins.declaration.primitive_type.type;
1166 else
1167 FIXME("Invalid instruction %#x for shader type %#x.\n",
1168 ins.handler_idx, shader_version.type);
1170 else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS)
1172 unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1173 if (reg_idx >= MAX_REG_INPUT)
1175 ERR("Invalid register index %u.\n", reg_idx);
1176 break;
1178 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1179 wined3d_insert_interpolation_mode(shader->u.ps.interpolation_mode, reg_idx, ins.flags);
1180 else
1181 FIXME("Invalid instruction %#x for shader type %#x.\n",
1182 ins.handler_idx, shader_version.type);
1184 else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT)
1186 if (ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUT
1187 || ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUTGE
1188 || ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUTLE)
1190 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1191 shader->u.ps.depth_output = ins.declaration.dst.reg.type;
1192 else
1193 FIXME("Invalid instruction %#x for shader type %#x.\n",
1194 ins.handler_idx, shader_version.type);
1197 else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT)
1199 if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
1200 shader->u.hs.output_vertex_count = ins.declaration.count;
1201 else
1202 FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1204 else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT_TOPOLOGY)
1206 if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1207 shader->u.gs.output_type = ins.declaration.primitive_type.type;
1208 else
1209 FIXME("Invalid instruction %#x for shader type %#x.\n",
1210 ins.handler_idx, shader_version.type);
1212 else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_RAW)
1214 unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1215 if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1217 ERR("Invalid resource index %u.\n", reg_idx);
1218 break;
1220 reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1221 reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1222 reg_maps->resource_info[reg_idx].flags = WINED3D_VIEW_BUFFER_RAW;
1223 wined3d_bitmap_set(reg_maps->resource_map, reg_idx);
1225 else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_STRUCTURED)
1227 unsigned int reg_idx = ins.declaration.structured_resource.reg.reg.idx[0].offset;
1228 if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1230 ERR("Invalid resource index %u.\n", reg_idx);
1231 break;
1233 reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1234 reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1235 reg_maps->resource_info[reg_idx].flags = 0;
1236 reg_maps->resource_info[reg_idx].stride = ins.declaration.structured_resource.byte_stride / 4;
1237 wined3d_bitmap_set(reg_maps->resource_map, reg_idx);
1239 else if (ins.handler_idx == WINED3DSIH_DCL_SAMPLER)
1241 if (ins.flags & WINED3DSI_SAMPLER_COMPARISON_MODE)
1242 reg_maps->sampler_comparison_mode |= (1u << ins.declaration.dst.reg.idx[0].offset);
1244 else if (ins.handler_idx == WINED3DSIH_DCL_TEMPS)
1246 if (phase)
1247 phase->temporary_count = ins.declaration.count;
1248 else
1249 reg_maps->temporary_count = ins.declaration.count;
1251 else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_DOMAIN)
1253 if (shader_version.type == WINED3D_SHADER_TYPE_DOMAIN)
1254 shader->u.ds.tessellator_domain = ins.declaration.tessellator_domain;
1255 else if (shader_version.type != WINED3D_SHADER_TYPE_HULL)
1256 FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1258 else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE)
1260 if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
1261 shader->u.hs.tessellator_output_primitive = ins.declaration.tessellator_output_primitive;
1262 else
1263 FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1265 else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_PARTITIONING)
1267 if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
1268 shader->u.hs.tessellator_partitioning = ins.declaration.tessellator_partitioning;
1269 else
1270 FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1272 else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_RAW)
1274 if (FAILED(hr = shader_reg_maps_add_tgsm(reg_maps, ins.declaration.tgsm_raw.reg.reg.idx[0].offset,
1275 ins.declaration.tgsm_raw.byte_count / 4, 0)))
1276 return hr;
1278 else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_STRUCTURED)
1280 unsigned int stride = ins.declaration.tgsm_structured.byte_stride / 4;
1281 unsigned int size = stride * ins.declaration.tgsm_structured.structure_count;
1282 if (FAILED(hr = shader_reg_maps_add_tgsm(reg_maps,
1283 ins.declaration.tgsm_structured.reg.reg.idx[0].offset, size, stride)))
1284 return hr;
1286 else if (ins.handler_idx == WINED3DSIH_DCL_THREAD_GROUP)
1288 if (shader_version.type == WINED3D_SHADER_TYPE_COMPUTE)
1290 shader->u.cs.thread_group_size = ins.declaration.thread_group_size;
1292 else
1294 FIXME("Invalid instruction %#x for shader type %#x.\n",
1295 ins.handler_idx, shader_version.type);
1298 else if (ins.handler_idx == WINED3DSIH_DCL_UAV_RAW)
1300 unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1301 if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
1303 ERR("Invalid UAV resource index %u.\n", reg_idx);
1304 break;
1306 if (ins.flags)
1307 FIXME("Ignoring raw UAV flags %#x.\n", ins.flags);
1308 reg_maps->uav_resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1309 reg_maps->uav_resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1310 reg_maps->uav_resource_info[reg_idx].flags = WINED3D_VIEW_BUFFER_RAW;
1312 else if (ins.handler_idx == WINED3DSIH_DCL_UAV_STRUCTURED)
1314 unsigned int reg_idx = ins.declaration.structured_resource.reg.reg.idx[0].offset;
1315 if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
1317 ERR("Invalid UAV resource index %u.\n", reg_idx);
1318 break;
1320 if (ins.flags)
1321 FIXME("Ignoring structured UAV flags %#x.\n", ins.flags);
1322 reg_maps->uav_resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1323 reg_maps->uav_resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1324 reg_maps->uav_resource_info[reg_idx].flags = 0;
1325 reg_maps->uav_resource_info[reg_idx].stride = ins.declaration.structured_resource.byte_stride / 4;
1327 else if (ins.handler_idx == WINED3DSIH_DCL_VERTICES_OUT)
1329 if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1330 shader->u.gs.vertices_out = ins.declaration.count;
1331 else
1332 FIXME("Invalid instruction %#x for shader type %#x.\n",
1333 ins.handler_idx, shader_version.type);
1335 else if (ins.handler_idx == WINED3DSIH_DEF)
1337 struct wined3d_shader_lconst *lconst;
1338 float *value;
1340 if (!(lconst = heap_alloc(sizeof(*lconst))))
1341 return E_OUTOFMEMORY;
1343 lconst->idx = ins.dst[0].reg.idx[0].offset;
1344 memcpy(lconst->value, ins.src[0].reg.u.immconst_data, 4 * sizeof(DWORD));
1345 value = (float *)lconst->value;
1347 /* In pixel shader 1.X shaders, the constants are clamped between [-1;1] */
1348 if (shader_version.major == 1 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1350 if (value[0] < -1.0f) value[0] = -1.0f;
1351 else if (value[0] > 1.0f) value[0] = 1.0f;
1352 if (value[1] < -1.0f) value[1] = -1.0f;
1353 else if (value[1] > 1.0f) value[1] = 1.0f;
1354 if (value[2] < -1.0f) value[2] = -1.0f;
1355 else if (value[2] > 1.0f) value[2] = 1.0f;
1356 if (value[3] < -1.0f) value[3] = -1.0f;
1357 else if (value[3] > 1.0f) value[3] = 1.0f;
1360 list_add_head(&shader->constantsF, &lconst->entry);
1362 if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
1363 || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
1365 shader->lconst_inf_or_nan = TRUE;
1368 else if (ins.handler_idx == WINED3DSIH_DEFI)
1370 struct wined3d_shader_lconst *lconst;
1372 if (!(lconst = heap_alloc(sizeof(*lconst))))
1373 return E_OUTOFMEMORY;
1375 lconst->idx = ins.dst[0].reg.idx[0].offset;
1376 memcpy(lconst->value, ins.src[0].reg.u.immconst_data, 4 * sizeof(DWORD));
1378 list_add_head(&shader->constantsI, &lconst->entry);
1379 reg_maps->local_int_consts |= (1u << lconst->idx);
1381 else if (ins.handler_idx == WINED3DSIH_DEFB)
1383 struct wined3d_shader_lconst *lconst;
1385 if (!(lconst = heap_alloc(sizeof(*lconst))))
1386 return E_OUTOFMEMORY;
1388 lconst->idx = ins.dst[0].reg.idx[0].offset;
1389 memcpy(lconst->value, ins.src[0].reg.u.immconst_data, sizeof(DWORD));
1391 list_add_head(&shader->constantsB, &lconst->entry);
1392 reg_maps->local_bool_consts |= (1u << lconst->idx);
1394 /* Handle shader phases. */
1395 else if (ins.handler_idx == WINED3DSIH_HS_CONTROL_POINT_PHASE
1396 || ins.handler_idx == WINED3DSIH_HS_FORK_PHASE
1397 || ins.handler_idx == WINED3DSIH_HS_JOIN_PHASE)
1399 if (FAILED(hr = shader_record_shader_phase(shader, &phase, &ins, current_ins, prev_ins)))
1400 return hr;
1402 /* For subroutine prototypes. */
1403 else if (ins.handler_idx == WINED3DSIH_LABEL)
1405 reg_maps->labels |= 1u << ins.src[0].reg.idx[0].offset;
1407 /* Set texture, address, temporary registers. */
1408 else
1410 BOOL color0_mov = FALSE;
1411 unsigned int i;
1413 /* This will loop over all the registers and try to
1414 * make a bitmask of the ones we're interested in.
1416 * Relative addressing tokens are ignored, but that's
1417 * okay, since we'll catch any address registers when
1418 * they are initialized (required by spec). */
1419 for (i = 0; i < ins.dst_count; ++i)
1421 if (!shader_record_register_usage(shader, reg_maps, &ins.dst[i].reg,
1422 shader_version.type, constf_size))
1423 return WINED3DERR_INVALIDCALL;
1425 if (shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1427 UINT idx = ins.dst[i].reg.idx[0].offset;
1429 switch (ins.dst[i].reg.type)
1431 case WINED3DSPR_RASTOUT:
1432 if (shader_version.major >= 3)
1433 break;
1434 switch (idx)
1436 case 0: /* oPos */
1437 reg_maps->output_registers |= 1u << 10;
1438 shader_signature_from_usage(&output_signature_elements[10],
1439 WINED3D_DECL_USAGE_POSITION, 0, 10, WINED3DSP_WRITEMASK_ALL);
1440 break;
1442 case 1: /* oFog */
1443 reg_maps->output_registers |= 1u << 11;
1444 shader_signature_from_usage(&output_signature_elements[11],
1445 WINED3D_DECL_USAGE_FOG, 0, 11, WINED3DSP_WRITEMASK_0);
1446 break;
1448 case 2: /* oPts */
1449 reg_maps->output_registers |= 1u << 11;
1450 shader_signature_from_usage(&output_signature_elements[11],
1451 WINED3D_DECL_USAGE_PSIZE, 0, 11, WINED3DSP_WRITEMASK_1);
1452 break;
1454 break;
1456 case WINED3DSPR_ATTROUT:
1457 if (shader_version.major >= 3)
1458 break;
1459 if (idx < 2)
1461 idx += 8;
1462 if (reg_maps->output_registers & (1u << idx))
1464 output_signature_elements[idx].mask |= ins.dst[i].write_mask;
1466 else
1468 reg_maps->output_registers |= 1u << idx;
1469 shader_signature_from_usage(&output_signature_elements[idx],
1470 WINED3D_DECL_USAGE_COLOR, idx - 8, idx, ins.dst[i].write_mask);
1473 break;
1475 case WINED3DSPR_TEXCRDOUT: /* WINED3DSPR_OUTPUT */
1476 if (shader_version.major >= 3)
1478 if (idx >= ARRAY_SIZE(reg_maps->u.output_registers_mask))
1480 WARN("Invalid output register index %u.\n", idx);
1481 break;
1483 reg_maps->u.output_registers_mask[idx] |= ins.dst[i].write_mask;
1484 break;
1486 if (idx >= ARRAY_SIZE(reg_maps->u.texcoord_mask))
1488 WARN("Invalid texcoord index %u.\n", idx);
1489 break;
1491 reg_maps->u.texcoord_mask[idx] |= ins.dst[i].write_mask;
1492 if (reg_maps->output_registers & (1u << idx))
1494 output_signature_elements[idx].mask |= ins.dst[i].write_mask;
1496 else
1498 reg_maps->output_registers |= 1u << idx;
1499 shader_signature_from_usage(&output_signature_elements[idx],
1500 WINED3D_DECL_USAGE_TEXCOORD, idx, idx, ins.dst[i].write_mask);
1502 break;
1504 default:
1505 break;
1509 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1511 if (ins.dst[i].reg.type == WINED3DSPR_COLOROUT && !ins.dst[i].reg.idx[0].offset)
1513 /* Many 2.0 and 3.0 pixel shaders end with a MOV from a temp register to
1514 * COLOROUT 0. If we know this in advance, the ARB shader backend can skip
1515 * the mov and perform the sRGB write correction from the source register.
1517 * However, if the mov is only partial, we can't do this, and if the write
1518 * comes from an instruction other than MOV it is hard to do as well. If
1519 * COLOROUT 0 is overwritten partially later, the marker is dropped again. */
1520 shader->u.ps.color0_mov = FALSE;
1521 if (ins.handler_idx == WINED3DSIH_MOV
1522 && ins.dst[i].write_mask == WINED3DSP_WRITEMASK_ALL)
1524 /* Used later when the source register is read. */
1525 color0_mov = TRUE;
1528 /* Also drop the MOV marker if the source register is overwritten prior to the shader
1529 * end
1531 else if (ins.dst[i].reg.type == WINED3DSPR_TEMP
1532 && ins.dst[i].reg.idx[0].offset == shader->u.ps.color0_reg)
1534 shader->u.ps.color0_mov = FALSE;
1538 /* Declare 1.x samplers implicitly, based on the destination reg. number. */
1539 if (shader_version.major == 1
1540 && (ins.handler_idx == WINED3DSIH_TEX
1541 || ins.handler_idx == WINED3DSIH_TEXBEM
1542 || ins.handler_idx == WINED3DSIH_TEXBEML
1543 || ins.handler_idx == WINED3DSIH_TEXDP3TEX
1544 || ins.handler_idx == WINED3DSIH_TEXM3x2TEX
1545 || ins.handler_idx == WINED3DSIH_TEXM3x3SPEC
1546 || ins.handler_idx == WINED3DSIH_TEXM3x3TEX
1547 || ins.handler_idx == WINED3DSIH_TEXM3x3VSPEC
1548 || ins.handler_idx == WINED3DSIH_TEXREG2AR
1549 || ins.handler_idx == WINED3DSIH_TEXREG2GB
1550 || ins.handler_idx == WINED3DSIH_TEXREG2RGB))
1552 unsigned int reg_idx = ins.dst[i].reg.idx[0].offset;
1554 if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1556 WARN("Invalid 1.x sampler index %u.\n", reg_idx);
1557 continue;
1560 TRACE("Setting fake 2D resource for 1.x pixelshader.\n");
1561 reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_TEXTURE_2D;
1562 reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_FLOAT;
1563 shader_record_sample(reg_maps, reg_idx, reg_idx, reg_idx);
1564 wined3d_bitmap_set(reg_maps->resource_map, reg_idx);
1566 /* texbem is only valid with < 1.4 pixel shaders */
1567 if (ins.handler_idx == WINED3DSIH_TEXBEM
1568 || ins.handler_idx == WINED3DSIH_TEXBEML)
1570 reg_maps->bumpmat |= 1u << reg_idx;
1571 if (ins.handler_idx == WINED3DSIH_TEXBEML)
1573 reg_maps->luminanceparams |= 1u << reg_idx;
1577 else if (ins.handler_idx == WINED3DSIH_BEM)
1579 reg_maps->bumpmat |= 1u << ins.dst[i].reg.idx[0].offset;
1583 if (ins.handler_idx == WINED3DSIH_IMM_ATOMIC_ALLOC || ins.handler_idx == WINED3DSIH_IMM_ATOMIC_CONSUME)
1585 unsigned int reg_idx = ins.src[0].reg.idx[0].offset;
1586 if (reg_idx >= MAX_UNORDERED_ACCESS_VIEWS)
1588 ERR("Invalid UAV index %u.\n", reg_idx);
1589 break;
1591 reg_maps->uav_counter_mask |= (1u << reg_idx);
1593 else if ((WINED3DSIH_ATOMIC_AND <= ins.handler_idx && ins.handler_idx <= WINED3DSIH_ATOMIC_XOR)
1594 || (WINED3DSIH_IMM_ATOMIC_AND <= ins.handler_idx && ins.handler_idx <= WINED3DSIH_IMM_ATOMIC_XOR)
1595 || (ins.handler_idx == WINED3DSIH_BUFINFO && ins.src[0].reg.type == WINED3DSPR_UAV)
1596 || ins.handler_idx == WINED3DSIH_LD_UAV_TYPED
1597 || (ins.handler_idx == WINED3DSIH_LD_RAW && ins.src[1].reg.type == WINED3DSPR_UAV)
1598 || (ins.handler_idx == WINED3DSIH_LD_STRUCTURED && ins.src[2].reg.type == WINED3DSPR_UAV))
1600 const struct wined3d_shader_register *reg;
1602 if (ins.handler_idx == WINED3DSIH_LD_UAV_TYPED || ins.handler_idx == WINED3DSIH_LD_RAW)
1603 reg = &ins.src[1].reg;
1604 else if (ins.handler_idx == WINED3DSIH_LD_STRUCTURED)
1605 reg = &ins.src[2].reg;
1606 else if (WINED3DSIH_ATOMIC_AND <= ins.handler_idx && ins.handler_idx <= WINED3DSIH_ATOMIC_XOR)
1607 reg = &ins.dst[0].reg;
1608 else if (ins.handler_idx == WINED3DSIH_BUFINFO)
1609 reg = &ins.src[0].reg;
1610 else
1611 reg = &ins.dst[1].reg;
1613 if (reg->type == WINED3DSPR_UAV)
1615 if (reg->idx[0].offset >= MAX_UNORDERED_ACCESS_VIEWS)
1617 ERR("Invalid UAV index %u.\n", reg->idx[0].offset);
1618 break;
1620 reg_maps->uav_read_mask |= (1u << reg->idx[0].offset);
1623 else if (ins.handler_idx == WINED3DSIH_NRM)
1625 reg_maps->usesnrm = 1;
1627 else if (ins.handler_idx == WINED3DSIH_DSY
1628 || ins.handler_idx == WINED3DSIH_DSY_COARSE
1629 || ins.handler_idx == WINED3DSIH_DSY_FINE)
1631 reg_maps->usesdsy = 1;
1633 else if (ins.handler_idx == WINED3DSIH_DSX
1634 || ins.handler_idx == WINED3DSIH_DSX_COARSE
1635 || ins.handler_idx == WINED3DSIH_DSX_FINE)
1637 reg_maps->usesdsx = 1;
1639 else if (ins.handler_idx == WINED3DSIH_TEXLDD) reg_maps->usestexldd = 1;
1640 else if (ins.handler_idx == WINED3DSIH_TEXLDL) reg_maps->usestexldl = 1;
1641 else if (ins.handler_idx == WINED3DSIH_MOVA) reg_maps->usesmova = 1;
1642 else if (ins.handler_idx == WINED3DSIH_IFC) reg_maps->usesifc = 1;
1643 else if (ins.handler_idx == WINED3DSIH_CALL) reg_maps->usescall = 1;
1644 else if (ins.handler_idx == WINED3DSIH_POW) reg_maps->usespow = 1;
1645 else if (ins.handler_idx == WINED3DSIH_LOOP
1646 || ins.handler_idx == WINED3DSIH_REP)
1648 ++cur_loop_depth;
1649 if (cur_loop_depth > max_loop_depth)
1650 max_loop_depth = cur_loop_depth;
1652 else if (ins.handler_idx == WINED3DSIH_ENDLOOP
1653 || ins.handler_idx == WINED3DSIH_ENDREP)
1655 --cur_loop_depth;
1657 else if (ins.handler_idx == WINED3DSIH_GATHER4
1658 || ins.handler_idx == WINED3DSIH_GATHER4_C
1659 || ins.handler_idx == WINED3DSIH_SAMPLE
1660 || ins.handler_idx == WINED3DSIH_SAMPLE_B
1661 || ins.handler_idx == WINED3DSIH_SAMPLE_C
1662 || ins.handler_idx == WINED3DSIH_SAMPLE_C_LZ
1663 || ins.handler_idx == WINED3DSIH_SAMPLE_GRAD
1664 || ins.handler_idx == WINED3DSIH_SAMPLE_LOD)
1666 shader_record_sample(reg_maps, ins.src[1].reg.idx[0].offset,
1667 ins.src[2].reg.idx[0].offset, reg_maps->sampler_map.count);
1669 else if (ins.handler_idx == WINED3DSIH_GATHER4_PO
1670 || ins.handler_idx == WINED3DSIH_GATHER4_PO_C)
1672 shader_record_sample(reg_maps, ins.src[2].reg.idx[0].offset,
1673 ins.src[3].reg.idx[0].offset, reg_maps->sampler_map.count);
1675 else if ((ins.handler_idx == WINED3DSIH_BUFINFO && ins.src[0].reg.type == WINED3DSPR_RESOURCE)
1676 || (ins.handler_idx == WINED3DSIH_SAMPLE_INFO && ins.src[0].reg.type == WINED3DSPR_RESOURCE))
1678 shader_record_sample(reg_maps, ins.src[0].reg.idx[0].offset,
1679 WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
1681 else if (ins.handler_idx == WINED3DSIH_LD
1682 || ins.handler_idx == WINED3DSIH_LD2DMS
1683 || (ins.handler_idx == WINED3DSIH_LD_RAW && ins.src[1].reg.type == WINED3DSPR_RESOURCE)
1684 || (ins.handler_idx == WINED3DSIH_RESINFO && ins.src[1].reg.type == WINED3DSPR_RESOURCE))
1686 shader_record_sample(reg_maps, ins.src[1].reg.idx[0].offset,
1687 WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
1689 else if (ins.handler_idx == WINED3DSIH_LD_STRUCTURED
1690 && ins.src[2].reg.type == WINED3DSPR_RESOURCE)
1692 shader_record_sample(reg_maps, ins.src[2].reg.idx[0].offset,
1693 WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
1696 if (ins.predicate)
1697 if (!shader_record_register_usage(shader, reg_maps, &ins.predicate->reg,
1698 shader_version.type, constf_size))
1699 return WINED3DERR_INVALIDCALL;
1701 for (i = 0; i < ins.src_count; ++i)
1703 unsigned int count = get_instr_extra_regcount(ins.handler_idx, i);
1704 struct wined3d_shader_register reg = ins.src[i].reg;
1706 if (!shader_record_register_usage(shader, reg_maps, &ins.src[i].reg,
1707 shader_version.type, constf_size))
1708 return WINED3DERR_INVALIDCALL;
1709 while (count)
1711 ++reg.idx[0].offset;
1712 if (!shader_record_register_usage(shader, reg_maps, &reg,
1713 shader_version.type, constf_size))
1714 return WINED3DERR_INVALIDCALL;
1715 --count;
1718 if (color0_mov)
1720 if (ins.src[i].reg.type == WINED3DSPR_TEMP
1721 && ins.src[i].swizzle == WINED3DSP_NOSWIZZLE)
1723 shader->u.ps.color0_mov = TRUE;
1724 shader->u.ps.color0_reg = ins.src[i].reg.idx[0].offset;
1730 prev_ins = current_ins;
1732 reg_maps->loop_depth = max_loop_depth;
1734 if (phase)
1736 phase->end = prev_ins;
1737 phase = NULL;
1740 /* PS before 2.0 don't have explicit color outputs. Instead the value of
1741 * R0 is written to the render target. */
1742 if (shader_version.major < 2 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1743 reg_maps->rt_mask |= (1u << 0);
1745 if (input_signature->elements)
1747 for (i = 0; i < input_signature->element_count; ++i)
1749 if (shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1751 if (input_signature->elements[i].register_idx >= ARRAY_SIZE(shader->u.vs.attributes))
1753 WARN("Invalid input signature register index %u.\n", input_signature->elements[i].register_idx);
1754 return WINED3DERR_INVALIDCALL;
1757 else if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1759 if (input_signature->elements[i].sysval_semantic == WINED3D_SV_POSITION)
1760 reg_maps->vpos = 1;
1761 else if (input_signature->elements[i].sysval_semantic == WINED3D_SV_IS_FRONT_FACE)
1762 reg_maps->usesfacing = 1;
1764 reg_maps->input_registers |= 1u << input_signature->elements[i].register_idx;
1767 else if (!input_signature->elements && reg_maps->input_registers)
1769 unsigned int count = wined3d_popcount(reg_maps->input_registers);
1770 struct wined3d_shader_signature_element *e;
1771 unsigned int i;
1773 if (!(input_signature->elements = heap_calloc(count, sizeof(*input_signature->elements))))
1774 return E_OUTOFMEMORY;
1775 input_signature->element_count = count;
1777 e = input_signature->elements;
1778 for (i = 0; i < ARRAY_SIZE(input_signature_elements); ++i)
1780 if (!(reg_maps->input_registers & (1u << i)))
1781 continue;
1782 input_signature_elements[i].register_idx = i;
1783 *e++ = input_signature_elements[i];
1787 if (output_signature->elements)
1789 if (FAILED(hr = shader_scan_output_signature(shader)))
1790 return hr;
1792 else if (reg_maps->output_registers)
1794 unsigned int count = wined3d_popcount(reg_maps->output_registers);
1795 struct wined3d_shader_signature_element *e;
1797 if (!(output_signature->elements = heap_calloc(count, sizeof(*output_signature->elements))))
1798 return E_OUTOFMEMORY;
1799 output_signature->element_count = count;
1801 e = output_signature->elements;
1802 for (i = 0; i < ARRAY_SIZE(output_signature_elements); ++i)
1804 if (!(reg_maps->output_registers & (1u << i)))
1805 continue;
1806 *e++ = output_signature_elements[i];
1810 return WINED3D_OK;
1813 static void shader_cleanup_reg_maps(struct wined3d_shader_reg_maps *reg_maps)
1815 struct wined3d_shader_indexable_temp *reg, *reg_next;
1817 heap_free(reg_maps->constf);
1818 heap_free(reg_maps->sampler_map.entries);
1820 LIST_FOR_EACH_ENTRY_SAFE(reg, reg_next, &reg_maps->indexable_temps, struct wined3d_shader_indexable_temp, entry)
1821 heap_free(reg);
1822 list_init(&reg_maps->indexable_temps);
1824 heap_free(reg_maps->tgsm);
1827 unsigned int shader_find_free_input_register(const struct wined3d_shader_reg_maps *reg_maps, unsigned int max)
1829 DWORD map = 1u << max;
1830 map |= map - 1;
1831 map &= reg_maps->shader_version.major < 3 ? ~reg_maps->texcoord : ~reg_maps->input_registers;
1833 return wined3d_log2i(map);
1836 /* Shared code in order to generate the bulk of the shader string. */
1837 HRESULT shader_generate_code(const struct wined3d_shader *shader, struct wined3d_string_buffer *buffer,
1838 const struct wined3d_shader_reg_maps *reg_maps, void *backend_ctx,
1839 const DWORD *start, const DWORD *end)
1841 struct wined3d_device *device = shader->device;
1842 const struct wined3d_shader_frontend *fe = shader->frontend;
1843 void *fe_data = shader->frontend_data;
1844 struct wined3d_shader_version shader_version;
1845 struct wined3d_shader_parser_state state;
1846 struct wined3d_shader_instruction ins;
1847 struct wined3d_shader_tex_mx tex_mx;
1848 struct wined3d_shader_context ctx;
1849 const DWORD *ptr;
1851 /* Initialize current parsing state. */
1852 tex_mx.current_row = 0;
1853 state.current_loop_depth = 0;
1854 state.current_loop_reg = 0;
1855 state.in_subroutine = FALSE;
1857 ctx.shader = shader;
1858 ctx.reg_maps = reg_maps;
1859 ctx.buffer = buffer;
1860 ctx.tex_mx = &tex_mx;
1861 ctx.state = &state;
1862 ctx.backend_data = backend_ctx;
1863 ins.ctx = &ctx;
1865 fe->shader_read_header(fe_data, &ptr, &shader_version);
1866 if (start)
1867 ptr = start;
1869 while (!fe->shader_is_end(fe_data, &ptr) && ptr != end)
1871 /* Read opcode. */
1872 fe->shader_read_instruction(fe_data, &ptr, &ins);
1874 /* Unknown opcode and its parameters. */
1875 if (ins.handler_idx == WINED3DSIH_TABLE_SIZE)
1877 WARN("Encountered unrecognised or invalid instruction.\n");
1878 return WINED3DERR_INVALIDCALL;
1881 if (ins.predicate)
1882 FIXME("Predicates not implemented.\n");
1884 /* Call appropriate function for output target */
1885 device->shader_backend->shader_handle_instruction(&ins);
1888 return WINED3D_OK;
1891 static void shader_cleanup(struct wined3d_shader *shader)
1893 if (shader->reg_maps.shader_version.type == WINED3D_SHADER_TYPE_HULL)
1895 heap_free(shader->u.hs.phases.control_point);
1896 heap_free(shader->u.hs.phases.fork);
1897 heap_free(shader->u.hs.phases.join);
1900 heap_free(shader->patch_constant_signature.elements);
1901 heap_free(shader->output_signature.elements);
1902 heap_free(shader->input_signature.elements);
1903 shader->device->shader_backend->shader_destroy(shader);
1904 shader_cleanup_reg_maps(&shader->reg_maps);
1905 heap_free(shader->byte_code);
1906 shader_delete_constant_list(&shader->constantsF);
1907 shader_delete_constant_list(&shader->constantsB);
1908 shader_delete_constant_list(&shader->constantsI);
1909 list_remove(&shader->shader_list_entry);
1911 if (shader->frontend && shader->frontend_data)
1912 shader->frontend->shader_free(shader->frontend_data);
1915 struct shader_none_priv
1917 const struct wined3d_vertex_pipe_ops *vertex_pipe;
1918 const struct wined3d_fragment_pipe_ops *fragment_pipe;
1919 BOOL ffp_proj_control;
1922 static void shader_none_handle_instruction(const struct wined3d_shader_instruction *ins) {}
1923 static void shader_none_precompile(void *shader_priv, struct wined3d_shader *shader) {}
1924 static void shader_none_select_compute(void *shader_priv, struct wined3d_context *context,
1925 const struct wined3d_state *state) {}
1926 static void shader_none_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count) {}
1927 static void shader_none_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count) {}
1928 static void shader_none_load_constants(void *shader_priv, struct wined3d_context *context,
1929 const struct wined3d_state *state) {}
1930 static void shader_none_destroy(struct wined3d_shader *shader) {}
1931 static void shader_none_free_context_data(struct wined3d_context *context) {}
1932 static void shader_none_init_context_state(struct wined3d_context *context) {}
1934 /* Context activation is done by the caller. */
1935 static void shader_none_select(void *shader_priv, struct wined3d_context *context,
1936 const struct wined3d_state *state)
1938 struct shader_none_priv *priv = shader_priv;
1940 priv->vertex_pipe->vp_enable(context, !use_vs(state));
1941 priv->fragment_pipe->fp_enable(context, !use_ps(state));
1944 /* Context activation is done by the caller. */
1945 static void shader_none_disable(void *shader_priv, struct wined3d_context *context)
1947 struct shader_none_priv *priv = shader_priv;
1949 priv->vertex_pipe->vp_enable(context, FALSE);
1950 priv->fragment_pipe->fp_enable(context, FALSE);
1952 context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL)
1953 | (1u << WINED3D_SHADER_TYPE_VERTEX)
1954 | (1u << WINED3D_SHADER_TYPE_GEOMETRY)
1955 | (1u << WINED3D_SHADER_TYPE_HULL)
1956 | (1u << WINED3D_SHADER_TYPE_DOMAIN)
1957 | (1u << WINED3D_SHADER_TYPE_COMPUTE);
1960 static HRESULT shader_none_alloc(struct wined3d_device *device, const struct wined3d_vertex_pipe_ops *vertex_pipe,
1961 const struct wined3d_fragment_pipe_ops *fragment_pipe)
1963 struct fragment_caps fragment_caps;
1964 void *vertex_priv, *fragment_priv;
1965 struct shader_none_priv *priv;
1967 if (!(priv = heap_alloc(sizeof(*priv))))
1968 return E_OUTOFMEMORY;
1970 if (!(vertex_priv = vertex_pipe->vp_alloc(&none_shader_backend, priv)))
1972 ERR("Failed to initialize vertex pipe.\n");
1973 heap_free(priv);
1974 return E_FAIL;
1977 if (!(fragment_priv = fragment_pipe->alloc_private(&none_shader_backend, priv)))
1979 ERR("Failed to initialize fragment pipe.\n");
1980 vertex_pipe->vp_free(device, NULL);
1981 heap_free(priv);
1982 return E_FAIL;
1985 priv->vertex_pipe = vertex_pipe;
1986 priv->fragment_pipe = fragment_pipe;
1987 fragment_pipe->get_caps(device->adapter, &fragment_caps);
1988 priv->ffp_proj_control = fragment_caps.wined3d_caps & WINED3D_FRAGMENT_CAP_PROJ_CONTROL;
1990 device->vertex_priv = vertex_priv;
1991 device->fragment_priv = fragment_priv;
1992 device->shader_priv = priv;
1994 return WINED3D_OK;
1997 static void shader_none_free(struct wined3d_device *device, struct wined3d_context *context)
1999 struct shader_none_priv *priv = device->shader_priv;
2001 priv->fragment_pipe->free_private(device, context);
2002 priv->vertex_pipe->vp_free(device, context);
2003 heap_free(priv);
2006 static BOOL shader_none_allocate_context_data(struct wined3d_context *context)
2008 return TRUE;
2011 static void shader_none_get_caps(const struct wined3d_adapter *adapter, struct shader_caps *caps)
2013 /* Set the shader caps to 0 for the none shader backend */
2014 memset(caps, 0, sizeof(*caps));
2017 static BOOL shader_none_color_fixup_supported(struct color_fixup_desc fixup)
2019 /* We "support" every possible fixup, since we don't support any shader
2020 * model, and will never have to actually sample a texture. */
2021 return TRUE;
2024 static BOOL shader_none_has_ffp_proj_control(void *shader_priv)
2026 struct shader_none_priv *priv = shader_priv;
2028 return priv->ffp_proj_control;
2031 static uint64_t shader_none_shader_compile(struct wined3d_context *context, const struct wined3d_shader_desc *shader_desc,
2032 enum wined3d_shader_type shader_type)
2034 return 0;
2037 const struct wined3d_shader_backend_ops none_shader_backend =
2039 shader_none_handle_instruction,
2040 shader_none_precompile,
2041 shader_none_select,
2042 shader_none_select_compute,
2043 shader_none_disable,
2044 shader_none_update_float_vertex_constants,
2045 shader_none_update_float_pixel_constants,
2046 shader_none_load_constants,
2047 shader_none_destroy,
2048 shader_none_alloc,
2049 shader_none_free,
2050 shader_none_allocate_context_data,
2051 shader_none_free_context_data,
2052 shader_none_init_context_state,
2053 shader_none_get_caps,
2054 shader_none_color_fixup_supported,
2055 shader_none_has_ffp_proj_control,
2056 shader_none_shader_compile,
2059 static unsigned int shader_max_version_from_feature_level(enum wined3d_feature_level level)
2061 switch (level)
2063 case WINED3D_FEATURE_LEVEL_11_1:
2064 case WINED3D_FEATURE_LEVEL_11:
2065 return 5;
2066 case WINED3D_FEATURE_LEVEL_10_1:
2067 case WINED3D_FEATURE_LEVEL_10:
2068 return 4;
2069 case WINED3D_FEATURE_LEVEL_9_3:
2070 return 3;
2071 case WINED3D_FEATURE_LEVEL_9_2:
2072 case WINED3D_FEATURE_LEVEL_9_1:
2073 return 2;
2074 default:
2075 return 1;
2079 static HRESULT shader_set_function(struct wined3d_shader *shader, struct wined3d_device *device,
2080 enum wined3d_shader_type type, unsigned int float_const_count)
2082 const struct wined3d_d3d_info *d3d_info = &shader->device->adapter->d3d_info;
2083 struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
2084 const struct wined3d_shader_version *version = &reg_maps->shader_version;
2085 const struct wined3d_shader_frontend *fe;
2086 unsigned int backend_version;
2087 HRESULT hr;
2089 TRACE("shader %p, device %p, type %s, float_const_count %u.\n",
2090 shader, device, debug_shader_type(type), float_const_count);
2092 fe = shader->frontend;
2093 if (!(shader->frontend_data = fe->shader_init(shader->function,
2094 shader->functionLength, &shader->output_signature)))
2096 FIXME("Failed to initialize frontend.\n");
2097 return WINED3DERR_INVALIDCALL;
2100 if (FAILED(hr = shader_get_registers_used(shader, float_const_count)))
2101 return hr;
2103 if (version->type != type)
2105 WARN("Wrong shader type %s.\n", debug_shader_type(reg_maps->shader_version.type));
2106 return WINED3DERR_INVALIDCALL;
2108 if (version->major > shader_max_version_from_feature_level(device->cs->c.state->feature_level))
2110 WARN("Shader version %u not supported by this device.\n", version->major);
2111 return WINED3DERR_INVALIDCALL;
2113 switch (type)
2115 case WINED3D_SHADER_TYPE_VERTEX:
2116 backend_version = d3d_info->limits.vs_version;
2117 break;
2118 case WINED3D_SHADER_TYPE_HULL:
2119 backend_version = d3d_info->limits.hs_version;
2120 break;
2121 case WINED3D_SHADER_TYPE_DOMAIN:
2122 backend_version = d3d_info->limits.ds_version;
2123 break;
2124 case WINED3D_SHADER_TYPE_GEOMETRY:
2125 backend_version = d3d_info->limits.gs_version;
2126 break;
2127 case WINED3D_SHADER_TYPE_PIXEL:
2128 backend_version = d3d_info->limits.ps_version;
2129 break;
2130 case WINED3D_SHADER_TYPE_COMPUTE:
2131 backend_version = d3d_info->limits.cs_version;
2132 break;
2133 default:
2134 FIXME("No backend version-checking for this shader type.\n");
2135 backend_version = 0;
2137 if (version->major > backend_version)
2139 WARN("Shader version %u.%u not supported by the current shader backend.\n",
2140 version->major, version->minor);
2141 return WINED3DERR_INVALIDCALL;
2144 shader->load_local_constsF = shader->lconst_inf_or_nan;
2146 return WINED3D_OK;
2149 ULONG CDECL wined3d_shader_incref(struct wined3d_shader *shader)
2151 unsigned int refcount = InterlockedIncrement(&shader->ref);
2153 TRACE("%p increasing refcount to %u.\n", shader, refcount);
2155 return refcount;
2158 static void wined3d_shader_init_object(void *object)
2160 struct wined3d_shader *shader = object;
2161 struct wined3d_device *device = shader->device;
2163 TRACE("shader %p.\n", shader);
2165 list_add_head(&device->shaders, &shader->shader_list_entry);
2167 device->shader_backend->shader_precompile(device->shader_priv, shader);
2170 static void wined3d_shader_destroy_object(void *object)
2172 TRACE("object %p.\n", object);
2174 shader_cleanup(object);
2175 heap_free(object);
2178 ULONG CDECL wined3d_shader_decref(struct wined3d_shader *shader)
2180 unsigned int refcount = InterlockedDecrement(&shader->ref);
2182 TRACE("%p decreasing refcount to %u.\n", shader, refcount);
2184 if (!refcount)
2186 wined3d_mutex_lock();
2187 shader->parent_ops->wined3d_object_destroyed(shader->parent);
2188 wined3d_cs_destroy_object(shader->device->cs, wined3d_shader_destroy_object, shader);
2189 wined3d_mutex_unlock();
2192 return refcount;
2195 void * CDECL wined3d_shader_get_parent(const struct wined3d_shader *shader)
2197 TRACE("shader %p.\n", shader);
2199 return shader->parent;
2202 HRESULT CDECL wined3d_shader_get_byte_code(const struct wined3d_shader *shader,
2203 void *byte_code, UINT *byte_code_size)
2205 TRACE("shader %p, byte_code %p, byte_code_size %p.\n", shader, byte_code, byte_code_size);
2207 if (!byte_code)
2209 *byte_code_size = shader->byte_code_size;
2210 return WINED3D_OK;
2213 if (*byte_code_size < shader->byte_code_size)
2215 /* MSDN claims (for d3d8 at least) that if *byte_code_size is smaller
2216 * than the required size we should write the required size and
2217 * return D3DERR_MOREDATA. That's not actually true. */
2218 return WINED3DERR_INVALIDCALL;
2221 memcpy(byte_code, shader->byte_code, shader->byte_code_size);
2223 return WINED3D_OK;
2226 /* Set local constants for d3d8 shaders. */
2227 HRESULT CDECL wined3d_shader_set_local_constants_float(struct wined3d_shader *shader,
2228 UINT start_idx, const float *src_data, UINT count)
2230 UINT end_idx = start_idx + count;
2231 UINT i;
2233 TRACE("shader %p, start_idx %u, src_data %p, count %u.\n", shader, start_idx, src_data, count);
2235 if (end_idx > shader->limits->constant_float)
2237 WARN("end_idx %u > float constants limit %u.\n",
2238 end_idx, shader->limits->constant_float);
2239 end_idx = shader->limits->constant_float;
2242 for (i = start_idx; i < end_idx; ++i)
2244 struct wined3d_shader_lconst *lconst;
2245 float *value;
2247 if (!(lconst = heap_alloc(sizeof(*lconst))))
2248 return E_OUTOFMEMORY;
2250 lconst->idx = i;
2251 value = (float *)lconst->value;
2252 memcpy(value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
2253 list_add_head(&shader->constantsF, &lconst->entry);
2255 if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
2256 || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
2258 shader->lconst_inf_or_nan = TRUE;
2262 return WINED3D_OK;
2265 static void init_interpolation_compile_args(uint32_t *interpolation_args,
2266 const struct wined3d_shader *pixel_shader, const struct wined3d_d3d_info *d3d_info)
2268 if (!d3d_info->shader_output_interpolation || !pixel_shader
2269 || pixel_shader->reg_maps.shader_version.major < 4)
2271 memset(interpolation_args, 0, sizeof(pixel_shader->u.ps.interpolation_mode));
2272 return;
2275 memcpy(interpolation_args, pixel_shader->u.ps.interpolation_mode,
2276 sizeof(pixel_shader->u.ps.interpolation_mode));
2279 void find_vs_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
2280 struct vs_compile_args *args, const struct wined3d_context *context)
2282 const struct wined3d_shader *geometry_shader = state->shader[WINED3D_SHADER_TYPE_GEOMETRY];
2283 const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
2284 const struct wined3d_shader *hull_shader = state->shader[WINED3D_SHADER_TYPE_HULL];
2285 const struct wined3d_d3d_info *d3d_info = context->d3d_info;
2286 WORD swizzle_map = context->stream_info.swizzle_map;
2288 args->fog_src = state->render_states[WINED3D_RS_FOGTABLEMODE]
2289 == WINED3D_FOG_NONE ? VS_FOG_COORD : VS_FOG_Z;
2290 args->clip_enabled = state->render_states[WINED3D_RS_CLIPPING]
2291 && state->render_states[WINED3D_RS_CLIPPLANEENABLE];
2292 args->point_size = state->primitive_type == WINED3D_PT_POINTLIST;
2293 args->per_vertex_point_size = shader->reg_maps.point_size;
2294 args->next_shader_type = hull_shader ? WINED3D_SHADER_TYPE_HULL
2295 : geometry_shader ? WINED3D_SHADER_TYPE_GEOMETRY : WINED3D_SHADER_TYPE_PIXEL;
2296 if (shader->reg_maps.shader_version.major >= 4)
2297 args->next_shader_input_count = hull_shader ? hull_shader->limits->packed_input
2298 : geometry_shader ? geometry_shader->limits->packed_input
2299 : pixel_shader ? pixel_shader->limits->packed_input : 0;
2300 else
2301 args->next_shader_input_count = 0;
2302 args->swizzle_map = swizzle_map;
2303 if (d3d_info->emulated_flatshading)
2304 args->flatshading = state->render_states[WINED3D_RS_SHADEMODE] == WINED3D_SHADE_FLAT;
2305 else
2306 args->flatshading = 0;
2308 init_interpolation_compile_args(args->interpolation_mode,
2309 args->next_shader_type == WINED3D_SHADER_TYPE_PIXEL ? pixel_shader : NULL, d3d_info);
2312 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2)
2314 if (usage_idx1 != usage_idx2)
2315 return FALSE;
2316 if (usage1 == usage2)
2317 return TRUE;
2318 if (usage1 == WINED3D_DECL_USAGE_POSITION && usage2 == WINED3D_DECL_USAGE_POSITIONT)
2319 return TRUE;
2320 if (usage2 == WINED3D_DECL_USAGE_POSITION && usage1 == WINED3D_DECL_USAGE_POSITIONT)
2321 return TRUE;
2323 return FALSE;
2326 bool vshader_get_input(const struct wined3d_shader *shader,
2327 uint8_t usage_req, uint8_t usage_idx_req, unsigned int *regnum)
2329 uint32_t map = shader->reg_maps.input_registers & 0xffff;
2330 unsigned int i;
2332 while (map)
2334 i = wined3d_bit_scan(&map);
2335 if (match_usage(shader->u.vs.attributes[i].usage,
2336 shader->u.vs.attributes[i].usage_idx, usage_req, usage_idx_req))
2338 *regnum = i;
2339 return true;
2343 return false;
2346 static void shader_trace(const void *code, size_t size, enum vkd3d_shader_source_type source_type)
2348 struct vkd3d_shader_compile_info info;
2349 struct vkd3d_shader_code d3d_asm;
2350 const char *ptr, *end, *line;
2351 char *messages;
2352 int ret;
2354 static const struct vkd3d_shader_compile_option compile_options[] =
2356 {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_6},
2359 info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO;
2360 info.next = NULL;
2361 info.source.code = code;
2362 info.source.size = size;
2363 info.source_type = source_type;
2364 info.target_type = VKD3D_SHADER_TARGET_D3D_ASM;
2365 info.options = compile_options;
2366 info.option_count = ARRAY_SIZE(compile_options);
2367 info.log_level = VKD3D_SHADER_LOG_WARNING;
2368 info.source_name = NULL;
2370 ret = vkd3d_shader_compile(&info, &d3d_asm, &messages);
2371 if (messages && *messages && FIXME_ON(d3d_shader))
2373 FIXME("Shader log:\n");
2374 ptr = messages;
2375 end = ptr + strlen(ptr);
2376 while ((line = wined3d_get_line(&ptr, end)))
2378 FIXME(" %.*s", (int)(ptr - line), line);
2380 FIXME("\n");
2382 vkd3d_shader_free_messages(messages);
2384 if (ret < 0)
2386 ERR("Failed to disassemble, ret %d.\n", ret);
2387 return;
2390 ptr = d3d_asm.code;
2391 end = ptr + d3d_asm.size;
2392 while ((line = wined3d_get_line(&ptr, end)))
2394 TRACE(" %.*s", (int)(ptr - line), line);
2396 TRACE("\n");
2398 vkd3d_shader_free_shader_code(&d3d_asm);
2401 static HRESULT shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
2402 const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
2404 enum vkd3d_shader_source_type source_type;
2405 HRESULT hr;
2407 TRACE("byte_code %p, byte_code_size %#lx.\n", desc->byte_code, (long)desc->byte_code_size);
2409 if (!desc->byte_code)
2410 return WINED3DERR_INVALIDCALL;
2412 shader->ref = 1;
2413 shader->device = device;
2414 shader->parent = parent;
2415 shader->parent_ops = parent_ops;
2417 list_init(&shader->linked_programs);
2418 list_init(&shader->constantsF);
2419 list_init(&shader->constantsB);
2420 list_init(&shader->constantsI);
2421 shader->lconst_inf_or_nan = FALSE;
2422 list_init(&shader->reg_maps.indexable_temps);
2423 list_init(&shader->shader_list_entry);
2425 if (desc->byte_code_size == ~(size_t)0)
2427 struct wined3d_shader_version shader_version;
2428 const struct wined3d_shader_frontend *fe;
2429 struct wined3d_shader_instruction ins;
2430 const DWORD *ptr;
2431 void *fe_data;
2433 source_type = VKD3D_SHADER_SOURCE_D3D_BYTECODE;
2434 if (!(shader->frontend = shader_select_frontend(source_type)))
2436 FIXME("Unable to find frontend for shader.\n");
2437 hr = WINED3DERR_INVALIDCALL;
2438 goto fail;
2441 fe = shader->frontend;
2442 if (!(fe_data = fe->shader_init(desc->byte_code, desc->byte_code_size, &shader->output_signature)))
2444 WARN("Failed to initialise frontend data.\n");
2445 hr = WINED3DERR_INVALIDCALL;
2446 goto fail;
2449 fe->shader_read_header(fe_data, &ptr, &shader_version);
2450 while (!fe->shader_is_end(fe_data, &ptr))
2451 fe->shader_read_instruction(fe_data, &ptr, &ins);
2453 fe->shader_free(fe_data);
2455 shader->byte_code_size = (ptr - desc->byte_code) * sizeof(*ptr);
2457 if (!(shader->byte_code = heap_alloc(shader->byte_code_size)))
2459 hr = E_OUTOFMEMORY;
2460 goto fail;
2462 memcpy(shader->byte_code, desc->byte_code, shader->byte_code_size);
2464 shader->function = shader->byte_code;
2465 shader->functionLength = shader->byte_code_size;
2467 else
2469 unsigned int max_version;
2471 if (!(shader->byte_code = heap_alloc(desc->byte_code_size)))
2473 hr = E_OUTOFMEMORY;
2474 goto fail;
2476 memcpy(shader->byte_code, desc->byte_code, desc->byte_code_size);
2477 shader->byte_code_size = desc->byte_code_size;
2479 max_version = shader_max_version_from_feature_level(device->cs->c.state->feature_level);
2480 if (FAILED(hr = shader_extract_from_dxbc(shader, max_version, &source_type)))
2481 goto fail;
2483 if (!(shader->frontend = shader_select_frontend(source_type)))
2485 FIXME("Unable to find frontend for shader.\n");
2486 hr = WINED3DERR_INVALIDCALL;
2487 goto fail;
2491 if (TRACE_ON(d3d_shader))
2493 if (source_type == VKD3D_SHADER_SOURCE_D3D_BYTECODE)
2494 shader_trace(shader->function, shader->functionLength, source_type);
2495 else
2496 shader_trace(shader->byte_code, shader->byte_code_size, source_type);
2500 return WINED3D_OK;
2502 fail:
2503 shader_cleanup(shader);
2504 return hr;
2507 static HRESULT vertex_shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
2508 const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
2510 struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
2511 unsigned int i;
2512 HRESULT hr;
2514 if (FAILED(hr = shader_init(shader, device, desc, parent, parent_ops)))
2515 return hr;
2517 if (FAILED(hr = shader_set_function(shader, device,
2518 WINED3D_SHADER_TYPE_VERTEX, device->adapter->d3d_info.limits.vs_uniform_count)))
2520 shader_cleanup(shader);
2521 return hr;
2524 for (i = 0; i < shader->input_signature.element_count; ++i)
2526 const struct wined3d_shader_signature_element *input = &shader->input_signature.elements[i];
2528 if (!(reg_maps->input_registers & (1u << input->register_idx)) || !input->semantic_name)
2529 continue;
2531 shader->u.vs.attributes[input->register_idx].usage =
2532 shader_usage_from_semantic_name(input->semantic_name);
2533 shader->u.vs.attributes[input->register_idx].usage_idx = input->semantic_idx;
2536 if (reg_maps->usesrelconstF && !list_empty(&shader->constantsF))
2537 shader->load_local_constsF = TRUE;
2539 return WINED3D_OK;
2542 static struct wined3d_shader_signature_element *shader_find_signature_element(const struct wined3d_shader_signature *s,
2543 unsigned int stream_idx, const char *semantic_name, unsigned int semantic_idx)
2545 struct wined3d_shader_signature_element *e = s->elements;
2546 unsigned int i;
2548 for (i = 0; i < s->element_count; ++i)
2550 if (e[i].stream_idx == stream_idx
2551 && !stricmp(e[i].semantic_name, semantic_name)
2552 && e[i].semantic_idx == semantic_idx)
2553 return &e[i];
2556 return NULL;
2559 BOOL shader_get_stream_output_register_info(const struct wined3d_shader *shader,
2560 const struct wined3d_stream_output_element *so_element, unsigned int *register_idx, unsigned int *component_idx)
2562 const struct wined3d_shader_signature_element *output;
2563 unsigned int idx;
2565 if (!(output = shader_find_signature_element(&shader->output_signature,
2566 so_element->stream_idx, so_element->semantic_name, so_element->semantic_idx)))
2567 return FALSE;
2569 for (idx = 0; idx < 4; ++idx)
2571 if (output->mask & (1u << idx))
2572 break;
2574 idx += so_element->component_idx;
2576 *register_idx = output->register_idx;
2577 *component_idx = idx;
2578 return TRUE;
2581 static HRESULT geometry_shader_init_so_desc(struct wined3d_geometry_shader *gs, struct wined3d_device *device,
2582 const struct wined3d_stream_output_desc *so_desc)
2584 struct wined3d_so_desc_entry *s;
2585 struct wine_rb_entry *entry;
2586 unsigned int i;
2587 size_t size;
2588 char *name;
2590 if ((entry = wine_rb_get(&device->so_descs, so_desc)))
2592 gs->so_desc = &WINE_RB_ENTRY_VALUE(entry, struct wined3d_so_desc_entry, entry)->desc;
2593 return WINED3D_OK;
2596 size = FIELD_OFFSET(struct wined3d_so_desc_entry, elements[so_desc->element_count]);
2597 for (i = 0; i < so_desc->element_count; ++i)
2599 const char *n = so_desc->elements[i].semantic_name;
2601 if (n)
2602 size += strlen(n) + 1;
2604 if (!(s = heap_alloc(size)))
2605 return E_OUTOFMEMORY;
2607 s->desc = *so_desc;
2609 memcpy(s->elements, so_desc->elements, so_desc->element_count * sizeof(*s->elements));
2610 s->desc.elements = s->elements;
2612 name = (char *)&s->elements[s->desc.element_count];
2613 for (i = 0; i < so_desc->element_count; ++i)
2615 struct wined3d_stream_output_element *e = &s->elements[i];
2617 if (!e->semantic_name)
2618 continue;
2620 size = strlen(e->semantic_name) + 1;
2621 memcpy(name, e->semantic_name, size);
2622 e->semantic_name = name;
2623 name += size;
2626 if (wine_rb_put(&device->so_descs, &s->desc, &s->entry) == -1)
2628 heap_free(s);
2629 return E_FAIL;
2631 gs->so_desc = &s->desc;
2633 return WINED3D_OK;
2636 static HRESULT geometry_shader_init_stream_output(struct wined3d_shader *shader,
2637 const struct wined3d_stream_output_desc *so_desc)
2639 const struct wined3d_shader_frontend *fe = shader->frontend;
2640 const struct wined3d_shader_signature_element *output;
2641 unsigned int i, component_idx, register_idx, mask;
2642 struct wined3d_shader_version shader_version;
2643 const DWORD *ptr;
2644 void *fe_data;
2645 HRESULT hr;
2647 if (!so_desc)
2648 return WINED3D_OK;
2650 if (!(fe_data = fe->shader_init(shader->function, shader->functionLength, &shader->output_signature)))
2652 WARN("Failed to initialise frontend data.\n");
2653 return WINED3DERR_INVALIDCALL;
2655 fe->shader_read_header(fe_data, &ptr, &shader_version);
2656 fe->shader_free(fe_data);
2658 switch (shader_version.type)
2660 case WINED3D_SHADER_TYPE_VERTEX:
2661 case WINED3D_SHADER_TYPE_DOMAIN:
2662 shader->function = NULL;
2663 shader->functionLength = 0;
2664 break;
2665 case WINED3D_SHADER_TYPE_GEOMETRY:
2666 break;
2667 default:
2668 WARN("Wrong shader type %s.\n", debug_shader_type(shader_version.type));
2669 return E_INVALIDARG;
2672 if (!shader->function)
2674 shader->reg_maps.shader_version = shader_version;
2675 shader->reg_maps.shader_version.type = WINED3D_SHADER_TYPE_GEOMETRY;
2676 shader_set_limits(shader);
2677 if (FAILED(hr = shader_scan_output_signature(shader)))
2678 return hr;
2681 for (i = 0; i < so_desc->element_count; ++i)
2683 const struct wined3d_stream_output_element *e = &so_desc->elements[i];
2685 if (!e->semantic_name)
2686 continue;
2687 if (!(output = shader_find_signature_element(&shader->output_signature,
2688 e->stream_idx, e->semantic_name, e->semantic_idx))
2689 || !shader_get_stream_output_register_info(shader, e, &register_idx, &component_idx))
2691 WARN("Failed to find output signature element for stream output entry.\n");
2692 return E_INVALIDARG;
2695 mask = wined3d_mask_from_size(e->component_count) << component_idx;
2696 if ((output->mask & 0xff & mask) != mask)
2698 WARN("Invalid component range %u-%u (mask %#x), output mask %#x.\n",
2699 component_idx, e->component_count, mask, output->mask & 0xff);
2700 return E_INVALIDARG;
2704 if (FAILED(hr = geometry_shader_init_so_desc(&shader->u.gs, shader->device, so_desc)))
2706 WARN("Failed to initialise stream output description, hr %#lx.\n", hr);
2707 return hr;
2710 return WINED3D_OK;
2713 static HRESULT geometry_shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
2714 const struct wined3d_shader_desc *desc, const struct wined3d_stream_output_desc *so_desc,
2715 void *parent, const struct wined3d_parent_ops *parent_ops)
2717 HRESULT hr;
2719 if (FAILED(hr = shader_init(shader, device, desc, parent, parent_ops)))
2720 return hr;
2722 if (FAILED(hr = geometry_shader_init_stream_output(shader, so_desc)))
2723 goto fail;
2725 if (shader->function
2726 && FAILED(hr = shader_set_function(shader, device, WINED3D_SHADER_TYPE_GEOMETRY, 0)))
2727 goto fail;
2729 return WINED3D_OK;
2731 fail:
2732 shader_cleanup(shader);
2733 return hr;
2736 void find_ds_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
2737 struct ds_compile_args *args, const struct wined3d_context *context)
2739 const struct wined3d_shader *geometry_shader = state->shader[WINED3D_SHADER_TYPE_GEOMETRY];
2740 const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
2741 const struct wined3d_shader *hull_shader = state->shader[WINED3D_SHADER_TYPE_HULL];
2743 args->tessellator_output_primitive = hull_shader->u.hs.tessellator_output_primitive;
2744 args->tessellator_partitioning = hull_shader->u.hs.tessellator_partitioning;
2746 args->output_count = geometry_shader ? geometry_shader->limits->packed_input
2747 : pixel_shader ? pixel_shader->limits->packed_input : shader->limits->packed_output;
2748 args->next_shader_type = geometry_shader ? WINED3D_SHADER_TYPE_GEOMETRY : WINED3D_SHADER_TYPE_PIXEL;
2750 args->render_offscreen = context->render_offscreen;
2752 init_interpolation_compile_args(args->interpolation_mode,
2753 args->next_shader_type == WINED3D_SHADER_TYPE_PIXEL ? pixel_shader : NULL, context->d3d_info);
2755 args->padding = 0;
2758 void find_gs_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
2759 struct gs_compile_args *args, const struct wined3d_context *context)
2761 const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
2763 args->output_count = pixel_shader ? pixel_shader->limits->packed_input : shader->limits->packed_output;
2765 if (!(args->primitive_type = shader->u.gs.input_type))
2766 args->primitive_type = state->primitive_type;
2768 init_interpolation_compile_args(args->interpolation_mode, pixel_shader, context->d3d_info);
2771 void find_ps_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
2772 BOOL position_transformed, struct ps_compile_args *args, const struct wined3d_context *context)
2774 const struct wined3d_d3d_info *d3d_info = context->d3d_info;
2775 struct wined3d_texture *texture;
2776 unsigned int i;
2778 memset(args, 0, sizeof(*args)); /* FIXME: Make sure all bits are set. */
2779 if (!d3d_info->srgb_write_control && needs_srgb_write(d3d_info, state, &state->fb))
2781 static unsigned int warned = 0;
2783 args->srgb_correction = 1;
2784 if (state->blend_state && state->blend_state->desc.rt[0].enable && !warned++)
2785 WARN("Blending into a sRGB render target with no GL_ARB_framebuffer_sRGB "
2786 "support, expect rendering artifacts.\n");
2789 if (shader->reg_maps.shader_version.major == 1
2790 && shader->reg_maps.shader_version.minor <= 3)
2792 for (i = 0; i < shader->limits->sampler; ++i)
2794 uint32_t flags = state->texture_states[i][WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS];
2796 if (flags & WINED3D_TTFF_PROJECTED)
2798 uint32_t tex_transform = flags & ~WINED3D_TTFF_PROJECTED;
2800 if (!state->shader[WINED3D_SHADER_TYPE_VERTEX])
2802 enum wined3d_shader_resource_type resource_type = shader->reg_maps.resource_info[i].type;
2803 unsigned int j;
2804 unsigned int index = state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX];
2805 uint32_t max_valid = WINED3D_TTFF_COUNT4;
2807 for (j = 0; j < state->vertex_declaration->element_count; ++j)
2809 struct wined3d_vertex_declaration_element *element =
2810 &state->vertex_declaration->elements[j];
2812 if (element->usage == WINED3D_DECL_USAGE_TEXCOORD
2813 && element->usage_idx == index)
2815 max_valid = element->format->component_count;
2816 break;
2819 if (!tex_transform || tex_transform > max_valid)
2821 WARN("Fixing up projected texture transform flags from %#x to %#x.\n",
2822 tex_transform, max_valid);
2823 tex_transform = max_valid;
2825 if ((resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_1D && tex_transform > WINED3D_TTFF_COUNT1)
2826 || (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_2D
2827 && tex_transform > WINED3D_TTFF_COUNT2)
2828 || (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_3D
2829 && tex_transform > WINED3D_TTFF_COUNT3))
2830 tex_transform |= WINED3D_PSARGS_PROJECTED;
2831 else
2833 WARN("Application requested projected texture with unsuitable texture coordinates.\n");
2834 WARN("(texture unit %u, transform flags %#x, sampler type %u).\n",
2835 i, tex_transform, resource_type);
2838 else
2839 tex_transform = WINED3D_TTFF_COUNT4 | WINED3D_PSARGS_PROJECTED;
2841 args->tex_transform |= tex_transform << i * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
2845 if (shader->reg_maps.shader_version.major == 1
2846 && shader->reg_maps.shader_version.minor <= 4)
2848 for (i = 0; i < shader->limits->sampler; ++i)
2850 if (!shader->reg_maps.resource_info[i].type)
2851 continue;
2853 /* Treat unbound textures as 2D. The dummy texture will provide
2854 * the proper sample value. The tex_types bitmap defaults to
2855 * 2D because of the memset. */
2856 if (!(texture = state->textures[i]))
2857 continue;
2859 switch (wined3d_texture_gl(texture)->target)
2861 /* RECT textures are distinguished from 2D textures via np2_fixup */
2862 default:
2863 break;
2865 case GL_TEXTURE_3D:
2866 args->tex_types |= WINED3D_SHADER_TEX_3D << i * WINED3D_PSARGS_TEXTYPE_SHIFT;
2867 break;
2869 case GL_TEXTURE_CUBE_MAP_ARB:
2870 args->tex_types |= WINED3D_SHADER_TEX_CUBE << i * WINED3D_PSARGS_TEXTYPE_SHIFT;
2871 break;
2875 else if (shader->reg_maps.shader_version.major <= 3)
2877 for (i = 0; i < shader->limits->sampler; ++i)
2879 enum wined3d_shader_resource_type resource_type;
2880 enum wined3d_shader_tex_types tex_type;
2882 if (!(resource_type = shader->reg_maps.resource_info[i].type))
2883 continue;
2885 switch (resource_type)
2887 case WINED3D_SHADER_RESOURCE_TEXTURE_3D:
2888 tex_type = WINED3D_SHADER_TEX_3D;
2889 break;
2890 case WINED3D_SHADER_RESOURCE_TEXTURE_CUBE:
2891 tex_type = WINED3D_SHADER_TEX_CUBE;
2892 break;
2893 default:
2894 tex_type = WINED3D_SHADER_TEX_2D;
2895 break;
2898 if ((texture = state->textures[i]))
2900 if (texture->resource.type == WINED3D_RTYPE_TEXTURE_2D
2901 && resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_3D
2902 && !(texture->resource.usage & WINED3DUSAGE_LEGACY_CUBEMAP))
2903 tex_type = WINED3D_SHADER_TEX_2D;
2904 else if (texture->resource.type == WINED3D_RTYPE_TEXTURE_3D
2905 && resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_2D)
2906 tex_type = WINED3D_SHADER_TEX_3D;
2908 args->tex_types |= tex_type << i * WINED3D_PSARGS_TEXTYPE_SHIFT;
2912 if (shader->reg_maps.shader_version.major >= 4)
2914 /* In SM4+ we use dcl_sampler in order to determine if we should use shadow sampler. */
2915 args->shadow = 0;
2916 for (i = 0 ; i < WINED3D_MAX_FRAGMENT_SAMPLERS; ++i)
2917 args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
2918 args->np2_fixup = 0;
2920 else
2922 for (i = 0; i < WINED3D_MAX_FRAGMENT_SAMPLERS; ++i)
2924 if (!shader->reg_maps.resource_info[i].type)
2925 continue;
2927 texture = state->textures[i];
2928 if (!texture)
2930 args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
2931 continue;
2933 if (can_use_texture_swizzle(d3d_info, texture->resource.format))
2934 args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
2935 else
2936 args->color_fixup[i] = texture->resource.format->color_fixup;
2938 if (texture->resource.format_caps & WINED3D_FORMAT_CAP_SHADOW)
2939 args->shadow |= 1u << i;
2941 /* Flag samplers that need NP2 texcoord fixup. */
2942 if (!(texture->flags & WINED3D_TEXTURE_POW2_MAT_IDENT))
2943 args->np2_fixup |= (1u << i);
2947 if (shader->reg_maps.shader_version.major >= 3)
2949 if (position_transformed)
2950 args->vp_mode = WINED3D_VP_MODE_NONE;
2951 else if (use_vs(state))
2952 args->vp_mode = WINED3D_VP_MODE_SHADER;
2953 else
2954 args->vp_mode = WINED3D_VP_MODE_FF;
2955 args->fog = WINED3D_FFP_PS_FOG_OFF;
2957 else
2959 args->vp_mode = WINED3D_VP_MODE_SHADER;
2960 if (state->render_states[WINED3D_RS_FOGENABLE])
2962 switch (state->render_states[WINED3D_RS_FOGTABLEMODE])
2964 case WINED3D_FOG_NONE:
2965 if (position_transformed || use_vs(state))
2967 args->fog = WINED3D_FFP_PS_FOG_LINEAR;
2968 break;
2971 switch (state->render_states[WINED3D_RS_FOGVERTEXMODE])
2973 case WINED3D_FOG_NONE: /* Fall through. */
2974 case WINED3D_FOG_LINEAR: args->fog = WINED3D_FFP_PS_FOG_LINEAR; break;
2975 case WINED3D_FOG_EXP: args->fog = WINED3D_FFP_PS_FOG_EXP; break;
2976 case WINED3D_FOG_EXP2: args->fog = WINED3D_FFP_PS_FOG_EXP2; break;
2978 break;
2980 case WINED3D_FOG_LINEAR: args->fog = WINED3D_FFP_PS_FOG_LINEAR; break;
2981 case WINED3D_FOG_EXP: args->fog = WINED3D_FFP_PS_FOG_EXP; break;
2982 case WINED3D_FOG_EXP2: args->fog = WINED3D_FFP_PS_FOG_EXP2; break;
2985 else
2987 args->fog = WINED3D_FFP_PS_FOG_OFF;
2991 if (!d3d_info->full_ffp_varyings)
2993 const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX];
2995 args->texcoords_initialized = 0;
2996 for (i = 0; i < WINED3D_MAX_TEXTURES; ++i)
2998 if (vs)
3000 if (state->shader[WINED3D_SHADER_TYPE_VERTEX]->reg_maps.output_registers & (1u << i))
3001 args->texcoords_initialized |= 1u << i;
3003 else
3005 const struct wined3d_stream_info *si = &context->stream_info;
3006 unsigned int coord_idx = state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX];
3008 if ((state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX] >> WINED3D_FFP_TCI_SHIFT)
3009 & WINED3D_FFP_TCI_MASK
3010 || (coord_idx < WINED3D_MAX_TEXTURES && (si->use_map & (1u << (WINED3D_FFP_TEXCOORD0 + coord_idx)))))
3011 args->texcoords_initialized |= 1u << i;
3015 else
3017 args->texcoords_initialized = wined3d_mask_from_size(WINED3D_MAX_TEXTURES);
3020 args->pointsprite = state->render_states[WINED3D_RS_POINTSPRITEENABLE]
3021 && state->primitive_type == WINED3D_PT_POINTLIST;
3023 if (d3d_info->ffp_alpha_test)
3024 args->alpha_test_func = WINED3D_CMP_ALWAYS - 1;
3025 else
3026 args->alpha_test_func = (state->render_states[WINED3D_RS_ALPHATESTENABLE]
3027 ? wined3d_sanitize_cmp_func(state->render_states[WINED3D_RS_ALPHAFUNC])
3028 : WINED3D_CMP_ALWAYS) - 1;
3030 if (d3d_info->emulated_flatshading)
3031 args->flatshading = state->render_states[WINED3D_RS_SHADEMODE] == WINED3D_SHADE_FLAT;
3033 args->y_correction = (shader->reg_maps.vpos && d3d_info->frag_coord_correction)
3034 || (shader->reg_maps.usesdsy && wined3d_settings.offscreen_rendering_mode != ORM_FBO)
3035 ? !context->render_offscreen : 0;
3037 for (i = 0; i < ARRAY_SIZE(state->fb.render_targets); ++i)
3039 struct wined3d_rendertarget_view *rtv = state->fb.render_targets[i];
3040 if (rtv && rtv->format->id == WINED3DFMT_A8_UNORM && !is_identity_fixup(rtv->format->color_fixup))
3041 args->rt_alpha_swizzle |= 1u << i;
3044 args->dual_source_blend = state->blend_state && state->blend_state->dual_source;
3047 static HRESULT pixel_shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
3048 const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
3050 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3051 unsigned int i, highest_reg_used = 0, num_regs_used = 0;
3052 HRESULT hr;
3054 if (FAILED(hr = shader_init(shader, device, desc, parent, parent_ops)))
3055 return hr;
3057 if (FAILED(hr = shader_set_function(shader, device,
3058 WINED3D_SHADER_TYPE_PIXEL, device->adapter->d3d_info.limits.ps_uniform_count)))
3060 shader_cleanup(shader);
3061 return hr;
3064 for (i = 0; i < MAX_REG_INPUT; ++i)
3066 if (shader->u.ps.input_reg_used & (1u << i))
3068 ++num_regs_used;
3069 highest_reg_used = i;
3073 /* Don't do any register mapping magic if it is not needed, or if we can't
3074 * achieve anything anyway */
3075 if (highest_reg_used < (gl_info->limits.glsl_varyings / 4)
3076 || num_regs_used > (gl_info->limits.glsl_varyings / 4)
3077 || shader->reg_maps.shader_version.major >= 4)
3079 if (num_regs_used > (gl_info->limits.glsl_varyings / 4))
3081 /* This happens with relative addressing. The input mapper function
3082 * warns about this if the higher registers are declared too, so
3083 * don't write a FIXME here */
3084 WARN("More varying registers used than supported\n");
3087 for (i = 0; i < MAX_REG_INPUT; ++i)
3089 shader->u.ps.input_reg_map[i] = i;
3092 shader->u.ps.declared_in_count = highest_reg_used + 1;
3094 else
3096 shader->u.ps.declared_in_count = 0;
3097 for (i = 0; i < MAX_REG_INPUT; ++i)
3099 if (shader->u.ps.input_reg_used & (1u << i))
3100 shader->u.ps.input_reg_map[i] = shader->u.ps.declared_in_count++;
3101 else shader->u.ps.input_reg_map[i] = ~0U;
3105 return WINED3D_OK;
3108 enum wined3d_shader_resource_type pixelshader_get_resource_type(const struct wined3d_shader_reg_maps *reg_maps,
3109 unsigned int resource_idx, DWORD tex_types)
3111 static enum wined3d_shader_resource_type shader_resource_type_from_shader_tex_types[] =
3113 WINED3D_SHADER_RESOURCE_TEXTURE_2D, /* WINED3D_SHADER_TEX_2D */
3114 WINED3D_SHADER_RESOURCE_TEXTURE_3D, /* WINED3D_SHADER_TEX_3D */
3115 WINED3D_SHADER_RESOURCE_TEXTURE_CUBE, /* WINED3D_SHADER_TEX_CUBE */
3118 unsigned int idx;
3120 if (reg_maps->shader_version.major > 3)
3121 return reg_maps->resource_info[resource_idx].type;
3123 if (!reg_maps->resource_info[resource_idx].type)
3124 return 0;
3126 idx = (tex_types >> resource_idx * WINED3D_PSARGS_TEXTYPE_SHIFT) & WINED3D_PSARGS_TEXTYPE_MASK;
3127 assert(idx < ARRAY_SIZE(shader_resource_type_from_shader_tex_types));
3128 return shader_resource_type_from_shader_tex_types[idx];
3131 HRESULT CDECL wined3d_shader_create_cs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
3132 void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
3134 struct wined3d_shader *object;
3135 HRESULT hr;
3137 TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
3138 device, desc, parent, parent_ops, shader);
3140 if (!(object = heap_alloc_zero(sizeof(*object))))
3141 return E_OUTOFMEMORY;
3143 if (FAILED(hr = shader_init(object, device, desc, parent, parent_ops)))
3145 WARN("Failed to initialize compute shader, hr %#lx.\n", hr);
3146 heap_free(object);
3147 return hr;
3150 if (FAILED(hr = shader_set_function(object, device, WINED3D_SHADER_TYPE_COMPUTE, 0)))
3152 shader_cleanup(object);
3153 heap_free(object);
3154 return hr;
3157 wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
3159 TRACE("Created compute shader %p.\n", object);
3160 *shader = object;
3162 return WINED3D_OK;
3165 HRESULT CDECL wined3d_shader_create_ds(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
3166 void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
3168 struct wined3d_shader *object;
3169 HRESULT hr;
3171 TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
3172 device, desc, parent, parent_ops, shader);
3174 if (!(object = heap_alloc_zero(sizeof(*object))))
3175 return E_OUTOFMEMORY;
3177 if (FAILED(hr = shader_init(object, device, desc, parent, parent_ops)))
3179 WARN("Failed to initialize domain shader, hr %#lx.\n", hr);
3180 heap_free(object);
3181 return hr;
3184 if (FAILED(hr = shader_set_function(object, device, WINED3D_SHADER_TYPE_DOMAIN, 0)))
3186 shader_cleanup(object);
3187 heap_free(object);
3188 return hr;
3191 wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
3193 TRACE("Created domain shader %p.\n", object);
3194 *shader = object;
3196 return WINED3D_OK;
3199 HRESULT CDECL wined3d_shader_create_gs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
3200 const struct wined3d_stream_output_desc *so_desc, void *parent,
3201 const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
3203 struct wined3d_shader *object;
3204 HRESULT hr;
3206 TRACE("device %p, desc %p, so_desc %p, parent %p, parent_ops %p, shader %p.\n",
3207 device, desc, so_desc, parent, parent_ops, shader);
3209 if (!(object = heap_alloc_zero(sizeof(*object))))
3210 return E_OUTOFMEMORY;
3212 if (FAILED(hr = geometry_shader_init(object, device, desc, so_desc, parent, parent_ops)))
3214 WARN("Failed to initialize geometry shader, hr %#lx.\n", hr);
3215 heap_free(object);
3216 return hr;
3219 wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
3221 TRACE("Created geometry shader %p.\n", object);
3222 *shader = object;
3224 return WINED3D_OK;
3227 HRESULT CDECL wined3d_shader_create_hs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
3228 void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
3230 struct wined3d_shader *object;
3231 HRESULT hr;
3233 TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
3234 device, desc, parent, parent_ops, shader);
3236 if (!(object = heap_alloc_zero(sizeof(*object))))
3237 return E_OUTOFMEMORY;
3239 if (FAILED(hr = shader_init(object, device, desc, parent, parent_ops)))
3241 WARN("Failed to initialize hull shader, hr %#lx.\n", hr);
3242 heap_free(object);
3243 return hr;
3246 if (FAILED(hr = shader_set_function(object, device, WINED3D_SHADER_TYPE_HULL, 0)))
3248 shader_cleanup(object);
3249 heap_free(object);
3250 return hr;
3253 wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
3255 TRACE("Created hull shader %p.\n", object);
3256 *shader = object;
3258 return WINED3D_OK;
3261 HRESULT CDECL wined3d_shader_create_ps(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
3262 void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
3264 struct wined3d_shader *object;
3265 HRESULT hr;
3267 TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
3268 device, desc, parent, parent_ops, shader);
3270 if (!(object = heap_alloc_zero(sizeof(*object))))
3271 return E_OUTOFMEMORY;
3273 if (FAILED(hr = pixel_shader_init(object, device, desc, parent, parent_ops)))
3275 WARN("Failed to initialize pixel shader, hr %#lx.\n", hr);
3276 heap_free(object);
3277 return hr;
3280 wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
3282 TRACE("Created pixel shader %p.\n", object);
3283 *shader = object;
3285 return WINED3D_OK;
3288 HRESULT CDECL wined3d_shader_create_vs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
3289 void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
3291 struct wined3d_shader *object;
3292 HRESULT hr;
3294 TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
3295 device, desc, parent, parent_ops, shader);
3297 if (!(object = heap_alloc_zero(sizeof(*object))))
3298 return E_OUTOFMEMORY;
3300 if (FAILED(hr = vertex_shader_init(object, device, desc, parent, parent_ops)))
3302 WARN("Failed to initialize vertex shader, hr %#lx.\n", hr);
3303 heap_free(object);
3304 return hr;
3307 wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
3309 TRACE("Created vertex shader %p.\n", object);
3310 *shader = object;
3312 return WINED3D_OK;