opengl32: Correctly interpret glMapBuffer() access in wow64 mapping.
[wine.git] / dlls / wined3d / shader.c
blob2728929045fba4fe69132102a297c9ca617a8505
1 /*
2 * Copyright 2002-2003 Jason Edmeades
3 * Copyright 2002-2003 Raphael Junqueira
4 * Copyright 2004 Christian Costa
5 * Copyright 2005 Oliver Stieber
6 * Copyright 2006 Ivan Gyurdiev
7 * Copyright 2007-2008, 2013 Stefan Dösinger for CodeWeavers
8 * Copyright 2009-2011 Henri Verbeet for CodeWeavers
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
25 #include <stdio.h>
26 #include <string.h>
28 #include "wined3d_private.h"
30 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
32 const struct wined3d_vec4 wined3d_srgb_const[] =
34 /* pow, mul_high, sub_high, mul_low */
35 {0.41666f, 1.055f, 0.055f, 12.92f},
36 /* cmp */
37 {0.0031308f, 0.0f, 0.0f, 0.0f},
40 static const char * const shader_opcode_names[] =
42 /* WINED3DSIH_ABS */ "abs",
43 /* WINED3DSIH_ADD */ "add",
44 /* WINED3DSIH_AND */ "and",
45 /* WINED3DSIH_ATOMIC_AND */ "atomic_and",
46 /* WINED3DSIH_ATOMIC_CMP_STORE */ "atomic_cmp_store",
47 /* WINED3DSIH_ATOMIC_IADD */ "atomic_iadd",
48 /* WINED3DSIH_ATOMIC_IMAX */ "atomic_imax",
49 /* WINED3DSIH_ATOMIC_IMIN */ "atomic_imin",
50 /* WINED3DSIH_ATOMIC_OR */ "atomic_or",
51 /* WINED3DSIH_ATOMIC_UMAX */ "atomic_umax",
52 /* WINED3DSIH_ATOMIC_UMIN */ "atomic_umin",
53 /* WINED3DSIH_ATOMIC_XOR */ "atomic_xor",
54 /* WINED3DSIH_BEM */ "bem",
55 /* WINED3DSIH_BFI */ "bfi",
56 /* WINED3DSIH_BFREV */ "bfrev",
57 /* WINED3DSIH_BREAK */ "break",
58 /* WINED3DSIH_BREAKC */ "breakc",
59 /* WINED3DSIH_BREAKP */ "breakp",
60 /* WINED3DSIH_BUFINFO */ "bufinfo",
61 /* WINED3DSIH_CALL */ "call",
62 /* WINED3DSIH_CALLNZ */ "callnz",
63 /* WINED3DSIH_CASE */ "case",
64 /* WINED3DSIH_CMP */ "cmp",
65 /* WINED3DSIH_CND */ "cnd",
66 /* WINED3DSIH_CONTINUE */ "continue",
67 /* WINED3DSIH_CONTINUEP */ "continuec",
68 /* WINED3DSIH_COUNTBITS */ "countbits",
69 /* WINED3DSIH_CRS */ "crs",
70 /* WINED3DSIH_CUT */ "cut",
71 /* WINED3DSIH_CUT_STREAM */ "cut_stream",
72 /* WINED3DSIH_DCL */ "dcl",
73 /* WINED3DSIH_DCL_CONSTANT_BUFFER */ "dcl_constantBuffer",
74 /* WINED3DSIH_DCL_FUNCTION_BODY */ "dcl_function_body",
75 /* WINED3DSIH_DCL_FUNCTION_TABLE */ "dcl_function_table",
76 /* WINED3DSIH_DCL_GLOBAL_FLAGS */ "dcl_globalFlags",
77 /* WINED3DSIH_DCL_GS_INSTANCES */ "dcl_gs_instances",
78 /* WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT */ "dcl_hs_fork_phase_instance_count",
79 /* WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */ "dcl_hs_join_phase_instance_count",
80 /* WINED3DSIH_DCL_HS_MAX_TESSFACTOR */ "dcl_hs_max_tessfactor",
81 /* WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER */ "dcl_immediateConstantBuffer",
82 /* WINED3DSIH_DCL_INDEX_RANGE */ "dcl_index_range",
83 /* WINED3DSIH_DCL_INDEXABLE_TEMP */ "dcl_indexableTemp",
84 /* WINED3DSIH_DCL_INPUT */ "dcl_input",
85 /* WINED3DSIH_DCL_INPUT_CONTROL_POINT_COUNT */ "dcl_input_control_point_count",
86 /* WINED3DSIH_DCL_INPUT_PRIMITIVE */ "dcl_inputPrimitive",
87 /* WINED3DSIH_DCL_INPUT_PS */ "dcl_input_ps",
88 /* WINED3DSIH_DCL_INPUT_PS_SGV */ "dcl_input_ps_sgv",
89 /* WINED3DSIH_DCL_INPUT_PS_SIV */ "dcl_input_ps_siv",
90 /* WINED3DSIH_DCL_INPUT_SGV */ "dcl_input_sgv",
91 /* WINED3DSIH_DCL_INPUT_SIV */ "dcl_input_siv",
92 /* WINED3DSIH_DCL_INTERFACE */ "dcl_interface",
93 /* WINED3DSIH_DCL_OUTPUT */ "dcl_output",
94 /* WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT */ "dcl_output_control_point_count",
95 /* WINED3DSIH_DCL_OUTPUT_SIV */ "dcl_output_siv",
96 /* WINED3DSIH_DCL_OUTPUT_TOPOLOGY */ "dcl_outputTopology",
97 /* WINED3DSIH_DCL_RESOURCE_RAW */ "dcl_resource_raw",
98 /* WINED3DSIH_DCL_RESOURCE_STRUCTURED */ "dcl_resource_structured",
99 /* WINED3DSIH_DCL_SAMPLER */ "dcl_sampler",
100 /* WINED3DSIH_DCL_STREAM */ "dcl_stream",
101 /* WINED3DSIH_DCL_TEMPS */ "dcl_temps",
102 /* WINED3DSIH_DCL_TESSELLATOR_DOMAIN */ "dcl_tessellator_domain",
103 /* WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE */ "dcl_tessellator_output_primitive",
104 /* WINED3DSIH_DCL_TESSELLATOR_PARTITIONING */ "dcl_tessellator_partitioning",
105 /* WINED3DSIH_DCL_TGSM_RAW */ "dcl_tgsm_raw",
106 /* WINED3DSIH_DCL_TGSM_STRUCTURED */ "dcl_tgsm_structured",
107 /* WINED3DSIH_DCL_THREAD_GROUP */ "dcl_thread_group",
108 /* WINED3DSIH_DCL_UAV_RAW */ "dcl_uav_raw",
109 /* WINED3DSIH_DCL_UAV_STRUCTURED */ "dcl_uav_structured",
110 /* WINED3DSIH_DCL_UAV_TYPED */ "dcl_uav_typed",
111 /* WINED3DSIH_DCL_VERTICES_OUT */ "dcl_maxOutputVertexCount",
112 /* WINED3DSIH_DEF */ "def",
113 /* WINED3DSIH_DEFAULT */ "default",
114 /* WINED3DSIH_DEFB */ "defb",
115 /* WINED3DSIH_DEFI */ "defi",
116 /* WINED3DSIH_DIV */ "div",
117 /* WINED3DSIH_DP2 */ "dp2",
118 /* WINED3DSIH_DP2ADD */ "dp2add",
119 /* WINED3DSIH_DP3 */ "dp3",
120 /* WINED3DSIH_DP4 */ "dp4",
121 /* WINED3DSIH_DST */ "dst",
122 /* WINED3DSIH_DSX */ "dsx",
123 /* WINED3DSIH_DSX_COARSE */ "deriv_rtx_coarse",
124 /* WINED3DSIH_DSX_FINE */ "deriv_rtx_fine",
125 /* WINED3DSIH_DSY */ "dsy",
126 /* WINED3DSIH_DSY_COARSE */ "deriv_rty_coarse",
127 /* WINED3DSIH_DSY_FINE */ "deriv_rty_fine",
128 /* WINED3DSIH_ELSE */ "else",
129 /* WINED3DSIH_EMIT */ "emit",
130 /* WINED3DSIH_EMIT_STREAM */ "emit_stream",
131 /* WINED3DSIH_ENDIF */ "endif",
132 /* WINED3DSIH_ENDLOOP */ "endloop",
133 /* WINED3DSIH_ENDREP */ "endrep",
134 /* WINED3DSIH_ENDSWITCH */ "endswitch",
135 /* WINED3DSIH_EQ */ "eq",
136 /* WINED3DSIH_EVAL_CENTROID */ "eval_centroid",
137 /* WINED3DSIH_EVAL_SAMPLE_INDEX */ "eval_sample_index",
138 /* WINED3DSIH_EXP */ "exp",
139 /* WINED3DSIH_EXPP */ "expp",
140 /* WINED3DSIH_F16TOF32 */ "f16tof32",
141 /* WINED3DSIH_F32TOF16 */ "f32tof16",
142 /* WINED3DSIH_FCALL */ "fcall",
143 /* WINED3DSIH_FIRSTBIT_HI */ "firstbit_hi",
144 /* WINED3DSIH_FIRSTBIT_LO */ "firstbit_lo",
145 /* WINED3DSIH_FIRSTBIT_SHI */ "firstbit_shi",
146 /* WINED3DSIH_FRC */ "frc",
147 /* WINED3DSIH_FTOI */ "ftoi",
148 /* WINED3DSIH_FTOU */ "ftou",
149 /* WINED3DSIH_GATHER4 */ "gather4",
150 /* WINED3DSIH_GATHER4_C */ "gather4_c",
151 /* WINED3DSIH_GATHER4_PO */ "gather4_po",
152 /* WINED3DSIH_GATHER4_PO_C */ "gather4_po_c",
153 /* WINED3DSIH_GE */ "ge",
154 /* WINED3DSIH_HS_CONTROL_POINT_PHASE */ "hs_control_point_phase",
155 /* WINED3DSIH_HS_DECLS */ "hs_decls",
156 /* WINED3DSIH_HS_FORK_PHASE */ "hs_fork_phase",
157 /* WINED3DSIH_HS_JOIN_PHASE */ "hs_join_phase",
158 /* WINED3DSIH_IADD */ "iadd",
159 /* WINED3DSIH_IBFE */ "ibfe",
160 /* WINED3DSIH_IEQ */ "ieq",
161 /* WINED3DSIH_IF */ "if",
162 /* WINED3DSIH_IFC */ "ifc",
163 /* WINED3DSIH_IGE */ "ige",
164 /* WINED3DSIH_ILT */ "ilt",
165 /* WINED3DSIH_IMAD */ "imad",
166 /* WINED3DSIH_IMAX */ "imax",
167 /* WINED3DSIH_IMIN */ "imin",
168 /* WINED3DSIH_IMM_ATOMIC_ALLOC */ "imm_atomic_alloc",
169 /* WINED3DSIH_IMM_ATOMIC_AND */ "imm_atomic_and",
170 /* WINED3DSIH_IMM_ATOMIC_CMP_EXCH */ "imm_atomic_cmp_exch",
171 /* WINED3DSIH_IMM_ATOMIC_CONSUME */ "imm_atomic_consume",
172 /* WINED3DSIH_IMM_ATOMIC_EXCH */ "imm_atomic_exch",
173 /* WINED3DSIH_IMM_ATOMIC_IADD */ "imm_atomic_iadd",
174 /* WINED3DSIH_IMM_ATOMIC_IMAX */ "imm_atomic_imax",
175 /* WINED3DSIH_IMM_ATOMIC_IMIN */ "imm_atomic_imin",
176 /* WINED3DSIH_IMM_ATOMIC_OR */ "imm_atomic_or",
177 /* WINED3DSIH_IMM_ATOMIC_UMAX */ "imm_atomic_umax",
178 /* WINED3DSIH_IMM_ATOMIC_UMIN */ "imm_atomic_umin",
179 /* WINED3DSIH_IMM_ATOMIC_XOR */ "imm_atomic_xor",
180 /* WINED3DSIH_IMUL */ "imul",
181 /* WINED3DSIH_INE */ "ine",
182 /* WINED3DSIH_INEG */ "ineg",
183 /* WINED3DSIH_ISHL */ "ishl",
184 /* WINED3DSIH_ISHR */ "ishr",
185 /* WINED3DSIH_ITOF */ "itof",
186 /* WINED3DSIH_LABEL */ "label",
187 /* WINED3DSIH_LD */ "ld",
188 /* WINED3DSIH_LD2DMS */ "ld2dms",
189 /* WINED3DSIH_LD_RAW */ "ld_raw",
190 /* WINED3DSIH_LD_STRUCTURED */ "ld_structured",
191 /* WINED3DSIH_LD_UAV_TYPED */ "ld_uav_typed",
192 /* WINED3DSIH_LIT */ "lit",
193 /* WINED3DSIH_LOD */ "lod",
194 /* WINED3DSIH_LOG */ "log",
195 /* WINED3DSIH_LOGP */ "logp",
196 /* WINED3DSIH_LOOP */ "loop",
197 /* WINED3DSIH_LRP */ "lrp",
198 /* WINED3DSIH_LT */ "lt",
199 /* WINED3DSIH_M3x2 */ "m3x2",
200 /* WINED3DSIH_M3x3 */ "m3x3",
201 /* WINED3DSIH_M3x4 */ "m3x4",
202 /* WINED3DSIH_M4x3 */ "m4x3",
203 /* WINED3DSIH_M4x4 */ "m4x4",
204 /* WINED3DSIH_MAD */ "mad",
205 /* WINED3DSIH_MAX */ "max",
206 /* WINED3DSIH_MIN */ "min",
207 /* WINED3DSIH_MOV */ "mov",
208 /* WINED3DSIH_MOVA */ "mova",
209 /* WINED3DSIH_MOVC */ "movc",
210 /* WINED3DSIH_MUL */ "mul",
211 /* WINED3DSIH_NE */ "ne",
212 /* WINED3DSIH_NOP */ "nop",
213 /* WINED3DSIH_NOT */ "not",
214 /* WINED3DSIH_NRM */ "nrm",
215 /* WINED3DSIH_OR */ "or",
216 /* WINED3DSIH_PHASE */ "phase",
217 /* WINED3DSIH_POW */ "pow",
218 /* WINED3DSIH_RCP */ "rcp",
219 /* WINED3DSIH_REP */ "rep",
220 /* WINED3DSIH_RESINFO */ "resinfo",
221 /* WINED3DSIH_RET */ "ret",
222 /* WINED3DSIH_RETP */ "retp",
223 /* WINED3DSIH_ROUND_NE */ "round_ne",
224 /* WINED3DSIH_ROUND_NI */ "round_ni",
225 /* WINED3DSIH_ROUND_PI */ "round_pi",
226 /* WINED3DSIH_ROUND_Z */ "round_z",
227 /* WINED3DSIH_RSQ */ "rsq",
228 /* WINED3DSIH_SAMPLE */ "sample",
229 /* WINED3DSIH_SAMPLE_B */ "sample_b",
230 /* WINED3DSIH_SAMPLE_C */ "sample_c",
231 /* WINED3DSIH_SAMPLE_C_LZ */ "sample_c_lz",
232 /* WINED3DSIH_SAMPLE_GRAD */ "sample_d",
233 /* WINED3DSIH_SAMPLE_INFO */ "sample_info",
234 /* WINED3DSIH_SAMPLE_LOD */ "sample_l",
235 /* WINED3DSIH_SAMPLE_POS */ "sample_pos",
236 /* WINED3DSIH_SETP */ "setp",
237 /* WINED3DSIH_SGE */ "sge",
238 /* WINED3DSIH_SGN */ "sgn",
239 /* WINED3DSIH_SINCOS */ "sincos",
240 /* WINED3DSIH_SLT */ "slt",
241 /* WINED3DSIH_SQRT */ "sqrt",
242 /* WINED3DSIH_STORE_RAW */ "store_raw",
243 /* WINED3DSIH_STORE_STRUCTURED */ "store_structured",
244 /* WINED3DSIH_STORE_UAV_TYPED */ "store_uav_typed",
245 /* WINED3DSIH_SUB */ "sub",
246 /* WINED3DSIH_SWAPC */ "swapc",
247 /* WINED3DSIH_SWITCH */ "switch",
248 /* WINED3DSIH_SYNC */ "sync",
249 /* WINED3DSIH_TEX */ "texld",
250 /* WINED3DSIH_TEXBEM */ "texbem",
251 /* WINED3DSIH_TEXBEML */ "texbeml",
252 /* WINED3DSIH_TEXCOORD */ "texcrd",
253 /* WINED3DSIH_TEXDEPTH */ "texdepth",
254 /* WINED3DSIH_TEXDP3 */ "texdp3",
255 /* WINED3DSIH_TEXDP3TEX */ "texdp3tex",
256 /* WINED3DSIH_TEXKILL */ "texkill",
257 /* WINED3DSIH_TEXLDD */ "texldd",
258 /* WINED3DSIH_TEXLDL */ "texldl",
259 /* WINED3DSIH_TEXM3x2DEPTH */ "texm3x2depth",
260 /* WINED3DSIH_TEXM3x2PAD */ "texm3x2pad",
261 /* WINED3DSIH_TEXM3x2TEX */ "texm3x2tex",
262 /* WINED3DSIH_TEXM3x3 */ "texm3x3",
263 /* WINED3DSIH_TEXM3x3DIFF */ "texm3x3diff",
264 /* WINED3DSIH_TEXM3x3PAD */ "texm3x3pad",
265 /* WINED3DSIH_TEXM3x3SPEC */ "texm3x3spec",
266 /* WINED3DSIH_TEXM3x3TEX */ "texm3x3tex",
267 /* WINED3DSIH_TEXM3x3VSPEC */ "texm3x3vspec",
268 /* WINED3DSIH_TEXREG2AR */ "texreg2ar",
269 /* WINED3DSIH_TEXREG2GB */ "texreg2gb",
270 /* WINED3DSIH_TEXREG2RGB */ "texreg2rgb",
271 /* WINED3DSIH_UBFE */ "ubfe",
272 /* WINED3DSIH_UDIV */ "udiv",
273 /* WINED3DSIH_UGE */ "uge",
274 /* WINED3DSIH_ULT */ "ult",
275 /* WINED3DSIH_UMAX */ "umax",
276 /* WINED3DSIH_UMIN */ "umin",
277 /* WINED3DSIH_UMUL */ "umul",
278 /* WINED3DSIH_USHR */ "ushr",
279 /* WINED3DSIH_UTOF */ "utof",
280 /* WINED3DSIH_XOR */ "xor",
283 static const char * const semantic_names[] =
285 /* WINED3D_DECL_USAGE_POSITION */ "SV_POSITION",
286 /* WINED3D_DECL_USAGE_BLEND_WEIGHT */ "BLENDWEIGHT",
287 /* WINED3D_DECL_USAGE_BLEND_INDICES */ "BLENDINDICES",
288 /* WINED3D_DECL_USAGE_NORMAL */ "NORMAL",
289 /* WINED3D_DECL_USAGE_PSIZE */ "PSIZE",
290 /* WINED3D_DECL_USAGE_TEXCOORD */ "TEXCOORD",
291 /* WINED3D_DECL_USAGE_TANGENT */ "TANGENT",
292 /* WINED3D_DECL_USAGE_BINORMAL */ "BINORMAL",
293 /* WINED3D_DECL_USAGE_TESS_FACTOR */ "TESSFACTOR",
294 /* WINED3D_DECL_USAGE_POSITIONT */ "POSITIONT",
295 /* WINED3D_DECL_USAGE_COLOR */ "COLOR",
296 /* WINED3D_DECL_USAGE_FOG */ "FOG",
297 /* WINED3D_DECL_USAGE_DEPTH */ "DEPTH",
298 /* WINED3D_DECL_USAGE_SAMPLE */ "SAMPLE",
301 const char *debug_d3dshaderinstructionhandler(enum WINED3D_SHADER_INSTRUCTION_HANDLER handler_idx)
303 if (handler_idx >= ARRAY_SIZE(shader_opcode_names))
304 return wine_dbg_sprintf("UNRECOGNIZED(%#x)", handler_idx);
306 return shader_opcode_names[handler_idx];
309 static const char *shader_semantic_name_from_usage(enum wined3d_decl_usage usage)
311 if (usage >= ARRAY_SIZE(semantic_names))
313 FIXME("Unrecognized usage %#x.\n", usage);
314 return "UNRECOGNIZED";
317 return semantic_names[usage];
320 static enum wined3d_decl_usage shader_usage_from_semantic_name(const char *name)
322 unsigned int i;
324 for (i = 0; i < ARRAY_SIZE(semantic_names); ++i)
326 if (!strcmp(name, semantic_names[i]))
327 return i;
330 return ~0U;
333 static enum wined3d_sysval_semantic shader_sysval_semantic_from_usage(enum wined3d_decl_usage usage)
335 switch (usage)
337 case WINED3D_DECL_USAGE_POSITION:
338 return WINED3D_SV_POSITION;
339 default:
340 return 0;
344 BOOL shader_match_semantic(const char *semantic_name, enum wined3d_decl_usage usage)
346 return !strcmp(semantic_name, shader_semantic_name_from_usage(usage));
349 static void shader_signature_from_semantic(struct wined3d_shader_signature_element *e,
350 const struct wined3d_shader_semantic *s)
352 e->semantic_name = shader_semantic_name_from_usage(s->usage);
353 e->semantic_idx = s->usage_idx;
354 e->stream_idx = 0;
355 e->sysval_semantic = shader_sysval_semantic_from_usage(s->usage);
356 e->component_type = WINED3D_TYPE_FLOAT;
357 e->register_idx = s->reg.reg.idx[0].offset;
358 e->mask = s->reg.write_mask;
361 static void shader_signature_from_usage(struct wined3d_shader_signature_element *e,
362 enum wined3d_decl_usage usage, UINT usage_idx, UINT reg_idx, DWORD write_mask)
364 e->semantic_name = shader_semantic_name_from_usage(usage);
365 e->semantic_idx = usage_idx;
366 e->stream_idx = 0;
367 e->sysval_semantic = shader_sysval_semantic_from_usage(usage);
368 e->component_type = WINED3D_TYPE_FLOAT;
369 e->register_idx = reg_idx;
370 e->mask = write_mask;
373 static const struct wined3d_shader_frontend *shader_select_frontend(enum vkd3d_shader_source_type source_type)
375 switch (source_type)
377 case VKD3D_SHADER_SOURCE_D3D_BYTECODE:
378 return &sm1_shader_frontend;
380 case VKD3D_SHADER_SOURCE_DXBC_TPF:
381 return &sm4_shader_frontend;
383 default:
384 WARN("Invalid source type %#x specified.\n", source_type);
385 return NULL;
389 void string_buffer_clear(struct wined3d_string_buffer *buffer)
391 buffer->buffer[0] = '\0';
392 buffer->content_size = 0;
395 BOOL string_buffer_init(struct wined3d_string_buffer *buffer)
397 buffer->buffer_size = 32;
398 if (!(buffer->buffer = heap_alloc(buffer->buffer_size)))
400 ERR("Failed to allocate shader buffer memory.\n");
401 return FALSE;
404 string_buffer_clear(buffer);
405 return TRUE;
408 void string_buffer_free(struct wined3d_string_buffer *buffer)
410 heap_free(buffer->buffer);
413 BOOL string_buffer_resize(struct wined3d_string_buffer *buffer, int rc)
415 char *new_buffer;
416 unsigned int new_buffer_size = buffer->buffer_size * 2;
418 while (rc > 0 && (unsigned int)rc >= new_buffer_size - buffer->content_size)
419 new_buffer_size *= 2;
420 if (!(new_buffer = heap_realloc(buffer->buffer, new_buffer_size)))
422 ERR("Failed to grow buffer.\n");
423 buffer->buffer[buffer->content_size] = '\0';
424 return FALSE;
426 buffer->buffer = new_buffer;
427 buffer->buffer_size = new_buffer_size;
428 return TRUE;
431 int shader_vaddline(struct wined3d_string_buffer *buffer, const char *format, va_list args)
433 unsigned int rem;
434 int rc;
436 rem = buffer->buffer_size - buffer->content_size;
437 rc = vsnprintf(&buffer->buffer[buffer->content_size], rem, format, args);
438 if (rc < 0 /* C89 */ || (unsigned int)rc >= rem /* C99 */)
439 return rc;
441 buffer->content_size += rc;
442 return 0;
445 int shader_addline(struct wined3d_string_buffer *buffer, const char *format, ...)
447 va_list args;
448 int ret;
450 for (;;)
452 va_start(args, format);
453 ret = shader_vaddline(buffer, format, args);
454 va_end(args);
455 if (!ret)
456 return ret;
457 if (!string_buffer_resize(buffer, ret))
458 return -1;
462 struct wined3d_string_buffer *string_buffer_get(struct wined3d_string_buffer_list *list)
464 struct wined3d_string_buffer *buffer;
466 if (list_empty(&list->list))
468 buffer = heap_alloc(sizeof(*buffer));
469 if (!buffer || !string_buffer_init(buffer))
471 ERR("Couldn't allocate buffer for temporary string.\n");
472 heap_free(buffer);
473 return NULL;
476 else
478 buffer = LIST_ENTRY(list_head(&list->list), struct wined3d_string_buffer, entry);
479 list_remove(&buffer->entry);
481 string_buffer_clear(buffer);
482 return buffer;
485 static int string_buffer_vsprintf(struct wined3d_string_buffer *buffer, const char *format, va_list args)
487 if (!buffer)
488 return 0;
489 string_buffer_clear(buffer);
490 return shader_vaddline(buffer, format, args);
493 void string_buffer_sprintf(struct wined3d_string_buffer *buffer, const char *format, ...)
495 va_list args;
496 int ret;
498 for (;;)
500 va_start(args, format);
501 ret = string_buffer_vsprintf(buffer, format, args);
502 va_end(args);
503 if (!ret)
504 return;
505 if (!string_buffer_resize(buffer, ret))
506 return;
510 void string_buffer_release(struct wined3d_string_buffer_list *list, struct wined3d_string_buffer *buffer)
512 if (!buffer)
513 return;
514 list_add_head(&list->list, &buffer->entry);
517 void string_buffer_list_init(struct wined3d_string_buffer_list *list)
519 list_init(&list->list);
522 void string_buffer_list_cleanup(struct wined3d_string_buffer_list *list)
524 struct wined3d_string_buffer *buffer, *buffer_next;
526 LIST_FOR_EACH_ENTRY_SAFE(buffer, buffer_next, &list->list, struct wined3d_string_buffer, entry)
528 string_buffer_free(buffer);
529 heap_free(buffer);
531 list_init(&list->list);
534 static void shader_delete_constant_list(struct list *clist)
536 struct wined3d_shader_lconst *constant, *constant_next;
538 LIST_FOR_EACH_ENTRY_SAFE(constant, constant_next, clist, struct wined3d_shader_lconst, entry)
539 heap_free(constant);
540 list_init(clist);
543 static void shader_set_limits(struct wined3d_shader *shader)
545 static const struct limits_entry
547 unsigned int min_version;
548 unsigned int max_version;
549 struct wined3d_shader_limits limits;
551 vs_limits[] =
553 /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
554 {WINED3D_SHADER_VERSION(1, 0), WINED3D_SHADER_VERSION(1, 1), { 0, 0, 256, 0, 12, 0}},
555 {WINED3D_SHADER_VERSION(2, 0), WINED3D_SHADER_VERSION(2, 1), { 0, 16, 256, 16, 12, 0}},
556 /* DX10 cards on Windows advertise a D3D9 constant limit of 256
557 * even though they are capable of supporting much more (GL
558 * drivers advertise 1024). d3d9.dll and d3d8.dll clamp the
559 * wined3d-advertised maximum. Clamp the constant limit for <= 3.0
560 * shaders to 256. */
561 {WINED3D_SHADER_VERSION(3, 0), WINED3D_SHADER_VERSION(3, 0), { 4, 16, 256, 16, 12, 0}},
562 {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), {16, 0, 0, 0, 16, 0}},
563 {WINED3D_SHADER_VERSION(4, 1), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 32, 0}},
566 hs_limits[] =
568 /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packet_input */
569 {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 32, 32}},
571 ds_limits[] =
573 /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packet_input */
574 {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 32, 32}},
576 gs_limits[] =
578 /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
579 {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), {16, 0, 0, 0, 32, 16}},
580 {WINED3D_SHADER_VERSION(4, 1), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 32, 32}},
583 ps_limits[] =
585 /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
586 {WINED3D_SHADER_VERSION(1, 0), WINED3D_SHADER_VERSION(1, 3), { 4, 0, 8, 0, 0, 0}},
587 {WINED3D_SHADER_VERSION(1, 4), WINED3D_SHADER_VERSION(1, 4), { 6, 0, 8, 0, 0, 0}},
588 {WINED3D_SHADER_VERSION(2, 0), WINED3D_SHADER_VERSION(2, 0), {16, 0, 32, 0, 0, 0}},
589 {WINED3D_SHADER_VERSION(2, 1), WINED3D_SHADER_VERSION(2, 1), {16, 16, 32, 16, 0, 0}},
590 {WINED3D_SHADER_VERSION(3, 0), WINED3D_SHADER_VERSION(3, 0), {16, 16, 224, 16, 0, 10}},
591 {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 0, 32}},
594 cs_limits[] =
596 /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
597 {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 0, 0}},
599 const struct limits_entry *limits_array;
600 DWORD shader_version = WINED3D_SHADER_VERSION(shader->reg_maps.shader_version.major,
601 shader->reg_maps.shader_version.minor);
602 int i = 0;
604 switch (shader->reg_maps.shader_version.type)
606 default:
607 FIXME("Unexpected shader type %u found.\n", shader->reg_maps.shader_version.type);
608 /* Fall-through. */
609 case WINED3D_SHADER_TYPE_VERTEX:
610 limits_array = vs_limits;
611 break;
612 case WINED3D_SHADER_TYPE_HULL:
613 limits_array = hs_limits;
614 break;
615 case WINED3D_SHADER_TYPE_DOMAIN:
616 limits_array = ds_limits;
617 break;
618 case WINED3D_SHADER_TYPE_GEOMETRY:
619 limits_array = gs_limits;
620 break;
621 case WINED3D_SHADER_TYPE_PIXEL:
622 limits_array = ps_limits;
623 break;
624 case WINED3D_SHADER_TYPE_COMPUTE:
625 limits_array = cs_limits;
626 break;
629 while (limits_array[i].min_version && limits_array[i].min_version <= shader_version)
631 if (shader_version <= limits_array[i].max_version)
633 shader->limits = &limits_array[i].limits;
634 break;
636 ++i;
638 if (!shader->limits)
640 FIXME("Unexpected shader version \"%u.%u\".\n",
641 shader->reg_maps.shader_version.major,
642 shader->reg_maps.shader_version.minor);
643 shader->limits = &limits_array[max(0, i - 1)].limits;
647 static BOOL shader_record_register_usage(struct wined3d_shader *shader, struct wined3d_shader_reg_maps *reg_maps,
648 const struct wined3d_shader_register *reg, enum wined3d_shader_type shader_type, unsigned int constf_size)
650 switch (reg->type)
652 case WINED3DSPR_TEXTURE: /* WINED3DSPR_ADDR */
653 if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
654 reg_maps->texcoord |= 1u << reg->idx[0].offset;
655 else
656 reg_maps->address |= 1u << reg->idx[0].offset;
657 break;
659 case WINED3DSPR_TEMP:
660 reg_maps->temporary |= 1u << reg->idx[0].offset;
661 break;
663 case WINED3DSPR_INPUT:
664 if (reg->idx[0].rel_addr)
665 reg_maps->input_rel_addressing = 1;
666 if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
668 /* If relative addressing is used, we must assume that all
669 * registers are used. Even if it is a construct like v3[aL],
670 * we can't assume that v0, v1 and v2 aren't read because aL
671 * can be negative. */
672 if (reg->idx[0].rel_addr)
673 shader->u.ps.input_reg_used = ~0u;
674 else
675 shader->u.ps.input_reg_used |= 1u << reg->idx[0].offset;
677 else
679 reg_maps->input_registers |= 1u << reg->idx[0].offset;
681 break;
683 case WINED3DSPR_RASTOUT:
684 if (reg->idx[0].offset == 1)
685 reg_maps->fog = 1;
686 if (reg->idx[0].offset == 2)
687 reg_maps->point_size = 1;
688 break;
690 case WINED3DSPR_MISCTYPE:
691 if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
693 if (!reg->idx[0].offset)
694 reg_maps->vpos = 1;
695 else if (reg->idx[0].offset == 1)
696 reg_maps->usesfacing = 1;
698 break;
700 case WINED3DSPR_CONST:
701 if (reg->idx[0].rel_addr)
703 if (reg->idx[0].offset < reg_maps->min_rel_offset)
704 reg_maps->min_rel_offset = reg->idx[0].offset;
705 if (reg->idx[0].offset > reg_maps->max_rel_offset)
706 reg_maps->max_rel_offset = reg->idx[0].offset;
707 reg_maps->usesrelconstF = TRUE;
709 else
711 if (reg->idx[0].offset >= min(shader->limits->constant_float, constf_size))
713 WARN("Shader using float constant %u which is not supported.\n", reg->idx[0].offset);
714 return FALSE;
716 else
718 wined3d_insert_bits(reg_maps->constf, reg->idx[0].offset, 1, 0x1);
721 break;
723 case WINED3DSPR_CONSTINT:
724 if (reg->idx[0].offset >= shader->limits->constant_int)
726 WARN("Shader using integer constant %u which is not supported.\n", reg->idx[0].offset);
727 return FALSE;
729 else
731 reg_maps->integer_constants |= (1u << reg->idx[0].offset);
733 break;
735 case WINED3DSPR_CONSTBOOL:
736 if (reg->idx[0].offset >= shader->limits->constant_bool)
738 WARN("Shader using bool constant %u which is not supported.\n", reg->idx[0].offset);
739 return FALSE;
741 else
743 reg_maps->boolean_constants |= (1u << reg->idx[0].offset);
745 break;
747 case WINED3DSPR_COLOROUT:
748 reg_maps->rt_mask |= (1u << reg->idx[0].offset);
749 break;
751 case WINED3DSPR_OUTCONTROLPOINT:
752 reg_maps->vocp = 1;
753 break;
755 case WINED3DSPR_SAMPLEMASK:
756 reg_maps->sample_mask = 1;
757 break;
759 default:
760 TRACE("Not recording register of type %#x and [%#x][%#x].\n",
761 reg->type, reg->idx[0].offset, reg->idx[1].offset);
762 break;
764 return TRUE;
767 static void shader_record_sample(struct wined3d_shader_reg_maps *reg_maps,
768 unsigned int resource_idx, unsigned int sampler_idx, unsigned int bind_idx)
770 struct wined3d_shader_sampler_map_entry *entries, *entry;
771 struct wined3d_shader_sampler_map *map;
772 unsigned int i;
774 map = &reg_maps->sampler_map;
775 entries = map->entries;
776 for (i = 0; i < map->count; ++i)
778 if (entries[i].resource_idx == resource_idx && entries[i].sampler_idx == sampler_idx)
779 return;
782 if (!map->size)
784 if (!(entries = heap_calloc(4, sizeof(*entries))))
786 ERR("Failed to allocate sampler map entries.\n");
787 return;
789 map->size = 4;
790 map->entries = entries;
792 else if (map->count == map->size)
794 size_t new_size = map->size * 2;
796 if (sizeof(*entries) * new_size <= sizeof(*entries) * map->size
797 || !(entries = heap_realloc(entries, sizeof(*entries) * new_size)))
799 ERR("Failed to resize sampler map entries.\n");
800 return;
802 map->size = new_size;
803 map->entries = entries;
806 entry = &entries[map->count++];
807 entry->resource_idx = resource_idx;
808 entry->sampler_idx = sampler_idx;
809 entry->bind_idx = bind_idx;
812 static unsigned int get_instr_extra_regcount(enum WINED3D_SHADER_INSTRUCTION_HANDLER instr, unsigned int param)
814 switch (instr)
816 case WINED3DSIH_M4x4:
817 case WINED3DSIH_M3x4:
818 return param == 1 ? 3 : 0;
820 case WINED3DSIH_M4x3:
821 case WINED3DSIH_M3x3:
822 return param == 1 ? 2 : 0;
824 case WINED3DSIH_M3x2:
825 return param == 1 ? 1 : 0;
827 default:
828 return 0;
832 static HRESULT shader_reg_maps_add_tgsm(struct wined3d_shader_reg_maps *reg_maps,
833 unsigned int register_idx, unsigned int size, unsigned int stride)
835 struct wined3d_shader_tgsm *tgsm;
837 if (register_idx >= MAX_TGSM_REGISTERS)
839 ERR("Invalid TGSM register index %u.\n", register_idx);
840 return S_OK;
842 if (reg_maps->shader_version.type != WINED3D_SHADER_TYPE_COMPUTE)
844 FIXME("TGSM declarations are allowed only in compute shaders.\n");
845 return S_OK;
848 if (!wined3d_array_reserve((void **)&reg_maps->tgsm, &reg_maps->tgsm_capacity,
849 register_idx + 1, sizeof(*reg_maps->tgsm)))
850 return E_OUTOFMEMORY;
852 reg_maps->tgsm_count = max(register_idx + 1, reg_maps->tgsm_count);
853 tgsm = &reg_maps->tgsm[register_idx];
854 tgsm->size = size;
855 tgsm->stride = stride;
856 return S_OK;
859 static HRESULT shader_record_shader_phase(struct wined3d_shader *shader,
860 struct wined3d_shader_phase **current_phase, const struct wined3d_shader_instruction *ins,
861 const DWORD *current_instruction_ptr, const DWORD *previous_instruction_ptr)
863 struct wined3d_shader_phase *phase;
865 if ((phase = *current_phase))
867 phase->end = previous_instruction_ptr;
868 *current_phase = NULL;
871 if (shader->reg_maps.shader_version.type != WINED3D_SHADER_TYPE_HULL)
873 ERR("Unexpected shader type %s.\n", debug_shader_type(shader->reg_maps.shader_version.type));
874 return E_FAIL;
877 switch (ins->handler_idx)
879 case WINED3DSIH_HS_CONTROL_POINT_PHASE:
880 if (shader->u.hs.phases.control_point)
882 FIXME("Multiple control point phases.\n");
883 heap_free(shader->u.hs.phases.control_point);
885 if (!(shader->u.hs.phases.control_point = heap_alloc_zero(sizeof(*shader->u.hs.phases.control_point))))
886 return E_OUTOFMEMORY;
887 phase = shader->u.hs.phases.control_point;
888 break;
889 case WINED3DSIH_HS_FORK_PHASE:
890 if (!wined3d_array_reserve((void **)&shader->u.hs.phases.fork,
891 &shader->u.hs.phases.fork_size, shader->u.hs.phases.fork_count + 1,
892 sizeof(*shader->u.hs.phases.fork)))
893 return E_OUTOFMEMORY;
894 phase = &shader->u.hs.phases.fork[shader->u.hs.phases.fork_count++];
895 break;
896 case WINED3DSIH_HS_JOIN_PHASE:
897 if (!wined3d_array_reserve((void **)&shader->u.hs.phases.join,
898 &shader->u.hs.phases.join_size, shader->u.hs.phases.join_count + 1,
899 sizeof(*shader->u.hs.phases.join)))
900 return E_OUTOFMEMORY;
901 phase = &shader->u.hs.phases.join[shader->u.hs.phases.join_count++];
902 break;
903 default:
904 ERR("Unexpected opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
905 return E_FAIL;
908 phase->start = current_instruction_ptr;
909 *current_phase = phase;
911 return WINED3D_OK;
914 static HRESULT shader_calculate_clip_or_cull_distance_mask(
915 const struct wined3d_shader_signature_element *e, unsigned int *mask)
917 /* Clip and cull distances are packed in 4 component registers. 0 and 1 are
918 * the only allowed semantic indices.
920 if (e->semantic_idx >= WINED3D_MAX_CLIP_DISTANCES / 4)
922 *mask = 0;
923 WARN("Invalid clip/cull distance index %u.\n", e->semantic_idx);
924 return WINED3DERR_INVALIDCALL;
927 *mask = (e->mask & WINED3DSP_WRITEMASK_ALL) << (4 * e->semantic_idx);
928 return WINED3D_OK;
931 static void wined3d_insert_interpolation_mode(uint32_t *packed_interpolation_mode,
932 unsigned int register_idx, enum wined3d_shader_interpolation_mode mode)
934 if (mode > WINED3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE)
935 FIXME("Unexpected interpolation mode %#x.\n", mode);
937 wined3d_insert_bits(packed_interpolation_mode,
938 register_idx * WINED3D_PACKED_INTERPOLATION_BIT_COUNT, WINED3D_PACKED_INTERPOLATION_BIT_COUNT, mode);
941 static HRESULT shader_scan_output_signature(struct wined3d_shader *shader)
943 const struct wined3d_shader_signature *output_signature = &shader->output_signature;
944 struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
945 unsigned int i;
946 HRESULT hr;
948 for (i = 0; i < output_signature->element_count; ++i)
950 const struct wined3d_shader_signature_element *e = &output_signature->elements[i];
951 unsigned int mask;
953 reg_maps->output_registers |= 1u << e->register_idx;
954 if (e->sysval_semantic == WINED3D_SV_CLIP_DISTANCE)
956 if (FAILED(hr = shader_calculate_clip_or_cull_distance_mask(e, &mask)))
957 return hr;
958 reg_maps->clip_distance_mask |= mask;
960 else if (e->sysval_semantic == WINED3D_SV_CULL_DISTANCE)
962 if (FAILED(hr = shader_calculate_clip_or_cull_distance_mask(e, &mask)))
963 return hr;
964 reg_maps->cull_distance_mask |= mask;
966 else if (e->sysval_semantic == WINED3D_SV_VIEWPORT_ARRAY_INDEX)
968 reg_maps->viewport_array = 1;
972 return WINED3D_OK;
975 /* Note that this does not count the loop register as an address register. */
976 static HRESULT shader_get_registers_used(struct wined3d_shader *shader, DWORD constf_size)
978 struct wined3d_shader_signature_element input_signature_elements[max(MAX_ATTRIBS, MAX_REG_INPUT)];
979 struct wined3d_shader_signature_element output_signature_elements[MAX_REG_OUTPUT];
980 struct wined3d_shader_signature *output_signature = &shader->output_signature;
981 struct wined3d_shader_signature *input_signature = &shader->input_signature;
982 struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
983 const struct wined3d_shader_frontend *fe = shader->frontend;
984 unsigned int cur_loop_depth = 0, max_loop_depth = 0;
985 struct wined3d_shader_version shader_version;
986 struct wined3d_shader_phase *phase = NULL;
987 const DWORD *ptr, *prev_ins, *current_ins;
988 void *fe_data = shader->frontend_data;
989 unsigned int i;
990 HRESULT hr;
992 memset(reg_maps, 0, sizeof(*reg_maps));
993 memset(input_signature_elements, 0, sizeof(input_signature_elements));
994 memset(output_signature_elements, 0, sizeof(output_signature_elements));
995 reg_maps->min_rel_offset = ~0U;
996 list_init(&reg_maps->indexable_temps);
998 fe->shader_read_header(fe_data, &ptr, &shader_version);
999 prev_ins = current_ins = ptr;
1000 reg_maps->shader_version = shader_version;
1002 shader_set_limits(shader);
1004 if (!(reg_maps->constf = heap_calloc(((min(shader->limits->constant_float, constf_size) + 31) / 32),
1005 sizeof(*reg_maps->constf))))
1007 ERR("Failed to allocate constant map memory.\n");
1008 return E_OUTOFMEMORY;
1011 while (!fe->shader_is_end(fe_data, &ptr))
1013 struct wined3d_shader_instruction ins;
1015 current_ins = ptr;
1016 /* Fetch opcode. */
1017 fe->shader_read_instruction(fe_data, &ptr, &ins);
1019 /* Unhandled opcode, and its parameters. */
1020 if (ins.handler_idx == WINED3DSIH_TABLE_SIZE)
1022 WARN("Encountered unrecognised or invalid instruction.\n");
1023 return WINED3DERR_INVALIDCALL;
1026 /* Handle declarations. */
1027 if (ins.handler_idx == WINED3DSIH_DCL
1028 || ins.handler_idx == WINED3DSIH_DCL_UAV_TYPED)
1030 struct wined3d_shader_semantic *semantic = &ins.declaration.semantic;
1031 unsigned int reg_idx = semantic->reg.reg.idx[0].offset;
1033 switch (semantic->reg.reg.type)
1035 /* Mark input registers used. */
1036 case WINED3DSPR_INPUT:
1037 if (reg_idx >= MAX_REG_INPUT)
1039 ERR("Invalid input register index %u.\n", reg_idx);
1040 break;
1042 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL && shader_version.major == 3
1043 && semantic->usage == WINED3D_DECL_USAGE_POSITION && !semantic->usage_idx)
1044 return WINED3DERR_INVALIDCALL;
1045 reg_maps->input_registers |= 1u << reg_idx;
1046 shader_signature_from_semantic(&input_signature_elements[reg_idx], semantic);
1047 break;
1049 /* Vertex shader: mark 3.0 output registers used, save token. */
1050 case WINED3DSPR_OUTPUT:
1051 if (reg_idx >= MAX_REG_OUTPUT)
1053 ERR("Invalid output register index %u.\n", reg_idx);
1054 break;
1056 reg_maps->output_registers |= 1u << reg_idx;
1057 shader_signature_from_semantic(&output_signature_elements[reg_idx], semantic);
1058 if (semantic->usage == WINED3D_DECL_USAGE_FOG)
1059 reg_maps->fog = 1;
1060 if (semantic->usage == WINED3D_DECL_USAGE_PSIZE)
1061 reg_maps->point_size = 1;
1062 break;
1064 case WINED3DSPR_SAMPLER:
1065 shader_record_sample(reg_maps, reg_idx, reg_idx, reg_idx);
1066 case WINED3DSPR_RESOURCE:
1067 if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1069 ERR("Invalid resource index %u.\n", reg_idx);
1070 break;
1072 reg_maps->resource_info[reg_idx].type = semantic->resource_type;
1073 if (semantic->resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_2DMS && semantic->sample_count == 1)
1074 reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_TEXTURE_2D;
1075 if (semantic->resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY && semantic->sample_count == 1)
1076 reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_TEXTURE_2DARRAY;
1077 reg_maps->resource_info[reg_idx].data_type = semantic->resource_data_type;
1078 wined3d_bitmap_set(reg_maps->resource_map, reg_idx);
1079 break;
1081 case WINED3DSPR_UAV:
1082 if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
1084 ERR("Invalid UAV resource index %u.\n", reg_idx);
1085 break;
1087 reg_maps->uav_resource_info[reg_idx].type = semantic->resource_type;
1088 reg_maps->uav_resource_info[reg_idx].data_type = semantic->resource_data_type;
1089 if (ins.flags)
1090 FIXME("Ignoring typed UAV flags %#x.\n", ins.flags);
1091 break;
1093 default:
1094 TRACE("Not recording DCL register type %#x.\n", semantic->reg.reg.type);
1095 break;
1098 else if (ins.handler_idx == WINED3DSIH_DCL_CONSTANT_BUFFER)
1100 struct wined3d_shader_register *reg = &ins.declaration.src.reg;
1101 if (reg->idx[0].offset >= WINED3D_MAX_CBS)
1103 ERR("Invalid CB index %u.\n", reg->idx[0].offset);
1105 else
1107 reg_maps->cb_sizes[reg->idx[0].offset] = reg->idx[1].offset;
1108 wined3d_bitmap_set(&reg_maps->cb_map, reg->idx[0].offset);
1111 else if (ins.handler_idx == WINED3DSIH_DCL_GLOBAL_FLAGS)
1113 if (ins.flags & WINED3DSGF_FORCE_EARLY_DEPTH_STENCIL)
1115 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1116 shader->u.ps.force_early_depth_stencil = TRUE;
1117 else
1118 FIXME("Invalid instruction %#x for shader type %#x.\n",
1119 ins.handler_idx, shader_version.type);
1121 else
1123 WARN("Ignoring global flags %#x.\n", ins.flags);
1126 else if (ins.handler_idx == WINED3DSIH_DCL_GS_INSTANCES)
1128 if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1129 shader->u.gs.instance_count = ins.declaration.count;
1130 else
1131 FIXME("Invalid instruction %#x for shader type %#x.\n",
1132 ins.handler_idx, shader_version.type);
1134 else if (ins.handler_idx == WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT
1135 || ins.handler_idx == WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT)
1137 if (phase)
1138 phase->instance_count = ins.declaration.count;
1139 else
1140 FIXME("Instruction %s outside of shader phase.\n",
1141 debug_d3dshaderinstructionhandler(ins.handler_idx));
1143 else if (ins.handler_idx == WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER)
1145 if (reg_maps->icb)
1146 FIXME("Multiple immediate constant buffers.\n");
1147 reg_maps->icb = ins.declaration.icb;
1149 else if (ins.handler_idx == WINED3DSIH_DCL_INDEXABLE_TEMP)
1151 if (phase)
1153 FIXME("Indexable temporary registers not supported.\n");
1155 else
1157 struct wined3d_shader_indexable_temp *reg;
1159 if (!(reg = heap_alloc(sizeof(*reg))))
1160 return E_OUTOFMEMORY;
1162 *reg = ins.declaration.indexable_temp;
1163 list_add_tail(&reg_maps->indexable_temps, &reg->entry);
1166 else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PRIMITIVE)
1168 if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1169 shader->u.gs.input_type = ins.declaration.primitive_type.type;
1170 else
1171 FIXME("Invalid instruction %#x for shader type %#x.\n",
1172 ins.handler_idx, shader_version.type);
1174 else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS)
1176 unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1177 if (reg_idx >= MAX_REG_INPUT)
1179 ERR("Invalid register index %u.\n", reg_idx);
1180 break;
1182 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1183 wined3d_insert_interpolation_mode(shader->u.ps.interpolation_mode, reg_idx, ins.flags);
1184 else
1185 FIXME("Invalid instruction %#x for shader type %#x.\n",
1186 ins.handler_idx, shader_version.type);
1188 else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT)
1190 if (ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUT
1191 || ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUTGE
1192 || ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUTLE)
1194 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1195 shader->u.ps.depth_output = ins.declaration.dst.reg.type;
1196 else
1197 FIXME("Invalid instruction %#x for shader type %#x.\n",
1198 ins.handler_idx, shader_version.type);
1201 else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT)
1203 if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
1204 shader->u.hs.output_vertex_count = ins.declaration.count;
1205 else
1206 FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1208 else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT_TOPOLOGY)
1210 if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1211 shader->u.gs.output_type = ins.declaration.primitive_type.type;
1212 else
1213 FIXME("Invalid instruction %#x for shader type %#x.\n",
1214 ins.handler_idx, shader_version.type);
1216 else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_RAW)
1218 unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1219 if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1221 ERR("Invalid resource index %u.\n", reg_idx);
1222 break;
1224 reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1225 reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1226 reg_maps->resource_info[reg_idx].flags = WINED3D_VIEW_BUFFER_RAW;
1227 wined3d_bitmap_set(reg_maps->resource_map, reg_idx);
1229 else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_STRUCTURED)
1231 unsigned int reg_idx = ins.declaration.structured_resource.reg.reg.idx[0].offset;
1232 if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1234 ERR("Invalid resource index %u.\n", reg_idx);
1235 break;
1237 reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1238 reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1239 reg_maps->resource_info[reg_idx].flags = 0;
1240 reg_maps->resource_info[reg_idx].stride = ins.declaration.structured_resource.byte_stride / 4;
1241 wined3d_bitmap_set(reg_maps->resource_map, reg_idx);
1243 else if (ins.handler_idx == WINED3DSIH_DCL_SAMPLER)
1245 if (ins.flags & WINED3DSI_SAMPLER_COMPARISON_MODE)
1246 reg_maps->sampler_comparison_mode |= (1u << ins.declaration.dst.reg.idx[0].offset);
1248 else if (ins.handler_idx == WINED3DSIH_DCL_TEMPS)
1250 if (phase)
1251 phase->temporary_count = ins.declaration.count;
1252 else
1253 reg_maps->temporary_count = ins.declaration.count;
1255 else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_DOMAIN)
1257 if (shader_version.type == WINED3D_SHADER_TYPE_DOMAIN)
1258 shader->u.ds.tessellator_domain = ins.declaration.tessellator_domain;
1259 else if (shader_version.type != WINED3D_SHADER_TYPE_HULL)
1260 FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1262 else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE)
1264 if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
1265 shader->u.hs.tessellator_output_primitive = ins.declaration.tessellator_output_primitive;
1266 else
1267 FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1269 else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_PARTITIONING)
1271 if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
1272 shader->u.hs.tessellator_partitioning = ins.declaration.tessellator_partitioning;
1273 else
1274 FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1276 else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_RAW)
1278 if (FAILED(hr = shader_reg_maps_add_tgsm(reg_maps, ins.declaration.tgsm_raw.reg.reg.idx[0].offset,
1279 ins.declaration.tgsm_raw.byte_count / 4, 0)))
1280 return hr;
1282 else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_STRUCTURED)
1284 unsigned int stride = ins.declaration.tgsm_structured.byte_stride / 4;
1285 unsigned int size = stride * ins.declaration.tgsm_structured.structure_count;
1286 if (FAILED(hr = shader_reg_maps_add_tgsm(reg_maps,
1287 ins.declaration.tgsm_structured.reg.reg.idx[0].offset, size, stride)))
1288 return hr;
1290 else if (ins.handler_idx == WINED3DSIH_DCL_THREAD_GROUP)
1292 if (shader_version.type == WINED3D_SHADER_TYPE_COMPUTE)
1294 shader->u.cs.thread_group_size = ins.declaration.thread_group_size;
1296 else
1298 FIXME("Invalid instruction %#x for shader type %#x.\n",
1299 ins.handler_idx, shader_version.type);
1302 else if (ins.handler_idx == WINED3DSIH_DCL_UAV_RAW)
1304 unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1305 if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
1307 ERR("Invalid UAV resource index %u.\n", reg_idx);
1308 break;
1310 if (ins.flags)
1311 FIXME("Ignoring raw UAV flags %#x.\n", ins.flags);
1312 reg_maps->uav_resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1313 reg_maps->uav_resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1314 reg_maps->uav_resource_info[reg_idx].flags = WINED3D_VIEW_BUFFER_RAW;
1316 else if (ins.handler_idx == WINED3DSIH_DCL_UAV_STRUCTURED)
1318 unsigned int reg_idx = ins.declaration.structured_resource.reg.reg.idx[0].offset;
1319 if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
1321 ERR("Invalid UAV resource index %u.\n", reg_idx);
1322 break;
1324 if (ins.flags)
1325 FIXME("Ignoring structured UAV flags %#x.\n", ins.flags);
1326 reg_maps->uav_resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1327 reg_maps->uav_resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1328 reg_maps->uav_resource_info[reg_idx].flags = 0;
1329 reg_maps->uav_resource_info[reg_idx].stride = ins.declaration.structured_resource.byte_stride / 4;
1331 else if (ins.handler_idx == WINED3DSIH_DCL_VERTICES_OUT)
1333 if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1334 shader->u.gs.vertices_out = ins.declaration.count;
1335 else
1336 FIXME("Invalid instruction %#x for shader type %#x.\n",
1337 ins.handler_idx, shader_version.type);
1339 else if (ins.handler_idx == WINED3DSIH_DEF)
1341 struct wined3d_shader_lconst *lconst;
1342 float *value;
1344 if (!(lconst = heap_alloc(sizeof(*lconst))))
1345 return E_OUTOFMEMORY;
1347 lconst->idx = ins.dst[0].reg.idx[0].offset;
1348 memcpy(lconst->value, ins.src[0].reg.u.immconst_data, 4 * sizeof(DWORD));
1349 value = (float *)lconst->value;
1351 /* In pixel shader 1.X shaders, the constants are clamped between [-1;1] */
1352 if (shader_version.major == 1 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1354 if (value[0] < -1.0f) value[0] = -1.0f;
1355 else if (value[0] > 1.0f) value[0] = 1.0f;
1356 if (value[1] < -1.0f) value[1] = -1.0f;
1357 else if (value[1] > 1.0f) value[1] = 1.0f;
1358 if (value[2] < -1.0f) value[2] = -1.0f;
1359 else if (value[2] > 1.0f) value[2] = 1.0f;
1360 if (value[3] < -1.0f) value[3] = -1.0f;
1361 else if (value[3] > 1.0f) value[3] = 1.0f;
1364 list_add_head(&shader->constantsF, &lconst->entry);
1366 if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
1367 || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
1369 shader->lconst_inf_or_nan = TRUE;
1372 else if (ins.handler_idx == WINED3DSIH_DEFI)
1374 struct wined3d_shader_lconst *lconst;
1376 if (!(lconst = heap_alloc(sizeof(*lconst))))
1377 return E_OUTOFMEMORY;
1379 lconst->idx = ins.dst[0].reg.idx[0].offset;
1380 memcpy(lconst->value, ins.src[0].reg.u.immconst_data, 4 * sizeof(DWORD));
1382 list_add_head(&shader->constantsI, &lconst->entry);
1383 reg_maps->local_int_consts |= (1u << lconst->idx);
1385 else if (ins.handler_idx == WINED3DSIH_DEFB)
1387 struct wined3d_shader_lconst *lconst;
1389 if (!(lconst = heap_alloc(sizeof(*lconst))))
1390 return E_OUTOFMEMORY;
1392 lconst->idx = ins.dst[0].reg.idx[0].offset;
1393 memcpy(lconst->value, ins.src[0].reg.u.immconst_data, sizeof(DWORD));
1395 list_add_head(&shader->constantsB, &lconst->entry);
1396 reg_maps->local_bool_consts |= (1u << lconst->idx);
1398 /* Handle shader phases. */
1399 else if (ins.handler_idx == WINED3DSIH_HS_CONTROL_POINT_PHASE
1400 || ins.handler_idx == WINED3DSIH_HS_FORK_PHASE
1401 || ins.handler_idx == WINED3DSIH_HS_JOIN_PHASE)
1403 if (FAILED(hr = shader_record_shader_phase(shader, &phase, &ins, current_ins, prev_ins)))
1404 return hr;
1406 /* For subroutine prototypes. */
1407 else if (ins.handler_idx == WINED3DSIH_LABEL)
1409 reg_maps->labels |= 1u << ins.src[0].reg.idx[0].offset;
1411 /* Set texture, address, temporary registers. */
1412 else
1414 BOOL color0_mov = FALSE;
1415 unsigned int i;
1417 /* This will loop over all the registers and try to
1418 * make a bitmask of the ones we're interested in.
1420 * Relative addressing tokens are ignored, but that's
1421 * okay, since we'll catch any address registers when
1422 * they are initialized (required by spec). */
1423 for (i = 0; i < ins.dst_count; ++i)
1425 if (!shader_record_register_usage(shader, reg_maps, &ins.dst[i].reg,
1426 shader_version.type, constf_size))
1427 return WINED3DERR_INVALIDCALL;
1429 if (shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1431 UINT idx = ins.dst[i].reg.idx[0].offset;
1433 switch (ins.dst[i].reg.type)
1435 case WINED3DSPR_RASTOUT:
1436 if (shader_version.major >= 3)
1437 break;
1438 switch (idx)
1440 case 0: /* oPos */
1441 reg_maps->output_registers |= 1u << 10;
1442 shader_signature_from_usage(&output_signature_elements[10],
1443 WINED3D_DECL_USAGE_POSITION, 0, 10, WINED3DSP_WRITEMASK_ALL);
1444 break;
1446 case 1: /* oFog */
1447 reg_maps->output_registers |= 1u << 11;
1448 shader_signature_from_usage(&output_signature_elements[11],
1449 WINED3D_DECL_USAGE_FOG, 0, 11, WINED3DSP_WRITEMASK_0);
1450 break;
1452 case 2: /* oPts */
1453 reg_maps->output_registers |= 1u << 11;
1454 shader_signature_from_usage(&output_signature_elements[11],
1455 WINED3D_DECL_USAGE_PSIZE, 0, 11, WINED3DSP_WRITEMASK_1);
1456 break;
1458 break;
1460 case WINED3DSPR_ATTROUT:
1461 if (shader_version.major >= 3)
1462 break;
1463 if (idx < 2)
1465 idx += 8;
1466 if (reg_maps->output_registers & (1u << idx))
1468 output_signature_elements[idx].mask |= ins.dst[i].write_mask;
1470 else
1472 reg_maps->output_registers |= 1u << idx;
1473 shader_signature_from_usage(&output_signature_elements[idx],
1474 WINED3D_DECL_USAGE_COLOR, idx - 8, idx, ins.dst[i].write_mask);
1477 break;
1479 case WINED3DSPR_TEXCRDOUT: /* WINED3DSPR_OUTPUT */
1480 if (shader_version.major >= 3)
1482 if (idx >= ARRAY_SIZE(reg_maps->u.output_registers_mask))
1484 WARN("Invalid output register index %u.\n", idx);
1485 break;
1487 reg_maps->u.output_registers_mask[idx] |= ins.dst[i].write_mask;
1488 break;
1490 if (idx >= ARRAY_SIZE(reg_maps->u.texcoord_mask))
1492 WARN("Invalid texcoord index %u.\n", idx);
1493 break;
1495 reg_maps->u.texcoord_mask[idx] |= ins.dst[i].write_mask;
1496 if (reg_maps->output_registers & (1u << idx))
1498 output_signature_elements[idx].mask |= ins.dst[i].write_mask;
1500 else
1502 reg_maps->output_registers |= 1u << idx;
1503 shader_signature_from_usage(&output_signature_elements[idx],
1504 WINED3D_DECL_USAGE_TEXCOORD, idx, idx, ins.dst[i].write_mask);
1506 break;
1508 default:
1509 break;
1513 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1515 if (ins.dst[i].reg.type == WINED3DSPR_COLOROUT && !ins.dst[i].reg.idx[0].offset)
1517 /* Many 2.0 and 3.0 pixel shaders end with a MOV from a temp register to
1518 * COLOROUT 0. If we know this in advance, the ARB shader backend can skip
1519 * the mov and perform the sRGB write correction from the source register.
1521 * However, if the mov is only partial, we can't do this, and if the write
1522 * comes from an instruction other than MOV it is hard to do as well. If
1523 * COLOROUT 0 is overwritten partially later, the marker is dropped again. */
1524 shader->u.ps.color0_mov = FALSE;
1525 if (ins.handler_idx == WINED3DSIH_MOV
1526 && ins.dst[i].write_mask == WINED3DSP_WRITEMASK_ALL)
1528 /* Used later when the source register is read. */
1529 color0_mov = TRUE;
1532 /* Also drop the MOV marker if the source register is overwritten prior to the shader
1533 * end
1535 else if (ins.dst[i].reg.type == WINED3DSPR_TEMP
1536 && ins.dst[i].reg.idx[0].offset == shader->u.ps.color0_reg)
1538 shader->u.ps.color0_mov = FALSE;
1542 /* Declare 1.x samplers implicitly, based on the destination reg. number. */
1543 if (shader_version.major == 1
1544 && (ins.handler_idx == WINED3DSIH_TEX
1545 || ins.handler_idx == WINED3DSIH_TEXBEM
1546 || ins.handler_idx == WINED3DSIH_TEXBEML
1547 || ins.handler_idx == WINED3DSIH_TEXDP3TEX
1548 || ins.handler_idx == WINED3DSIH_TEXM3x2TEX
1549 || ins.handler_idx == WINED3DSIH_TEXM3x3SPEC
1550 || ins.handler_idx == WINED3DSIH_TEXM3x3TEX
1551 || ins.handler_idx == WINED3DSIH_TEXM3x3VSPEC
1552 || ins.handler_idx == WINED3DSIH_TEXREG2AR
1553 || ins.handler_idx == WINED3DSIH_TEXREG2GB
1554 || ins.handler_idx == WINED3DSIH_TEXREG2RGB))
1556 unsigned int reg_idx = ins.dst[i].reg.idx[0].offset;
1558 if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1560 WARN("Invalid 1.x sampler index %u.\n", reg_idx);
1561 continue;
1564 TRACE("Setting fake 2D resource for 1.x pixelshader.\n");
1565 reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_TEXTURE_2D;
1566 reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_FLOAT;
1567 shader_record_sample(reg_maps, reg_idx, reg_idx, reg_idx);
1568 wined3d_bitmap_set(reg_maps->resource_map, reg_idx);
1570 /* texbem is only valid with < 1.4 pixel shaders */
1571 if (ins.handler_idx == WINED3DSIH_TEXBEM
1572 || ins.handler_idx == WINED3DSIH_TEXBEML)
1574 reg_maps->bumpmat |= 1u << reg_idx;
1575 if (ins.handler_idx == WINED3DSIH_TEXBEML)
1577 reg_maps->luminanceparams |= 1u << reg_idx;
1581 else if (ins.handler_idx == WINED3DSIH_BEM)
1583 reg_maps->bumpmat |= 1u << ins.dst[i].reg.idx[0].offset;
1587 if (ins.handler_idx == WINED3DSIH_IMM_ATOMIC_ALLOC || ins.handler_idx == WINED3DSIH_IMM_ATOMIC_CONSUME)
1589 unsigned int reg_idx = ins.src[0].reg.idx[0].offset;
1590 if (reg_idx >= MAX_UNORDERED_ACCESS_VIEWS)
1592 ERR("Invalid UAV index %u.\n", reg_idx);
1593 break;
1595 reg_maps->uav_counter_mask |= (1u << reg_idx);
1597 else if ((WINED3DSIH_ATOMIC_AND <= ins.handler_idx && ins.handler_idx <= WINED3DSIH_ATOMIC_XOR)
1598 || (WINED3DSIH_IMM_ATOMIC_AND <= ins.handler_idx && ins.handler_idx <= WINED3DSIH_IMM_ATOMIC_XOR)
1599 || (ins.handler_idx == WINED3DSIH_BUFINFO && ins.src[0].reg.type == WINED3DSPR_UAV)
1600 || ins.handler_idx == WINED3DSIH_LD_UAV_TYPED
1601 || (ins.handler_idx == WINED3DSIH_LD_RAW && ins.src[1].reg.type == WINED3DSPR_UAV)
1602 || (ins.handler_idx == WINED3DSIH_LD_STRUCTURED && ins.src[2].reg.type == WINED3DSPR_UAV))
1604 const struct wined3d_shader_register *reg;
1606 if (ins.handler_idx == WINED3DSIH_LD_UAV_TYPED || ins.handler_idx == WINED3DSIH_LD_RAW)
1607 reg = &ins.src[1].reg;
1608 else if (ins.handler_idx == WINED3DSIH_LD_STRUCTURED)
1609 reg = &ins.src[2].reg;
1610 else if (WINED3DSIH_ATOMIC_AND <= ins.handler_idx && ins.handler_idx <= WINED3DSIH_ATOMIC_XOR)
1611 reg = &ins.dst[0].reg;
1612 else if (ins.handler_idx == WINED3DSIH_BUFINFO)
1613 reg = &ins.src[0].reg;
1614 else
1615 reg = &ins.dst[1].reg;
1617 if (reg->type == WINED3DSPR_UAV)
1619 if (reg->idx[0].offset >= MAX_UNORDERED_ACCESS_VIEWS)
1621 ERR("Invalid UAV index %u.\n", reg->idx[0].offset);
1622 break;
1624 reg_maps->uav_read_mask |= (1u << reg->idx[0].offset);
1627 else if (ins.handler_idx == WINED3DSIH_NRM)
1629 reg_maps->usesnrm = 1;
1631 else if (ins.handler_idx == WINED3DSIH_DSY
1632 || ins.handler_idx == WINED3DSIH_DSY_COARSE
1633 || ins.handler_idx == WINED3DSIH_DSY_FINE)
1635 reg_maps->usesdsy = 1;
1637 else if (ins.handler_idx == WINED3DSIH_DSX
1638 || ins.handler_idx == WINED3DSIH_DSX_COARSE
1639 || ins.handler_idx == WINED3DSIH_DSX_FINE)
1641 reg_maps->usesdsx = 1;
1643 else if (ins.handler_idx == WINED3DSIH_TEXLDD) reg_maps->usestexldd = 1;
1644 else if (ins.handler_idx == WINED3DSIH_TEXLDL) reg_maps->usestexldl = 1;
1645 else if (ins.handler_idx == WINED3DSIH_MOVA) reg_maps->usesmova = 1;
1646 else if (ins.handler_idx == WINED3DSIH_IFC) reg_maps->usesifc = 1;
1647 else if (ins.handler_idx == WINED3DSIH_CALL) reg_maps->usescall = 1;
1648 else if (ins.handler_idx == WINED3DSIH_POW) reg_maps->usespow = 1;
1649 else if (ins.handler_idx == WINED3DSIH_LOOP
1650 || ins.handler_idx == WINED3DSIH_REP)
1652 ++cur_loop_depth;
1653 if (cur_loop_depth > max_loop_depth)
1654 max_loop_depth = cur_loop_depth;
1656 else if (ins.handler_idx == WINED3DSIH_ENDLOOP
1657 || ins.handler_idx == WINED3DSIH_ENDREP)
1659 --cur_loop_depth;
1661 else if (ins.handler_idx == WINED3DSIH_GATHER4
1662 || ins.handler_idx == WINED3DSIH_GATHER4_C
1663 || ins.handler_idx == WINED3DSIH_SAMPLE
1664 || ins.handler_idx == WINED3DSIH_SAMPLE_B
1665 || ins.handler_idx == WINED3DSIH_SAMPLE_C
1666 || ins.handler_idx == WINED3DSIH_SAMPLE_C_LZ
1667 || ins.handler_idx == WINED3DSIH_SAMPLE_GRAD
1668 || ins.handler_idx == WINED3DSIH_SAMPLE_LOD)
1670 shader_record_sample(reg_maps, ins.src[1].reg.idx[0].offset,
1671 ins.src[2].reg.idx[0].offset, reg_maps->sampler_map.count);
1673 else if (ins.handler_idx == WINED3DSIH_GATHER4_PO
1674 || ins.handler_idx == WINED3DSIH_GATHER4_PO_C)
1676 shader_record_sample(reg_maps, ins.src[2].reg.idx[0].offset,
1677 ins.src[3].reg.idx[0].offset, reg_maps->sampler_map.count);
1679 else if ((ins.handler_idx == WINED3DSIH_BUFINFO && ins.src[0].reg.type == WINED3DSPR_RESOURCE)
1680 || (ins.handler_idx == WINED3DSIH_SAMPLE_INFO && ins.src[0].reg.type == WINED3DSPR_RESOURCE))
1682 shader_record_sample(reg_maps, ins.src[0].reg.idx[0].offset,
1683 WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
1685 else if (ins.handler_idx == WINED3DSIH_LD
1686 || ins.handler_idx == WINED3DSIH_LD2DMS
1687 || (ins.handler_idx == WINED3DSIH_LD_RAW && ins.src[1].reg.type == WINED3DSPR_RESOURCE)
1688 || (ins.handler_idx == WINED3DSIH_RESINFO && ins.src[1].reg.type == WINED3DSPR_RESOURCE))
1690 shader_record_sample(reg_maps, ins.src[1].reg.idx[0].offset,
1691 WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
1693 else if (ins.handler_idx == WINED3DSIH_LD_STRUCTURED
1694 && ins.src[2].reg.type == WINED3DSPR_RESOURCE)
1696 shader_record_sample(reg_maps, ins.src[2].reg.idx[0].offset,
1697 WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
1700 if (ins.predicate)
1701 if (!shader_record_register_usage(shader, reg_maps, &ins.predicate->reg,
1702 shader_version.type, constf_size))
1703 return WINED3DERR_INVALIDCALL;
1705 for (i = 0; i < ins.src_count; ++i)
1707 unsigned int count = get_instr_extra_regcount(ins.handler_idx, i);
1708 struct wined3d_shader_register reg = ins.src[i].reg;
1710 if (!shader_record_register_usage(shader, reg_maps, &ins.src[i].reg,
1711 shader_version.type, constf_size))
1712 return WINED3DERR_INVALIDCALL;
1713 while (count)
1715 ++reg.idx[0].offset;
1716 if (!shader_record_register_usage(shader, reg_maps, &reg,
1717 shader_version.type, constf_size))
1718 return WINED3DERR_INVALIDCALL;
1719 --count;
1722 if (color0_mov)
1724 if (ins.src[i].reg.type == WINED3DSPR_TEMP
1725 && ins.src[i].swizzle == WINED3DSP_NOSWIZZLE)
1727 shader->u.ps.color0_mov = TRUE;
1728 shader->u.ps.color0_reg = ins.src[i].reg.idx[0].offset;
1734 prev_ins = current_ins;
1736 reg_maps->loop_depth = max_loop_depth;
1738 if (phase)
1740 phase->end = prev_ins;
1741 phase = NULL;
1744 /* PS before 2.0 don't have explicit color outputs. Instead the value of
1745 * R0 is written to the render target. */
1746 if (shader_version.major < 2 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1747 reg_maps->rt_mask |= (1u << 0);
1749 if (input_signature->elements)
1751 for (i = 0; i < input_signature->element_count; ++i)
1753 if (shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1755 if (input_signature->elements[i].register_idx >= ARRAY_SIZE(shader->u.vs.attributes))
1757 WARN("Invalid input signature register index %u.\n", input_signature->elements[i].register_idx);
1758 return WINED3DERR_INVALIDCALL;
1761 else if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1763 if (input_signature->elements[i].sysval_semantic == WINED3D_SV_POSITION)
1764 reg_maps->vpos = 1;
1765 else if (input_signature->elements[i].sysval_semantic == WINED3D_SV_IS_FRONT_FACE)
1766 reg_maps->usesfacing = 1;
1768 reg_maps->input_registers |= 1u << input_signature->elements[i].register_idx;
1771 else if (!input_signature->elements && reg_maps->input_registers)
1773 unsigned int count = wined3d_popcount(reg_maps->input_registers);
1774 struct wined3d_shader_signature_element *e;
1775 unsigned int i;
1777 if (!(input_signature->elements = heap_calloc(count, sizeof(*input_signature->elements))))
1778 return E_OUTOFMEMORY;
1779 input_signature->element_count = count;
1781 e = input_signature->elements;
1782 for (i = 0; i < ARRAY_SIZE(input_signature_elements); ++i)
1784 if (!(reg_maps->input_registers & (1u << i)))
1785 continue;
1786 input_signature_elements[i].register_idx = i;
1787 *e++ = input_signature_elements[i];
1791 if (output_signature->elements)
1793 if (FAILED(hr = shader_scan_output_signature(shader)))
1794 return hr;
1796 else if (reg_maps->output_registers)
1798 unsigned int count = wined3d_popcount(reg_maps->output_registers);
1799 struct wined3d_shader_signature_element *e;
1801 if (!(output_signature->elements = heap_calloc(count, sizeof(*output_signature->elements))))
1802 return E_OUTOFMEMORY;
1803 output_signature->element_count = count;
1805 e = output_signature->elements;
1806 for (i = 0; i < ARRAY_SIZE(output_signature_elements); ++i)
1808 if (!(reg_maps->output_registers & (1u << i)))
1809 continue;
1810 *e++ = output_signature_elements[i];
1814 return WINED3D_OK;
1817 static void shader_cleanup_reg_maps(struct wined3d_shader_reg_maps *reg_maps)
1819 struct wined3d_shader_indexable_temp *reg, *reg_next;
1821 heap_free(reg_maps->constf);
1822 heap_free(reg_maps->sampler_map.entries);
1824 LIST_FOR_EACH_ENTRY_SAFE(reg, reg_next, &reg_maps->indexable_temps, struct wined3d_shader_indexable_temp, entry)
1825 heap_free(reg);
1826 list_init(&reg_maps->indexable_temps);
1828 heap_free(reg_maps->tgsm);
1831 unsigned int shader_find_free_input_register(const struct wined3d_shader_reg_maps *reg_maps, unsigned int max)
1833 DWORD map = 1u << max;
1834 map |= map - 1;
1835 map &= reg_maps->shader_version.major < 3 ? ~reg_maps->texcoord : ~reg_maps->input_registers;
1837 return wined3d_log2i(map);
1840 /* Shared code in order to generate the bulk of the shader string. */
1841 HRESULT shader_generate_code(const struct wined3d_shader *shader, struct wined3d_string_buffer *buffer,
1842 const struct wined3d_shader_reg_maps *reg_maps, void *backend_ctx,
1843 const DWORD *start, const DWORD *end)
1845 struct wined3d_device *device = shader->device;
1846 const struct wined3d_shader_frontend *fe = shader->frontend;
1847 void *fe_data = shader->frontend_data;
1848 struct wined3d_shader_version shader_version;
1849 struct wined3d_shader_parser_state state;
1850 struct wined3d_shader_instruction ins;
1851 struct wined3d_shader_tex_mx tex_mx;
1852 struct wined3d_shader_context ctx;
1853 const DWORD *ptr;
1855 /* Initialize current parsing state. */
1856 tex_mx.current_row = 0;
1857 state.current_loop_depth = 0;
1858 state.current_loop_reg = 0;
1859 state.in_subroutine = FALSE;
1861 ctx.shader = shader;
1862 ctx.reg_maps = reg_maps;
1863 ctx.buffer = buffer;
1864 ctx.tex_mx = &tex_mx;
1865 ctx.state = &state;
1866 ctx.backend_data = backend_ctx;
1867 ins.ctx = &ctx;
1869 fe->shader_read_header(fe_data, &ptr, &shader_version);
1870 if (start)
1871 ptr = start;
1873 while (!fe->shader_is_end(fe_data, &ptr) && ptr != end)
1875 /* Read opcode. */
1876 fe->shader_read_instruction(fe_data, &ptr, &ins);
1878 /* Unknown opcode and its parameters. */
1879 if (ins.handler_idx == WINED3DSIH_TABLE_SIZE)
1881 WARN("Encountered unrecognised or invalid instruction.\n");
1882 return WINED3DERR_INVALIDCALL;
1885 if (ins.predicate)
1886 FIXME("Predicates not implemented.\n");
1888 /* Call appropriate function for output target */
1889 device->shader_backend->shader_handle_instruction(&ins);
1892 return WINED3D_OK;
1895 static void shader_cleanup(struct wined3d_shader *shader)
1897 if (shader->reg_maps.shader_version.type == WINED3D_SHADER_TYPE_HULL)
1899 heap_free(shader->u.hs.phases.control_point);
1900 heap_free(shader->u.hs.phases.fork);
1901 heap_free(shader->u.hs.phases.join);
1904 heap_free(shader->patch_constant_signature.elements);
1905 heap_free(shader->output_signature.elements);
1906 heap_free(shader->input_signature.elements);
1907 shader->device->shader_backend->shader_destroy(shader);
1908 shader_cleanup_reg_maps(&shader->reg_maps);
1909 heap_free(shader->byte_code);
1910 shader_delete_constant_list(&shader->constantsF);
1911 shader_delete_constant_list(&shader->constantsB);
1912 shader_delete_constant_list(&shader->constantsI);
1913 list_remove(&shader->shader_list_entry);
1915 if (shader->frontend && shader->frontend_data)
1916 shader->frontend->shader_free(shader->frontend_data);
1919 struct shader_none_priv
1921 const struct wined3d_vertex_pipe_ops *vertex_pipe;
1922 const struct wined3d_fragment_pipe_ops *fragment_pipe;
1923 BOOL ffp_proj_control;
1926 static void shader_none_handle_instruction(const struct wined3d_shader_instruction *ins) {}
1927 static void shader_none_precompile(void *shader_priv, struct wined3d_shader *shader) {}
1928 static void shader_none_select_compute(void *shader_priv, struct wined3d_context *context,
1929 const struct wined3d_state *state) {}
1930 static void shader_none_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count) {}
1931 static void shader_none_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count) {}
1932 static void shader_none_load_constants(void *shader_priv, struct wined3d_context *context,
1933 const struct wined3d_state *state) {}
1934 static void shader_none_destroy(struct wined3d_shader *shader) {}
1935 static void shader_none_free_context_data(struct wined3d_context *context) {}
1936 static void shader_none_init_context_state(struct wined3d_context *context) {}
1938 /* Context activation is done by the caller. */
1939 static void shader_none_select(void *shader_priv, struct wined3d_context *context,
1940 const struct wined3d_state *state)
1942 struct shader_none_priv *priv = shader_priv;
1944 priv->vertex_pipe->vp_enable(context, !use_vs(state));
1945 priv->fragment_pipe->fp_enable(context, !use_ps(state));
1948 /* Context activation is done by the caller. */
1949 static void shader_none_disable(void *shader_priv, struct wined3d_context *context)
1951 struct shader_none_priv *priv = shader_priv;
1953 priv->vertex_pipe->vp_enable(context, FALSE);
1954 priv->fragment_pipe->fp_enable(context, FALSE);
1956 context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL)
1957 | (1u << WINED3D_SHADER_TYPE_VERTEX)
1958 | (1u << WINED3D_SHADER_TYPE_GEOMETRY)
1959 | (1u << WINED3D_SHADER_TYPE_HULL)
1960 | (1u << WINED3D_SHADER_TYPE_DOMAIN)
1961 | (1u << WINED3D_SHADER_TYPE_COMPUTE);
1964 static HRESULT shader_none_alloc(struct wined3d_device *device, const struct wined3d_vertex_pipe_ops *vertex_pipe,
1965 const struct wined3d_fragment_pipe_ops *fragment_pipe)
1967 struct fragment_caps fragment_caps;
1968 void *vertex_priv, *fragment_priv;
1969 struct shader_none_priv *priv;
1971 if (!(priv = heap_alloc(sizeof(*priv))))
1972 return E_OUTOFMEMORY;
1974 if (!(vertex_priv = vertex_pipe->vp_alloc(&none_shader_backend, priv)))
1976 ERR("Failed to initialize vertex pipe.\n");
1977 heap_free(priv);
1978 return E_FAIL;
1981 if (!(fragment_priv = fragment_pipe->alloc_private(&none_shader_backend, priv)))
1983 ERR("Failed to initialize fragment pipe.\n");
1984 vertex_pipe->vp_free(device, NULL);
1985 heap_free(priv);
1986 return E_FAIL;
1989 priv->vertex_pipe = vertex_pipe;
1990 priv->fragment_pipe = fragment_pipe;
1991 fragment_pipe->get_caps(device->adapter, &fragment_caps);
1992 priv->ffp_proj_control = fragment_caps.wined3d_caps & WINED3D_FRAGMENT_CAP_PROJ_CONTROL;
1994 device->vertex_priv = vertex_priv;
1995 device->fragment_priv = fragment_priv;
1996 device->shader_priv = priv;
1998 return WINED3D_OK;
2001 static void shader_none_free(struct wined3d_device *device, struct wined3d_context *context)
2003 struct shader_none_priv *priv = device->shader_priv;
2005 priv->fragment_pipe->free_private(device, context);
2006 priv->vertex_pipe->vp_free(device, context);
2007 heap_free(priv);
2010 static BOOL shader_none_allocate_context_data(struct wined3d_context *context)
2012 return TRUE;
2015 static void shader_none_get_caps(const struct wined3d_adapter *adapter, struct shader_caps *caps)
2017 /* Set the shader caps to 0 for the none shader backend */
2018 memset(caps, 0, sizeof(*caps));
2021 static BOOL shader_none_color_fixup_supported(struct color_fixup_desc fixup)
2023 /* We "support" every possible fixup, since we don't support any shader
2024 * model, and will never have to actually sample a texture. */
2025 return TRUE;
2028 static BOOL shader_none_has_ffp_proj_control(void *shader_priv)
2030 struct shader_none_priv *priv = shader_priv;
2032 return priv->ffp_proj_control;
2035 static uint64_t shader_none_shader_compile(struct wined3d_context *context, const struct wined3d_shader_desc *shader_desc,
2036 enum wined3d_shader_type shader_type)
2038 return 0;
2041 const struct wined3d_shader_backend_ops none_shader_backend =
2043 shader_none_handle_instruction,
2044 shader_none_precompile,
2045 shader_none_select,
2046 shader_none_select_compute,
2047 shader_none_disable,
2048 shader_none_update_float_vertex_constants,
2049 shader_none_update_float_pixel_constants,
2050 shader_none_load_constants,
2051 shader_none_destroy,
2052 shader_none_alloc,
2053 shader_none_free,
2054 shader_none_allocate_context_data,
2055 shader_none_free_context_data,
2056 shader_none_init_context_state,
2057 shader_none_get_caps,
2058 shader_none_color_fixup_supported,
2059 shader_none_has_ffp_proj_control,
2060 shader_none_shader_compile,
2063 static unsigned int shader_max_version_from_feature_level(enum wined3d_feature_level level)
2065 switch (level)
2067 case WINED3D_FEATURE_LEVEL_11_1:
2068 case WINED3D_FEATURE_LEVEL_11:
2069 return 5;
2070 case WINED3D_FEATURE_LEVEL_10_1:
2071 case WINED3D_FEATURE_LEVEL_10:
2072 return 4;
2073 case WINED3D_FEATURE_LEVEL_9_3:
2074 return 3;
2075 case WINED3D_FEATURE_LEVEL_9_2:
2076 case WINED3D_FEATURE_LEVEL_9_1:
2077 return 2;
2078 default:
2079 return 1;
2083 static HRESULT shader_set_function(struct wined3d_shader *shader, struct wined3d_device *device,
2084 enum wined3d_shader_type type, unsigned int float_const_count)
2086 const struct wined3d_d3d_info *d3d_info = &shader->device->adapter->d3d_info;
2087 struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
2088 const struct wined3d_shader_version *version = &reg_maps->shader_version;
2089 const struct wined3d_shader_frontend *fe;
2090 unsigned int backend_version;
2091 HRESULT hr;
2093 TRACE("shader %p, device %p, type %s, float_const_count %u.\n",
2094 shader, device, debug_shader_type(type), float_const_count);
2096 fe = shader->frontend;
2097 if (!(shader->frontend_data = fe->shader_init(shader->function,
2098 shader->functionLength, &shader->output_signature)))
2100 FIXME("Failed to initialize frontend.\n");
2101 return WINED3DERR_INVALIDCALL;
2104 if (FAILED(hr = shader_get_registers_used(shader, float_const_count)))
2105 return hr;
2107 if (version->type != type)
2109 WARN("Wrong shader type %s.\n", debug_shader_type(reg_maps->shader_version.type));
2110 return WINED3DERR_INVALIDCALL;
2112 if (version->major > shader_max_version_from_feature_level(device->cs->c.state->feature_level))
2114 WARN("Shader version %u not supported by this device.\n", version->major);
2115 return WINED3DERR_INVALIDCALL;
2117 switch (type)
2119 case WINED3D_SHADER_TYPE_VERTEX:
2120 backend_version = d3d_info->limits.vs_version;
2121 break;
2122 case WINED3D_SHADER_TYPE_HULL:
2123 backend_version = d3d_info->limits.hs_version;
2124 break;
2125 case WINED3D_SHADER_TYPE_DOMAIN:
2126 backend_version = d3d_info->limits.ds_version;
2127 break;
2128 case WINED3D_SHADER_TYPE_GEOMETRY:
2129 backend_version = d3d_info->limits.gs_version;
2130 break;
2131 case WINED3D_SHADER_TYPE_PIXEL:
2132 backend_version = d3d_info->limits.ps_version;
2133 break;
2134 case WINED3D_SHADER_TYPE_COMPUTE:
2135 backend_version = d3d_info->limits.cs_version;
2136 break;
2137 default:
2138 FIXME("No backend version-checking for this shader type.\n");
2139 backend_version = 0;
2141 if (version->major > backend_version)
2143 WARN("Shader version %u.%u not supported by the current shader backend.\n",
2144 version->major, version->minor);
2145 return WINED3DERR_INVALIDCALL;
2148 shader->load_local_constsF = shader->lconst_inf_or_nan;
2150 return WINED3D_OK;
2153 ULONG CDECL wined3d_shader_incref(struct wined3d_shader *shader)
2155 unsigned int refcount = InterlockedIncrement(&shader->ref);
2157 TRACE("%p increasing refcount to %u.\n", shader, refcount);
2159 return refcount;
2162 static void wined3d_shader_init_object(void *object)
2164 struct wined3d_shader *shader = object;
2165 struct wined3d_device *device = shader->device;
2167 TRACE("shader %p.\n", shader);
2169 list_add_head(&device->shaders, &shader->shader_list_entry);
2171 device->shader_backend->shader_precompile(device->shader_priv, shader);
2174 static void wined3d_shader_destroy_object(void *object)
2176 TRACE("object %p.\n", object);
2178 shader_cleanup(object);
2179 heap_free(object);
2182 ULONG CDECL wined3d_shader_decref(struct wined3d_shader *shader)
2184 unsigned int refcount = InterlockedDecrement(&shader->ref);
2186 TRACE("%p decreasing refcount to %u.\n", shader, refcount);
2188 if (!refcount)
2190 wined3d_mutex_lock();
2191 shader->parent_ops->wined3d_object_destroyed(shader->parent);
2192 wined3d_cs_destroy_object(shader->device->cs, wined3d_shader_destroy_object, shader);
2193 wined3d_mutex_unlock();
2196 return refcount;
2199 void * CDECL wined3d_shader_get_parent(const struct wined3d_shader *shader)
2201 TRACE("shader %p.\n", shader);
2203 return shader->parent;
2206 HRESULT CDECL wined3d_shader_get_byte_code(const struct wined3d_shader *shader,
2207 void *byte_code, UINT *byte_code_size)
2209 TRACE("shader %p, byte_code %p, byte_code_size %p.\n", shader, byte_code, byte_code_size);
2211 if (!byte_code)
2213 *byte_code_size = shader->byte_code_size;
2214 return WINED3D_OK;
2217 if (*byte_code_size < shader->byte_code_size)
2219 /* MSDN claims (for d3d8 at least) that if *byte_code_size is smaller
2220 * than the required size we should write the required size and
2221 * return D3DERR_MOREDATA. That's not actually true. */
2222 return WINED3DERR_INVALIDCALL;
2225 memcpy(byte_code, shader->byte_code, shader->byte_code_size);
2227 return WINED3D_OK;
2230 /* Set local constants for d3d8 shaders. */
2231 HRESULT CDECL wined3d_shader_set_local_constants_float(struct wined3d_shader *shader,
2232 UINT start_idx, const float *src_data, UINT count)
2234 UINT end_idx = start_idx + count;
2235 UINT i;
2237 TRACE("shader %p, start_idx %u, src_data %p, count %u.\n", shader, start_idx, src_data, count);
2239 if (end_idx > shader->limits->constant_float)
2241 WARN("end_idx %u > float constants limit %u.\n",
2242 end_idx, shader->limits->constant_float);
2243 end_idx = shader->limits->constant_float;
2246 for (i = start_idx; i < end_idx; ++i)
2248 struct wined3d_shader_lconst *lconst;
2249 float *value;
2251 if (!(lconst = heap_alloc(sizeof(*lconst))))
2252 return E_OUTOFMEMORY;
2254 lconst->idx = i;
2255 value = (float *)lconst->value;
2256 memcpy(value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
2257 list_add_head(&shader->constantsF, &lconst->entry);
2259 if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
2260 || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
2262 shader->lconst_inf_or_nan = TRUE;
2266 return WINED3D_OK;
2269 static void init_interpolation_compile_args(uint32_t *interpolation_args,
2270 const struct wined3d_shader *pixel_shader, const struct wined3d_d3d_info *d3d_info)
2272 if (!d3d_info->shader_output_interpolation || !pixel_shader
2273 || pixel_shader->reg_maps.shader_version.major < 4)
2275 memset(interpolation_args, 0, sizeof(pixel_shader->u.ps.interpolation_mode));
2276 return;
2279 memcpy(interpolation_args, pixel_shader->u.ps.interpolation_mode,
2280 sizeof(pixel_shader->u.ps.interpolation_mode));
2283 void find_vs_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
2284 struct vs_compile_args *args, const struct wined3d_context *context)
2286 const struct wined3d_shader *geometry_shader = state->shader[WINED3D_SHADER_TYPE_GEOMETRY];
2287 const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
2288 const struct wined3d_shader *hull_shader = state->shader[WINED3D_SHADER_TYPE_HULL];
2289 const struct wined3d_d3d_info *d3d_info = context->d3d_info;
2290 WORD swizzle_map = context->stream_info.swizzle_map;
2292 args->fog_src = state->render_states[WINED3D_RS_FOGTABLEMODE]
2293 == WINED3D_FOG_NONE ? VS_FOG_COORD : VS_FOG_Z;
2294 args->clip_enabled = state->render_states[WINED3D_RS_CLIPPING]
2295 && state->render_states[WINED3D_RS_CLIPPLANEENABLE];
2296 args->point_size = state->primitive_type == WINED3D_PT_POINTLIST;
2297 args->per_vertex_point_size = shader->reg_maps.point_size;
2298 args->next_shader_type = hull_shader ? WINED3D_SHADER_TYPE_HULL
2299 : geometry_shader ? WINED3D_SHADER_TYPE_GEOMETRY : WINED3D_SHADER_TYPE_PIXEL;
2300 if (shader->reg_maps.shader_version.major >= 4)
2301 args->next_shader_input_count = hull_shader ? hull_shader->limits->packed_input
2302 : geometry_shader ? geometry_shader->limits->packed_input
2303 : pixel_shader ? pixel_shader->limits->packed_input : 0;
2304 else
2305 args->next_shader_input_count = 0;
2306 args->swizzle_map = swizzle_map;
2307 if (d3d_info->emulated_flatshading)
2308 args->flatshading = state->render_states[WINED3D_RS_SHADEMODE] == WINED3D_SHADE_FLAT;
2309 else
2310 args->flatshading = 0;
2312 init_interpolation_compile_args(args->interpolation_mode,
2313 args->next_shader_type == WINED3D_SHADER_TYPE_PIXEL ? pixel_shader : NULL, d3d_info);
2316 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2)
2318 if (usage_idx1 != usage_idx2)
2319 return FALSE;
2320 if (usage1 == usage2)
2321 return TRUE;
2322 if (usage1 == WINED3D_DECL_USAGE_POSITION && usage2 == WINED3D_DECL_USAGE_POSITIONT)
2323 return TRUE;
2324 if (usage2 == WINED3D_DECL_USAGE_POSITION && usage1 == WINED3D_DECL_USAGE_POSITIONT)
2325 return TRUE;
2327 return FALSE;
2330 bool vshader_get_input(const struct wined3d_shader *shader,
2331 uint8_t usage_req, uint8_t usage_idx_req, unsigned int *regnum)
2333 uint32_t map = shader->reg_maps.input_registers & 0xffff;
2334 unsigned int i;
2336 while (map)
2338 i = wined3d_bit_scan(&map);
2339 if (match_usage(shader->u.vs.attributes[i].usage,
2340 shader->u.vs.attributes[i].usage_idx, usage_req, usage_idx_req))
2342 *regnum = i;
2343 return true;
2347 return false;
2350 static void shader_trace(const void *code, size_t size, enum vkd3d_shader_source_type source_type)
2352 struct vkd3d_shader_compile_info info;
2353 struct vkd3d_shader_code d3d_asm;
2354 const char *ptr, *end, *line;
2355 char *messages;
2356 int ret;
2358 static const struct vkd3d_shader_compile_option compile_options[] =
2360 {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_6},
2363 info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO;
2364 info.next = NULL;
2365 info.source.code = code;
2366 info.source.size = size;
2367 info.source_type = source_type;
2368 info.target_type = VKD3D_SHADER_TARGET_D3D_ASM;
2369 info.options = compile_options;
2370 info.option_count = ARRAY_SIZE(compile_options);
2371 info.log_level = VKD3D_SHADER_LOG_WARNING;
2372 info.source_name = NULL;
2374 ret = vkd3d_shader_compile(&info, &d3d_asm, &messages);
2375 if (messages && *messages && FIXME_ON(d3d_shader))
2377 FIXME("Shader log:\n");
2378 ptr = messages;
2379 end = ptr + strlen(ptr);
2380 while ((line = wined3d_get_line(&ptr, end)))
2382 FIXME(" %.*s", (int)(ptr - line), line);
2384 FIXME("\n");
2386 vkd3d_shader_free_messages(messages);
2388 if (ret < 0)
2390 ERR("Failed to disassemble, ret %d.\n", ret);
2391 return;
2394 ptr = d3d_asm.code;
2395 end = ptr + d3d_asm.size;
2396 while ((line = wined3d_get_line(&ptr, end)))
2398 TRACE(" %.*s", (int)(ptr - line), line);
2400 TRACE("\n");
2402 vkd3d_shader_free_shader_code(&d3d_asm);
2405 static HRESULT shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
2406 const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
2408 enum vkd3d_shader_source_type source_type;
2409 HRESULT hr;
2411 TRACE("byte_code %p, byte_code_size %#lx.\n", desc->byte_code, (long)desc->byte_code_size);
2413 if (!desc->byte_code)
2414 return WINED3DERR_INVALIDCALL;
2416 shader->ref = 1;
2417 shader->device = device;
2418 shader->parent = parent;
2419 shader->parent_ops = parent_ops;
2421 list_init(&shader->linked_programs);
2422 list_init(&shader->constantsF);
2423 list_init(&shader->constantsB);
2424 list_init(&shader->constantsI);
2425 shader->lconst_inf_or_nan = FALSE;
2426 list_init(&shader->reg_maps.indexable_temps);
2427 list_init(&shader->shader_list_entry);
2429 if (desc->byte_code_size == ~(size_t)0)
2431 struct wined3d_shader_version shader_version;
2432 const struct wined3d_shader_frontend *fe;
2433 struct wined3d_shader_instruction ins;
2434 const DWORD *ptr;
2435 void *fe_data;
2437 source_type = VKD3D_SHADER_SOURCE_D3D_BYTECODE;
2438 if (!(shader->frontend = shader_select_frontend(source_type)))
2440 FIXME("Unable to find frontend for shader.\n");
2441 hr = WINED3DERR_INVALIDCALL;
2442 goto fail;
2445 fe = shader->frontend;
2446 if (!(fe_data = fe->shader_init(desc->byte_code, desc->byte_code_size, &shader->output_signature)))
2448 WARN("Failed to initialise frontend data.\n");
2449 hr = WINED3DERR_INVALIDCALL;
2450 goto fail;
2453 fe->shader_read_header(fe_data, &ptr, &shader_version);
2454 while (!fe->shader_is_end(fe_data, &ptr))
2455 fe->shader_read_instruction(fe_data, &ptr, &ins);
2457 fe->shader_free(fe_data);
2459 shader->byte_code_size = (ptr - desc->byte_code) * sizeof(*ptr);
2461 if (!(shader->byte_code = heap_alloc(shader->byte_code_size)))
2463 hr = E_OUTOFMEMORY;
2464 goto fail;
2466 memcpy(shader->byte_code, desc->byte_code, shader->byte_code_size);
2468 shader->function = shader->byte_code;
2469 shader->functionLength = shader->byte_code_size;
2471 else
2473 unsigned int max_version;
2475 if (!(shader->byte_code = heap_alloc(desc->byte_code_size)))
2477 hr = E_OUTOFMEMORY;
2478 goto fail;
2480 memcpy(shader->byte_code, desc->byte_code, desc->byte_code_size);
2481 shader->byte_code_size = desc->byte_code_size;
2483 max_version = shader_max_version_from_feature_level(device->cs->c.state->feature_level);
2484 if (FAILED(hr = wined3d_shader_extract_from_dxbc(shader, max_version, &source_type)))
2485 goto fail;
2487 if (!(shader->frontend = shader_select_frontend(source_type)))
2489 FIXME("Unable to find frontend for shader.\n");
2490 hr = WINED3DERR_INVALIDCALL;
2491 goto fail;
2495 if (TRACE_ON(d3d_shader))
2497 if (source_type == VKD3D_SHADER_SOURCE_D3D_BYTECODE)
2498 shader_trace(shader->function, shader->functionLength, source_type);
2499 else
2500 shader_trace(shader->byte_code, shader->byte_code_size, source_type);
2504 return WINED3D_OK;
2506 fail:
2507 shader_cleanup(shader);
2508 return hr;
2511 static HRESULT vertex_shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
2512 const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
2514 struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
2515 unsigned int i;
2516 HRESULT hr;
2518 if (FAILED(hr = shader_init(shader, device, desc, parent, parent_ops)))
2519 return hr;
2521 if (FAILED(hr = shader_set_function(shader, device,
2522 WINED3D_SHADER_TYPE_VERTEX, device->adapter->d3d_info.limits.vs_uniform_count)))
2524 shader_cleanup(shader);
2525 return hr;
2528 for (i = 0; i < shader->input_signature.element_count; ++i)
2530 const struct wined3d_shader_signature_element *input = &shader->input_signature.elements[i];
2532 if (!(reg_maps->input_registers & (1u << input->register_idx)) || !input->semantic_name)
2533 continue;
2535 shader->u.vs.attributes[input->register_idx].usage =
2536 shader_usage_from_semantic_name(input->semantic_name);
2537 shader->u.vs.attributes[input->register_idx].usage_idx = input->semantic_idx;
2540 if (reg_maps->usesrelconstF && !list_empty(&shader->constantsF))
2541 shader->load_local_constsF = TRUE;
2543 return WINED3D_OK;
2546 static struct wined3d_shader_signature_element *shader_find_signature_element(const struct wined3d_shader_signature *s,
2547 unsigned int stream_idx, const char *semantic_name, unsigned int semantic_idx)
2549 struct wined3d_shader_signature_element *e = s->elements;
2550 unsigned int i;
2552 for (i = 0; i < s->element_count; ++i)
2554 if (e[i].stream_idx == stream_idx
2555 && !stricmp(e[i].semantic_name, semantic_name)
2556 && e[i].semantic_idx == semantic_idx)
2557 return &e[i];
2560 return NULL;
2563 BOOL shader_get_stream_output_register_info(const struct wined3d_shader *shader,
2564 const struct wined3d_stream_output_element *so_element, unsigned int *register_idx, unsigned int *component_idx)
2566 const struct wined3d_shader_signature_element *output;
2567 unsigned int idx;
2569 if (!(output = shader_find_signature_element(&shader->output_signature,
2570 so_element->stream_idx, so_element->semantic_name, so_element->semantic_idx)))
2571 return FALSE;
2573 for (idx = 0; idx < 4; ++idx)
2575 if (output->mask & (1u << idx))
2576 break;
2578 idx += so_element->component_idx;
2580 *register_idx = output->register_idx;
2581 *component_idx = idx;
2582 return TRUE;
2585 static HRESULT geometry_shader_init_so_desc(struct wined3d_geometry_shader *gs, struct wined3d_device *device,
2586 const struct wined3d_stream_output_desc *so_desc)
2588 struct wined3d_so_desc_entry *s;
2589 struct wine_rb_entry *entry;
2590 unsigned int i;
2591 size_t size;
2592 char *name;
2594 if ((entry = wine_rb_get(&device->so_descs, so_desc)))
2596 gs->so_desc = &WINE_RB_ENTRY_VALUE(entry, struct wined3d_so_desc_entry, entry)->desc;
2597 return WINED3D_OK;
2600 size = FIELD_OFFSET(struct wined3d_so_desc_entry, elements[so_desc->element_count]);
2601 for (i = 0; i < so_desc->element_count; ++i)
2603 const char *n = so_desc->elements[i].semantic_name;
2605 if (n)
2606 size += strlen(n) + 1;
2608 if (!(s = heap_alloc(size)))
2609 return E_OUTOFMEMORY;
2611 s->desc = *so_desc;
2613 memcpy(s->elements, so_desc->elements, so_desc->element_count * sizeof(*s->elements));
2614 s->desc.elements = s->elements;
2616 name = (char *)&s->elements[s->desc.element_count];
2617 for (i = 0; i < so_desc->element_count; ++i)
2619 struct wined3d_stream_output_element *e = &s->elements[i];
2621 if (!e->semantic_name)
2622 continue;
2624 size = strlen(e->semantic_name) + 1;
2625 memcpy(name, e->semantic_name, size);
2626 e->semantic_name = name;
2627 name += size;
2630 if (wine_rb_put(&device->so_descs, &s->desc, &s->entry) == -1)
2632 heap_free(s);
2633 return E_FAIL;
2635 gs->so_desc = &s->desc;
2637 return WINED3D_OK;
2640 static HRESULT geometry_shader_init_stream_output(struct wined3d_shader *shader,
2641 const struct wined3d_stream_output_desc *so_desc)
2643 const struct wined3d_shader_frontend *fe = shader->frontend;
2644 const struct wined3d_shader_signature_element *output;
2645 unsigned int i, component_idx, register_idx, mask;
2646 struct wined3d_shader_version shader_version;
2647 const DWORD *ptr;
2648 void *fe_data;
2649 HRESULT hr;
2651 if (!so_desc)
2652 return WINED3D_OK;
2654 if (!(fe_data = fe->shader_init(shader->function, shader->functionLength, &shader->output_signature)))
2656 WARN("Failed to initialise frontend data.\n");
2657 return WINED3DERR_INVALIDCALL;
2659 fe->shader_read_header(fe_data, &ptr, &shader_version);
2660 fe->shader_free(fe_data);
2662 switch (shader_version.type)
2664 case WINED3D_SHADER_TYPE_VERTEX:
2665 case WINED3D_SHADER_TYPE_DOMAIN:
2666 shader->function = NULL;
2667 shader->functionLength = 0;
2668 break;
2669 case WINED3D_SHADER_TYPE_GEOMETRY:
2670 break;
2671 default:
2672 WARN("Wrong shader type %s.\n", debug_shader_type(shader_version.type));
2673 return E_INVALIDARG;
2676 if (!shader->function)
2678 shader->reg_maps.shader_version = shader_version;
2679 shader->reg_maps.shader_version.type = WINED3D_SHADER_TYPE_GEOMETRY;
2680 shader_set_limits(shader);
2681 if (FAILED(hr = shader_scan_output_signature(shader)))
2682 return hr;
2685 for (i = 0; i < so_desc->element_count; ++i)
2687 const struct wined3d_stream_output_element *e = &so_desc->elements[i];
2689 if (!e->semantic_name)
2690 continue;
2691 if (!(output = shader_find_signature_element(&shader->output_signature,
2692 e->stream_idx, e->semantic_name, e->semantic_idx))
2693 || !shader_get_stream_output_register_info(shader, e, &register_idx, &component_idx))
2695 WARN("Failed to find output signature element for stream output entry.\n");
2696 return E_INVALIDARG;
2699 mask = wined3d_mask_from_size(e->component_count) << component_idx;
2700 if ((output->mask & 0xff & mask) != mask)
2702 WARN("Invalid component range %u-%u (mask %#x), output mask %#x.\n",
2703 component_idx, e->component_count, mask, output->mask & 0xff);
2704 return E_INVALIDARG;
2708 if (FAILED(hr = geometry_shader_init_so_desc(&shader->u.gs, shader->device, so_desc)))
2710 WARN("Failed to initialise stream output description, hr %#lx.\n", hr);
2711 return hr;
2714 return WINED3D_OK;
2717 static HRESULT geometry_shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
2718 const struct wined3d_shader_desc *desc, const struct wined3d_stream_output_desc *so_desc,
2719 void *parent, const struct wined3d_parent_ops *parent_ops)
2721 HRESULT hr;
2723 if (FAILED(hr = shader_init(shader, device, desc, parent, parent_ops)))
2724 return hr;
2726 if (FAILED(hr = geometry_shader_init_stream_output(shader, so_desc)))
2727 goto fail;
2729 if (shader->function
2730 && FAILED(hr = shader_set_function(shader, device, WINED3D_SHADER_TYPE_GEOMETRY, 0)))
2731 goto fail;
2733 return WINED3D_OK;
2735 fail:
2736 shader_cleanup(shader);
2737 return hr;
2740 void find_ds_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
2741 struct ds_compile_args *args, const struct wined3d_context *context)
2743 const struct wined3d_shader *geometry_shader = state->shader[WINED3D_SHADER_TYPE_GEOMETRY];
2744 const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
2745 const struct wined3d_shader *hull_shader = state->shader[WINED3D_SHADER_TYPE_HULL];
2747 args->tessellator_output_primitive = hull_shader->u.hs.tessellator_output_primitive;
2748 args->tessellator_partitioning = hull_shader->u.hs.tessellator_partitioning;
2750 args->output_count = geometry_shader ? geometry_shader->limits->packed_input
2751 : pixel_shader ? pixel_shader->limits->packed_input : shader->limits->packed_output;
2752 args->next_shader_type = geometry_shader ? WINED3D_SHADER_TYPE_GEOMETRY : WINED3D_SHADER_TYPE_PIXEL;
2754 args->render_offscreen = context->render_offscreen;
2756 init_interpolation_compile_args(args->interpolation_mode,
2757 args->next_shader_type == WINED3D_SHADER_TYPE_PIXEL ? pixel_shader : NULL, context->d3d_info);
2759 args->padding = 0;
2762 void find_gs_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
2763 struct gs_compile_args *args, const struct wined3d_context *context)
2765 const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
2767 args->output_count = pixel_shader ? pixel_shader->limits->packed_input : shader->limits->packed_output;
2769 if (!(args->primitive_type = shader->u.gs.input_type))
2770 args->primitive_type = state->primitive_type;
2772 init_interpolation_compile_args(args->interpolation_mode, pixel_shader, context->d3d_info);
2775 void find_ps_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
2776 BOOL position_transformed, struct ps_compile_args *args, const struct wined3d_context *context)
2778 const struct wined3d_d3d_info *d3d_info = context->d3d_info;
2779 struct wined3d_texture *texture;
2780 unsigned int i;
2782 memset(args, 0, sizeof(*args)); /* FIXME: Make sure all bits are set. */
2783 if (!d3d_info->srgb_write_control && needs_srgb_write(d3d_info, state, &state->fb))
2785 static unsigned int warned = 0;
2787 args->srgb_correction = 1;
2788 if (state->blend_state && state->blend_state->desc.rt[0].enable && !warned++)
2789 WARN("Blending into a sRGB render target with no GL_ARB_framebuffer_sRGB "
2790 "support, expect rendering artifacts.\n");
2793 if (shader->reg_maps.shader_version.major == 1
2794 && shader->reg_maps.shader_version.minor <= 3)
2796 for (i = 0; i < shader->limits->sampler; ++i)
2798 uint32_t flags = state->texture_states[i][WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS];
2800 if (flags & WINED3D_TTFF_PROJECTED)
2802 uint32_t tex_transform = flags & ~WINED3D_TTFF_PROJECTED;
2804 if (!state->shader[WINED3D_SHADER_TYPE_VERTEX])
2806 enum wined3d_shader_resource_type resource_type = shader->reg_maps.resource_info[i].type;
2807 unsigned int j;
2808 unsigned int index = state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX];
2809 uint32_t max_valid = WINED3D_TTFF_COUNT4;
2811 for (j = 0; j < state->vertex_declaration->element_count; ++j)
2813 struct wined3d_vertex_declaration_element *element =
2814 &state->vertex_declaration->elements[j];
2816 if (element->usage == WINED3D_DECL_USAGE_TEXCOORD
2817 && element->usage_idx == index)
2819 max_valid = element->format->component_count;
2820 break;
2823 if (!tex_transform || tex_transform > max_valid)
2825 WARN("Fixing up projected texture transform flags from %#x to %#x.\n",
2826 tex_transform, max_valid);
2827 tex_transform = max_valid;
2829 if ((resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_1D && tex_transform > WINED3D_TTFF_COUNT1)
2830 || (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_2D
2831 && tex_transform > WINED3D_TTFF_COUNT2)
2832 || (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_3D
2833 && tex_transform > WINED3D_TTFF_COUNT3))
2834 tex_transform |= WINED3D_PSARGS_PROJECTED;
2835 else
2837 WARN("Application requested projected texture with unsuitable texture coordinates.\n");
2838 WARN("(texture unit %u, transform flags %#x, sampler type %u).\n",
2839 i, tex_transform, resource_type);
2842 else
2843 tex_transform = WINED3D_TTFF_COUNT4 | WINED3D_PSARGS_PROJECTED;
2845 args->tex_transform |= tex_transform << i * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
2849 if (shader->reg_maps.shader_version.major == 1
2850 && shader->reg_maps.shader_version.minor <= 4)
2852 for (i = 0; i < shader->limits->sampler; ++i)
2854 if (!shader->reg_maps.resource_info[i].type)
2855 continue;
2857 /* Treat unbound textures as 2D. The dummy texture will provide
2858 * the proper sample value. The tex_types bitmap defaults to
2859 * 2D because of the memset. */
2860 if (!(texture = state->textures[i]))
2861 continue;
2863 switch (wined3d_texture_gl(texture)->target)
2865 /* RECT textures are distinguished from 2D textures via np2_fixup */
2866 default:
2867 break;
2869 case GL_TEXTURE_3D:
2870 args->tex_types |= WINED3D_SHADER_TEX_3D << i * WINED3D_PSARGS_TEXTYPE_SHIFT;
2871 break;
2873 case GL_TEXTURE_CUBE_MAP_ARB:
2874 args->tex_types |= WINED3D_SHADER_TEX_CUBE << i * WINED3D_PSARGS_TEXTYPE_SHIFT;
2875 break;
2879 else if (shader->reg_maps.shader_version.major <= 3)
2881 for (i = 0; i < shader->limits->sampler; ++i)
2883 enum wined3d_shader_resource_type resource_type;
2884 enum wined3d_shader_tex_types tex_type;
2886 if (!(resource_type = shader->reg_maps.resource_info[i].type))
2887 continue;
2889 switch (resource_type)
2891 case WINED3D_SHADER_RESOURCE_TEXTURE_3D:
2892 tex_type = WINED3D_SHADER_TEX_3D;
2893 break;
2894 case WINED3D_SHADER_RESOURCE_TEXTURE_CUBE:
2895 tex_type = WINED3D_SHADER_TEX_CUBE;
2896 break;
2897 default:
2898 tex_type = WINED3D_SHADER_TEX_2D;
2899 break;
2902 if ((texture = state->textures[i]))
2904 /* Star Wars: The Old Republic uses mismatched samplers for rendering water. */
2905 if (texture->resource.type == WINED3D_RTYPE_TEXTURE_2D
2906 && resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_3D
2907 && !(texture->resource.usage & WINED3DUSAGE_LEGACY_CUBEMAP))
2908 tex_type = WINED3D_SHADER_TEX_2D;
2909 else if (texture->resource.type == WINED3D_RTYPE_TEXTURE_3D
2910 && resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_2D)
2911 tex_type = WINED3D_SHADER_TEX_3D;
2913 args->tex_types |= tex_type << i * WINED3D_PSARGS_TEXTYPE_SHIFT;
2917 if (shader->reg_maps.shader_version.major >= 4)
2919 /* In SM4+ we use dcl_sampler in order to determine if we should use shadow sampler. */
2920 args->shadow = 0;
2921 for (i = 0 ; i < WINED3D_MAX_FRAGMENT_SAMPLERS; ++i)
2922 args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
2923 args->np2_fixup = 0;
2925 else
2927 for (i = 0; i < WINED3D_MAX_FRAGMENT_SAMPLERS; ++i)
2929 if (!shader->reg_maps.resource_info[i].type)
2930 continue;
2932 texture = state->textures[i];
2933 if (!texture)
2935 args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
2936 continue;
2938 if (can_use_texture_swizzle(d3d_info, texture->resource.format))
2939 args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
2940 else
2941 args->color_fixup[i] = texture->resource.format->color_fixup;
2943 if (texture->resource.format_caps & WINED3D_FORMAT_CAP_SHADOW)
2944 args->shadow |= 1u << i;
2946 /* Flag samplers that need NP2 texcoord fixup. */
2947 if (!(texture->flags & WINED3D_TEXTURE_POW2_MAT_IDENT))
2948 args->np2_fixup |= (1u << i);
2952 if (shader->reg_maps.shader_version.major >= 3)
2954 if (position_transformed)
2955 args->vp_mode = WINED3D_VP_MODE_NONE;
2956 else if (use_vs(state))
2957 args->vp_mode = WINED3D_VP_MODE_SHADER;
2958 else
2959 args->vp_mode = WINED3D_VP_MODE_FF;
2960 args->fog = WINED3D_FFP_PS_FOG_OFF;
2962 else
2964 args->vp_mode = WINED3D_VP_MODE_SHADER;
2965 if (state->render_states[WINED3D_RS_FOGENABLE])
2967 switch (state->render_states[WINED3D_RS_FOGTABLEMODE])
2969 case WINED3D_FOG_NONE:
2970 if (position_transformed || use_vs(state))
2972 args->fog = WINED3D_FFP_PS_FOG_LINEAR;
2973 break;
2976 switch (state->render_states[WINED3D_RS_FOGVERTEXMODE])
2978 case WINED3D_FOG_NONE: /* Fall through. */
2979 case WINED3D_FOG_LINEAR: args->fog = WINED3D_FFP_PS_FOG_LINEAR; break;
2980 case WINED3D_FOG_EXP: args->fog = WINED3D_FFP_PS_FOG_EXP; break;
2981 case WINED3D_FOG_EXP2: args->fog = WINED3D_FFP_PS_FOG_EXP2; break;
2983 break;
2985 case WINED3D_FOG_LINEAR: args->fog = WINED3D_FFP_PS_FOG_LINEAR; break;
2986 case WINED3D_FOG_EXP: args->fog = WINED3D_FFP_PS_FOG_EXP; break;
2987 case WINED3D_FOG_EXP2: args->fog = WINED3D_FFP_PS_FOG_EXP2; break;
2990 else
2992 args->fog = WINED3D_FFP_PS_FOG_OFF;
2996 if (!d3d_info->full_ffp_varyings)
2998 const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX];
3000 args->texcoords_initialized = 0;
3001 for (i = 0; i < WINED3D_MAX_TEXTURES; ++i)
3003 if (vs)
3005 if (state->shader[WINED3D_SHADER_TYPE_VERTEX]->reg_maps.output_registers & (1u << i))
3006 args->texcoords_initialized |= 1u << i;
3008 else
3010 const struct wined3d_stream_info *si = &context->stream_info;
3011 unsigned int coord_idx = state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX];
3013 if ((state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX] >> WINED3D_FFP_TCI_SHIFT)
3014 & WINED3D_FFP_TCI_MASK
3015 || (coord_idx < WINED3D_MAX_TEXTURES && (si->use_map & (1u << (WINED3D_FFP_TEXCOORD0 + coord_idx)))))
3016 args->texcoords_initialized |= 1u << i;
3020 else
3022 args->texcoords_initialized = wined3d_mask_from_size(WINED3D_MAX_TEXTURES);
3025 args->pointsprite = state->render_states[WINED3D_RS_POINTSPRITEENABLE]
3026 && state->primitive_type == WINED3D_PT_POINTLIST;
3028 if (d3d_info->ffp_alpha_test)
3029 args->alpha_test_func = WINED3D_CMP_ALWAYS - 1;
3030 else
3031 args->alpha_test_func = (state->render_states[WINED3D_RS_ALPHATESTENABLE]
3032 ? wined3d_sanitize_cmp_func(state->render_states[WINED3D_RS_ALPHAFUNC])
3033 : WINED3D_CMP_ALWAYS) - 1;
3035 if (d3d_info->emulated_flatshading)
3036 args->flatshading = state->render_states[WINED3D_RS_SHADEMODE] == WINED3D_SHADE_FLAT;
3038 args->y_correction = (shader->reg_maps.vpos && d3d_info->frag_coord_correction)
3039 || (shader->reg_maps.usesdsy && wined3d_settings.offscreen_rendering_mode != ORM_FBO)
3040 ? !context->render_offscreen : 0;
3042 for (i = 0; i < ARRAY_SIZE(state->fb.render_targets); ++i)
3044 struct wined3d_rendertarget_view *rtv = state->fb.render_targets[i];
3045 if (rtv && rtv->format->id == WINED3DFMT_A8_UNORM && !is_identity_fixup(rtv->format->color_fixup))
3046 args->rt_alpha_swizzle |= 1u << i;
3049 args->dual_source_blend = state->blend_state && state->blend_state->dual_source;
3052 static HRESULT pixel_shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
3053 const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
3055 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3056 unsigned int i, highest_reg_used = 0, num_regs_used = 0;
3057 HRESULT hr;
3059 if (FAILED(hr = shader_init(shader, device, desc, parent, parent_ops)))
3060 return hr;
3062 if (FAILED(hr = shader_set_function(shader, device,
3063 WINED3D_SHADER_TYPE_PIXEL, device->adapter->d3d_info.limits.ps_uniform_count)))
3065 shader_cleanup(shader);
3066 return hr;
3069 for (i = 0; i < MAX_REG_INPUT; ++i)
3071 if (shader->u.ps.input_reg_used & (1u << i))
3073 ++num_regs_used;
3074 highest_reg_used = i;
3078 /* Don't do any register mapping magic if it is not needed, or if we can't
3079 * achieve anything anyway */
3080 if (highest_reg_used < (gl_info->limits.glsl_varyings / 4)
3081 || num_regs_used > (gl_info->limits.glsl_varyings / 4)
3082 || shader->reg_maps.shader_version.major >= 4)
3084 if (num_regs_used > (gl_info->limits.glsl_varyings / 4))
3086 /* This happens with relative addressing. The input mapper function
3087 * warns about this if the higher registers are declared too, so
3088 * don't write a FIXME here */
3089 WARN("More varying registers used than supported\n");
3092 for (i = 0; i < MAX_REG_INPUT; ++i)
3094 shader->u.ps.input_reg_map[i] = i;
3097 shader->u.ps.declared_in_count = highest_reg_used + 1;
3099 else
3101 shader->u.ps.declared_in_count = 0;
3102 for (i = 0; i < MAX_REG_INPUT; ++i)
3104 if (shader->u.ps.input_reg_used & (1u << i))
3105 shader->u.ps.input_reg_map[i] = shader->u.ps.declared_in_count++;
3106 else shader->u.ps.input_reg_map[i] = ~0U;
3110 return WINED3D_OK;
3113 enum wined3d_shader_resource_type pixelshader_get_resource_type(const struct wined3d_shader_reg_maps *reg_maps,
3114 unsigned int resource_idx, DWORD tex_types)
3116 static enum wined3d_shader_resource_type shader_resource_type_from_shader_tex_types[] =
3118 WINED3D_SHADER_RESOURCE_TEXTURE_2D, /* WINED3D_SHADER_TEX_2D */
3119 WINED3D_SHADER_RESOURCE_TEXTURE_3D, /* WINED3D_SHADER_TEX_3D */
3120 WINED3D_SHADER_RESOURCE_TEXTURE_CUBE, /* WINED3D_SHADER_TEX_CUBE */
3123 unsigned int idx;
3125 if (reg_maps->shader_version.major > 3)
3126 return reg_maps->resource_info[resource_idx].type;
3128 if (!reg_maps->resource_info[resource_idx].type)
3129 return 0;
3131 idx = (tex_types >> resource_idx * WINED3D_PSARGS_TEXTYPE_SHIFT) & WINED3D_PSARGS_TEXTYPE_MASK;
3132 assert(idx < ARRAY_SIZE(shader_resource_type_from_shader_tex_types));
3133 return shader_resource_type_from_shader_tex_types[idx];
3136 HRESULT CDECL wined3d_shader_create_cs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
3137 void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
3139 struct wined3d_shader *object;
3140 HRESULT hr;
3142 TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
3143 device, desc, parent, parent_ops, shader);
3145 if (!(object = heap_alloc_zero(sizeof(*object))))
3146 return E_OUTOFMEMORY;
3148 if (FAILED(hr = shader_init(object, device, desc, parent, parent_ops)))
3150 WARN("Failed to initialize compute shader, hr %#lx.\n", hr);
3151 heap_free(object);
3152 return hr;
3155 if (FAILED(hr = shader_set_function(object, device, WINED3D_SHADER_TYPE_COMPUTE, 0)))
3157 shader_cleanup(object);
3158 heap_free(object);
3159 return hr;
3162 wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
3164 TRACE("Created compute shader %p.\n", object);
3165 *shader = object;
3167 return WINED3D_OK;
3170 HRESULT CDECL wined3d_shader_create_ds(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
3171 void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
3173 struct wined3d_shader *object;
3174 HRESULT hr;
3176 TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
3177 device, desc, parent, parent_ops, shader);
3179 if (!(object = heap_alloc_zero(sizeof(*object))))
3180 return E_OUTOFMEMORY;
3182 if (FAILED(hr = shader_init(object, device, desc, parent, parent_ops)))
3184 WARN("Failed to initialize domain shader, hr %#lx.\n", hr);
3185 heap_free(object);
3186 return hr;
3189 if (FAILED(hr = shader_set_function(object, device, WINED3D_SHADER_TYPE_DOMAIN, 0)))
3191 shader_cleanup(object);
3192 heap_free(object);
3193 return hr;
3196 wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
3198 TRACE("Created domain shader %p.\n", object);
3199 *shader = object;
3201 return WINED3D_OK;
3204 HRESULT CDECL wined3d_shader_create_gs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
3205 const struct wined3d_stream_output_desc *so_desc, void *parent,
3206 const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
3208 struct wined3d_shader *object;
3209 HRESULT hr;
3211 TRACE("device %p, desc %p, so_desc %p, parent %p, parent_ops %p, shader %p.\n",
3212 device, desc, so_desc, parent, parent_ops, shader);
3214 if (!(object = heap_alloc_zero(sizeof(*object))))
3215 return E_OUTOFMEMORY;
3217 if (FAILED(hr = geometry_shader_init(object, device, desc, so_desc, parent, parent_ops)))
3219 WARN("Failed to initialize geometry shader, hr %#lx.\n", hr);
3220 heap_free(object);
3221 return hr;
3224 wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
3226 TRACE("Created geometry shader %p.\n", object);
3227 *shader = object;
3229 return WINED3D_OK;
3232 HRESULT CDECL wined3d_shader_create_hs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
3233 void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
3235 struct wined3d_shader *object;
3236 HRESULT hr;
3238 TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
3239 device, desc, parent, parent_ops, shader);
3241 if (!(object = heap_alloc_zero(sizeof(*object))))
3242 return E_OUTOFMEMORY;
3244 if (FAILED(hr = shader_init(object, device, desc, parent, parent_ops)))
3246 WARN("Failed to initialize hull shader, hr %#lx.\n", hr);
3247 heap_free(object);
3248 return hr;
3251 if (FAILED(hr = shader_set_function(object, device, WINED3D_SHADER_TYPE_HULL, 0)))
3253 shader_cleanup(object);
3254 heap_free(object);
3255 return hr;
3258 wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
3260 TRACE("Created hull shader %p.\n", object);
3261 *shader = object;
3263 return WINED3D_OK;
3266 HRESULT CDECL wined3d_shader_create_ps(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
3267 void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
3269 struct wined3d_shader *object;
3270 HRESULT hr;
3272 TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
3273 device, desc, parent, parent_ops, shader);
3275 if (!(object = heap_alloc_zero(sizeof(*object))))
3276 return E_OUTOFMEMORY;
3278 if (FAILED(hr = pixel_shader_init(object, device, desc, parent, parent_ops)))
3280 WARN("Failed to initialize pixel shader, hr %#lx.\n", hr);
3281 heap_free(object);
3282 return hr;
3285 wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
3287 TRACE("Created pixel shader %p.\n", object);
3288 *shader = object;
3290 return WINED3D_OK;
3293 HRESULT CDECL wined3d_shader_create_vs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
3294 void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
3296 struct wined3d_shader *object;
3297 HRESULT hr;
3299 TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
3300 device, desc, parent, parent_ops, shader);
3302 if (!(object = heap_alloc_zero(sizeof(*object))))
3303 return E_OUTOFMEMORY;
3305 if (FAILED(hr = vertex_shader_init(object, device, desc, parent, parent_ops)))
3307 WARN("Failed to initialize vertex shader, hr %#lx.\n", hr);
3308 heap_free(object);
3309 return hr;
3312 wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);
3314 TRACE("Created vertex shader %p.\n", object);
3315 *shader = object;
3317 return WINED3D_OK;