Propagate error in mono_unicode_to_external (#14879)
[mono-project.git] / mono / mini / simd-intrinsics.c
blobdbfb5a9ea0cbee2fa2a54780085981b9c1add844
1 /**
2 * \file
3 * simd support for intrinsics
5 * Author:
6 * Rodrigo Kumpera (rkumpera@novell.com)
8 * (C) 2008 Novell, Inc.
9 */
11 #include <config.h>
12 #include <stdio.h>
14 #include "mini.h"
15 #include "ir-emit.h"
16 #include "mono/utils/bsearch.h"
17 #include <mono/metadata/abi-details.h>
18 #include <mono/metadata/reflection-internals.h>
21 General notes on SIMD intrinsics
23 TODO handle operands with non SIMD args, such as op_Addition (Vector4f, float)
24 TODO optimize r4const in .ctor so it doesn't go into the FP stack first
25 TODO extend op_to_op_dest_membase to handle simd ops
26 TODO add support for indexed versions of simd ops
27 TODO to an amd64 port and figure out how to properly handle extractors/.ctor
28 TODO make sure locals, arguments and spills are properly aligned.
29 TODO add support for fusing a XMOVE into a simd op in mono_spill_global_vars.
30 TODO add stuff to man pages
31 TODO document this under /docs
32 TODO make passing a xmm as argument not cause it to be LDADDR'ed (introduce an OP_XPUSH)
33 TODO revamp the .ctor sequence as it looks very fragile, maybe use a var just like move_i4_to_f. (or just pinst sse ops)
34 TODO figure out what's wrong with OP_STOREX_MEMBASE_REG and OP_STOREX_MEMBASE (the 2nd is for imm operands)
35 TODO maybe add SSE3 emulation on top of SSE2, or just implement the corresponding functions using SSE2 intrinsics.
36 TODO pass simd arguments in registers or, at least, add SSE support for pushing large (>=16) valuetypes
37 TODO pass simd args byval to a non-intrinsic method cause some useless local var load/store to happen.
38 TODO check if we need to init the SSE control word with better precision.
39 TODO add support for 3 reg sources in mini without slowing the common path. Or find a way to make MASKMOVDQU work.
40 TODO make SimdRuntime.get_AccelMode work under AOT
41 TODO patterns such as "a ^= b" generate slower code as the LDADDR op will be copied to a tmp first. Look at adding a indirection reduction pass after the dce pass.
42 TODO extend bounds checking code to support for range checking.
44 General notes for SIMD intrinsics.
46 -Bad extractor and constructor performance
47 Extracting a float from a XMM is a complete disaster if you are passing it as an argument.
48 It will be loaded in the FP stack just to be pushed on the call stack.
50 A similar thing happens with Vector4f constructor that require float vars to be
52 The fix for this issue is similar to the one required for r4const as method args. Avoiding the
53 trip to the FP stack is desirable.
55 -Extractor and constructor code doesn't make sense under amd64. Both currently assume separate banks
56 for simd and fp.
59 -Promote OP_EXTRACT_I4 to a STORE op
60 The advantage of this change is that it could have a _membase version and promote further optimizations.
62 -Create a MONO_INST_DONT_REGALLOC and use it in all places that MONO_INST_INDIRECT is used
63 without a OP_LDADDR.
66 #if defined (MONO_ARCH_SIMD_INTRINSICS)
68 #if defined (DISABLE_JIT)
70 void
71 mono_simd_intrinsics_init (void)
75 #else
77 //#define IS_DEBUG_ON(cfg) (0)
79 #define IS_DEBUG_ON(cfg) ((cfg)->verbose_level >= 3)
80 #define DEBUG(a) do { if (IS_DEBUG_ON(cfg)) { a; } } while (0)
81 enum {
82 SIMD_EMIT_BINARY,
83 SIMD_EMIT_UNARY,
84 SIMD_EMIT_SETTER,
85 SIMD_EMIT_GETTER,
86 SIMD_EMIT_GETTER_QWORD,
87 SIMD_EMIT_CTOR,
88 SIMD_EMIT_CAST,
89 SIMD_EMIT_SHUFFLE,
90 SIMD_EMIT_SHIFT,
91 SIMD_EMIT_EQUALITY,
92 SIMD_EMIT_LOAD_ALIGNED,
93 SIMD_EMIT_STORE,
94 SIMD_EMIT_EXTRACT_MASK,
95 SIMD_EMIT_PREFETCH
98 // This, instead of an array of pointers, to optimize away a pointer and a relocation per string.
99 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
100 #define MSGSTRFIELD1(line) str##line
101 static const struct msgstr_t {
102 #define SIMD_METHOD(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
103 #include "simd-methods.h"
104 #undef SIMD_METHOD
105 } method_names = {
106 #define SIMD_METHOD(str,name) str,
107 #include "simd-methods.h"
108 #undef SIMD_METHOD
111 enum {
112 #define SIMD_METHOD(str,name) name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
113 #include "simd-methods.h"
115 #define method_name(idx) ((const char*)&method_names + (idx))
117 typedef struct {
118 guint16 name;
119 guint16 opcode;
120 guint8 simd_version_flags;
121 guint8 simd_emit_mode : 4;
122 guint8 flags : 4;
123 } SimdIntrinsic;
125 static const SimdIntrinsic vector4f_intrinsics[] = {
126 { SN_ctor, OP_EXPAND_R4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
127 { SN_AddSub, OP_ADDSUBPS, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY},
128 { SN_AndNot, OP_ANDNPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY},
129 { SN_CompareEqual, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_EQ },
130 { SN_CompareLessEqual, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_LE },
131 { SN_CompareLessThan, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_LT },
132 { SN_CompareNotEqual, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NEQ },
133 { SN_CompareNotLessEqual, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLE },
134 { SN_CompareNotLessThan, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLT },
135 { SN_CompareOrdered, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_ORD },
136 { SN_CompareUnordered, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_UNORD },
137 { SN_ConvertToDouble, OP_CVTPS2PD, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
138 { SN_ConvertToInt, OP_CVTPS2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
139 { SN_ConvertToIntTruncated, OP_CVTTPS2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
140 { SN_DuplicateHigh, OP_DUPPS_HIGH, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY },
141 { SN_DuplicateLow, OP_DUPPS_LOW, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY },
142 { SN_HorizontalAdd, OP_HADDPS, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
143 { SN_HorizontalSub, OP_HSUBPS, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
144 { SN_InterleaveHigh, OP_UNPACK_HIGHPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
145 { SN_InterleaveLow, OP_UNPACK_LOWPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
146 { SN_InvSqrt, OP_RSQRTPS, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
147 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
148 { SN_Max, OP_MAXPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
149 { SN_Min, OP_MINPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
150 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
151 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
152 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
153 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
154 { SN_Reciprocal, OP_RCPPS, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
155 { SN_Shuffle, OP_PSHUFLED, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
156 { SN_Sqrt, OP_SQRTPS, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
157 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
158 { SN_StoreNonTemporal, OP_STOREX_NTA_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
159 { SN_get_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
160 { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
161 { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
162 { SN_get_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
163 { SN_op_Addition, OP_ADDPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
164 { SN_op_BitwiseAnd, OP_ANDPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
165 { SN_op_BitwiseOr, OP_ORPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
166 { SN_op_Division, OP_DIVPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
167 { SN_op_Equality, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
168 { SN_op_ExclusiveOr, OP_XORPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
169 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
170 { SN_op_Inequality, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
171 { SN_op_Multiply, OP_MULPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
172 { SN_op_Subtraction, OP_SUBPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
173 { SN_set_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
174 { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
175 { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
176 { SN_set_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }
179 static const SimdIntrinsic vector2d_intrinsics[] = {
180 { SN_ctor, OP_EXPAND_R8, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
181 { SN_AddSub, OP_ADDSUBPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY,},
182 { SN_AndNot, OP_ANDNPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
183 { SN_CompareEqual, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_EQ },
184 { SN_CompareLessEqual, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_LE },
185 { SN_CompareLessThan, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_LT },
186 { SN_CompareNotEqual, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NEQ },
187 { SN_CompareNotLessEqual, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLE },
188 { SN_CompareNotLessThan, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLT },
189 { SN_CompareOrdered, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_ORD },
190 { SN_CompareUnordered, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_UNORD },
191 { SN_ConvertToFloat, OP_CVTPD2PS, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
192 { SN_ConvertToInt, OP_CVTPD2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
193 { SN_ConvertToIntTruncated, OP_CVTTPD2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
194 { SN_Duplicate, OP_DUPPD, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY },
195 { SN_HorizontalAdd, OP_HADDPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
196 { SN_HorizontalSub, OP_HSUBPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
197 { SN_InterleaveHigh, OP_UNPACK_HIGHPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
198 { SN_InterleaveLow, OP_UNPACK_LOWPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
199 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
200 { SN_Max, OP_MAXPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
201 { SN_Min, OP_MINPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
202 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
203 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
204 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
205 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
206 { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
207 { SN_Sqrt, OP_SQRTPD, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
208 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
209 { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
210 { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
211 { SN_op_Addition, OP_ADDPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
212 { SN_op_BitwiseAnd, OP_ANDPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
213 { SN_op_BitwiseOr, OP_ORPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
214 { SN_op_Division, OP_DIVPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
215 { SN_op_ExclusiveOr, OP_XORPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
216 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
217 { SN_op_Multiply, OP_MULPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
218 { SN_op_Subtraction, OP_SUBPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
219 { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
220 { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
223 static const SimdIntrinsic vector2ul_intrinsics[] = {
224 { SN_ctor, OP_EXPAND_I8, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
225 { SN_CompareEqual, OP_PCMPEQQ, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
226 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
227 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
228 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
229 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
230 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
231 { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
232 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
233 { SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
234 { SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
235 { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
236 { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
237 { SN_op_Addition, OP_PADDQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
238 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
239 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
240 { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
241 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
242 { SN_op_LeftShift, OP_PSHLQ, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
243 { SN_op_Multiply, OP_PMULQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
244 { SN_op_RightShift, OP_PSHRQ, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
245 { SN_op_Subtraction, OP_PSUBQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
246 { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
247 { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
250 static const SimdIntrinsic vector2l_intrinsics[] = {
251 { SN_ctor, OP_EXPAND_I8, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
252 { SN_CompareEqual, OP_PCMPEQQ, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
253 { SN_CompareGreaterThan, OP_PCMPGTQ, SIMD_VERSION_SSE42, SIMD_EMIT_BINARY },
254 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
255 { SN_LogicalRightShift, OP_PSHRQ, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
256 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
257 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
258 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
259 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
260 { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
261 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
262 { SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
263 { SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
264 { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
265 { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
266 { SN_op_Addition, OP_PADDQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
267 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
268 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
269 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
270 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
271 { SN_op_LeftShift, OP_PSHLQ, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
272 { SN_op_Multiply, OP_PMULQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
273 { SN_op_Subtraction, OP_PSUBQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
274 { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
275 { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
278 static const SimdIntrinsic vector4ui_intrinsics[] = {
279 { SN_ctor, OP_EXPAND_I4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
280 { SN_ArithmeticRightShift, OP_PSARD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
281 { SN_CompareEqual, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
282 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
283 { SN_Max, OP_PMAXD_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
284 { SN_Min, OP_PMIND_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
285 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
286 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
287 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
288 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
289 { SN_Shuffle, OP_PSHUFLED, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
290 { SN_SignedPackWithSignedSaturation, OP_PACKD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
291 { SN_SignedPackWithUnsignedSaturation, OP_PACKD_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
292 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
293 { SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
294 { SN_UnpackLow, OP_UNPACK_LOWD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
295 { SN_get_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
296 { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
297 { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
298 { SN_get_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
299 { SN_op_Addition, OP_PADDD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
300 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
301 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
302 { SN_op_Equality, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
303 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
304 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
305 { SN_op_Inequality, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
306 { SN_op_LeftShift, OP_PSHLD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
307 { SN_op_Multiply, OP_PMULD, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
308 { SN_op_RightShift, OP_PSHRD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
309 { SN_op_Subtraction, OP_PSUBD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
310 { SN_set_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
311 { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
312 { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
313 { SN_set_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
316 static const SimdIntrinsic vector4i_intrinsics[] = {
317 { SN_ctor, OP_EXPAND_I4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
318 { SN_CompareEqual, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
319 { SN_CompareGreaterThan, OP_PCMPGTD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
320 { SN_ConvertToDouble, OP_CVTDQ2PD, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
321 { SN_ConvertToFloat, OP_CVTDQ2PS, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
322 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
323 { SN_LogicalRightShift, OP_PSHRD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
324 { SN_Max, OP_PMAXD, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
325 { SN_Min, OP_PMIND, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
326 { SN_PackWithSignedSaturation, OP_PACKD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
327 { SN_PackWithUnsignedSaturation, OP_PACKD_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
328 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
329 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
330 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
331 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
332 { SN_Shuffle, OP_PSHUFLED, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
333 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
334 { SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
335 { SN_UnpackLow, OP_UNPACK_LOWD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
336 { SN_get_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
337 { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
338 { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
339 { SN_get_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
340 { SN_op_Addition, OP_PADDD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
341 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
342 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
343 { SN_op_Equality, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
344 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
345 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
346 { SN_op_Inequality, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
347 { SN_op_LeftShift, OP_PSHLD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
348 { SN_op_Multiply, OP_PMULD, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
349 { SN_op_RightShift, OP_PSARD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
350 { SN_op_Subtraction, OP_PSUBD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
351 { SN_set_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
352 { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
353 { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
354 { SN_set_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
357 static const SimdIntrinsic vector8us_intrinsics[] = {
358 { SN_ctor, OP_EXPAND_I2, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
359 { SN_AddWithSaturation, OP_PADDW_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
360 { SN_ArithmeticRightShift, OP_PSARW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
361 { SN_Average, OP_PAVGW_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
362 { SN_CompareEqual, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
363 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
364 { SN_Max, OP_PMAXW_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
365 { SN_Min, OP_PMINW_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
366 { SN_MultiplyStoreHigh, OP_PMULW_HIGH_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
367 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
368 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
369 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
370 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
371 { SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
372 { SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
373 { SN_SignedPackWithSignedSaturation, OP_PACKW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
374 { SN_SignedPackWithUnsignedSaturation, OP_PACKW_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
375 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
376 { SN_SubtractWithSaturation, OP_PSUBW_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
377 { SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
378 { SN_UnpackLow, OP_UNPACK_LOWW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
379 { SN_get_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
380 { SN_get_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
381 { SN_get_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
382 { SN_get_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
383 { SN_get_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
384 { SN_get_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
385 { SN_get_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
386 { SN_get_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
387 { SN_op_Addition, OP_PADDW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
388 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
389 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
390 { SN_op_Equality, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
391 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
392 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
393 { SN_op_Inequality, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
394 { SN_op_LeftShift, OP_PSHLW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
395 { SN_op_Multiply, OP_PMULW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
396 { SN_op_RightShift, OP_PSHRW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
397 { SN_op_Subtraction, OP_PSUBW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
398 { SN_set_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
399 { SN_set_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
400 { SN_set_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
401 { SN_set_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
402 { SN_set_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
403 { SN_set_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
404 { SN_set_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
405 { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
408 static const SimdIntrinsic vector8s_intrinsics[] = {
409 { SN_ctor, OP_EXPAND_I2, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
410 { SN_AddWithSaturation, OP_PADDW_SAT, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
411 { SN_CompareEqual, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
412 { SN_CompareGreaterThan, OP_PCMPGTW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
413 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
414 { SN_LogicalRightShift, OP_PSHRW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
415 { SN_Max, OP_PMAXW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
416 { SN_Min, OP_PMINW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
417 { SN_MultiplyStoreHigh, OP_PMULW_HIGH, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
418 { SN_PackWithSignedSaturation, OP_PACKW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
419 { SN_PackWithUnsignedSaturation, OP_PACKW_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
420 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
421 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
422 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
423 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
424 { SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
425 { SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
426 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
427 { SN_SubtractWithSaturation, OP_PSUBW_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
428 { SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
429 { SN_UnpackLow, OP_UNPACK_LOWW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
430 { SN_get_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
431 { SN_get_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
432 { SN_get_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
433 { SN_get_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
434 { SN_get_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
435 { SN_get_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
436 { SN_get_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
437 { SN_get_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
438 { SN_op_Addition, OP_PADDW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
439 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
440 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
441 { SN_op_Equality, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
442 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
443 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
444 { SN_op_Inequality, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
445 { SN_op_LeftShift, OP_PSHLW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
446 { SN_op_Multiply, OP_PMULW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
447 { SN_op_RightShift, OP_PSARW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
448 { SN_op_Subtraction, OP_PSUBW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
449 { SN_set_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
450 { SN_set_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
451 { SN_set_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
452 { SN_set_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
453 { SN_set_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
454 { SN_set_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
455 { SN_set_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
456 { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
459 static const SimdIntrinsic vector16b_intrinsics[] = {
460 { SN_ctor, OP_EXPAND_I1, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
461 { SN_AddWithSaturation, OP_PADDB_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
462 { SN_Average, OP_PAVGB_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
463 { SN_CompareEqual, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
464 { SN_ExtractByteMask, 0, SIMD_VERSION_SSE1, SIMD_EMIT_EXTRACT_MASK },
465 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
466 { SN_Max, OP_PMAXB_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
467 { SN_Min, OP_PMINB_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
468 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
469 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
470 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
471 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
472 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
473 { SN_SubtractWithSaturation, OP_PSUBB_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
474 { SN_SumOfAbsoluteDifferences, OP_PSUM_ABS_DIFF, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
475 { SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
476 { SN_UnpackLow, OP_UNPACK_LOWB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
477 { SN_get_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
478 { SN_get_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
479 { SN_get_V10, 10, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
480 { SN_get_V11, 11, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
481 { SN_get_V12, 12, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
482 { SN_get_V13, 13, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
483 { SN_get_V14, 14, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
484 { SN_get_V15, 15, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
485 { SN_get_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
486 { SN_get_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
487 { SN_get_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
488 { SN_get_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
489 { SN_get_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
490 { SN_get_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
491 { SN_get_V8, 8, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
492 { SN_get_V9, 9, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
493 { SN_op_Addition, OP_PADDB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
494 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
495 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
496 { SN_op_Equality, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
497 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
498 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
499 { SN_op_Inequality, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
500 { SN_op_Subtraction, OP_PSUBB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
501 { SN_set_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
502 { SN_set_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
503 { SN_set_V10, 10, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
504 { SN_set_V11, 11, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
505 { SN_set_V12, 12, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
506 { SN_set_V13, 13, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
507 { SN_set_V14, 14, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
508 { SN_set_V15, 15, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
509 { SN_set_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
510 { SN_set_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
511 { SN_set_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
512 { SN_set_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
513 { SN_set_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
514 { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
515 { SN_set_V8, 8, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
516 { SN_set_V9, 9, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
520 Missing:
521 setters
523 static const SimdIntrinsic vector16sb_intrinsics[] = {
524 { SN_ctor, OP_EXPAND_I1, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
525 { SN_AddWithSaturation, OP_PADDB_SAT, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
526 { SN_CompareEqual, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
527 { SN_CompareGreaterThan, OP_PCMPGTB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
528 { SN_ExtractByteMask, 0, SIMD_VERSION_SSE1, SIMD_EMIT_EXTRACT_MASK },
529 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
530 { SN_Max, OP_PMAXB, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
531 { SN_Min, OP_PMINB, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
532 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
533 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
534 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
535 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
536 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
537 { SN_SubtractWithSaturation, OP_PSUBB_SAT, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
538 { SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
539 { SN_UnpackLow, OP_UNPACK_LOWB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
540 { SN_get_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
541 { SN_get_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
542 { SN_get_V10, 10, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
543 { SN_get_V11, 11, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
544 { SN_get_V12, 12, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
545 { SN_get_V13, 13, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
546 { SN_get_V14, 14, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
547 { SN_get_V15, 15, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
548 { SN_get_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
549 { SN_get_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
550 { SN_get_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
551 { SN_get_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
552 { SN_get_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
553 { SN_get_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
554 { SN_get_V8, 8, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
555 { SN_get_V9, 9, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
556 { SN_op_Addition, OP_PADDB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
557 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
558 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
559 { SN_op_Equality, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
560 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
561 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
562 { SN_op_Inequality, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
563 { SN_op_Subtraction, OP_PSUBB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
564 { SN_set_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
565 { SN_set_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
566 { SN_set_V10, 10, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
567 { SN_set_V11, 11, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
568 { SN_set_V12, 12, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
569 { SN_set_V13, 13, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
570 { SN_set_V14, 14, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
571 { SN_set_V15, 15, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
572 { SN_set_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
573 { SN_set_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
574 { SN_set_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
575 { SN_set_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
576 { SN_set_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
577 { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
578 { SN_set_V8, 8, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
579 { SN_set_V9, 9, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
582 static guint32 simd_supported_versions;
584 static MonoInst* emit_sys_numerics_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args);
585 static MonoInst* emit_sys_numerics_vectors_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args);
587 /*TODO match using number of parameters as well*/
588 static int
589 simd_intrinsic_compare_by_name (const void *key, const void *value)
591 return strcmp ((const char*)key, method_name (((SimdIntrinsic *)value)->name));
594 typedef enum {
595 VREG_USED = 0x01,
596 VREG_HAS_XZERO_BB0 = 0x02,
597 VREG_HAS_OTHER_OP_BB0 = 0x04,
598 VREG_SINGLE_BB_USE = 0x08,
599 VREG_MANY_BB_USE = 0x10,
600 } KillFlags;
602 void
603 mono_simd_intrinsics_init (void)
605 simd_supported_versions = mono_arch_cpu_enumerate_simd_versions ();
606 /*TODO log the supported flags*/
609 static inline gboolean
610 apply_vreg_first_block_interference (MonoCompile *cfg, MonoInst *ins, int reg, int max_vreg, char *vreg_flags)
612 if (reg != -1 && reg <= max_vreg && vreg_flags [reg]) {
613 vreg_flags [reg] &= ~VREG_HAS_XZERO_BB0;
614 vreg_flags [reg] |= VREG_HAS_OTHER_OP_BB0;
615 DEBUG (printf ("[simd-simplify] R%d used: ", reg); mono_print_ins(ins));
616 return TRUE;
618 return FALSE;
621 static inline gboolean
622 apply_vreg_following_block_interference (MonoCompile *cfg, MonoInst *ins, int reg, MonoBasicBlock *bb, int max_vreg, char *vreg_flags, MonoBasicBlock **target_bb)
624 if (reg == -1 || reg > max_vreg || !(vreg_flags [reg] & VREG_HAS_XZERO_BB0) || target_bb [reg] == bb)
625 return FALSE;
627 if (vreg_flags [reg] & VREG_SINGLE_BB_USE) {
628 vreg_flags [reg] &= ~VREG_SINGLE_BB_USE;
629 vreg_flags [reg] |= VREG_MANY_BB_USE;
630 DEBUG (printf ("[simd-simplify] R%d used by many bb: ", reg); mono_print_ins(ins));
631 return TRUE;
632 } else if (!(vreg_flags [reg] & VREG_MANY_BB_USE)) {
633 vreg_flags [reg] |= VREG_SINGLE_BB_USE;
634 target_bb [reg] = bb;
635 DEBUG (printf ("[simd-simplify] R%d first used by: ", reg); mono_print_ins(ins));
636 return TRUE;
638 return FALSE;
642 This pass recalculate which vars need MONO_INST_INDIRECT.
644 We cannot do this for non SIMD vars since code like mono_get_vtable_var
645 uses MONO_INST_INDIRECT to signal that the variable must be stack allocated.
647 void
648 mono_simd_simplify_indirection (MonoCompile *cfg)
650 int i, max_vreg = 0;
651 MonoBasicBlock *bb, *first_bb = NULL, **target_bb;
652 MonoInst *ins;
653 char *vreg_flags;
655 for (i = 0; i < cfg->num_varinfo; i++) {
656 MonoInst *var = cfg->varinfo [i];
657 if (m_class_is_simd_type (var->klass)) {
658 var->flags &= ~MONO_INST_INDIRECT;
659 max_vreg = MAX (var->dreg, max_vreg);
663 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
664 if (!first_bb && bb->code)
665 first_bb = bb;
666 for (ins = bb->code; ins; ins = ins->next) {
667 if (ins->opcode == OP_LDADDR) {
668 MonoInst *var = (MonoInst*)ins->inst_p0;
669 if (m_class_is_simd_type (var->klass)) {
670 var->flags |= MONO_INST_INDIRECT;
676 DEBUG (printf ("[simd-simplify] max vreg is %d\n", max_vreg));
677 vreg_flags = (char *)g_malloc0 (max_vreg + 1);
678 target_bb = g_new0 (MonoBasicBlock*, max_vreg + 1);
680 for (i = 0; i < cfg->num_varinfo; i++) {
681 MonoInst *var = cfg->varinfo [i];
682 if (m_class_is_simd_type (var->klass) && !(var->flags & (MONO_INST_INDIRECT|MONO_INST_VOLATILE))) {
683 vreg_flags [var->dreg] = VREG_USED;
684 DEBUG (printf ("[simd-simplify] processing var %d with vreg %d\n", i, var->dreg));
688 /*Scan the first basic block looking xzeros not used*/
689 for (ins = first_bb->code; ins; ins = ins->next) {
690 int num_sregs;
691 int sregs [MONO_MAX_SRC_REGS];
693 if (ins->opcode == OP_XZERO) {
694 if (!(vreg_flags [ins->dreg] & VREG_HAS_OTHER_OP_BB0)) {
695 DEBUG (printf ("[simd-simplify] R%d has vzero: ", ins->dreg); mono_print_ins(ins));
696 vreg_flags [ins->dreg] |= VREG_HAS_XZERO_BB0;
698 continue;
700 if (ins->opcode == OP_LDADDR && apply_vreg_first_block_interference (cfg, ins, ((MonoInst*)ins->inst_p0)->dreg, max_vreg, vreg_flags))
701 continue;
702 if (apply_vreg_first_block_interference (cfg, ins, ins->dreg, max_vreg, vreg_flags))
703 continue;
704 num_sregs = mono_inst_get_src_registers (ins, sregs);
705 for (i = 0; i < num_sregs; ++i) {
706 if (apply_vreg_first_block_interference (cfg, ins, sregs [i], max_vreg, vreg_flags))
707 break;
711 if (IS_DEBUG_ON (cfg)) {
712 for (i = 0; i < cfg->num_varinfo; i++) {
713 MonoInst *var = cfg->varinfo [i];
714 if (m_class_is_simd_type (var->klass)) {
715 if ((vreg_flags [var->dreg] & VREG_HAS_XZERO_BB0))
716 DEBUG (printf ("[simd-simplify] R%d has xzero only\n", var->dreg));
717 if ((vreg_flags [var->dreg] & VREG_HAS_OTHER_OP_BB0))
718 DEBUG (printf ("[simd-simplify] R%d has other ops on bb0\n", var->dreg));
723 /*TODO stop here if no var is xzero only*/
726 Scan all other bb and check if it has only one other use
727 Ideally this would be done after an extended bb formation pass
729 FIXME This pass could use dominator information to properly
730 place the XZERO on the bb that dominates all uses of the var,
731 but this will have zero effect with the current local reg alloc
733 TODO simply the use of flags.
736 for (bb = first_bb->next_bb; bb; bb = bb->next_bb) {
737 for (ins = bb->code; ins; ins = ins->next) {
738 int num_sregs;
739 int sregs [MONO_MAX_SRC_REGS];
741 if (ins->opcode == OP_LDADDR && apply_vreg_following_block_interference (cfg, ins, ((MonoInst*)ins->inst_p0)->dreg, bb, max_vreg, vreg_flags, target_bb))
742 continue;
743 if (apply_vreg_following_block_interference (cfg, ins, ins->dreg, bb, max_vreg, vreg_flags, target_bb))
744 continue;
745 num_sregs = mono_inst_get_src_registers (ins, sregs);
746 for (i = 0; i < num_sregs; ++i) {
747 if (apply_vreg_following_block_interference (cfg, ins, sregs [i], bb,
748 max_vreg, vreg_flags, target_bb))
749 continue;
754 for (i = 0; i < cfg->num_varinfo; i++) {
755 MonoInst *var = cfg->varinfo [i];
756 if (!m_class_is_simd_type (var->klass))
757 continue;
758 if ((vreg_flags [var->dreg] & VREG_SINGLE_BB_USE))
759 DEBUG (printf ("[simd-simplify] R%d has single bb use\n", var->dreg));
760 if ((vreg_flags [var->dreg] & VREG_MANY_BB_USE))
761 DEBUG (printf ("[simd-simplify] R%d has many bb in use\n", var->dreg));
763 if (!(vreg_flags [var->dreg] & VREG_SINGLE_BB_USE))
764 continue;
765 for (ins = target_bb [var->dreg]->code; ins; ins = ins->next) {
766 int num_sregs, j;
767 int sregs [MONO_MAX_SRC_REGS];
768 gboolean found = FALSE;
770 num_sregs = mono_inst_get_src_registers (ins, sregs);
771 for (j = 0; j < num_sregs; ++j) {
772 if (sregs [j] == var->dreg)
773 found = TRUE;
775 /*We can avoid inserting the XZERO if the first use doesn't depend on the zero'ed value.*/
776 if (ins->dreg == var->dreg && !found) {
777 DEBUG (printf ("[simd-simplify] INGORING R%d on BB %d because first op is a def", i, target_bb [var->dreg]->block_num););
778 break;
779 } else if (found) {
780 DEBUG (printf ("[simd-simplify] Adding XZERO for R%d on BB %d: ", i, target_bb [var->dreg]->block_num); );
781 MonoInst *tmp;
782 MONO_INST_NEW (cfg, tmp, OP_XZERO);
783 tmp->dreg = var->dreg;
784 tmp->type = STACK_VTYPE;
785 tmp->klass = var->klass;
786 mono_bblock_insert_before_ins (target_bb [var->dreg], ins, tmp);
787 break;
792 for (ins = first_bb->code; ins; ins = ins->next) {
793 if (ins->opcode == OP_XZERO && (vreg_flags [ins->dreg] & VREG_SINGLE_BB_USE)) {
794 DEBUG (printf ("[simd-simplify] Nullify %d on first BB: ", ins->dreg); mono_print_ins(ins));
795 NULLIFY_INS (ins);
799 g_free (vreg_flags);
800 g_free (target_bb);
804 * Windows x64 value type ABI uses reg/stack references (ArgValuetypeAddrInIReg/ArgValuetypeAddrOnStack)
805 * for function arguments. When using SIMD intrinsics arguments optimized into OP_ARG needs to be decomposed
806 * into correspondig SIMD LOADX/STOREX instructions.
808 #if defined(TARGET_WIN32) && defined(TARGET_AMD64)
809 static gboolean
810 decompose_vtype_opt_uses_simd_intrinsics (MonoCompile *cfg, MonoInst *ins)
812 if (cfg->uses_simd_intrinsics & MONO_CFG_USES_SIMD_INTRINSICS_DECOMPOSE_VTYPE)
813 return TRUE;
815 switch (ins->opcode) {
816 case OP_XMOVE:
817 case OP_XZERO:
818 case OP_LOADX_MEMBASE:
819 case OP_LOADX_ALIGNED_MEMBASE:
820 case OP_STOREX_MEMBASE:
821 case OP_STOREX_ALIGNED_MEMBASE_REG:
822 return TRUE;
823 default:
824 return FALSE;
828 static void
829 decompose_vtype_opt_load_arg (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins, gint32 *sreg_int32)
831 guint32 *sreg = (guint32*)sreg_int32;
832 MonoInst *src_var = get_vreg_to_inst (cfg, *sreg);
833 if (src_var && src_var->opcode == OP_ARG && src_var->klass && MONO_CLASS_IS_SIMD (cfg, src_var->klass)) {
834 MonoInst *varload_ins, *load_ins;
835 NEW_VARLOADA (cfg, varload_ins, src_var, src_var->inst_vtype);
836 mono_bblock_insert_before_ins (bb, ins, varload_ins);
837 MONO_INST_NEW (cfg, load_ins, OP_LOADX_MEMBASE);
838 load_ins->klass = src_var->klass;
839 load_ins->type = STACK_VTYPE;
840 load_ins->sreg1 = varload_ins->dreg;
841 load_ins->dreg = alloc_xreg (cfg);
842 mono_bblock_insert_after_ins (bb, varload_ins, load_ins);
843 *sreg = load_ins->dreg;
847 void
848 mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
850 if (cfg->opt & MONO_OPT_SIMD && decompose_vtype_opt_uses_simd_intrinsics (cfg, ins)) {
851 decompose_vtype_opt_load_arg (cfg, bb, ins, &(ins->sreg1));
852 decompose_vtype_opt_load_arg (cfg, bb, ins, &(ins->sreg2));
853 decompose_vtype_opt_load_arg (cfg, bb, ins, &(ins->sreg3));
854 MonoInst *dest_var = get_vreg_to_inst (cfg, ins->dreg);
855 if (dest_var && dest_var->opcode == OP_ARG && dest_var->klass && MONO_CLASS_IS_SIMD (cfg, dest_var->klass)) {
856 MonoInst *varload_ins, *store_ins;
857 ins->dreg = alloc_xreg (cfg);
858 NEW_VARLOADA (cfg, varload_ins, dest_var, dest_var->inst_vtype);
859 mono_bblock_insert_after_ins (bb, ins, varload_ins);
860 MONO_INST_NEW (cfg, store_ins, OP_STOREX_MEMBASE);
861 store_ins->klass = dest_var->klass;
862 store_ins->type = STACK_VTYPE;
863 store_ins->sreg1 = ins->dreg;
864 store_ins->dreg = varload_ins->dreg;
865 mono_bblock_insert_after_ins (bb, varload_ins, store_ins);
870 void
871 mono_simd_decompose_intrinsics (MonoCompile *cfg)
873 MonoBasicBlock *bb;
874 MonoInst *ins;
876 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
877 for (ins = bb->code; ins; ins = ins->next) {
878 mono_simd_decompose_intrinsic (cfg, bb, ins);
882 #else
883 void
884 mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
888 void
889 mono_simd_decompose_intrinsics (MonoCompile *cfg)
892 #endif /*defined(TARGET_WIN32) && defined(TARGET_AMD64)*/
895 * This function expect that src be a value.
897 static int
898 get_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src)
900 const char *spec = INS_INFO (src->opcode);
902 if (src->opcode == OP_XMOVE) {
903 return src->sreg1;
904 } else if (spec [MONO_INST_DEST] == 'x') {
905 return src->dreg;
906 } else if (src->opcode == OP_VCALL || src->opcode == OP_VCALL_MEMBASE) {
907 return src->dreg;
910 g_warning ("get_simd_vreg:: could not infer source simd vreg for op");
911 mono_print_ins (src);
912 g_assert_not_reached ();
916 * This function will load the value if needed.
918 static int
919 load_simd_vreg_class (MonoCompile *cfg, MonoClass *klass, MonoInst *src, gboolean *indirect)
921 const char *spec = INS_INFO (src->opcode);
923 if (indirect)
924 *indirect = FALSE;
925 if (src->opcode == OP_XMOVE) {
926 return src->sreg1;
927 } else if (src->opcode == OP_LDADDR) {
928 int res = ((MonoInst*)src->inst_p0)->dreg;
929 return res;
930 } else if (spec [MONO_INST_DEST] == 'x') {
931 return src->dreg;
932 } else if (src->type == STACK_PTR || src->type == STACK_MP) {
933 MonoInst *ins;
934 if (indirect)
935 *indirect = TRUE;
937 MONO_INST_NEW (cfg, ins, OP_LOADX_MEMBASE);
938 ins->klass = klass;
939 ins->sreg1 = src->dreg;
940 ins->type = STACK_VTYPE;
941 ins->dreg = alloc_ireg (cfg);
942 MONO_ADD_INS (cfg->cbb, ins);
943 return ins->dreg;
945 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src->type);
946 mono_print_ins (src);
947 g_assert_not_reached ();
950 static int
951 load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean *indirect)
953 return load_simd_vreg_class (cfg, cmethod->klass, src, indirect);
956 /*We share the var with fconv_to_r8_x to save some stack space.*/
957 static MonoInst*
958 get_double_spill_area (MonoCompile *cfg)
960 if (!cfg->fconv_to_r8_x_var) {
961 cfg->fconv_to_r8_x_var = mono_compile_create_var (cfg, m_class_get_byval_arg (mono_defaults.double_class), OP_LOCAL);
962 cfg->fconv_to_r8_x_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
964 return cfg->fconv_to_r8_x_var;
966 static MonoInst*
967 get_simd_ctor_spill_area (MonoCompile *cfg, MonoClass *avector_klass)
969 if (!cfg->simd_ctor_var) {
970 cfg->simd_ctor_var = mono_compile_create_var (cfg, m_class_get_byval_arg (avector_klass), OP_LOCAL);
971 cfg->simd_ctor_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
973 return cfg->simd_ctor_var;
976 static int
977 mono_type_to_expand_op (MonoType *type)
979 switch (type->type) {
980 case MONO_TYPE_I1:
981 case MONO_TYPE_U1:
982 return OP_EXPAND_I1;
983 case MONO_TYPE_I2:
984 case MONO_TYPE_U2:
985 return OP_EXPAND_I2;
986 case MONO_TYPE_I4:
987 case MONO_TYPE_U4:
988 return OP_EXPAND_I4;
989 case MONO_TYPE_I8:
990 case MONO_TYPE_U8:
991 return OP_EXPAND_I8;
992 case MONO_TYPE_R4:
993 return OP_EXPAND_R4;
994 case MONO_TYPE_R8:
995 return OP_EXPAND_R8;
996 default:
997 g_assert_not_reached ();
1001 static int
1002 type_to_comp_op (MonoType *t)
1004 switch (t->type) {
1005 case MONO_TYPE_I1:
1006 case MONO_TYPE_U1:
1007 return OP_PCMPEQB;
1008 case MONO_TYPE_I2:
1009 case MONO_TYPE_U2:
1010 return OP_PCMPEQW;
1011 case MONO_TYPE_I4:
1012 case MONO_TYPE_U4:
1013 return OP_PCMPEQD;
1014 case MONO_TYPE_I8:
1015 case MONO_TYPE_U8:
1016 return OP_PCMPEQQ;
1017 case MONO_TYPE_R4:
1018 return OP_COMPPS;
1019 case MONO_TYPE_R8:
1020 return OP_COMPPD;
1021 default:
1022 g_assert_not_reached ();
1023 return -1;
1027 static int
1028 type_to_gt_op (MonoType *t)
1030 switch (t->type) {
1031 case MONO_TYPE_I1:
1032 return OP_PCMPGTB;
1033 case MONO_TYPE_I2:
1034 return OP_PCMPGTW;
1035 case MONO_TYPE_I4:
1036 return OP_PCMPGTD;
1037 case MONO_TYPE_I8:
1038 return OP_PCMPGTQ;
1039 default:
1040 return -1;
1044 static int
1045 type_to_padd_op (MonoType *t)
1047 switch (t->type) {
1048 case MONO_TYPE_U1:
1049 case MONO_TYPE_I1:
1050 return OP_PADDB;
1051 case MONO_TYPE_U2:
1052 case MONO_TYPE_I2:
1053 return OP_PADDW;
1054 case MONO_TYPE_U4:
1055 case MONO_TYPE_I4:
1056 return OP_PADDD;
1057 case MONO_TYPE_U8:
1058 case MONO_TYPE_I8:
1059 return OP_PADDQ;
1060 case MONO_TYPE_R4:
1061 return OP_ADDPS;
1062 case MONO_TYPE_R8:
1063 return OP_ADDPD;
1064 default:
1065 break;
1067 return -1;
1070 static int
1071 type_to_psub_op (MonoType *t)
1073 switch (t->type) {
1074 case MONO_TYPE_U1:
1075 case MONO_TYPE_I1:
1076 return OP_PSUBB;
1077 case MONO_TYPE_U2:
1078 case MONO_TYPE_I2:
1079 return OP_PSUBW;
1080 case MONO_TYPE_U4:
1081 case MONO_TYPE_I4:
1082 return OP_PSUBD;
1083 case MONO_TYPE_U8:
1084 case MONO_TYPE_I8:
1085 return OP_PSUBQ;
1086 case MONO_TYPE_R4:
1087 return OP_SUBPS;
1088 case MONO_TYPE_R8:
1089 return OP_SUBPD;
1090 default:
1091 break;
1093 return -1;
1096 static int
1097 type_to_pmul_op (MonoType *t)
1099 switch (t->type) {
1100 case MONO_TYPE_U2:
1101 case MONO_TYPE_I2:
1102 return OP_PMULW;
1103 case MONO_TYPE_U4:
1104 case MONO_TYPE_I4:
1105 return OP_PMULD;
1106 case MONO_TYPE_R4:
1107 return OP_MULPS;
1108 case MONO_TYPE_R8:
1109 return OP_MULPD;
1110 case MONO_TYPE_U8:
1111 /* PMULQ multiplies two 32 bit numbers into a 64 bit one */
1112 return -1;
1113 case MONO_TYPE_I8:
1114 return -1;
1115 default:
1116 break;
1118 return -1;
1121 static int
1122 type_to_pdiv_op (MonoType *t)
1124 switch (t->type) {
1125 case MONO_TYPE_R4:
1126 return OP_DIVPS;
1127 case MONO_TYPE_R8:
1128 return OP_DIVPD;
1129 default:
1130 break;
1132 return -1;
1135 static int
1136 type_to_pxor_op (MonoType *t)
1139 * These opcodes have the same semantics, but using the
1140 * correctly typed version is better for performance.
1142 switch (t->type) {
1143 case MONO_TYPE_R4:
1144 return OP_XORPS;
1145 case MONO_TYPE_R8:
1146 return OP_XORPD;
1147 default:
1148 return OP_PXOR;
1152 static int
1153 type_to_pand_op (MonoType *t)
1155 switch (t->type) {
1156 case MONO_TYPE_R4:
1157 return OP_ANDPS;
1158 case MONO_TYPE_R8:
1159 return OP_ANDPD;
1160 default:
1161 return OP_PAND;
1165 static int
1166 type_to_por_op (MonoType *t)
1168 switch (t->type) {
1169 case MONO_TYPE_R4:
1170 return OP_ORPS;
1171 case MONO_TYPE_R8:
1172 return OP_ORPD;
1173 default:
1174 return OP_POR;
1178 static int
1179 type_to_pmin_op (MonoType *t)
1181 switch (t->type) {
1182 case MONO_TYPE_R4:
1183 return OP_MINPS;
1184 case MONO_TYPE_R8:
1185 return OP_MINPD;
1186 case MONO_TYPE_I1:
1187 return OP_PMINB;
1188 case MONO_TYPE_U1:
1189 return OP_PMINB_UN;
1190 case MONO_TYPE_I2:
1191 return OP_PMINW;
1192 case MONO_TYPE_U2:
1193 return OP_PMINW_UN;
1194 case MONO_TYPE_I4:
1195 return OP_PMIND;
1196 case MONO_TYPE_U4:
1197 return OP_PMIND_UN;
1198 default:
1199 return -1;
1203 static int
1204 type_to_pmax_op (MonoType *t)
1206 switch (t->type) {
1207 case MONO_TYPE_R4:
1208 return OP_MAXPS;
1209 case MONO_TYPE_R8:
1210 return OP_MAXPD;
1211 case MONO_TYPE_I1:
1212 return OP_PMAXB;
1213 case MONO_TYPE_U1:
1214 return OP_PMAXB_UN;
1215 case MONO_TYPE_I2:
1216 return OP_PMAXW;
1217 case MONO_TYPE_U2:
1218 return OP_PMAXW_UN;
1219 case MONO_TYPE_I4:
1220 return OP_PMAXD;
1221 case MONO_TYPE_U4:
1222 return OP_PMAXD_UN;
1223 default:
1224 return -1;
1228 static int
1229 get_simd_vreg_or_expanded_scalar (MonoCompile *cfg, MonoClass *klass, MonoType *param_type, MonoInst *src)
1231 MonoInst *ins;
1232 int expand_op;
1234 if (m_class_is_simd_type (mono_class_from_mono_type_internal (param_type)))
1235 return get_simd_vreg (cfg, NULL, src);
1237 expand_op = mono_type_to_expand_op (param_type);
1238 MONO_INST_NEW (cfg, ins, expand_op);
1239 ins->klass = klass;
1240 ins->sreg1 = src->dreg;
1241 ins->type = STACK_VTYPE;
1242 ins->dreg = alloc_ireg (cfg);
1243 MONO_ADD_INS (cfg->cbb, ins);
1245 if (expand_op == OP_EXPAND_R4)
1246 ins->backend.spill_var = mini_get_int_to_float_spill_area (cfg);
1247 else if (expand_op == OP_EXPAND_R8)
1248 ins->backend.spill_var = get_double_spill_area (cfg);
1250 return ins->dreg;
1254 * simd_intrinsic_emit_binary_op:
1256 * Emit a binary SIMD opcode.
1257 * @LHS/@RHS are the two arguments, they can be either a SIMD type or a scalar one. Scalar arguments are
1258 * expanded to the SIMD type.
1260 static MonoInst*
1261 simd_intrinsic_emit_binary_op (MonoCompile *cfg, int opcode, int flags, MonoClass *klass, MonoType *lhs_type, MonoType *rhs_type, MonoInst *lhs, MonoInst *rhs)
1263 MonoInst* ins;
1264 int left_vreg, right_vreg;
1266 left_vreg = get_simd_vreg_or_expanded_scalar (cfg, klass, lhs_type, lhs);
1267 right_vreg = get_simd_vreg_or_expanded_scalar (cfg, klass, rhs_type, rhs);
1269 MONO_INST_NEW (cfg, ins, opcode);
1270 ins->klass = klass;
1271 ins->sreg1 = left_vreg;
1272 ins->sreg2 = right_vreg;
1273 ins->type = STACK_VTYPE;
1274 ins->dreg = alloc_ireg (cfg);
1275 ins->inst_c0 = flags;
1276 MONO_ADD_INS (cfg->cbb, ins);
1277 return ins;
1280 static MonoInst*
1281 simd_intrinsic_emit_binary (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1283 MonoMethodSignature *sig = mono_method_signature_internal (cmethod);
1285 g_assert (sig->param_count == 2);
1287 return simd_intrinsic_emit_binary_op (cfg, intrinsic->opcode, intrinsic->flags, cmethod->klass, sig->params [0], sig->params [1], args [0], args [1]);
1290 static MonoInst*
1291 simd_intrinsic_emit_unary (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1293 MonoInst* ins;
1294 int vreg;
1296 vreg = get_simd_vreg (cfg, cmethod, args [0]);
1298 MONO_INST_NEW (cfg, ins, intrinsic->opcode);
1299 ins->klass = cmethod->klass;
1300 ins->sreg1 = vreg;
1301 ins->type = STACK_VTYPE;
1302 ins->dreg = alloc_ireg (cfg);
1303 MONO_ADD_INS (cfg->cbb, ins);
1304 return ins;
1307 static int
1308 mono_type_to_extract_op (MonoType *type)
1310 switch (type->type) {
1311 case MONO_TYPE_I1:
1312 return OP_EXTRACT_I1;
1313 case MONO_TYPE_U1:
1314 return OP_EXTRACT_U1;
1315 case MONO_TYPE_I2:
1316 return OP_EXTRACT_I2;
1317 case MONO_TYPE_U2:
1318 return OP_EXTRACT_U2;
1319 case MONO_TYPE_I4:
1320 case MONO_TYPE_U4:
1321 case MONO_TYPE_R4:
1322 return OP_EXTRACT_I4;
1323 default:
1324 g_assert_not_reached ();
1328 /*Returns the amount to shift the element index to get the dword it belongs to*/
1329 static int
1330 mono_type_elements_shift_bits (MonoType *type)
1332 switch (type->type) {
1333 case MONO_TYPE_I1:
1334 case MONO_TYPE_U1:
1335 return 2;
1336 case MONO_TYPE_I2:
1337 case MONO_TYPE_U2:
1338 return 1;
1339 case MONO_TYPE_I4:
1340 case MONO_TYPE_U4:
1341 case MONO_TYPE_R4:
1342 return 0;
1343 default:
1344 g_assert_not_reached ();
1348 static G_GNUC_UNUSED int
1349 mono_type_to_insert_op (MonoType *type)
1351 switch (type->type) {
1352 case MONO_TYPE_I1:
1353 case MONO_TYPE_U1:
1354 return OP_INSERT_I1;
1355 case MONO_TYPE_I2:
1356 case MONO_TYPE_U2:
1357 return OP_INSERT_I2;
1358 case MONO_TYPE_I4:
1359 case MONO_TYPE_U4:
1360 return OP_INSERT_I4;
1361 case MONO_TYPE_I8:
1362 case MONO_TYPE_U8:
1363 return OP_INSERT_I8;
1364 case MONO_TYPE_R4:
1365 return OP_INSERT_R4;
1366 case MONO_TYPE_R8:
1367 return OP_INSERT_R8;
1368 default:
1369 g_assert_not_reached ();
1373 static int
1374 mono_type_to_slow_insert_op (MonoType *type)
1376 switch (type->type) {
1377 case MONO_TYPE_I1:
1378 case MONO_TYPE_U1:
1379 return OP_INSERTX_U1_SLOW;
1380 case MONO_TYPE_I2:
1381 case MONO_TYPE_U2:
1382 return OP_INSERT_I2;
1383 case MONO_TYPE_I4:
1384 case MONO_TYPE_U4:
1385 return OP_INSERTX_I4_SLOW;
1386 case MONO_TYPE_I8:
1387 case MONO_TYPE_U8:
1388 return OP_INSERTX_I8_SLOW;
1389 case MONO_TYPE_R4:
1390 return OP_INSERTX_R4_SLOW;
1391 case MONO_TYPE_R8:
1392 return OP_INSERTX_R8_SLOW;
1393 default:
1394 g_assert_not_reached ();
1398 static MonoInst*
1399 simd_intrinsic_emit_setter (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1401 MonoInst *ins;
1402 MonoMethodSignature *sig = mono_method_signature_internal (cmethod);
1403 int size, align;
1404 gboolean indirect;
1405 int dreg;
1407 size = mono_type_size (sig->params [0], &align);
1409 if (COMPILE_LLVM (cfg)) {
1410 MONO_INST_NEW (cfg, ins, mono_type_to_insert_op (sig->params [0]));
1411 ins->klass = cmethod->klass;
1412 ins->dreg = ins->sreg1 = dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);
1413 ins->sreg2 = args [1]->dreg;
1414 ins->inst_c0 = intrinsic->opcode;
1415 MONO_ADD_INS (cfg->cbb, ins);
1416 } else if (size == 2 || size == 4 || size == 8) {
1417 MONO_INST_NEW (cfg, ins, mono_type_to_slow_insert_op (sig->params [0]));
1418 ins->klass = cmethod->klass;
1419 /*This is a partial load so we encode the dependency on the previous value by setting dreg and sreg1 to the same value.*/
1420 ins->dreg = ins->sreg1 = dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);
1421 ins->sreg2 = args [1]->dreg;
1422 ins->inst_c0 = intrinsic->opcode;
1423 if (sig->params [0]->type == MONO_TYPE_R4)
1424 ins->backend.spill_var = mini_get_int_to_float_spill_area (cfg);
1425 else if (sig->params [0]->type == MONO_TYPE_R8)
1426 ins->backend.spill_var = get_double_spill_area (cfg);
1427 MONO_ADD_INS (cfg->cbb, ins);
1428 } else {
1429 int vreg, sreg;
1431 MONO_INST_NEW (cfg, ins, OP_EXTRACTX_U2);
1432 ins->klass = cmethod->klass;
1433 ins->sreg1 = sreg = dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);
1434 ins->type = STACK_I4;
1435 ins->dreg = vreg = alloc_ireg (cfg);
1436 ins->inst_c0 = intrinsic->opcode / 2;
1437 MONO_ADD_INS (cfg->cbb, ins);
1439 MONO_INST_NEW (cfg, ins, OP_INSERTX_U1_SLOW);
1440 ins->klass = cmethod->klass;
1441 ins->sreg1 = vreg;
1442 ins->sreg2 = args [1]->dreg;
1443 ins->dreg = sreg;
1444 ins->inst_c0 = intrinsic->opcode;
1445 MONO_ADD_INS (cfg->cbb, ins);
1448 if (indirect) {
1449 MONO_INST_NEW (cfg, ins, OP_STOREX_MEMBASE);
1450 ins->klass = cmethod->klass;
1451 ins->dreg = args [0]->dreg;
1452 ins->sreg1 = dreg;
1453 MONO_ADD_INS (cfg->cbb, ins);
1455 return ins;
1459 * simd_intrinsic_emit_getter_op:
1461 * Emit IR for loading an element of a SIMD value.
1463 * @klass is the simd type, @type is the element type.
1465 static MonoInst*
1466 simd_intrinsic_emit_getter_op (MonoCompile *cfg, int index, MonoClass *klass, MonoType *type, MonoInst *arg)
1468 MonoInst *ins;
1469 int vreg, shift_bits;
1471 vreg = load_simd_vreg_class (cfg, klass, arg, NULL);
1473 if (type->type == MONO_TYPE_I8 || type->type == MONO_TYPE_U8 || type->type == MONO_TYPE_R8) {
1474 MonoInst *ins;
1475 gboolean is_r8 = type->type == MONO_TYPE_R8;
1477 MONO_INST_NEW (cfg, ins, is_r8 ? OP_EXTRACT_R8 : OP_EXTRACT_I8);
1478 ins->klass = klass;
1479 ins->sreg1 = vreg;
1480 ins->inst_c0 = index;
1481 if (is_r8) {
1482 ins->type = STACK_R8;
1483 ins->dreg = alloc_freg (cfg);
1484 ins->backend.spill_var = get_double_spill_area (cfg);
1485 } else {
1486 ins->type = STACK_I8;
1487 ins->dreg = alloc_lreg (cfg);
1489 MONO_ADD_INS (cfg->cbb, ins);
1490 return ins;
1493 shift_bits = mono_type_elements_shift_bits (type);
1495 if ((index >> shift_bits) && !cfg->compile_llvm) {
1496 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
1497 ins->klass = klass;
1498 ins->sreg1 = vreg;
1499 ins->inst_c0 = index >> shift_bits;
1500 ins->type = STACK_VTYPE;
1501 ins->dreg = vreg = alloc_ireg (cfg);
1502 MONO_ADD_INS (cfg->cbb, ins);
1505 MONO_INST_NEW (cfg, ins, mono_type_to_extract_op (type));
1506 ins->klass = klass;
1507 ins->sreg1 = vreg;
1508 ins->type = STACK_I4;
1509 ins->dreg = vreg = alloc_ireg (cfg);
1510 if (cfg->compile_llvm)
1511 ins->inst_c0 = index;
1512 else
1513 ins->inst_c0 = index & ((1 << shift_bits) - 1);
1514 MONO_ADD_INS (cfg->cbb, ins);
1516 if (type->type == MONO_TYPE_R4) {
1517 MONO_INST_NEW (cfg, ins, cfg->r4fp ? OP_ICONV_TO_R4_RAW : OP_MOVE_I4_TO_F);
1518 ins->klass = mono_defaults.single_class;
1519 ins->sreg1 = vreg;
1520 ins->type = cfg->r4_stack_type;
1521 ins->dreg = alloc_freg (cfg);
1522 ins->backend.spill_var = mini_get_int_to_float_spill_area (cfg);
1523 MONO_ADD_INS (cfg->cbb, ins);
1525 return ins;
1528 static MonoInst*
1529 simd_intrinsic_emit_getter (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1531 MonoMethodSignature *sig = mono_method_signature_internal (cmethod);
1533 return simd_intrinsic_emit_getter_op (cfg, intrinsic->opcode, cmethod->klass, sig->ret, args [0]);
1536 static MonoInst*
1537 simd_intrinsic_emit_long_getter (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1539 MonoInst *ins;
1540 int vreg;
1541 gboolean is_r8 = mono_method_signature_internal (cmethod)->ret->type == MONO_TYPE_R8;
1543 vreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
1545 MONO_INST_NEW (cfg, ins, is_r8 ? OP_EXTRACT_R8 : OP_EXTRACT_I8);
1546 ins->klass = cmethod->klass;
1547 ins->sreg1 = vreg;
1548 ins->inst_c0 = intrinsic->opcode;
1549 if (is_r8) {
1550 ins->type = STACK_R8;
1551 ins->dreg = alloc_freg (cfg);
1552 ins->backend.spill_var = get_double_spill_area (cfg);
1553 } else {
1554 ins->type = STACK_I8;
1555 ins->dreg = alloc_lreg (cfg);
1557 MONO_ADD_INS (cfg->cbb, ins);
1559 return ins;
1562 static MonoInst*
1563 simd_intrinsic_emit_ctor (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1565 MonoInst *ins = NULL;
1566 int i, addr_reg;
1567 gboolean is_ldaddr = (args [0]->opcode == OP_LDADDR && args [0]->inst_left->opcode != OP_ARG);
1568 MonoMethodSignature *sig = mono_method_signature_internal (cmethod);
1569 int store_op = mono_type_to_store_membase (cfg, sig->params [0]);
1570 int arg_size = mono_type_size (sig->params [0], &i);
1571 int opcode;
1573 if (sig->param_count == 1) {
1574 int dreg;
1576 if (is_ldaddr) {
1577 dreg = args [0]->inst_i0->dreg;
1578 NULLIFY_INS (args [0]);
1579 } else {
1580 g_assert (args [0]->type == STACK_MP || args [0]->type == STACK_PTR);
1581 dreg = alloc_ireg (cfg);
1584 if (intrinsic)
1585 opcode = intrinsic->opcode;
1586 else
1587 opcode = mono_type_to_expand_op (sig->params [0]);
1588 MONO_INST_NEW (cfg, ins, opcode);
1589 ins->klass = cmethod->klass;
1590 ins->sreg1 = args [1]->dreg;
1591 ins->type = STACK_VTYPE;
1592 ins->dreg = dreg;
1593 MONO_ADD_INS (cfg->cbb, ins);
1594 if (sig->params [0]->type == MONO_TYPE_R4)
1595 ins->backend.spill_var = mini_get_int_to_float_spill_area (cfg);
1596 else if (sig->params [0]->type == MONO_TYPE_R8)
1597 ins->backend.spill_var = get_double_spill_area (cfg);
1599 if (!is_ldaddr) {
1600 MONO_INST_NEW (cfg, ins, OP_STOREX_MEMBASE);
1601 ins->dreg = args [0]->dreg;
1602 ins->sreg1 = dreg;
1603 MONO_ADD_INS (cfg->cbb, ins);
1605 return ins;
1608 if (is_ldaddr) {
1609 NEW_VARLOADA (cfg, ins, get_simd_ctor_spill_area (cfg, cmethod->klass), &cmethod->klass->byref_arg);
1610 MONO_ADD_INS (cfg->cbb, ins);
1611 addr_reg = ins->dreg;
1612 } else {
1613 g_assert (args [0]->type == STACK_MP || args [0]->type == STACK_PTR);
1614 addr_reg = args [0]->dreg;
1617 for (i = sig->param_count - 1; i >= 0; --i) {
1618 EMIT_NEW_STORE_MEMBASE (cfg, ins, store_op, addr_reg, i * arg_size, args [i + 1]->dreg);
1621 if (sig->param_count * arg_size < 16) {
1622 /* If there are not enough arguments, fill the rest with 0s */
1623 for (i = sig->param_count; i < 16 / arg_size; ++i) {
1624 switch (arg_size) {
1625 case 4:
1626 MONO_EMIT_NEW_STORE_MEMBASE_IMM (cfg, OP_STOREI4_MEMBASE_IMM, addr_reg, i * arg_size, 0);
1627 break;
1628 default:
1629 g_assert_not_reached ();
1630 break;
1635 if (is_ldaddr) { /*Eliminate LDADDR if it's initing a local var*/
1636 int vreg = ((MonoInst*)args [0]->inst_p0)->dreg;
1637 NULLIFY_INS (args [0]);
1639 MONO_INST_NEW (cfg, ins, OP_LOADX_MEMBASE);
1640 ins->klass = cmethod->klass;
1641 ins->sreg1 = addr_reg;
1642 ins->type = STACK_VTYPE;
1643 ins->dreg = vreg;
1644 MONO_ADD_INS (cfg->cbb, ins);
1646 return ins;
1649 static MonoInst*
1650 simd_intrinsic_emit_cast (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1652 MonoInst *ins;
1653 MonoClass *klass;
1654 int vreg;
1656 vreg = get_simd_vreg (cfg, cmethod, args [0]);
1658 if (cmethod->is_inflated)
1659 /* Vector<T> */
1660 klass = mono_class_from_mono_type_internal (mono_method_signature_internal (cmethod)->ret);
1661 else
1662 klass = cmethod->klass;
1664 MONO_INST_NEW (cfg, ins, OP_XMOVE);
1665 ins->klass = klass;
1666 ins->type = STACK_VTYPE;
1667 ins->sreg1 = vreg;
1668 ins->dreg = alloc_ireg (cfg);
1669 MONO_ADD_INS (cfg->cbb, ins);
1670 return ins;
1673 static MonoInst*
1674 simd_intrinsic_emit_shift (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1676 MonoInst *ins;
1677 int vreg, vreg2 = -1, opcode = intrinsic->opcode;
1679 vreg = get_simd_vreg (cfg, cmethod, args [0]);
1681 if (args [1]->opcode != OP_ICONST) {
1682 MONO_INST_NEW (cfg, ins, OP_ICONV_TO_X);
1683 ins->klass = mono_defaults.int32_class;
1684 ins->sreg1 = args [1]->dreg;
1685 ins->type = STACK_I4;
1686 ins->dreg = vreg2 = alloc_ireg (cfg);
1687 MONO_ADD_INS (cfg->cbb, ins);
1689 ++opcode; /*The shift_reg version op is always +1 from the regular one.*/
1692 MONO_INST_NEW (cfg, ins, opcode);
1693 ins->klass = cmethod->klass;
1694 ins->sreg1 = vreg;
1695 ins->sreg2 = vreg2;
1697 if (args [1]->opcode == OP_ICONST) {
1698 ins->inst_imm = args [1]->inst_c0;
1699 NULLIFY_INS (args [1]);
1702 ins->type = STACK_VTYPE;
1703 ins->dreg = alloc_ireg (cfg);
1704 MONO_ADD_INS (cfg->cbb, ins);
1705 return ins;
1708 static inline gboolean
1709 mono_op_is_packed_compare (int op)
1711 return op >= OP_PCMPEQB && op <= OP_PCMPEQQ;
1714 static MonoInst*
1715 simd_intrinsic_emit_equality_op (MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args, int opcode, int flags)
1717 MonoInst* ins;
1718 int left_vreg, right_vreg, tmp_vreg;
1720 left_vreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
1721 right_vreg = get_simd_vreg (cfg, cmethod, args [1]);
1723 MONO_INST_NEW (cfg, ins, opcode);
1724 ins->klass = cmethod->klass;
1725 ins->sreg1 = left_vreg;
1726 ins->sreg2 = right_vreg;
1727 ins->type = STACK_VTYPE;
1728 ins->klass = cmethod->klass;
1729 ins->dreg = tmp_vreg = alloc_ireg (cfg);
1730 ins->inst_c0 = flags;
1731 MONO_ADD_INS (cfg->cbb, ins);
1733 /*FIXME the next ops are SSE specific*/
1734 MONO_INST_NEW (cfg, ins, OP_EXTRACT_MASK);
1735 ins->klass = cmethod->klass;
1736 ins->sreg1 = tmp_vreg;
1737 ins->type = STACK_I4;
1738 ins->dreg = tmp_vreg = alloc_ireg (cfg);
1739 MONO_ADD_INS (cfg->cbb, ins);
1741 /*FP ops have a not equal instruction, which means that we must test the results with OR semantics.*/
1742 if (mono_op_is_packed_compare (opcode) || flags == SIMD_COMP_EQ) {
1743 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, tmp_vreg, 0xFFFF);
1744 NEW_UNALU (cfg, ins, flags == SIMD_COMP_EQ ? OP_CEQ : OP_CLT_UN, tmp_vreg, -1);
1745 } else {
1746 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, tmp_vreg, 0);
1747 NEW_UNALU (cfg, ins, OP_CGT_UN, tmp_vreg, -1);
1749 MONO_ADD_INS (cfg->cbb, ins);
1750 return ins;
1753 static MonoInst*
1754 simd_intrinsic_emit_equality (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1756 return simd_intrinsic_emit_equality_op (cfg, cmethod, args, intrinsic->opcode, intrinsic->flags);
1759 static MonoInst*
1760 simd_intrinsic_emit_shuffle (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1762 MonoInst *ins;
1763 int vreg, vreg2 = -1;
1764 int param_count = mono_method_signature_internal (cmethod)->param_count;
1766 if (args [param_count - 1]->opcode != OP_ICONST) {
1767 /*TODO Shuffle with non literals is not yet supported */
1768 return NULL;
1771 vreg = get_simd_vreg (cfg, cmethod, args [0]);
1772 if (param_count == 3)
1773 vreg2 = get_simd_vreg (cfg, cmethod, args [1]);
1775 NULLIFY_INS (args [param_count - 1]);
1778 MONO_INST_NEW (cfg, ins, intrinsic->opcode);
1779 ins->klass = cmethod->klass;
1780 ins->sreg1 = vreg;
1781 ins->sreg2 = vreg2;
1782 ins->inst_c0 = args [param_count - 1]->inst_c0;
1783 ins->type = STACK_VTYPE;
1784 ins->dreg = alloc_ireg (cfg);
1785 MONO_ADD_INS (cfg->cbb, ins);
1787 if (param_count == 3 && ins->opcode == OP_PSHUFLED)
1788 ins->opcode = OP_SHUFPS;
1789 return ins;
1792 static MonoInst*
1793 simd_intrinsic_emit_load_aligned (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1795 MonoInst *ins;
1797 MONO_INST_NEW (cfg, ins, OP_LOADX_ALIGNED_MEMBASE);
1798 ins->klass = cmethod->klass;
1799 ins->sreg1 = args [0]->dreg;
1800 ins->type = STACK_VTYPE;
1801 ins->dreg = alloc_ireg (cfg);
1802 MONO_ADD_INS (cfg->cbb, ins);
1803 return ins;
1806 static MonoInst*
1807 simd_intrinsic_emit_store (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1809 MonoInst *ins;
1810 int vreg;
1812 vreg = get_simd_vreg (cfg, cmethod, args [1]);
1814 MONO_INST_NEW (cfg, ins, intrinsic->opcode);
1815 ins->klass = cmethod->klass;
1816 ins->dreg = args [0]->dreg;
1817 ins->sreg1 = vreg;
1818 ins->type = STACK_VTYPE;
1819 MONO_ADD_INS (cfg->cbb, ins);
1820 return ins;
1823 static MonoInst*
1824 simd_intrinsic_emit_extract_mask (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1826 MonoInst *ins;
1827 int vreg;
1829 vreg = get_simd_vreg (cfg, cmethod, args [0]);
1831 MONO_INST_NEW (cfg, ins, OP_EXTRACT_MASK);
1832 ins->klass = cmethod->klass;
1833 ins->sreg1 = vreg;
1834 ins->type = STACK_I4;
1835 ins->dreg = alloc_ireg (cfg);
1836 MONO_ADD_INS (cfg->cbb, ins);
1838 return ins;
1841 static MonoInst*
1842 simd_intrinsic_emit_prefetch (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1844 MonoInst *ins;
1846 MONO_INST_NEW (cfg, ins, OP_PREFETCH_MEMBASE);
1847 ins->klass = cmethod->klass;
1848 ins->sreg1 = args [0]->dreg;
1849 ins->backend.arg_info = intrinsic->flags;
1850 MONO_ADD_INS (cfg->cbb, ins);
1851 return ins;
1854 static MonoInst*
1855 simd_intrinsic_emit_const (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1857 MonoInst *ins;
1859 MONO_INST_NEW (cfg, ins, intrinsic->opcode);
1860 ins->klass = cmethod->klass;
1861 ins->type = STACK_VTYPE;
1862 ins->dreg = alloc_xreg (cfg);
1863 MONO_ADD_INS (cfg->cbb, ins);
1864 return ins;
1867 static const char *
1868 simd_version_name (guint32 version)
1870 switch (version) {
1871 case SIMD_VERSION_SSE1:
1872 return "sse1";
1873 case SIMD_VERSION_SSE2:
1874 return "sse2";
1875 case SIMD_VERSION_SSE3:
1876 return "sse3";
1877 case SIMD_VERSION_SSSE3:
1878 return "ssse3";
1879 case SIMD_VERSION_SSE41:
1880 return "sse41";
1881 case SIMD_VERSION_SSE42:
1882 return "sse42";
1883 case SIMD_VERSION_SSE4a:
1884 return "sse4a";
1886 return "n/a";
1889 static MonoInst*
1890 emit_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args, const SimdIntrinsic *intrinsics, guint32 size)
1892 const SimdIntrinsic *result = (const SimdIntrinsic *)mono_binary_search (cmethod->name, intrinsics, size, sizeof (SimdIntrinsic), &simd_intrinsic_compare_by_name);
1893 if (!result) {
1894 DEBUG (printf ("function doesn't have a simd intrinsic %s::%s/%d\n", m_class_get_name (cmethod->klass), cmethod->name, fsig->param_count));
1895 return NULL;
1897 if (IS_DEBUG_ON (cfg)) {
1898 int i, max;
1899 printf ("found call to intrinsic %s::%s/%d -> %s\n", m_class_get_name (cmethod->klass), cmethod->name, fsig->param_count, method_name (result->name));
1900 max = fsig->param_count + fsig->hasthis;
1901 for (i = 0; i < max; ++i) {
1902 printf ("param %d: ", i);
1903 mono_print_ins (args [i]);
1906 if (result->simd_version_flags && !(result->simd_version_flags & simd_supported_versions)) {
1907 if (IS_DEBUG_ON (cfg)) {
1908 int x;
1909 printf ("function %s::%s/%d requires one of unsuported SIMD instruction set(s): ", m_class_get_name (cmethod->klass), cmethod->name, fsig->param_count);
1910 for (x = 1; x <= SIMD_VERSION_INDEX_END; x++)
1911 if (result->simd_version_flags & (1 << x))
1912 printf ("%s ", simd_version_name (1 << x));
1914 printf ("\n");
1916 return NULL;
1919 switch (result->simd_emit_mode) {
1920 case SIMD_EMIT_BINARY:
1921 return simd_intrinsic_emit_binary (result, cfg, cmethod, args);
1922 case SIMD_EMIT_UNARY:
1923 return simd_intrinsic_emit_unary (result, cfg, cmethod, args);
1924 case SIMD_EMIT_SETTER:
1925 return simd_intrinsic_emit_setter (result, cfg, cmethod, args);
1926 case SIMD_EMIT_GETTER:
1927 return simd_intrinsic_emit_getter (result, cfg, cmethod, args);
1928 case SIMD_EMIT_GETTER_QWORD:
1929 return simd_intrinsic_emit_long_getter (result, cfg, cmethod, args);
1930 case SIMD_EMIT_CTOR:
1931 return simd_intrinsic_emit_ctor (result, cfg, cmethod, args);
1932 case SIMD_EMIT_CAST:
1933 return simd_intrinsic_emit_cast (result, cfg, cmethod, args);
1934 case SIMD_EMIT_SHUFFLE:
1935 return simd_intrinsic_emit_shuffle (result, cfg, cmethod, args);
1936 case SIMD_EMIT_SHIFT:
1937 return simd_intrinsic_emit_shift (result, cfg, cmethod, args);
1938 case SIMD_EMIT_EQUALITY:
1939 return simd_intrinsic_emit_equality (result, cfg, cmethod, args);
1940 case SIMD_EMIT_LOAD_ALIGNED:
1941 return simd_intrinsic_emit_load_aligned (result, cfg, cmethod, args);
1942 case SIMD_EMIT_STORE:
1943 return simd_intrinsic_emit_store (result, cfg, cmethod, args);
1944 case SIMD_EMIT_EXTRACT_MASK:
1945 return simd_intrinsic_emit_extract_mask (result, cfg, cmethod, args);
1946 case SIMD_EMIT_PREFETCH:
1947 return simd_intrinsic_emit_prefetch (result, cfg, cmethod, args);
1949 g_assert_not_reached ();
1952 static int
1953 mono_emit_vector_ldelema (MonoCompile *cfg, MonoType *array_type, MonoInst *arr, MonoInst *index, gboolean check_bounds)
1955 MonoInst *ins;
1956 guint32 size;
1957 int mult_reg, add_reg, array_reg, index_reg, index2_reg, index3_reg;
1959 size = mono_array_element_size (mono_class_from_mono_type_internal (array_type));
1960 mult_reg = alloc_preg (cfg);
1961 array_reg = arr->dreg;
1962 index_reg = index->dreg;
1964 #if TARGET_SIZEOF_VOID_P == 8
1965 /* The array reg is 64 bits but the index reg is only 32 */
1966 index2_reg = alloc_preg (cfg);
1967 MONO_EMIT_NEW_UNALU (cfg, OP_SEXT_I4, index2_reg, index_reg);
1968 #else
1969 index2_reg = index_reg;
1970 #endif
1971 index3_reg = alloc_preg (cfg);
1973 if (check_bounds) {
1974 MONO_EMIT_BOUNDS_CHECK (cfg, array_reg, MonoArray, max_length, index2_reg);
1975 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_PADD_IMM, index3_reg, index2_reg, 16 / size - 1);
1976 MONO_EMIT_BOUNDS_CHECK (cfg, array_reg, MonoArray, max_length, index3_reg);
1979 add_reg = alloc_preg (cfg);
1981 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_MUL_IMM, mult_reg, index2_reg, size);
1982 MONO_EMIT_NEW_BIALU (cfg, OP_PADD, add_reg, array_reg, mult_reg);
1983 NEW_BIALU_IMM (cfg, ins, OP_PADD_IMM, add_reg, add_reg, MONO_STRUCT_OFFSET (MonoArray, vector));
1984 ins->type = STACK_PTR;
1985 MONO_ADD_INS (cfg->cbb, ins);
1987 return add_reg;
1990 static MonoInst*
1991 emit_array_extension_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
1993 if ((!strcmp ("GetVector", cmethod->name) || !strcmp ("GetVectorAligned", cmethod->name)) && fsig->param_count == 2) {
1994 MonoInst *load;
1995 int addr = mono_emit_vector_ldelema (cfg, fsig->params [0], args [0], args [1], TRUE);
1997 MONO_INST_NEW (cfg, load, !strcmp ("GetVectorAligned", cmethod->name) ? OP_LOADX_ALIGNED_MEMBASE : OP_LOADX_MEMBASE );
1998 load->klass = cmethod->klass;
1999 load->sreg1 = addr;
2000 load->type = STACK_VTYPE;
2001 load->dreg = alloc_ireg (cfg);
2002 MONO_ADD_INS (cfg->cbb, load);
2004 return load;
2006 if ((!strcmp ("SetVector", cmethod->name) || !strcmp ("SetVectorAligned", cmethod->name)) && fsig->param_count == 3) {
2007 MonoInst *store;
2008 int vreg = get_simd_vreg (cfg, cmethod, args [1]);
2009 int addr = mono_emit_vector_ldelema (cfg, fsig->params [0], args [0], args [2], TRUE);
2011 MONO_INST_NEW (cfg, store, !strcmp ("SetVectorAligned", cmethod->name) ? OP_STOREX_ALIGNED_MEMBASE_REG : OP_STOREX_MEMBASE);
2012 store->klass = cmethod->klass;
2013 store->dreg = addr;
2014 store->sreg1 = vreg;
2015 MONO_ADD_INS (cfg->cbb, store);
2017 return store;
2019 if (!strcmp ("IsAligned", cmethod->name) && fsig->param_count == 2) {
2020 MonoInst *ins;
2021 int addr = mono_emit_vector_ldelema (cfg, fsig->params [0], args [0], args [1], FALSE);
2023 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_AND_IMM, addr, addr, 15);
2024 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, addr, 0);
2025 NEW_UNALU (cfg, ins, OP_CEQ, addr, -1);
2026 MONO_ADD_INS (cfg->cbb, ins);
2028 return ins;
2030 return NULL;
2033 static MonoInst*
2034 emit_simd_runtime_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2036 if (!strcmp ("get_AccelMode", cmethod->name) && fsig->param_count == 0) {
2037 MonoInst *ins;
2038 EMIT_NEW_ICONST (cfg, ins, simd_supported_versions);
2039 return ins;
2041 return NULL;
2044 static gboolean
2045 is_sys_numerics_assembly (MonoAssembly *assembly)
2047 return !strcmp ("System.Numerics", assembly->aname.name);
2050 static gboolean
2051 is_sys_numerics_vectors_assembly (MonoAssembly *assembly)
2053 return !strcmp ("System.Numerics.Vectors", assembly->aname.name);
2056 MonoInst*
2057 mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2059 const char *class_name;
2060 MonoInst *simd_inst = NULL;
2062 if (is_sys_numerics_assembly (m_class_get_image (cmethod->klass)->assembly)) {
2063 simd_inst = emit_sys_numerics_intrinsics (cfg, cmethod, fsig, args);
2064 goto on_exit;
2067 if (is_sys_numerics_vectors_assembly (m_class_get_image (cmethod->klass)->assembly)) {
2068 simd_inst = emit_sys_numerics_vectors_intrinsics (cfg, cmethod, fsig, args);
2069 goto on_exit;
2072 if (strcmp ("Mono.Simd", m_class_get_image (cmethod->klass)->assembly->aname.name) ||
2073 strcmp ("Mono.Simd", m_class_get_name_space (cmethod->klass))) {
2074 goto on_exit;
2077 class_name = m_class_get_name (cmethod->klass);
2078 if (!strcmp ("SimdRuntime", class_name)) {
2079 simd_inst = emit_simd_runtime_intrinsics (cfg, cmethod, fsig, args);
2080 goto on_exit;
2083 if (!strcmp ("ArrayExtensions", class_name)) {
2084 simd_inst = emit_array_extension_intrinsics (cfg, cmethod, fsig, args);
2085 goto on_exit;
2088 if (!strcmp ("VectorOperations", class_name)) {
2089 if (!(cmethod->flags & METHOD_ATTRIBUTE_STATIC))
2090 goto on_exit;
2091 class_name = m_class_get_name (mono_class_from_mono_type_internal (mono_method_signature_internal (cmethod)->params [0]));
2092 } else if (!m_class_is_simd_type (cmethod->klass))
2093 goto on_exit;
2095 cfg->uses_simd_intrinsics |= MONO_CFG_USES_SIMD_INTRINSICS_SIMPLIFY_INDIRECTION;
2096 if (!strcmp ("Vector2d", class_name)) {
2097 simd_inst = emit_intrinsics (cfg, cmethod, fsig, args, vector2d_intrinsics, sizeof (vector2d_intrinsics) / sizeof (SimdIntrinsic));
2098 goto on_exit;
2100 if (!strcmp ("Vector4f", class_name)) {
2101 simd_inst = emit_intrinsics (cfg, cmethod, fsig, args, vector4f_intrinsics, sizeof (vector4f_intrinsics) / sizeof (SimdIntrinsic));
2102 goto on_exit;
2104 if (!strcmp ("Vector2ul", class_name)) {
2105 simd_inst = emit_intrinsics (cfg, cmethod, fsig, args, vector2ul_intrinsics, sizeof (vector2ul_intrinsics) / sizeof (SimdIntrinsic));
2106 goto on_exit;
2108 if (!strcmp ("Vector2l", class_name)) {
2109 simd_inst = emit_intrinsics (cfg, cmethod, fsig, args, vector2l_intrinsics, sizeof (vector2l_intrinsics) / sizeof (SimdIntrinsic));
2110 goto on_exit;
2112 if (!strcmp ("Vector4ui", class_name)) {
2113 simd_inst = emit_intrinsics (cfg, cmethod, fsig, args, vector4ui_intrinsics, sizeof (vector4ui_intrinsics) / sizeof (SimdIntrinsic));
2114 goto on_exit;
2116 if (!strcmp ("Vector4i", class_name)) {
2117 simd_inst = emit_intrinsics (cfg, cmethod, fsig, args, vector4i_intrinsics, sizeof (vector4i_intrinsics) / sizeof (SimdIntrinsic));
2118 goto on_exit;
2120 if (!strcmp ("Vector8us", class_name)) {
2121 simd_inst = emit_intrinsics (cfg, cmethod, fsig, args, vector8us_intrinsics, sizeof (vector8us_intrinsics) / sizeof (SimdIntrinsic));
2122 goto on_exit;
2124 if (!strcmp ("Vector8s", class_name)) {
2125 simd_inst = emit_intrinsics (cfg, cmethod, fsig, args, vector8s_intrinsics, sizeof (vector8s_intrinsics) / sizeof (SimdIntrinsic));
2126 goto on_exit;
2128 if (!strcmp ("Vector16b", class_name)) {
2129 simd_inst = emit_intrinsics (cfg, cmethod, fsig, args, vector16b_intrinsics, sizeof (vector16b_intrinsics) / sizeof (SimdIntrinsic));
2130 goto on_exit;
2132 if (!strcmp ("Vector16sb", class_name)) {
2133 simd_inst = emit_intrinsics (cfg, cmethod, fsig, args, vector16sb_intrinsics, sizeof (vector16sb_intrinsics) / sizeof (SimdIntrinsic));
2134 goto on_exit;
2137 on_exit:
2138 if (simd_inst != NULL) {
2139 cfg->uses_simd_intrinsics |= MONO_CFG_USES_SIMD_INTRINSICS;
2140 cfg->uses_simd_intrinsics |= MONO_CFG_USES_SIMD_INTRINSICS_DECOMPOSE_VTYPE;
2143 return simd_inst;
2146 static void
2147 assert_handled (MonoCompile *cfg, MonoMethod *method)
2149 MonoCustomAttrInfo *cattr;
2150 ERROR_DECL (error);
2152 if (cfg->verbose_level > 1) {
2153 cattr = mono_custom_attrs_from_method_checked (method, error);
2155 if (cattr) {
2156 gboolean has_attr = FALSE;
2157 for (int i = 0; i < cattr->num_attrs; ++i)
2158 if (cattr->attrs [i].ctor && (!strcmp (m_class_get_name (cattr->attrs [i].ctor->klass), "JitIntrinsicAttribute")))
2159 has_attr = TRUE;
2160 if (has_attr) {
2161 printf ("SIMD intrinsic unhandled: %s\n", mono_method_get_name_full (method, TRUE, TRUE, MONO_TYPE_NAME_FORMAT_IL));
2162 fflush (stdout);
2163 //g_assert_not_reached ();
2165 mono_custom_attrs_free (cattr);
2170 // The entries should be ordered by name
2171 // System.Numerics.Vector2/Vector3/Vector4
2172 static const SimdIntrinsic vector2_intrinsics[] = {
2173 { SN_ctor, OP_EXPAND_R4 },
2174 { SN_Abs },
2175 { SN_Dot, OP_DPPS },
2176 { SN_Equals, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
2177 { SN_Max, OP_MAXPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
2178 { SN_Min, OP_MINPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
2179 { SN_SquareRoot, OP_SQRTPS, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
2180 { SN_op_Addition, OP_ADDPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
2181 { SN_op_Division, OP_DIVPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
2182 { SN_op_Multiply, OP_MULPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
2183 { SN_op_Subtraction, OP_SUBPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
2186 static MonoInst*
2187 emit_vector_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2189 const SimdIntrinsic *intrins;
2190 MonoMethodSignature *sig = mono_method_signature_internal (cmethod);
2191 MonoType *type = m_class_get_byval_arg (cmethod->klass);
2193 if (!m_class_is_simd_type (cmethod->klass))
2194 return NULL;
2197 * Vector2/3/4 are handled the same way, since the underlying SIMD type is the same (4 * r4).
2199 intrins = (const SimdIntrinsic*)mono_binary_search (cmethod->name, vector2_intrinsics, sizeof (vector2_intrinsics) / sizeof (SimdIntrinsic), sizeof (SimdIntrinsic), &simd_intrinsic_compare_by_name);
2200 if (!intrins) {
2201 assert_handled (cfg, cmethod);
2202 return NULL;
2205 if (cfg->verbose_level > 1) {
2206 char *name = mono_method_full_name (cmethod, TRUE);
2207 printf (" SIMD intrinsic %s\n", name);
2208 g_free (name);
2211 switch (intrins->name) {
2212 case SN_ctor: {
2213 gboolean match = TRUE;
2214 for (int i = 0; i < fsig->param_count; ++i)
2215 if (fsig->params [i]->type != MONO_TYPE_R4)
2216 match = FALSE;
2217 if (!match)
2218 break;
2219 return simd_intrinsic_emit_ctor (intrins, cfg, cmethod, args);
2221 case SN_Equals:
2222 if (!(fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && fsig->params [0] == type))
2223 break;
2224 return simd_intrinsic_emit_equality (intrins, cfg, cmethod, args);
2225 case SN_SquareRoot:
2226 if (!(fsig->param_count == 1 && fsig->ret == type && fsig->params [0] == type))
2227 break;
2228 return simd_intrinsic_emit_unary (intrins, cfg, cmethod, args);
2229 case SN_Dot:
2230 if (!(fsig->param_count == 2 && fsig->ret->type == MONO_TYPE_R4 && fsig->params [0] == type && fsig->params [1] == type))
2231 break;
2232 if (COMPILE_LLVM (cfg)) {
2233 MonoInst *ins;
2235 ins = simd_intrinsic_emit_binary (intrins, cfg, cmethod, args);
2236 /* The end result is in the lowest element */
2237 return simd_intrinsic_emit_getter_op (cfg, 0, cmethod->klass, mono_method_signature_internal (cmethod)->ret, ins);
2239 break;
2240 case SN_Abs: {
2241 // abs(x) = max(x, sub(0,x))
2242 MonoInst *sub;
2243 MonoInst *zero;
2245 if (!(fsig->param_count == 1 && fsig->ret == type && fsig->params [0] == type))
2246 break;
2248 MONO_INST_NEW (cfg, zero, OP_XZERO);
2249 zero->dreg = alloc_xreg (cfg);
2250 zero->klass = cmethod->klass;
2251 MONO_ADD_INS (cfg->cbb, zero);
2253 sub = simd_intrinsic_emit_binary_op (cfg, OP_SUBPS, 0, cmethod->klass, sig->params [0], sig->params [0], zero, args [0]);
2254 return simd_intrinsic_emit_binary_op (cfg, OP_MAXPS, 0, cmethod->klass, sig->params [0], sig->params [0], args [0], sub);
2256 case SN_Max:
2257 case SN_Min:
2258 case SN_op_Addition:
2259 case SN_op_Division:
2260 case SN_op_Multiply:
2261 case SN_op_Subtraction:
2262 if (!(fsig->param_count == 2 && fsig->ret == type && (fsig->params [0] == type || fsig->params [0]->type == MONO_TYPE_R4) && (fsig->params [1] == type || fsig->params [1]->type == MONO_TYPE_R4)))
2263 break;
2264 return simd_intrinsic_emit_binary (intrins, cfg, cmethod, args);
2265 default:
2266 break;
2269 assert_handled (cfg, cmethod);
2271 if (cfg->verbose_level > 1) {
2272 char *name = mono_method_full_name (cmethod, TRUE);
2273 printf (" SIMD method %s not handled.\n", name);
2274 g_free (name);
2276 return NULL;
2279 static MonoInst*
2280 emit_vector_is_hardware_accelerated_intrinsic (MonoCompile *cfg)
2282 MonoInst *ins;
2284 if (simd_supported_versions)
2285 EMIT_NEW_ICONST (cfg, ins, 1);
2286 else
2287 EMIT_NEW_ICONST (cfg, ins, 0);
2288 ins->type = STACK_I4;
2289 return ins;
2292 /* These should be ordered by name */
2293 static const SimdIntrinsic vector_t_intrinsics[] = {
2294 { SN_ctor },
2295 { SN_Abs },
2296 { SN_CopyTo },
2297 { SN_Equals },
2298 { SN_GreaterThan },
2299 { SN_GreaterThanOrEqual },
2300 { SN_LessThan },
2301 { SN_LessThanOrEqual },
2302 { SN_Max },
2303 { SN_Min },
2304 { SN_get_AllOnes, OP_XONES },
2305 { SN_get_Count },
2306 { SN_get_Item },
2307 { SN_get_Zero, OP_XZERO },
2308 { SN_op_Addition },
2309 { SN_op_BitwiseAnd },
2310 { SN_op_BitwiseOr },
2311 { SN_op_Division },
2312 { SN_op_ExclusiveOr },
2313 { SN_op_Explicit },
2314 { SN_op_Multiply },
2315 { SN_op_Subtraction }
2318 static MonoInst*
2319 emit_vector_t_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2321 const SimdIntrinsic *intrins;
2322 MonoType *type, *etype;
2323 MonoInst *ins;
2324 int size, len, index;
2326 intrins = (const SimdIntrinsic*)mono_binary_search (cmethod->name, vector_t_intrinsics, sizeof (vector_t_intrinsics) / sizeof (SimdIntrinsic), sizeof (SimdIntrinsic), &simd_intrinsic_compare_by_name);
2327 if (!intrins) {
2328 assert_handled (cfg, cmethod);
2329 return NULL;
2332 type = m_class_get_byval_arg (cmethod->klass);
2333 etype = mono_class_get_context (cmethod->klass)->class_inst->type_argv [0];
2334 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
2335 g_assert (size);
2336 len = 16 / size;
2338 if (!MONO_TYPE_IS_PRIMITIVE (etype))
2339 return NULL;
2341 if (cfg->verbose_level > 1) {
2342 char *name = mono_method_full_name (cmethod, TRUE);
2343 printf (" SIMD intrinsic %s\n", name);
2344 g_free (name);
2347 switch (intrins->name) {
2348 case SN_get_Count:
2349 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
2350 break;
2351 EMIT_NEW_ICONST (cfg, ins, len);
2352 return ins;
2353 case SN_get_AllOnes:
2354 case SN_get_Zero:
2355 if (!(fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type)))
2356 break;
2357 return simd_intrinsic_emit_const (intrins, cfg, cmethod, args);
2358 case SN_get_Item:
2359 g_assert (fsig->param_count == 1);
2360 if (args [1]->opcode != OP_ICONST)
2361 return NULL;
2362 index = args [1]->inst_c0;
2363 if (index < 0 || index >= len)
2364 return NULL;
2365 return simd_intrinsic_emit_getter_op (cfg, index, cmethod->klass, etype, args [0]);
2366 case SN_ctor:
2367 if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype))
2368 return simd_intrinsic_emit_ctor (NULL, cfg, cmethod, args);
2369 if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
2370 MonoInst *array_ins = args [1];
2371 MonoInst *index_ins;
2372 MonoInst *ldelema_ins;
2373 MonoInst *var;
2374 int end_index_reg;
2376 if (args [0]->opcode != OP_LDADDR)
2377 return NULL;
2379 /* .ctor (T[]) or .ctor (T[], index) */
2381 if (fsig->param_count == 2) {
2382 index_ins = args [2];
2383 } else {
2384 EMIT_NEW_ICONST (cfg, index_ins, 0);
2387 /* Emit index check for the end (index + len - 1 < array length) */
2388 end_index_reg = alloc_ireg (cfg);
2389 EMIT_NEW_BIALU_IMM (cfg, ins, OP_IADD_IMM, end_index_reg, index_ins->dreg, len - 1);
2390 MONO_EMIT_BOUNDS_CHECK (cfg, array_ins->dreg, MonoArray, max_length, end_index_reg);
2392 /* Load the array slice into the simd reg */
2393 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, TRUE);
2394 g_assert (args [0]->opcode == OP_LDADDR);
2395 var = (MonoInst*)args [0]->inst_p0;
2396 EMIT_NEW_LOAD_MEMBASE (cfg, ins, OP_LOADX_MEMBASE, var->dreg, ldelema_ins->dreg, 0);
2397 ins->klass = cmethod->klass;
2398 return args [0];
2400 break;
2401 case SN_op_Explicit:
2402 return simd_intrinsic_emit_cast (intrins, cfg, cmethod, args);
2403 case SN_Equals:
2404 if (fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && mono_metadata_type_equal (fsig->params [0], type))
2405 return simd_intrinsic_emit_equality_op (cfg, cmethod, args, type_to_comp_op (etype), SIMD_COMP_EQ);
2406 if (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type))
2407 return simd_intrinsic_emit_binary_op (cfg, type_to_comp_op (etype), 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]);
2408 break;
2410 case SN_GreaterThan:
2411 case SN_GreaterThanOrEqual:
2412 case SN_LessThan:
2413 case SN_LessThanOrEqual: {
2414 MonoInst *cmp1, *cmp2;
2415 int eq_op, gt_op;
2417 switch (etype->type) {
2418 case MONO_TYPE_I1:
2419 case MONO_TYPE_I2:
2420 case MONO_TYPE_I4:
2421 case MONO_TYPE_I8:
2422 break;
2423 default:
2424 return NULL;
2427 eq_op = type_to_comp_op (etype);
2428 gt_op = type_to_gt_op (etype);
2430 switch (intrins->name) {
2431 case SN_GreaterThan:
2432 return simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]);
2433 case SN_LessThan:
2434 return simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]);
2435 case SN_LessThanOrEqual:
2436 cmp1 = simd_intrinsic_emit_binary_op (cfg, eq_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]);
2437 cmp2 = simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]);
2438 return simd_intrinsic_emit_binary_op (cfg, OP_POR, 0, cmethod->klass, fsig->params [0], fsig->params [1], cmp1, cmp2);
2439 case SN_GreaterThanOrEqual:
2440 cmp1 = simd_intrinsic_emit_binary_op (cfg, eq_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]);
2441 cmp2 = simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]);
2442 return simd_intrinsic_emit_binary_op (cfg, OP_POR, 0, cmethod->klass, fsig->params [0], fsig->params [1], cmp1, cmp2);
2443 default:
2444 g_assert_not_reached ();
2445 break;
2448 case SN_Abs:
2449 /* Vector<T>.Abs */
2450 switch (etype->type) {
2451 case MONO_TYPE_U1:
2452 case MONO_TYPE_U2:
2453 case MONO_TYPE_U4:
2454 case MONO_TYPE_U8: {
2455 MonoInst *ins;
2457 /* No-op */
2458 MONO_INST_NEW (cfg, ins, OP_XMOVE);
2459 ins->klass = cmethod->klass;
2460 ins->type = STACK_VTYPE;
2461 ins->sreg1 = args [0]->dreg;
2462 ins->dreg = alloc_xreg (cfg);
2463 MONO_ADD_INS (cfg->cbb, ins);
2464 return ins;
2466 default:
2467 break;
2469 break;
2470 case SN_op_Addition:
2471 case SN_op_Subtraction:
2472 case SN_op_Multiply:
2473 case SN_op_Division:
2474 case SN_op_ExclusiveOr:
2475 case SN_op_BitwiseAnd:
2476 case SN_op_BitwiseOr:
2477 case SN_Max:
2478 case SN_Min: {
2479 if (!(fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, fsig->params [0]) && mono_metadata_type_equal (fsig->params [0], fsig->params [1])))
2480 break;
2481 int op = 0;
2482 switch (intrins->name) {
2483 case SN_op_Addition:
2484 op = type_to_padd_op (etype);
2485 break;
2486 case SN_op_Subtraction:
2487 op = type_to_psub_op (etype);
2488 break;
2489 case SN_op_Multiply:
2490 op = type_to_pmul_op (etype);
2491 break;
2492 case SN_op_Division:
2493 op = type_to_pdiv_op (etype);
2494 break;
2495 case SN_op_ExclusiveOr:
2496 op = type_to_pxor_op (etype);
2497 break;
2498 case SN_op_BitwiseAnd:
2499 op = type_to_pand_op (etype);
2500 break;
2501 case SN_op_BitwiseOr:
2502 op = type_to_por_op (etype);
2503 break;
2504 case SN_Min:
2505 op = type_to_pmin_op (etype);
2506 break;
2507 case SN_Max:
2508 op = type_to_pmax_op (etype);
2509 break;
2510 default:
2511 g_assert_not_reached ();
2513 if (op != -1)
2514 return simd_intrinsic_emit_binary_op (cfg, op, 0, cmethod->klass, fsig->params [0], fsig->params [0], args [0], args [1]);
2515 break;
2517 case SN_CopyTo: {
2518 MonoInst *array_ins = args [1];
2519 MonoInst *index_ins = args [2];
2520 MonoInst *ldelema_ins;
2521 MonoInst *var;
2522 int end_index_reg;
2524 if (args [0]->opcode != OP_LDADDR)
2525 return NULL;
2527 /* Emit index check for the end (index + len - 1 < array length) */
2528 end_index_reg = alloc_ireg (cfg);
2529 EMIT_NEW_BIALU_IMM (cfg, ins, OP_IADD_IMM, end_index_reg, index_ins->dreg, len - 1);
2531 int length_reg = alloc_ireg (cfg);
2532 MONO_EMIT_NEW_LOAD_MEMBASE_OP_FAULT (cfg, OP_LOADI4_MEMBASE, length_reg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length));
2533 MONO_EMIT_NEW_BIALU (cfg, OP_COMPARE, -1, length_reg, end_index_reg);
2534 MONO_EMIT_NEW_COND_EXC (cfg, LE_UN, "ArgumentException");
2536 /* Load the simd reg into the array slice */
2537 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, TRUE);
2538 g_assert (args [0]->opcode == OP_LDADDR);
2539 var = (MonoInst*)args [0]->inst_p0;
2540 EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_MEMBASE, ldelema_ins->dreg, 0, var->dreg);
2541 ins->klass = cmethod->klass;
2542 return args [0];
2543 break;
2545 default:
2546 break;
2549 assert_handled (cfg, cmethod);
2551 if (cfg->verbose_level > 1) {
2552 char *name = mono_method_full_name (cmethod, TRUE);
2553 printf (" SIMD method %s not handled.\n", name);
2554 g_free (name);
2557 return NULL;
2561 * emit_sys_numerics_intrinsics:
2563 * Emit intrinsics for the System.Numerics assembly.
2565 static MonoInst*
2566 emit_sys_numerics_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2568 const char *nspace = m_class_get_name_space (cmethod->klass);
2569 const char *class_name = m_class_get_name (cmethod->klass);
2571 if (!strcmp ("Vector2", class_name) || !strcmp ("Vector4", class_name) || !strcmp ("Vector3", class_name))
2572 return emit_vector_intrinsics (cfg, cmethod, fsig, args);
2574 if (!strcmp ("System.Numerics", nspace) && !strcmp ("Vector", class_name)) {
2575 if (!strcmp (cmethod->name, "get_IsHardwareAccelerated"))
2576 return emit_vector_is_hardware_accelerated_intrinsic (cfg);
2579 return NULL;
2582 static MonoInst*
2583 emit_sys_numerics_vectors_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2585 const char *nspace = m_class_get_name_space (cmethod->klass);
2586 const char *class_name = m_class_get_name (cmethod->klass);
2588 if (!strcmp (class_name, "Vector`1"))
2589 return emit_vector_t_intrinsics (cfg, cmethod, fsig, args);
2591 if (!strcmp ("System.Numerics", nspace) && !strcmp ("Vector", class_name)) {
2592 if (!strcmp (cmethod->name, "get_IsHardwareAccelerated"))
2593 return emit_vector_is_hardware_accelerated_intrinsic (cfg);
2596 return NULL;
2599 MonoInst*
2600 mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr)
2602 MonoInst * simd_inst = NULL;
2604 if (is_sys_numerics_assembly (m_class_get_image (field->parent)->assembly)) {
2605 int index = -1;
2607 const char *parent_name = m_class_get_name (field->parent);
2608 if (!strcmp (parent_name, "Vector2") ||
2609 !strcmp (parent_name, "Vector3") ||
2610 !strcmp (parent_name, "Vector4")) {
2611 if (!strcmp (field->name, "X"))
2612 index = 0;
2613 else if (!strcmp (field->name, "Y"))
2614 index = 1;
2615 else if (!strcmp (field->name, "Z"))
2616 index = 2;
2617 else if (!strcmp (field->name, "W"))
2618 index = 3;
2621 if (index != -1) {
2622 if (cfg->verbose_level > 1)
2623 printf (" SIMD intrinsic field access: %s\n", field->name);
2625 simd_inst = simd_intrinsic_emit_getter_op (cfg, index, field->parent, mono_field_get_type_internal (field), addr);
2626 goto on_exit;
2630 on_exit:
2632 if (simd_inst != NULL) {
2633 cfg->uses_simd_intrinsics |= MONO_CFG_USES_SIMD_INTRINSICS;
2634 cfg->uses_simd_intrinsics |= MONO_CFG_USES_SIMD_INTRINSICS_DECOMPOSE_VTYPE;
2637 return simd_inst;
2640 #endif /* DISABLE_JIT */
2641 #endif /* MONO_ARCH_SIMD_INTRINSICS */