2 * simd-instrisics.c: simd support for intrinsics
5 * Rodrigo Kumpera (rkumpera@novell.com)
7 * (C) 2008 Novell, Inc.
15 #include "mono/utils/bsearch.h"
16 #include <mono/metadata/abi-details.h>
19 General notes on SIMD intrinsics
21 TODO handle operands with non SIMD args, such as op_Addition (Vector4f, float)
22 TODO optimize r4const in .ctor so it doesn't go into the FP stack first
23 TODO extend op_to_op_dest_membase to handle simd ops
24 TODO add support for indexed versions of simd ops
25 TODO to an amd64 port and figure out how to properly handle extractors/.ctor
26 TODO make sure locals, arguments and spills are properly aligned.
27 TODO add support for fusing a XMOVE into a simd op in mono_spill_global_vars.
28 TODO add stuff to man pages
29 TODO document this under /docs
30 TODO make passing a xmm as argument not cause it to be LDADDR'ed (introduce an OP_XPUSH)
31 TODO revamp the .ctor sequence as it looks very fragile, maybe use a var just like move_i4_to_f. (or just pinst sse ops)
32 TODO figure out what's wrong with OP_STOREX_MEMBASE_REG and OP_STOREX_MEMBASE (the 2nd is for imm operands)
33 TODO maybe add SSE3 emulation on top of SSE2, or just implement the corresponding functions using SSE2 intrinsics.
34 TODO pass simd arguments in registers or, at least, add SSE support for pushing large (>=16) valuetypes
35 TODO pass simd args byval to a non-intrinsic method cause some useless local var load/store to happen.
36 TODO check if we need to init the SSE control word with better precision.
37 TODO add support for 3 reg sources in mini without slowing the common path. Or find a way to make MASKMOVDQU work.
38 TODO make SimdRuntime.get_AccelMode work under AOT
39 TODO patterns such as "a ^= b" generate slower code as the LDADDR op will be copied to a tmp first. Look at adding a indirection reduction pass after the dce pass.
40 TODO extend bounds checking code to support for range checking.
42 General notes for SIMD intrinsics.
44 -Bad extractor and constructor performance
45 Extracting a float from a XMM is a complete disaster if you are passing it as an argument.
46 It will be loaded in the FP stack just to be pushed on the call stack.
48 A similar thing happens with Vector4f constructor that require float vars to be
50 The fix for this issue is similar to the one required for r4const as method args. Avoiding the
51 trip to the FP stack is desirable.
53 -Extractor and constructor code doesn't make sense under amd64. Both currently assume separate banks
57 -Promote OP_EXTRACT_I4 to a STORE op
58 The advantage of this change is that it could have a _membase version and promote further optimizations.
60 -Create a MONO_INST_DONT_REGALLOC and use it in all places that MONO_INST_INDIRECT is used
64 #if defined (MONO_ARCH_SIMD_INTRINSICS)
66 #if defined (DISABLE_JIT)
69 mono_simd_intrinsics_init (void)
75 //#define IS_DEBUG_ON(cfg) (0)
77 #define IS_DEBUG_ON(cfg) ((cfg)->verbose_level >= 3)
78 #define DEBUG(a) do { if (IS_DEBUG_ON(cfg)) { a; } } while (0)
84 SIMD_EMIT_GETTER_QWORD
,
90 SIMD_EMIT_LOAD_ALIGNED
,
92 SIMD_EMIT_EXTRACT_MASK
,
96 #ifdef HAVE_ARRAY_ELEM_INIT
97 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
98 #define MSGSTRFIELD1(line) str##line
99 static const struct msgstr_t
{
100 #define SIMD_METHOD(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
101 #include "simd-methods.h"
104 #define SIMD_METHOD(str,name) str,
105 #include "simd-methods.h"
110 #define SIMD_METHOD(str,name) name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
111 #include "simd-methods.h"
113 #define method_name(idx) ((const char*)&method_names + (idx))
116 #define SIMD_METHOD(str,name) str,
117 static const char * const method_names
[] = {
118 #include "simd-methods.h"
122 #define SIMD_METHOD(str,name) name,
124 #include "simd-methods.h"
128 #define method_name(idx) (method_names [(idx)])
135 guint8 simd_version_flags
;
136 guint8 simd_emit_mode
: 4;
140 static const SimdIntrinsic vector4f_intrinsics
[] = {
141 { SN_ctor
, OP_EXPAND_R4
, SIMD_VERSION_SSE1
, SIMD_EMIT_CTOR
},
142 { SN_AddSub
, OP_ADDSUBPS
, SIMD_VERSION_SSE3
, SIMD_EMIT_BINARY
},
143 { SN_AndNot
, OP_ANDNPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
144 { SN_CompareEqual
, OP_COMPPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_EQ
},
145 { SN_CompareLessEqual
, OP_COMPPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_LE
},
146 { SN_CompareLessThan
, OP_COMPPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_LT
},
147 { SN_CompareNotEqual
, OP_COMPPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_NEQ
},
148 { SN_CompareNotLessEqual
, OP_COMPPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_NLE
},
149 { SN_CompareNotLessThan
, OP_COMPPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_NLT
},
150 { SN_CompareOrdered
, OP_COMPPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_ORD
},
151 { SN_CompareUnordered
, OP_COMPPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_UNORD
},
152 { SN_ConvertToDouble
, OP_CVTPS2PD
, SIMD_VERSION_SSE2
, SIMD_EMIT_UNARY
},
153 { SN_ConvertToInt
, OP_CVTPS2DQ
, SIMD_VERSION_SSE2
, SIMD_EMIT_UNARY
},
154 { SN_ConvertToIntTruncated
, OP_CVTTPS2DQ
, SIMD_VERSION_SSE2
, SIMD_EMIT_UNARY
},
155 { SN_DuplicateHigh
, OP_DUPPS_HIGH
, SIMD_VERSION_SSE3
, SIMD_EMIT_UNARY
},
156 { SN_DuplicateLow
, OP_DUPPS_LOW
, SIMD_VERSION_SSE3
, SIMD_EMIT_UNARY
},
157 { SN_HorizontalAdd
, OP_HADDPS
, SIMD_VERSION_SSE3
, SIMD_EMIT_BINARY
},
158 { SN_HorizontalSub
, OP_HSUBPS
, SIMD_VERSION_SSE3
, SIMD_EMIT_BINARY
},
159 { SN_InterleaveHigh
, OP_UNPACK_HIGHPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
160 { SN_InterleaveLow
, OP_UNPACK_LOWPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
161 { SN_InvSqrt
, OP_RSQRTPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_UNARY
},
162 { SN_LoadAligned
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_LOAD_ALIGNED
},
163 { SN_Max
, OP_MAXPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
164 { SN_Min
, OP_MINPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
165 { SN_PrefetchTemporalAllCacheLevels
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_0
},
166 { SN_PrefetchTemporal1stLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_1
},
167 { SN_PrefetchTemporal2ndLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_2
},
168 { SN_PrefetchNonTemporal
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_NTA
},
169 { SN_Reciprocal
, OP_RCPPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_UNARY
},
170 { SN_Shuffle
, OP_PSHUFLED
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHUFFLE
},
171 { SN_Sqrt
, OP_SQRTPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_UNARY
},
172 { SN_StoreAligned
, OP_STOREX_ALIGNED_MEMBASE_REG
, SIMD_VERSION_SSE1
, SIMD_EMIT_STORE
},
173 { SN_StoreNonTemporal
, OP_STOREX_NTA_MEMBASE_REG
, SIMD_VERSION_SSE1
, SIMD_EMIT_STORE
},
174 { SN_get_W
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
175 { SN_get_X
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
176 { SN_get_Y
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
177 { SN_get_Z
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
178 { SN_op_Addition
, OP_ADDPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
179 { SN_op_BitwiseAnd
, OP_ANDPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
180 { SN_op_BitwiseOr
, OP_ORPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
181 { SN_op_Division
, OP_DIVPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
182 { SN_op_Equality
, OP_COMPPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_EQ
},
183 { SN_op_ExclusiveOr
, OP_XORPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
184 { SN_op_Explicit
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_CAST
},
185 { SN_op_Inequality
, OP_COMPPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_NEQ
},
186 { SN_op_Multiply
, OP_MULPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
187 { SN_op_Subtraction
, OP_SUBPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
188 { SN_set_W
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
189 { SN_set_X
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
190 { SN_set_Y
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
191 { SN_set_Z
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
}
194 static const SimdIntrinsic vector2d_intrinsics
[] = {
195 { SN_ctor
, OP_EXPAND_R8
, SIMD_VERSION_SSE1
, SIMD_EMIT_CTOR
},
196 { SN_AddSub
, OP_ADDSUBPD
, SIMD_VERSION_SSE3
, SIMD_EMIT_BINARY
,},
197 { SN_AndNot
, OP_ANDNPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
198 { SN_CompareEqual
, OP_COMPPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_EQ
},
199 { SN_CompareLessEqual
, OP_COMPPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_LE
},
200 { SN_CompareLessThan
, OP_COMPPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_LT
},
201 { SN_CompareNotEqual
, OP_COMPPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_NEQ
},
202 { SN_CompareNotLessEqual
, OP_COMPPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_NLE
},
203 { SN_CompareNotLessThan
, OP_COMPPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_NLT
},
204 { SN_CompareOrdered
, OP_COMPPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_ORD
},
205 { SN_CompareUnordered
, OP_COMPPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_COMP_UNORD
},
206 { SN_ConvertToFloat
, OP_CVTPD2PS
, SIMD_VERSION_SSE2
, SIMD_EMIT_UNARY
},
207 { SN_ConvertToInt
, OP_CVTPD2DQ
, SIMD_VERSION_SSE2
, SIMD_EMIT_UNARY
},
208 { SN_ConvertToIntTruncated
, OP_CVTTPD2DQ
, SIMD_VERSION_SSE2
, SIMD_EMIT_UNARY
},
209 { SN_Duplicate
, OP_DUPPD
, SIMD_VERSION_SSE3
, SIMD_EMIT_UNARY
},
210 { SN_HorizontalAdd
, OP_HADDPD
, SIMD_VERSION_SSE3
, SIMD_EMIT_BINARY
},
211 { SN_HorizontalSub
, OP_HSUBPD
, SIMD_VERSION_SSE3
, SIMD_EMIT_BINARY
},
212 { SN_InterleaveHigh
, OP_UNPACK_HIGHPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
213 { SN_InterleaveLow
, OP_UNPACK_LOWPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
214 { SN_LoadAligned
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_LOAD_ALIGNED
},
215 { SN_Max
, OP_MAXPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
216 { SN_Min
, OP_MINPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
217 { SN_PrefetchTemporalAllCacheLevels
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_0
},
218 { SN_PrefetchTemporal1stLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_1
},
219 { SN_PrefetchTemporal2ndLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_2
},
220 { SN_PrefetchNonTemporal
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_NTA
},
221 { SN_Shuffle
, OP_SHUFPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHUFFLE
},
222 { SN_Sqrt
, OP_SQRTPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_UNARY
},
223 { SN_StoreAligned
, OP_STOREX_ALIGNED_MEMBASE_REG
, SIMD_VERSION_SSE1
, SIMD_EMIT_STORE
},
224 { SN_get_X
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER_QWORD
},
225 { SN_get_Y
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER_QWORD
},
226 { SN_op_Addition
, OP_ADDPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
227 { SN_op_BitwiseAnd
, OP_ANDPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
228 { SN_op_BitwiseOr
, OP_ORPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
229 { SN_op_Division
, OP_DIVPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
230 { SN_op_ExclusiveOr
, OP_XORPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
231 { SN_op_Explicit
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_CAST
},
232 { SN_op_Multiply
, OP_MULPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
233 { SN_op_Subtraction
, OP_SUBPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
234 { SN_set_X
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
235 { SN_set_Y
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
238 static const SimdIntrinsic vector2ul_intrinsics
[] = {
239 { SN_ctor
, OP_EXPAND_I8
, SIMD_VERSION_SSE1
, SIMD_EMIT_CTOR
},
240 { SN_CompareEqual
, OP_PCMPEQQ
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
241 { SN_LoadAligned
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_LOAD_ALIGNED
},
242 { SN_PrefetchTemporalAllCacheLevels
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_0
},
243 { SN_PrefetchTemporal1stLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_1
},
244 { SN_PrefetchTemporal2ndLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_2
},
245 { SN_PrefetchNonTemporal
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_NTA
},
246 { SN_Shuffle
, OP_SHUFPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHUFFLE
},
247 { SN_StoreAligned
, OP_STOREX_ALIGNED_MEMBASE_REG
, SIMD_VERSION_SSE1
, SIMD_EMIT_STORE
},
248 { SN_UnpackHigh
, OP_UNPACK_HIGHQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
249 { SN_UnpackLow
, OP_UNPACK_LOWQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
250 { SN_get_X
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER_QWORD
},
251 { SN_get_Y
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER_QWORD
},
252 { SN_op_Addition
, OP_PADDQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
253 { SN_op_BitwiseAnd
, OP_PAND
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
254 { SN_op_BitwiseOr
, OP_POR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
255 { SN_op_ExclusiveOr
, OP_PXOR
, SIMD_EMIT_BINARY
, SIMD_VERSION_SSE1
},
256 { SN_op_Explicit
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_CAST
},
257 { SN_op_LeftShift
, OP_PSHLQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
258 { SN_op_Multiply
, OP_PMULQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
259 { SN_op_RightShift
, OP_PSHRQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
260 { SN_op_Subtraction
, OP_PSUBQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
261 { SN_set_X
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
262 { SN_set_Y
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
265 static const SimdIntrinsic vector2l_intrinsics
[] = {
266 { SN_ctor
, OP_EXPAND_I8
, SIMD_VERSION_SSE1
, SIMD_EMIT_CTOR
},
267 { SN_CompareEqual
, OP_PCMPEQQ
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
268 { SN_CompareGreaterThan
, OP_PCMPGTQ
, SIMD_VERSION_SSE42
, SIMD_EMIT_BINARY
},
269 { SN_LoadAligned
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_LOAD_ALIGNED
},
270 { SN_LogicalRightShift
, OP_PSHRQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
271 { SN_PrefetchTemporalAllCacheLevels
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_0
},
272 { SN_PrefetchTemporal1stLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_1
},
273 { SN_PrefetchTemporal2ndLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_2
},
274 { SN_PrefetchNonTemporal
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_NTA
},
275 { SN_Shuffle
, OP_SHUFPD
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHUFFLE
},
276 { SN_StoreAligned
, OP_STOREX_ALIGNED_MEMBASE_REG
, SIMD_VERSION_SSE1
, SIMD_EMIT_STORE
},
277 { SN_UnpackHigh
, OP_UNPACK_HIGHQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
278 { SN_UnpackLow
, OP_UNPACK_LOWQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
279 { SN_get_X
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER_QWORD
},
280 { SN_get_Y
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER_QWORD
},
281 { SN_op_Addition
, OP_PADDQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
282 { SN_op_BitwiseAnd
, OP_PAND
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
283 { SN_op_BitwiseOr
, OP_POR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
284 { SN_op_ExclusiveOr
, OP_PXOR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
285 { SN_op_Explicit
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_CAST
},
286 { SN_op_LeftShift
, OP_PSHLQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
287 { SN_op_Multiply
, OP_PMULQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
288 { SN_op_Subtraction
, OP_PSUBQ
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
289 { SN_set_X
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
290 { SN_set_Y
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
293 static const SimdIntrinsic vector4ui_intrinsics
[] = {
294 { SN_ctor
, OP_EXPAND_I4
, SIMD_VERSION_SSE1
, SIMD_EMIT_CTOR
},
295 { SN_ArithmeticRightShift
, OP_PSARD
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
296 { SN_CompareEqual
, OP_PCMPEQD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
297 { SN_LoadAligned
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_LOAD_ALIGNED
},
298 { SN_Max
, OP_PMAXD_UN
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
299 { SN_Min
, OP_PMIND_UN
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
300 { SN_PrefetchTemporalAllCacheLevels
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_0
},
301 { SN_PrefetchTemporal1stLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_1
},
302 { SN_PrefetchTemporal2ndLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_2
},
303 { SN_PrefetchNonTemporal
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_NTA
},
304 { SN_Shuffle
, OP_PSHUFLED
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHUFFLE
},
305 { SN_SignedPackWithSignedSaturation
, OP_PACKD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
306 { SN_SignedPackWithUnsignedSaturation
, OP_PACKD_UN
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
307 { SN_StoreAligned
, OP_STOREX_ALIGNED_MEMBASE_REG
, SIMD_VERSION_SSE1
, SIMD_EMIT_STORE
},
308 { SN_UnpackHigh
, OP_UNPACK_HIGHD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
309 { SN_UnpackLow
, OP_UNPACK_LOWD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
310 { SN_get_W
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
311 { SN_get_X
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
312 { SN_get_Y
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
313 { SN_get_Z
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
314 { SN_op_Addition
, OP_PADDD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
315 { SN_op_BitwiseAnd
, OP_PAND
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
316 { SN_op_BitwiseOr
, OP_POR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
317 { SN_op_Equality
, OP_PCMPEQD
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_EQ
},
318 { SN_op_ExclusiveOr
, OP_PXOR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
319 { SN_op_Explicit
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_CAST
},
320 { SN_op_Inequality
, OP_PCMPEQD
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_NEQ
},
321 { SN_op_LeftShift
, OP_PSHLD
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
322 { SN_op_Multiply
, OP_PMULD
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
323 { SN_op_RightShift
, OP_PSHRD
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
324 { SN_op_Subtraction
, OP_PSUBD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
325 { SN_set_W
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
326 { SN_set_X
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
327 { SN_set_Y
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
328 { SN_set_Z
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
331 static const SimdIntrinsic vector4i_intrinsics
[] = {
332 { SN_ctor
, OP_EXPAND_I4
, SIMD_VERSION_SSE1
, SIMD_EMIT_CTOR
},
333 { SN_CompareEqual
, OP_PCMPEQD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
334 { SN_CompareGreaterThan
, OP_PCMPGTD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
335 { SN_ConvertToDouble
, OP_CVTDQ2PD
, SIMD_VERSION_SSE2
, SIMD_EMIT_UNARY
},
336 { SN_ConvertToFloat
, OP_CVTDQ2PS
, SIMD_VERSION_SSE2
, SIMD_EMIT_UNARY
},
337 { SN_LoadAligned
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_LOAD_ALIGNED
},
338 { SN_LogicalRightShift
, OP_PSHRD
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
339 { SN_Max
, OP_PMAXD
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
340 { SN_Min
, OP_PMIND
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
341 { SN_PackWithSignedSaturation
, OP_PACKD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
342 { SN_PackWithUnsignedSaturation
, OP_PACKD_UN
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
343 { SN_PrefetchTemporalAllCacheLevels
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_0
},
344 { SN_PrefetchTemporal1stLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_1
},
345 { SN_PrefetchTemporal2ndLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_2
},
346 { SN_PrefetchNonTemporal
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_NTA
},
347 { SN_Shuffle
, OP_PSHUFLED
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHUFFLE
},
348 { SN_StoreAligned
, OP_STOREX_ALIGNED_MEMBASE_REG
, SIMD_VERSION_SSE1
, SIMD_EMIT_STORE
},
349 { SN_UnpackHigh
, OP_UNPACK_HIGHD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
350 { SN_UnpackLow
, OP_UNPACK_LOWD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
351 { SN_get_W
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
352 { SN_get_X
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
353 { SN_get_Y
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
354 { SN_get_Z
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
355 { SN_op_Addition
, OP_PADDD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
356 { SN_op_BitwiseAnd
, OP_PAND
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
357 { SN_op_BitwiseOr
, OP_POR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
358 { SN_op_Equality
, OP_PCMPEQD
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_EQ
},
359 { SN_op_ExclusiveOr
, OP_PXOR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
360 { SN_op_Explicit
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_CAST
},
361 { SN_op_Inequality
, OP_PCMPEQD
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_NEQ
},
362 { SN_op_LeftShift
, OP_PSHLD
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
363 { SN_op_Multiply
, OP_PMULD
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
364 { SN_op_RightShift
, OP_PSARD
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
365 { SN_op_Subtraction
, OP_PSUBD
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
366 { SN_set_W
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
367 { SN_set_X
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
368 { SN_set_Y
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
369 { SN_set_Z
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
372 static const SimdIntrinsic vector8us_intrinsics
[] = {
373 { SN_ctor
, OP_EXPAND_I2
, SIMD_VERSION_SSE1
, SIMD_EMIT_CTOR
},
374 { SN_AddWithSaturation
, OP_PADDW_SAT_UN
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
375 { SN_ArithmeticRightShift
, OP_PSARW
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
376 { SN_Average
, OP_PAVGW_UN
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
377 { SN_CompareEqual
, OP_PCMPEQW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
, SIMD_VERSION_SSE1
},
378 { SN_LoadAligned
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_LOAD_ALIGNED
},
379 { SN_Max
, OP_PMAXW_UN
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
380 { SN_Min
, OP_PMINW_UN
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
381 { SN_MultiplyStoreHigh
, OP_PMULW_HIGH_UN
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
382 { SN_PrefetchTemporalAllCacheLevels
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_0
},
383 { SN_PrefetchTemporal1stLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_1
},
384 { SN_PrefetchTemporal2ndLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_2
},
385 { SN_PrefetchNonTemporal
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_NTA
},
386 { SN_ShuffleHigh
, OP_PSHUFLEW_HIGH
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHUFFLE
},
387 { SN_ShuffleLow
, OP_PSHUFLEW_LOW
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHUFFLE
},
388 { SN_SignedPackWithSignedSaturation
, OP_PACKW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
389 { SN_SignedPackWithUnsignedSaturation
, OP_PACKW_UN
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
390 { SN_StoreAligned
, OP_STOREX_ALIGNED_MEMBASE_REG
, SIMD_VERSION_SSE1
, SIMD_EMIT_STORE
},
391 { SN_SubtractWithSaturation
, OP_PSUBW_SAT_UN
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
392 { SN_UnpackHigh
, OP_UNPACK_HIGHW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
393 { SN_UnpackLow
, OP_UNPACK_LOWW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
394 { SN_get_V0
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
395 { SN_get_V1
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
396 { SN_get_V2
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
397 { SN_get_V3
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
398 { SN_get_V4
, 4, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
399 { SN_get_V5
, 5, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
400 { SN_get_V6
, 6, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
401 { SN_get_V7
, 7, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
402 { SN_op_Addition
, OP_PADDW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
403 { SN_op_BitwiseAnd
, OP_PAND
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
404 { SN_op_BitwiseOr
, OP_POR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
405 { SN_op_Equality
, OP_PCMPEQW
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_EQ
},
406 { SN_op_ExclusiveOr
, OP_PXOR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
407 { SN_op_Explicit
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_CAST
},
408 { SN_op_Inequality
, OP_PCMPEQW
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_NEQ
},
409 { SN_op_LeftShift
, OP_PSHLW
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
410 { SN_op_Multiply
, OP_PMULW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
411 { SN_op_RightShift
, OP_PSHRW
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
412 { SN_op_Subtraction
, OP_PSUBW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
413 { SN_set_V0
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
414 { SN_set_V1
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
415 { SN_set_V2
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
416 { SN_set_V3
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
417 { SN_set_V4
, 4, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
418 { SN_set_V5
, 5, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
419 { SN_set_V6
, 6, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
420 { SN_set_V7
, 7, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
423 static const SimdIntrinsic vector8s_intrinsics
[] = {
424 { SN_ctor
, OP_EXPAND_I2
, SIMD_VERSION_SSE1
, SIMD_EMIT_CTOR
},
425 { SN_AddWithSaturation
, OP_PADDW_SAT
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
426 { SN_CompareEqual
, OP_PCMPEQW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
427 { SN_CompareGreaterThan
, OP_PCMPGTW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
428 { SN_LoadAligned
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_LOAD_ALIGNED
},
429 { SN_LogicalRightShift
, OP_PSHRW
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
430 { SN_Max
, OP_PMAXW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
431 { SN_Min
, OP_PMINW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
432 { SN_MultiplyStoreHigh
, OP_PMULW_HIGH
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
433 { SN_PackWithSignedSaturation
, OP_PACKW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
434 { SN_PackWithUnsignedSaturation
, OP_PACKW_UN
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
435 { SN_PrefetchTemporalAllCacheLevels
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_0
},
436 { SN_PrefetchTemporal1stLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_1
},
437 { SN_PrefetchTemporal2ndLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_2
},
438 { SN_PrefetchNonTemporal
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_NTA
},
439 { SN_ShuffleHigh
, OP_PSHUFLEW_HIGH
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHUFFLE
},
440 { SN_ShuffleLow
, OP_PSHUFLEW_LOW
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHUFFLE
},
441 { SN_StoreAligned
, OP_STOREX_ALIGNED_MEMBASE_REG
, SIMD_VERSION_SSE1
, SIMD_EMIT_STORE
},
442 { SN_SubtractWithSaturation
, OP_PSUBW_SAT_UN
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
443 { SN_UnpackHigh
, OP_UNPACK_HIGHW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
444 { SN_UnpackLow
, OP_UNPACK_LOWW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
445 { SN_get_V0
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
446 { SN_get_V1
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
447 { SN_get_V2
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
448 { SN_get_V3
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
449 { SN_get_V4
, 4, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
450 { SN_get_V5
, 5, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
451 { SN_get_V6
, 6, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
452 { SN_get_V7
, 7, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
453 { SN_op_Addition
, OP_PADDW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
454 { SN_op_BitwiseAnd
, OP_PAND
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
455 { SN_op_BitwiseOr
, OP_POR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
456 { SN_op_Equality
, OP_PCMPEQW
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_EQ
},
457 { SN_op_ExclusiveOr
, OP_PXOR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
458 { SN_op_Explicit
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_CAST
},
459 { SN_op_Inequality
, OP_PCMPEQW
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_NEQ
},
460 { SN_op_LeftShift
, OP_PSHLW
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
461 { SN_op_Multiply
, OP_PMULW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
462 { SN_op_RightShift
, OP_PSARW
, SIMD_VERSION_SSE1
, SIMD_EMIT_SHIFT
},
463 { SN_op_Subtraction
, OP_PSUBW
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
464 { SN_set_V0
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
465 { SN_set_V1
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
466 { SN_set_V2
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
467 { SN_set_V3
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
468 { SN_set_V4
, 4, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
469 { SN_set_V5
, 5, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
470 { SN_set_V6
, 6, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
471 { SN_set_V7
, 7, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
474 static const SimdIntrinsic vector16b_intrinsics
[] = {
475 { SN_ctor
, OP_EXPAND_I1
, SIMD_VERSION_SSE1
, SIMD_EMIT_CTOR
},
476 { SN_AddWithSaturation
, OP_PADDB_SAT_UN
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
477 { SN_Average
, OP_PAVGB_UN
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
478 { SN_CompareEqual
, OP_PCMPEQB
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
479 { SN_ExtractByteMask
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_EXTRACT_MASK
},
480 { SN_LoadAligned
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_LOAD_ALIGNED
},
481 { SN_Max
, OP_PMAXB_UN
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
482 { SN_Min
, OP_PMINB_UN
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
483 { SN_PrefetchTemporalAllCacheLevels
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_0
},
484 { SN_PrefetchTemporal1stLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_1
},
485 { SN_PrefetchTemporal2ndLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_2
},
486 { SN_PrefetchNonTemporal
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_NTA
},
487 { SN_StoreAligned
, OP_STOREX_ALIGNED_MEMBASE_REG
, SIMD_VERSION_SSE1
, SIMD_EMIT_STORE
},
488 { SN_SubtractWithSaturation
, OP_PSUBB_SAT_UN
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
489 { SN_SumOfAbsoluteDifferences
, OP_PSUM_ABS_DIFF
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
490 { SN_UnpackHigh
, OP_UNPACK_HIGHB
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
491 { SN_UnpackLow
, OP_UNPACK_LOWB
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
492 { SN_get_V0
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
493 { SN_get_V1
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
494 { SN_get_V10
, 10, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
495 { SN_get_V11
, 11, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
496 { SN_get_V12
, 12, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
497 { SN_get_V13
, 13, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
498 { SN_get_V14
, 14, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
499 { SN_get_V15
, 15, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
500 { SN_get_V2
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
501 { SN_get_V3
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
502 { SN_get_V4
, 4, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
503 { SN_get_V5
, 5, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
504 { SN_get_V6
, 6, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
505 { SN_get_V7
, 7, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
506 { SN_get_V8
, 8, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
507 { SN_get_V9
, 9, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
508 { SN_op_Addition
, OP_PADDB
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
509 { SN_op_BitwiseAnd
, OP_PAND
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
510 { SN_op_BitwiseOr
, OP_POR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
511 { SN_op_Equality
, OP_PCMPEQB
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_EQ
},
512 { SN_op_ExclusiveOr
, OP_PXOR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
513 { SN_op_Explicit
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_CAST
},
514 { SN_op_Inequality
, OP_PCMPEQB
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_NEQ
},
515 { SN_op_Subtraction
, OP_PSUBB
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
516 { SN_set_V0
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
517 { SN_set_V1
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
518 { SN_set_V10
, 10, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
519 { SN_set_V11
, 11, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
520 { SN_set_V12
, 12, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
521 { SN_set_V13
, 13, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
522 { SN_set_V14
, 14, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
523 { SN_set_V15
, 15, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
524 { SN_set_V2
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
525 { SN_set_V3
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
526 { SN_set_V4
, 4, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
527 { SN_set_V5
, 5, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
528 { SN_set_V6
, 6, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
529 { SN_set_V7
, 7, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
530 { SN_set_V8
, 8, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
531 { SN_set_V9
, 9, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
538 static const SimdIntrinsic vector16sb_intrinsics
[] = {
539 { SN_ctor
, OP_EXPAND_I1
, SIMD_VERSION_SSE1
, SIMD_EMIT_CTOR
},
540 { SN_AddWithSaturation
, OP_PADDB_SAT
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
541 { SN_CompareEqual
, OP_PCMPEQB
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
542 { SN_CompareGreaterThan
, OP_PCMPGTB
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
543 { SN_ExtractByteMask
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_EXTRACT_MASK
},
544 { SN_LoadAligned
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_LOAD_ALIGNED
},
545 { SN_Max
, OP_PMAXB
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
546 { SN_Min
, OP_PMINB
, SIMD_VERSION_SSE41
, SIMD_EMIT_BINARY
},
547 { SN_PrefetchTemporalAllCacheLevels
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_0
},
548 { SN_PrefetchTemporal1stLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_1
},
549 { SN_PrefetchTemporal2ndLevelCache
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_2
},
550 { SN_PrefetchNonTemporal
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_PREFETCH
, SIMD_PREFETCH_MODE_NTA
},
551 { SN_StoreAligned
, OP_STOREX_ALIGNED_MEMBASE_REG
, SIMD_VERSION_SSE1
, SIMD_EMIT_STORE
},
552 { SN_SubtractWithSaturation
, OP_PSUBB_SAT
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
553 { SN_UnpackHigh
, OP_UNPACK_HIGHB
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
554 { SN_UnpackLow
, OP_UNPACK_LOWB
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
555 { SN_get_V0
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
556 { SN_get_V1
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
557 { SN_get_V10
, 10, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
558 { SN_get_V11
, 11, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
559 { SN_get_V12
, 12, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
560 { SN_get_V13
, 13, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
561 { SN_get_V14
, 14, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
562 { SN_get_V15
, 15, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
563 { SN_get_V2
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
564 { SN_get_V3
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
565 { SN_get_V4
, 4, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
566 { SN_get_V5
, 5, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
567 { SN_get_V6
, 6, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
568 { SN_get_V7
, 7, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
569 { SN_get_V8
, 8, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
570 { SN_get_V9
, 9, SIMD_VERSION_SSE1
, SIMD_EMIT_GETTER
},
571 { SN_op_Addition
, OP_PADDB
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
572 { SN_op_BitwiseAnd
, OP_PAND
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
573 { SN_op_BitwiseOr
, OP_POR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
574 { SN_op_Equality
, OP_PCMPEQB
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_EQ
},
575 { SN_op_ExclusiveOr
, OP_PXOR
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
576 { SN_op_Explicit
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_CAST
},
577 { SN_op_Inequality
, OP_PCMPEQB
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_NEQ
},
578 { SN_op_Subtraction
, OP_PSUBB
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
579 { SN_set_V0
, 0, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
580 { SN_set_V1
, 1, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
581 { SN_set_V10
, 10, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
582 { SN_set_V11
, 11, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
583 { SN_set_V12
, 12, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
584 { SN_set_V13
, 13, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
585 { SN_set_V14
, 14, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
586 { SN_set_V15
, 15, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
587 { SN_set_V2
, 2, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
588 { SN_set_V3
, 3, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
589 { SN_set_V4
, 4, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
590 { SN_set_V5
, 5, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
591 { SN_set_V6
, 6, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
592 { SN_set_V7
, 7, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
593 { SN_set_V8
, 8, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
594 { SN_set_V9
, 9, SIMD_VERSION_SSE1
, SIMD_EMIT_SETTER
},
597 static guint32 simd_supported_versions
;
599 static MonoInst
* emit_sys_numerics_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
);
600 static MonoInst
* emit_sys_numerics_vectors_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
);
602 /*TODO match using number of parameters as well*/
604 simd_intrinsic_compare_by_name (const void *key
, const void *value
)
606 return strcmp (key
, method_name (((SimdIntrinsic
*)value
)->name
));
611 VREG_HAS_XZERO_BB0
= 0x02,
612 VREG_HAS_OTHER_OP_BB0
= 0x04,
613 VREG_SINGLE_BB_USE
= 0x08,
614 VREG_MANY_BB_USE
= 0x10,
618 mono_simd_intrinsics_init (void)
620 simd_supported_versions
= mono_arch_cpu_enumerate_simd_versions ();
621 /*TODO log the supported flags*/
624 static inline gboolean
625 apply_vreg_first_block_interference (MonoCompile
*cfg
, MonoInst
*ins
, int reg
, int max_vreg
, char *vreg_flags
)
627 if (reg
!= -1 && reg
<= max_vreg
&& vreg_flags
[reg
]) {
628 vreg_flags
[reg
] &= ~VREG_HAS_XZERO_BB0
;
629 vreg_flags
[reg
] |= VREG_HAS_OTHER_OP_BB0
;
630 DEBUG (printf ("[simd-simplify] R%d used: ", reg
); mono_print_ins(ins
));
636 static inline gboolean
637 apply_vreg_following_block_interference (MonoCompile
*cfg
, MonoInst
*ins
, int reg
, MonoBasicBlock
*bb
, int max_vreg
, char *vreg_flags
, MonoBasicBlock
**target_bb
)
639 if (reg
== -1 || reg
> max_vreg
|| !(vreg_flags
[reg
] & VREG_HAS_XZERO_BB0
) || target_bb
[reg
] == bb
)
642 if (vreg_flags
[reg
] & VREG_SINGLE_BB_USE
) {
643 vreg_flags
[reg
] &= ~VREG_SINGLE_BB_USE
;
644 vreg_flags
[reg
] |= VREG_MANY_BB_USE
;
645 DEBUG (printf ("[simd-simplify] R%d used by many bb: ", reg
); mono_print_ins(ins
));
647 } else if (!(vreg_flags
[reg
] & VREG_MANY_BB_USE
)) {
648 vreg_flags
[reg
] |= VREG_SINGLE_BB_USE
;
649 target_bb
[reg
] = bb
;
650 DEBUG (printf ("[simd-simplify] R%d first used by: ", reg
); mono_print_ins(ins
));
657 This pass recalculate which vars need MONO_INST_INDIRECT.
659 We cannot do this for non SIMD vars since code like mono_get_vtable_var
660 uses MONO_INST_INDIRECT to signal that the variable must be stack allocated.
663 mono_simd_simplify_indirection (MonoCompile
*cfg
)
666 MonoBasicBlock
*bb
, *first_bb
= NULL
, **target_bb
;
670 for (i
= 0; i
< cfg
->num_varinfo
; i
++) {
671 MonoInst
*var
= cfg
->varinfo
[i
];
672 if (var
->klass
->simd_type
) {
673 var
->flags
&= ~MONO_INST_INDIRECT
;
674 max_vreg
= MAX (var
->dreg
, max_vreg
);
678 for (bb
= cfg
->bb_entry
; bb
; bb
= bb
->next_bb
) {
679 if (!first_bb
&& bb
->code
)
681 for (ins
= bb
->code
; ins
; ins
= ins
->next
) {
682 if (ins
->opcode
== OP_LDADDR
) {
683 MonoInst
*var
= (MonoInst
*)ins
->inst_p0
;
684 if (var
->klass
->simd_type
) {
685 var
->flags
|= MONO_INST_INDIRECT
;
691 DEBUG (printf ("[simd-simplify] max vreg is %d\n", max_vreg
));
692 vreg_flags
= (char *)g_malloc0 (max_vreg
+ 1);
693 target_bb
= g_new0 (MonoBasicBlock
*, max_vreg
+ 1);
695 for (i
= 0; i
< cfg
->num_varinfo
; i
++) {
696 MonoInst
*var
= cfg
->varinfo
[i
];
697 if (var
->klass
->simd_type
&& !(var
->flags
& (MONO_INST_INDIRECT
|MONO_INST_VOLATILE
))) {
698 vreg_flags
[var
->dreg
] = VREG_USED
;
699 DEBUG (printf ("[simd-simplify] processing var %d with vreg %d\n", i
, var
->dreg
));
703 /*Scan the first basic block looking xzeros not used*/
704 for (ins
= first_bb
->code
; ins
; ins
= ins
->next
) {
706 int sregs
[MONO_MAX_SRC_REGS
];
708 if (ins
->opcode
== OP_XZERO
) {
709 if (!(vreg_flags
[ins
->dreg
] & VREG_HAS_OTHER_OP_BB0
)) {
710 DEBUG (printf ("[simd-simplify] R%d has vzero: ", ins
->dreg
); mono_print_ins(ins
));
711 vreg_flags
[ins
->dreg
] |= VREG_HAS_XZERO_BB0
;
715 if (ins
->opcode
== OP_LDADDR
&& apply_vreg_first_block_interference (cfg
, ins
, ((MonoInst
*)ins
->inst_p0
)->dreg
, max_vreg
, vreg_flags
))
717 if (apply_vreg_first_block_interference (cfg
, ins
, ins
->dreg
, max_vreg
, vreg_flags
))
719 num_sregs
= mono_inst_get_src_registers (ins
, sregs
);
720 for (i
= 0; i
< num_sregs
; ++i
) {
721 if (apply_vreg_first_block_interference (cfg
, ins
, sregs
[i
], max_vreg
, vreg_flags
))
726 if (IS_DEBUG_ON (cfg
)) {
727 for (i
= 0; i
< cfg
->num_varinfo
; i
++) {
728 MonoInst
*var
= cfg
->varinfo
[i
];
729 if (var
->klass
->simd_type
) {
730 if ((vreg_flags
[var
->dreg
] & VREG_HAS_XZERO_BB0
))
731 DEBUG (printf ("[simd-simplify] R%d has xzero only\n", var
->dreg
));
732 if ((vreg_flags
[var
->dreg
] & VREG_HAS_OTHER_OP_BB0
))
733 DEBUG (printf ("[simd-simplify] R%d has other ops on bb0\n", var
->dreg
));
738 /*TODO stop here if no var is xzero only*/
741 Scan all other bb and check if it has only one other use
742 Ideally this would be done after an extended bb formation pass
744 FIXME This pass could use dominator information to properly
745 place the XZERO on the bb that dominates all uses of the var,
746 but this will have zero effect with the current local reg alloc
748 TODO simply the use of flags.
751 for (bb
= first_bb
->next_bb
; bb
; bb
= bb
->next_bb
) {
752 for (ins
= bb
->code
; ins
; ins
= ins
->next
) {
754 int sregs
[MONO_MAX_SRC_REGS
];
756 if (ins
->opcode
== OP_LDADDR
&& apply_vreg_following_block_interference (cfg
, ins
, ((MonoInst
*)ins
->inst_p0
)->dreg
, bb
, max_vreg
, vreg_flags
, target_bb
))
758 if (apply_vreg_following_block_interference (cfg
, ins
, ins
->dreg
, bb
, max_vreg
, vreg_flags
, target_bb
))
760 num_sregs
= mono_inst_get_src_registers (ins
, sregs
);
761 for (i
= 0; i
< num_sregs
; ++i
) {
762 if (apply_vreg_following_block_interference (cfg
, ins
, sregs
[i
], bb
,
763 max_vreg
, vreg_flags
, target_bb
))
769 for (i
= 0; i
< cfg
->num_varinfo
; i
++) {
770 MonoInst
*var
= cfg
->varinfo
[i
];
771 if (!var
->klass
->simd_type
)
773 if ((vreg_flags
[var
->dreg
] & VREG_SINGLE_BB_USE
))
774 DEBUG (printf ("[simd-simplify] R%d has single bb use\n", var
->dreg
));
775 if ((vreg_flags
[var
->dreg
] & VREG_MANY_BB_USE
))
776 DEBUG (printf ("[simd-simplify] R%d has many bb in use\n", var
->dreg
));
778 if (!(vreg_flags
[var
->dreg
] & VREG_SINGLE_BB_USE
))
780 for (ins
= target_bb
[var
->dreg
]->code
; ins
; ins
= ins
->next
) {
782 int sregs
[MONO_MAX_SRC_REGS
];
783 gboolean found
= FALSE
;
785 num_sregs
= mono_inst_get_src_registers (ins
, sregs
);
786 for (j
= 0; j
< num_sregs
; ++j
) {
787 if (sregs
[j
] == var
->dreg
)
790 /*We can avoid inserting the XZERO if the first use doesn't depend on the zero'ed value.*/
791 if (ins
->dreg
== var
->dreg
&& !found
) {
792 DEBUG (printf ("[simd-simplify] INGORING R%d on BB %d because first op is a def", i
, target_bb
[var
->dreg
]->block_num
););
795 DEBUG (printf ("[simd-simplify] Adding XZERO for R%d on BB %d: ", i
, target_bb
[var
->dreg
]->block_num
); );
797 MONO_INST_NEW (cfg
, tmp
, OP_XZERO
);
798 tmp
->dreg
= var
->dreg
;
799 tmp
->type
= STACK_VTYPE
;
800 tmp
->klass
= var
->klass
;
801 mono_bblock_insert_before_ins (target_bb
[var
->dreg
], ins
, tmp
);
807 for (ins
= first_bb
->code
; ins
; ins
= ins
->next
) {
808 if (ins
->opcode
== OP_XZERO
&& (vreg_flags
[ins
->dreg
] & VREG_SINGLE_BB_USE
)) {
809 DEBUG (printf ("[simd-simplify] Nullify %d on first BB: ", ins
->dreg
); mono_print_ins(ins
));
819 * This function expect that src be a value.
822 get_simd_vreg (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
*src
)
824 const char *spec
= INS_INFO (src
->opcode
);
826 if (src
->opcode
== OP_XMOVE
) {
828 } else if (spec
[MONO_INST_DEST
] == 'x') {
830 } else if (src
->opcode
== OP_VCALL
) {
834 g_warning ("get_simd_vreg:: could not infer source simd vreg for op");
835 mono_print_ins (src
);
836 g_assert_not_reached ();
840 * This function will load the value if needed.
843 load_simd_vreg_class (MonoCompile
*cfg
, MonoClass
*klass
, MonoInst
*src
, gboolean
*indirect
)
845 const char *spec
= INS_INFO (src
->opcode
);
849 if (src
->opcode
== OP_XMOVE
) {
851 } else if (src
->opcode
== OP_LDADDR
) {
852 int res
= ((MonoInst
*)src
->inst_p0
)->dreg
;
855 } else if (spec
[MONO_INST_DEST
] == 'x') {
857 } else if (src
->type
== STACK_PTR
|| src
->type
== STACK_MP
) {
862 MONO_INST_NEW (cfg
, ins
, OP_LOADX_MEMBASE
);
864 ins
->sreg1
= src
->dreg
;
865 ins
->type
= STACK_VTYPE
;
866 ins
->dreg
= alloc_ireg (cfg
);
867 MONO_ADD_INS (cfg
->cbb
, ins
);
870 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src
->type
);
871 mono_print_ins (src
);
872 g_assert_not_reached ();
876 load_simd_vreg (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
*src
, gboolean
*indirect
)
878 return load_simd_vreg_class (cfg
, cmethod
->klass
, src
, indirect
);
881 /*We share the var with fconv_to_r8_x to save some stack space.*/
883 get_double_spill_area (MonoCompile
*cfg
)
885 if (!cfg
->fconv_to_r8_x_var
) {
886 cfg
->fconv_to_r8_x_var
= mono_compile_create_var (cfg
, &mono_defaults
.double_class
->byval_arg
, OP_LOCAL
);
887 cfg
->fconv_to_r8_x_var
->flags
|= MONO_INST_VOLATILE
; /*FIXME, use the don't regalloc flag*/
889 return cfg
->fconv_to_r8_x_var
;
892 get_simd_ctor_spill_area (MonoCompile
*cfg
, MonoClass
*avector_klass
)
894 if (!cfg
->simd_ctor_var
) {
895 cfg
->simd_ctor_var
= mono_compile_create_var (cfg
, &avector_klass
->byval_arg
, OP_LOCAL
);
896 cfg
->simd_ctor_var
->flags
|= MONO_INST_VOLATILE
; /*FIXME, use the don't regalloc flag*/
898 return cfg
->simd_ctor_var
;
902 mono_type_to_expand_op (MonoType
*type
)
904 switch (type
->type
) {
922 g_assert_not_reached ();
927 type_to_comp_op (MonoType
*t
)
947 g_assert_not_reached ();
953 type_to_gt_op (MonoType
*t
)
970 type_to_padd_op (MonoType
*t
)
996 type_to_psub_op (MonoType
*t
)
1022 type_to_pmul_op (MonoType
*t
)
1045 type_to_pdiv_op (MonoType
*t
)
1059 get_simd_vreg_or_expanded_scalar (MonoCompile
*cfg
, MonoClass
*klass
, MonoType
*param_type
, MonoInst
*src
)
1064 if (mono_class_from_mono_type (param_type
)->simd_type
)
1065 return get_simd_vreg (cfg
, NULL
, src
);
1067 expand_op
= mono_type_to_expand_op (param_type
);
1068 MONO_INST_NEW (cfg
, ins
, expand_op
);
1070 ins
->sreg1
= src
->dreg
;
1071 ins
->type
= STACK_VTYPE
;
1072 ins
->dreg
= alloc_ireg (cfg
);
1073 MONO_ADD_INS (cfg
->cbb
, ins
);
1075 if (expand_op
== OP_EXPAND_R4
)
1076 ins
->backend
.spill_var
= mini_get_int_to_float_spill_area (cfg
);
1077 else if (expand_op
== OP_EXPAND_R8
)
1078 ins
->backend
.spill_var
= get_double_spill_area (cfg
);
1084 * simd_intrinsic_emit_binary_op:
1086 * Emit a binary SIMD opcode.
1087 * @LHS/@RHS are the two arguments, they can be either a SIMD type or a scalar one. Scalar arguments are
1088 * expanded to the SIMD type.
1091 simd_intrinsic_emit_binary_op (MonoCompile
*cfg
, int opcode
, int flags
, MonoClass
*klass
, MonoType
*lhs_type
, MonoType
*rhs_type
, MonoInst
*lhs
, MonoInst
*rhs
)
1094 int left_vreg
, right_vreg
;
1096 left_vreg
= get_simd_vreg_or_expanded_scalar (cfg
, klass
, lhs_type
, lhs
);
1097 right_vreg
= get_simd_vreg_or_expanded_scalar (cfg
, klass
, rhs_type
, rhs
);
1099 MONO_INST_NEW (cfg
, ins
, opcode
);
1101 ins
->sreg1
= left_vreg
;
1102 ins
->sreg2
= right_vreg
;
1103 ins
->type
= STACK_VTYPE
;
1104 ins
->dreg
= alloc_ireg (cfg
);
1105 ins
->inst_c0
= flags
;
1106 MONO_ADD_INS (cfg
->cbb
, ins
);
1111 simd_intrinsic_emit_binary (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1113 MonoMethodSignature
*sig
= mono_method_signature (cmethod
);
1115 g_assert (sig
->param_count
== 2);
1117 return simd_intrinsic_emit_binary_op (cfg
, intrinsic
->opcode
, intrinsic
->flags
, cmethod
->klass
, sig
->params
[0], sig
->params
[1], args
[0], args
[1]);
1121 simd_intrinsic_emit_unary (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1126 vreg
= get_simd_vreg (cfg
, cmethod
, args
[0]);
1128 MONO_INST_NEW (cfg
, ins
, intrinsic
->opcode
);
1129 ins
->klass
= cmethod
->klass
;
1131 ins
->type
= STACK_VTYPE
;
1132 ins
->dreg
= alloc_ireg (cfg
);
1133 MONO_ADD_INS (cfg
->cbb
, ins
);
1138 mono_type_to_extract_op (MonoType
*type
)
1140 switch (type
->type
) {
1142 return OP_EXTRACT_I1
;
1144 return OP_EXTRACT_U1
;
1146 return OP_EXTRACT_I2
;
1148 return OP_EXTRACT_U2
;
1152 return OP_EXTRACT_I4
;
1154 g_assert_not_reached ();
1158 /*Returns the amount to shift the element index to get the dword it belongs to*/
1160 mono_type_elements_shift_bits (MonoType
*type
)
1162 switch (type
->type
) {
1174 g_assert_not_reached ();
1178 static G_GNUC_UNUSED
int
1179 mono_type_to_insert_op (MonoType
*type
)
1181 switch (type
->type
) {
1184 return OP_INSERT_I1
;
1187 return OP_INSERT_I2
;
1190 return OP_INSERT_I4
;
1193 return OP_INSERT_I8
;
1195 return OP_INSERT_R4
;
1197 return OP_INSERT_R8
;
1199 g_assert_not_reached ();
1204 mono_type_to_slow_insert_op (MonoType
*type
)
1206 switch (type
->type
) {
1209 return OP_INSERTX_U1_SLOW
;
1212 return OP_INSERT_I2
;
1215 return OP_INSERTX_I4_SLOW
;
1218 return OP_INSERTX_I8_SLOW
;
1220 return OP_INSERTX_R4_SLOW
;
1222 return OP_INSERTX_R8_SLOW
;
1224 g_assert_not_reached ();
1229 simd_intrinsic_emit_setter (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1232 MonoMethodSignature
*sig
= mono_method_signature (cmethod
);
1237 size
= mono_type_size (sig
->params
[0], &align
);
1239 if (COMPILE_LLVM (cfg
)) {
1240 MONO_INST_NEW (cfg
, ins
, mono_type_to_insert_op (sig
->params
[0]));
1241 ins
->klass
= cmethod
->klass
;
1242 ins
->dreg
= ins
->sreg1
= dreg
= load_simd_vreg (cfg
, cmethod
, args
[0], &indirect
);
1243 ins
->sreg2
= args
[1]->dreg
;
1244 ins
->inst_c0
= intrinsic
->opcode
;
1245 MONO_ADD_INS (cfg
->cbb
, ins
);
1246 } else if (size
== 2 || size
== 4 || size
== 8) {
1247 MONO_INST_NEW (cfg
, ins
, mono_type_to_slow_insert_op (sig
->params
[0]));
1248 ins
->klass
= cmethod
->klass
;
1249 /*This is a partial load so we encode the dependency on the previous value by setting dreg and sreg1 to the same value.*/
1250 ins
->dreg
= ins
->sreg1
= dreg
= load_simd_vreg (cfg
, cmethod
, args
[0], &indirect
);
1251 ins
->sreg2
= args
[1]->dreg
;
1252 ins
->inst_c0
= intrinsic
->opcode
;
1253 if (sig
->params
[0]->type
== MONO_TYPE_R4
)
1254 ins
->backend
.spill_var
= mini_get_int_to_float_spill_area (cfg
);
1255 else if (sig
->params
[0]->type
== MONO_TYPE_R8
)
1256 ins
->backend
.spill_var
= get_double_spill_area (cfg
);
1257 MONO_ADD_INS (cfg
->cbb
, ins
);
1261 MONO_INST_NEW (cfg
, ins
, OP_EXTRACTX_U2
);
1262 ins
->klass
= cmethod
->klass
;
1263 ins
->sreg1
= sreg
= dreg
= load_simd_vreg (cfg
, cmethod
, args
[0], &indirect
);
1264 ins
->type
= STACK_I4
;
1265 ins
->dreg
= vreg
= alloc_ireg (cfg
);
1266 ins
->inst_c0
= intrinsic
->opcode
/ 2;
1267 MONO_ADD_INS (cfg
->cbb
, ins
);
1269 MONO_INST_NEW (cfg
, ins
, OP_INSERTX_U1_SLOW
);
1270 ins
->klass
= cmethod
->klass
;
1272 ins
->sreg2
= args
[1]->dreg
;
1274 ins
->inst_c0
= intrinsic
->opcode
;
1275 MONO_ADD_INS (cfg
->cbb
, ins
);
1279 MONO_INST_NEW (cfg
, ins
, OP_STOREX_MEMBASE
);
1280 ins
->klass
= cmethod
->klass
;
1281 ins
->dreg
= args
[0]->dreg
;
1283 MONO_ADD_INS (cfg
->cbb
, ins
);
1289 * simd_intrinsic_emit_getter_op:
1291 * Emit IR for loading an element of a SIMD value.
1293 * @klass is the simd type, @type is the element type.
1296 simd_intrinsic_emit_getter_op (MonoCompile
*cfg
, int index
, MonoClass
*klass
, MonoType
*type
, MonoInst
*arg
)
1299 int vreg
, shift_bits
;
1301 vreg
= load_simd_vreg_class (cfg
, klass
, arg
, NULL
);
1303 if (type
->type
== MONO_TYPE_I8
|| type
->type
== MONO_TYPE_U8
|| type
->type
== MONO_TYPE_R8
) {
1305 gboolean is_r8
= type
->type
== MONO_TYPE_R8
;
1307 MONO_INST_NEW (cfg
, ins
, is_r8
? OP_EXTRACT_R8
: OP_EXTRACT_I8
);
1310 ins
->inst_c0
= index
;
1312 ins
->type
= STACK_R8
;
1313 ins
->dreg
= alloc_freg (cfg
);
1314 ins
->backend
.spill_var
= get_double_spill_area (cfg
);
1316 ins
->type
= STACK_I8
;
1317 ins
->dreg
= alloc_lreg (cfg
);
1319 MONO_ADD_INS (cfg
->cbb
, ins
);
1323 shift_bits
= mono_type_elements_shift_bits (type
);
1325 if ((index
>> shift_bits
) && !cfg
->compile_llvm
) {
1326 MONO_INST_NEW (cfg
, ins
, OP_PSHUFLED
);
1329 ins
->inst_c0
= index
>> shift_bits
;
1330 ins
->type
= STACK_VTYPE
;
1331 ins
->dreg
= vreg
= alloc_ireg (cfg
);
1332 MONO_ADD_INS (cfg
->cbb
, ins
);
1335 MONO_INST_NEW (cfg
, ins
, mono_type_to_extract_op (type
));
1338 ins
->type
= STACK_I4
;
1339 ins
->dreg
= vreg
= alloc_ireg (cfg
);
1340 if (cfg
->compile_llvm
)
1341 ins
->inst_c0
= index
;
1343 ins
->inst_c0
= index
& ((1 << shift_bits
) - 1);
1344 MONO_ADD_INS (cfg
->cbb
, ins
);
1346 if (type
->type
== MONO_TYPE_R4
) {
1347 MONO_INST_NEW (cfg
, ins
, cfg
->r4fp
? OP_ICONV_TO_R4_RAW
: OP_MOVE_I4_TO_F
);
1348 ins
->klass
= mono_defaults
.single_class
;
1350 ins
->type
= cfg
->r4_stack_type
;
1351 ins
->dreg
= alloc_freg (cfg
);
1352 ins
->backend
.spill_var
= mini_get_int_to_float_spill_area (cfg
);
1353 MONO_ADD_INS (cfg
->cbb
, ins
);
1359 simd_intrinsic_emit_getter (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1361 MonoMethodSignature
*sig
= mono_method_signature (cmethod
);
1363 return simd_intrinsic_emit_getter_op (cfg
, intrinsic
->opcode
, cmethod
->klass
, sig
->ret
, args
[0]);
1367 simd_intrinsic_emit_long_getter (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1371 gboolean is_r8
= mono_method_signature (cmethod
)->ret
->type
== MONO_TYPE_R8
;
1373 vreg
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
1375 MONO_INST_NEW (cfg
, ins
, is_r8
? OP_EXTRACT_R8
: OP_EXTRACT_I8
);
1376 ins
->klass
= cmethod
->klass
;
1378 ins
->inst_c0
= intrinsic
->opcode
;
1380 ins
->type
= STACK_R8
;
1381 ins
->dreg
= alloc_freg (cfg
);
1382 ins
->backend
.spill_var
= get_double_spill_area (cfg
);
1384 ins
->type
= STACK_I8
;
1385 ins
->dreg
= alloc_lreg (cfg
);
1387 MONO_ADD_INS (cfg
->cbb
, ins
);
1393 simd_intrinsic_emit_ctor (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1395 MonoInst
*ins
= NULL
;
1397 gboolean is_ldaddr
= args
[0]->opcode
== OP_LDADDR
;
1398 MonoMethodSignature
*sig
= mono_method_signature (cmethod
);
1399 int store_op
= mono_type_to_store_membase (cfg
, sig
->params
[0]);
1400 int arg_size
= mono_type_size (sig
->params
[0], &i
);
1403 if (sig
->param_count
== 1) {
1407 dreg
= args
[0]->inst_i0
->dreg
;
1408 NULLIFY_INS (args
[0]);
1410 g_assert (args
[0]->type
== STACK_MP
|| args
[0]->type
== STACK_PTR
);
1411 dreg
= alloc_ireg (cfg
);
1415 opcode
= intrinsic
->opcode
;
1417 opcode
= mono_type_to_expand_op (sig
->params
[0]);
1418 MONO_INST_NEW (cfg
, ins
, opcode
);
1419 ins
->klass
= cmethod
->klass
;
1420 ins
->sreg1
= args
[1]->dreg
;
1421 ins
->type
= STACK_VTYPE
;
1424 MONO_ADD_INS (cfg
->cbb
, ins
);
1425 if (sig
->params
[0]->type
== MONO_TYPE_R4
)
1426 ins
->backend
.spill_var
= mini_get_int_to_float_spill_area (cfg
);
1427 else if (sig
->params
[0]->type
== MONO_TYPE_R8
)
1428 ins
->backend
.spill_var
= get_double_spill_area (cfg
);
1431 MONO_INST_NEW (cfg
, ins
, OP_STOREX_MEMBASE
);
1432 ins
->dreg
= args
[0]->dreg
;
1434 MONO_ADD_INS (cfg
->cbb
, ins
);
1440 NEW_VARLOADA (cfg
, ins
, get_simd_ctor_spill_area (cfg
, cmethod
->klass
), &cmethod
->klass
->byref_arg
);
1441 MONO_ADD_INS (cfg
->cbb
, ins
);
1442 addr_reg
= ins
->dreg
;
1444 g_assert (args
[0]->type
== STACK_MP
|| args
[0]->type
== STACK_PTR
);
1445 addr_reg
= args
[0]->dreg
;
1448 for (i
= sig
->param_count
- 1; i
>= 0; --i
) {
1449 EMIT_NEW_STORE_MEMBASE (cfg
, ins
, store_op
, addr_reg
, i
* arg_size
, args
[i
+ 1]->dreg
);
1452 if (sig
->param_count
* arg_size
< 16) {
1453 /* If there are not enough arguments, fill the rest with 0s */
1454 for (i
= sig
->param_count
; i
< 16 / arg_size
; ++i
) {
1457 MONO_EMIT_NEW_STORE_MEMBASE_IMM (cfg
, OP_STOREI4_MEMBASE_IMM
, addr_reg
, i
* arg_size
, 0);
1460 g_assert_not_reached ();
1466 if (is_ldaddr
) { /*Eliminate LDADDR if it's initing a local var*/
1467 int vreg
= ((MonoInst
*)args
[0]->inst_p0
)->dreg
;
1468 NULLIFY_INS (args
[0]);
1470 MONO_INST_NEW (cfg
, ins
, OP_LOADX_MEMBASE
);
1471 ins
->klass
= cmethod
->klass
;
1472 ins
->sreg1
= addr_reg
;
1473 ins
->type
= STACK_VTYPE
;
1475 MONO_ADD_INS (cfg
->cbb
, ins
);
1481 simd_intrinsic_emit_cast (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1487 vreg
= get_simd_vreg (cfg
, cmethod
, args
[0]);
1489 if (cmethod
->is_inflated
)
1491 klass
= mono_class_from_mono_type (mono_method_signature (cmethod
)->ret
);
1493 klass
= cmethod
->klass
;
1495 MONO_INST_NEW (cfg
, ins
, OP_XMOVE
);
1497 ins
->type
= STACK_VTYPE
;
1499 ins
->dreg
= alloc_ireg (cfg
);
1500 MONO_ADD_INS (cfg
->cbb
, ins
);
1505 simd_intrinsic_emit_shift (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1508 int vreg
, vreg2
= -1, opcode
= intrinsic
->opcode
;
1510 vreg
= get_simd_vreg (cfg
, cmethod
, args
[0]);
1512 if (args
[1]->opcode
!= OP_ICONST
) {
1513 MONO_INST_NEW (cfg
, ins
, OP_ICONV_TO_X
);
1514 ins
->klass
= mono_defaults
.int32_class
;
1515 ins
->sreg1
= args
[1]->dreg
;
1516 ins
->type
= STACK_I4
;
1517 ins
->dreg
= vreg2
= alloc_ireg (cfg
);
1518 MONO_ADD_INS (cfg
->cbb
, ins
);
1520 ++opcode
; /*The shift_reg version op is always +1 from the regular one.*/
1523 MONO_INST_NEW (cfg
, ins
, opcode
);
1524 ins
->klass
= cmethod
->klass
;
1528 if (args
[1]->opcode
== OP_ICONST
) {
1529 ins
->inst_imm
= args
[1]->inst_c0
;
1530 NULLIFY_INS (args
[1]);
1533 ins
->type
= STACK_VTYPE
;
1534 ins
->dreg
= alloc_ireg (cfg
);
1535 MONO_ADD_INS (cfg
->cbb
, ins
);
1539 static inline gboolean
1540 mono_op_is_packed_compare (int op
)
1542 return op
>= OP_PCMPEQB
&& op
<= OP_PCMPEQQ
;
1546 simd_intrinsic_emit_equality_op (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
, int opcode
, int flags
)
1549 int left_vreg
, right_vreg
, tmp_vreg
;
1551 left_vreg
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
1552 right_vreg
= get_simd_vreg (cfg
, cmethod
, args
[1]);
1554 MONO_INST_NEW (cfg
, ins
, opcode
);
1555 ins
->klass
= cmethod
->klass
;
1556 ins
->sreg1
= left_vreg
;
1557 ins
->sreg2
= right_vreg
;
1558 ins
->type
= STACK_VTYPE
;
1559 ins
->klass
= cmethod
->klass
;
1560 ins
->dreg
= tmp_vreg
= alloc_ireg (cfg
);
1561 ins
->inst_c0
= flags
;
1562 MONO_ADD_INS (cfg
->cbb
, ins
);
1564 /*FIXME the next ops are SSE specific*/
1565 MONO_INST_NEW (cfg
, ins
, OP_EXTRACT_MASK
);
1566 ins
->klass
= cmethod
->klass
;
1567 ins
->sreg1
= tmp_vreg
;
1568 ins
->type
= STACK_I4
;
1569 ins
->dreg
= tmp_vreg
= alloc_ireg (cfg
);
1570 MONO_ADD_INS (cfg
->cbb
, ins
);
1572 /*FP ops have a not equal instruction, which means that we must test the results with OR semantics.*/
1573 if (mono_op_is_packed_compare (opcode
) || flags
== SIMD_COMP_EQ
) {
1574 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, tmp_vreg
, 0xFFFF);
1575 NEW_UNALU (cfg
, ins
, flags
== SIMD_COMP_EQ
? OP_CEQ
: OP_CLT_UN
, tmp_vreg
, -1);
1577 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, tmp_vreg
, 0);
1578 NEW_UNALU (cfg
, ins
, OP_CGT_UN
, tmp_vreg
, -1);
1580 MONO_ADD_INS (cfg
->cbb
, ins
);
1585 simd_intrinsic_emit_equality (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1587 return simd_intrinsic_emit_equality_op (cfg
, cmethod
, args
, intrinsic
->opcode
, intrinsic
->flags
);
1591 simd_intrinsic_emit_shuffle (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1594 int vreg
, vreg2
= -1;
1595 int param_count
= mono_method_signature (cmethod
)->param_count
;
1597 if (args
[param_count
- 1]->opcode
!= OP_ICONST
) {
1598 /*TODO Shuffle with non literals is not yet supported */
1602 vreg
= get_simd_vreg (cfg
, cmethod
, args
[0]);
1603 if (param_count
== 3)
1604 vreg2
= get_simd_vreg (cfg
, cmethod
, args
[1]);
1606 NULLIFY_INS (args
[param_count
- 1]);
1609 MONO_INST_NEW (cfg
, ins
, intrinsic
->opcode
);
1610 ins
->klass
= cmethod
->klass
;
1613 ins
->inst_c0
= args
[param_count
- 1]->inst_c0
;
1614 ins
->type
= STACK_VTYPE
;
1615 ins
->dreg
= alloc_ireg (cfg
);
1616 MONO_ADD_INS (cfg
->cbb
, ins
);
1618 if (param_count
== 3 && ins
->opcode
== OP_PSHUFLED
)
1619 ins
->opcode
= OP_SHUFPS
;
1624 simd_intrinsic_emit_load_aligned (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1628 MONO_INST_NEW (cfg
, ins
, OP_LOADX_ALIGNED_MEMBASE
);
1629 ins
->klass
= cmethod
->klass
;
1630 ins
->sreg1
= args
[0]->dreg
;
1631 ins
->type
= STACK_VTYPE
;
1632 ins
->dreg
= alloc_ireg (cfg
);
1633 MONO_ADD_INS (cfg
->cbb
, ins
);
1638 simd_intrinsic_emit_store (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1643 vreg
= get_simd_vreg (cfg
, cmethod
, args
[1]);
1645 MONO_INST_NEW (cfg
, ins
, intrinsic
->opcode
);
1646 ins
->klass
= cmethod
->klass
;
1647 ins
->dreg
= args
[0]->dreg
;
1649 ins
->type
= STACK_VTYPE
;
1650 MONO_ADD_INS (cfg
->cbb
, ins
);
1655 simd_intrinsic_emit_extract_mask (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1660 vreg
= get_simd_vreg (cfg
, cmethod
, args
[0]);
1662 MONO_INST_NEW (cfg
, ins
, OP_EXTRACT_MASK
);
1663 ins
->klass
= cmethod
->klass
;
1665 ins
->type
= STACK_I4
;
1666 ins
->dreg
= alloc_ireg (cfg
);
1667 MONO_ADD_INS (cfg
->cbb
, ins
);
1673 simd_intrinsic_emit_prefetch (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1677 MONO_INST_NEW (cfg
, ins
, OP_PREFETCH_MEMBASE
);
1678 ins
->klass
= cmethod
->klass
;
1679 ins
->sreg1
= args
[0]->dreg
;
1680 ins
->backend
.arg_info
= intrinsic
->flags
;
1681 MONO_ADD_INS (cfg
->cbb
, ins
);
1686 simd_intrinsic_emit_const (const SimdIntrinsic
*intrinsic
, MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
**args
)
1690 MONO_INST_NEW (cfg
, ins
, intrinsic
->opcode
);
1691 ins
->klass
= cmethod
->klass
;
1692 ins
->type
= STACK_VTYPE
;
1693 ins
->dreg
= alloc_xreg (cfg
);
1694 MONO_ADD_INS (cfg
->cbb
, ins
);
1699 simd_version_name (guint32 version
)
1702 case SIMD_VERSION_SSE1
:
1704 case SIMD_VERSION_SSE2
:
1706 case SIMD_VERSION_SSE3
:
1708 case SIMD_VERSION_SSSE3
:
1710 case SIMD_VERSION_SSE41
:
1712 case SIMD_VERSION_SSE42
:
1714 case SIMD_VERSION_SSE4a
:
1721 emit_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
, const SimdIntrinsic
*intrinsics
, guint32 size
)
1723 const SimdIntrinsic
*result
= (const SimdIntrinsic
*)mono_binary_search (cmethod
->name
, intrinsics
, size
, sizeof (SimdIntrinsic
), &simd_intrinsic_compare_by_name
);
1725 DEBUG (printf ("function doesn't have a simd intrinsic %s::%s/%d\n", cmethod
->klass
->name
, cmethod
->name
, fsig
->param_count
));
1728 if (IS_DEBUG_ON (cfg
)) {
1730 printf ("found call to intrinsic %s::%s/%d -> %s\n", cmethod
->klass
->name
, cmethod
->name
, fsig
->param_count
, method_name (result
->name
));
1731 max
= fsig
->param_count
+ fsig
->hasthis
;
1732 for (i
= 0; i
< max
; ++i
) {
1733 printf ("param %d: ", i
);
1734 mono_print_ins (args
[i
]);
1737 if (result
->simd_version_flags
&& !(result
->simd_version_flags
& simd_supported_versions
)) {
1738 if (IS_DEBUG_ON (cfg
)) {
1740 printf ("function %s::%s/%d requires one of unsuported SIMD instruction set(s): ", cmethod
->klass
->name
, cmethod
->name
, fsig
->param_count
);
1741 for (x
= 1; x
<= SIMD_VERSION_INDEX_END
; x
++)
1742 if (result
->simd_version_flags
& (1 << x
))
1743 printf ("%s ", simd_version_name (1 << x
));
1750 switch (result
->simd_emit_mode
) {
1751 case SIMD_EMIT_BINARY
:
1752 return simd_intrinsic_emit_binary (result
, cfg
, cmethod
, args
);
1753 case SIMD_EMIT_UNARY
:
1754 return simd_intrinsic_emit_unary (result
, cfg
, cmethod
, args
);
1755 case SIMD_EMIT_SETTER
:
1756 return simd_intrinsic_emit_setter (result
, cfg
, cmethod
, args
);
1757 case SIMD_EMIT_GETTER
:
1758 return simd_intrinsic_emit_getter (result
, cfg
, cmethod
, args
);
1759 case SIMD_EMIT_GETTER_QWORD
:
1760 return simd_intrinsic_emit_long_getter (result
, cfg
, cmethod
, args
);
1761 case SIMD_EMIT_CTOR
:
1762 return simd_intrinsic_emit_ctor (result
, cfg
, cmethod
, args
);
1763 case SIMD_EMIT_CAST
:
1764 return simd_intrinsic_emit_cast (result
, cfg
, cmethod
, args
);
1765 case SIMD_EMIT_SHUFFLE
:
1766 return simd_intrinsic_emit_shuffle (result
, cfg
, cmethod
, args
);
1767 case SIMD_EMIT_SHIFT
:
1768 return simd_intrinsic_emit_shift (result
, cfg
, cmethod
, args
);
1769 case SIMD_EMIT_EQUALITY
:
1770 return simd_intrinsic_emit_equality (result
, cfg
, cmethod
, args
);
1771 case SIMD_EMIT_LOAD_ALIGNED
:
1772 return simd_intrinsic_emit_load_aligned (result
, cfg
, cmethod
, args
);
1773 case SIMD_EMIT_STORE
:
1774 return simd_intrinsic_emit_store (result
, cfg
, cmethod
, args
);
1775 case SIMD_EMIT_EXTRACT_MASK
:
1776 return simd_intrinsic_emit_extract_mask (result
, cfg
, cmethod
, args
);
1777 case SIMD_EMIT_PREFETCH
:
1778 return simd_intrinsic_emit_prefetch (result
, cfg
, cmethod
, args
);
1780 g_assert_not_reached ();
1784 mono_emit_vector_ldelema (MonoCompile
*cfg
, MonoType
*array_type
, MonoInst
*arr
, MonoInst
*index
, gboolean check_bounds
)
1788 int mult_reg
, add_reg
, array_reg
, index_reg
, index2_reg
, index3_reg
;
1790 size
= mono_array_element_size (mono_class_from_mono_type (array_type
));
1791 mult_reg
= alloc_preg (cfg
);
1792 array_reg
= arr
->dreg
;
1793 index_reg
= index
->dreg
;
1795 #if SIZEOF_VOID_P == 8
1796 /* The array reg is 64 bits but the index reg is only 32 */
1797 index2_reg
= alloc_preg (cfg
);
1798 MONO_EMIT_NEW_UNALU (cfg
, OP_SEXT_I4
, index2_reg
, index_reg
);
1800 index2_reg
= index_reg
;
1802 index3_reg
= alloc_preg (cfg
);
1805 MONO_EMIT_BOUNDS_CHECK (cfg
, array_reg
, MonoArray
, max_length
, index2_reg
);
1806 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_PADD_IMM
, index3_reg
, index2_reg
, 16 / size
- 1);
1807 MONO_EMIT_BOUNDS_CHECK (cfg
, array_reg
, MonoArray
, max_length
, index3_reg
);
1810 add_reg
= alloc_preg (cfg
);
1812 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_MUL_IMM
, mult_reg
, index2_reg
, size
);
1813 MONO_EMIT_NEW_BIALU (cfg
, OP_PADD
, add_reg
, array_reg
, mult_reg
);
1814 NEW_BIALU_IMM (cfg
, ins
, OP_PADD_IMM
, add_reg
, add_reg
, MONO_STRUCT_OFFSET (MonoArray
, vector
));
1815 ins
->type
= STACK_PTR
;
1816 MONO_ADD_INS (cfg
->cbb
, ins
);
1822 emit_array_extension_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
1824 if ((!strcmp ("GetVector", cmethod
->name
) || !strcmp ("GetVectorAligned", cmethod
->name
)) && fsig
->param_count
== 2) {
1826 int addr
= mono_emit_vector_ldelema (cfg
, fsig
->params
[0], args
[0], args
[1], TRUE
);
1828 MONO_INST_NEW (cfg
, load
, !strcmp ("GetVectorAligned", cmethod
->name
) ? OP_LOADX_ALIGNED_MEMBASE
: OP_LOADX_MEMBASE
);
1829 load
->klass
= cmethod
->klass
;
1831 load
->type
= STACK_VTYPE
;
1832 load
->dreg
= alloc_ireg (cfg
);
1833 MONO_ADD_INS (cfg
->cbb
, load
);
1837 if ((!strcmp ("SetVector", cmethod
->name
) || !strcmp ("SetVectorAligned", cmethod
->name
)) && fsig
->param_count
== 3) {
1839 int vreg
= get_simd_vreg (cfg
, cmethod
, args
[1]);
1840 int addr
= mono_emit_vector_ldelema (cfg
, fsig
->params
[0], args
[0], args
[2], TRUE
);
1842 MONO_INST_NEW (cfg
, store
, !strcmp ("SetVectorAligned", cmethod
->name
) ? OP_STOREX_ALIGNED_MEMBASE_REG
: OP_STOREX_MEMBASE
);
1843 store
->klass
= cmethod
->klass
;
1845 store
->sreg1
= vreg
;
1846 MONO_ADD_INS (cfg
->cbb
, store
);
1850 if (!strcmp ("IsAligned", cmethod
->name
) && fsig
->param_count
== 2) {
1852 int addr
= mono_emit_vector_ldelema (cfg
, fsig
->params
[0], args
[0], args
[1], FALSE
);
1854 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_AND_IMM
, addr
, addr
, 15);
1855 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, addr
, 0);
1856 NEW_UNALU (cfg
, ins
, OP_CEQ
, addr
, -1);
1857 MONO_ADD_INS (cfg
->cbb
, ins
);
1865 emit_simd_runtime_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
1867 if (!strcmp ("get_AccelMode", cmethod
->name
) && fsig
->param_count
== 0) {
1869 EMIT_NEW_ICONST (cfg
, ins
, simd_supported_versions
);
1876 is_sys_numerics_assembly (MonoAssembly
*assembly
)
1878 return !strcmp ("System.Numerics", assembly
->aname
.name
);
1882 is_sys_numerics_vectors_assembly (MonoAssembly
*assembly
)
1884 return !strcmp ("System.Numerics.Vectors", assembly
->aname
.name
);
1888 mono_emit_simd_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
1890 const char *class_name
;
1892 if (is_sys_numerics_assembly (cmethod
->klass
->image
->assembly
))
1893 return emit_sys_numerics_intrinsics (cfg
, cmethod
, fsig
, args
);
1895 if (is_sys_numerics_vectors_assembly (cmethod
->klass
->image
->assembly
))
1896 return emit_sys_numerics_vectors_intrinsics (cfg
, cmethod
, fsig
, args
);
1898 if (strcmp ("Mono.Simd", cmethod
->klass
->image
->assembly
->aname
.name
) ||
1899 strcmp ("Mono.Simd", cmethod
->klass
->name_space
))
1902 class_name
= cmethod
->klass
->name
;
1903 if (!strcmp ("SimdRuntime", class_name
))
1904 return emit_simd_runtime_intrinsics (cfg
, cmethod
, fsig
, args
);
1906 if (!strcmp ("ArrayExtensions", class_name
))
1907 return emit_array_extension_intrinsics (cfg
, cmethod
, fsig
, args
);
1909 if (!strcmp ("VectorOperations", class_name
)) {
1910 if (!(cmethod
->flags
& METHOD_ATTRIBUTE_STATIC
))
1912 class_name
= mono_class_from_mono_type (mono_method_signature (cmethod
)->params
[0])->name
;
1913 } else if (!cmethod
->klass
->simd_type
)
1916 cfg
->uses_simd_intrinsics
= 1;
1917 if (!strcmp ("Vector2d", class_name
))
1918 return emit_intrinsics (cfg
, cmethod
, fsig
, args
, vector2d_intrinsics
, sizeof (vector2d_intrinsics
) / sizeof (SimdIntrinsic
));
1919 if (!strcmp ("Vector4f", class_name
))
1920 return emit_intrinsics (cfg
, cmethod
, fsig
, args
, vector4f_intrinsics
, sizeof (vector4f_intrinsics
) / sizeof (SimdIntrinsic
));
1921 if (!strcmp ("Vector2ul", class_name
))
1922 return emit_intrinsics (cfg
, cmethod
, fsig
, args
, vector2ul_intrinsics
, sizeof (vector2ul_intrinsics
) / sizeof (SimdIntrinsic
));
1923 if (!strcmp ("Vector2l", class_name
))
1924 return emit_intrinsics (cfg
, cmethod
, fsig
, args
, vector2l_intrinsics
, sizeof (vector2l_intrinsics
) / sizeof (SimdIntrinsic
));
1925 if (!strcmp ("Vector4ui", class_name
))
1926 return emit_intrinsics (cfg
, cmethod
, fsig
, args
, vector4ui_intrinsics
, sizeof (vector4ui_intrinsics
) / sizeof (SimdIntrinsic
));
1927 if (!strcmp ("Vector4i", class_name
))
1928 return emit_intrinsics (cfg
, cmethod
, fsig
, args
, vector4i_intrinsics
, sizeof (vector4i_intrinsics
) / sizeof (SimdIntrinsic
));
1929 if (!strcmp ("Vector8us", class_name
))
1930 return emit_intrinsics (cfg
, cmethod
, fsig
, args
, vector8us_intrinsics
, sizeof (vector8us_intrinsics
) / sizeof (SimdIntrinsic
));
1931 if (!strcmp ("Vector8s", class_name
))
1932 return emit_intrinsics (cfg
, cmethod
, fsig
, args
, vector8s_intrinsics
, sizeof (vector8s_intrinsics
) / sizeof (SimdIntrinsic
));
1933 if (!strcmp ("Vector16b", class_name
))
1934 return emit_intrinsics (cfg
, cmethod
, fsig
, args
, vector16b_intrinsics
, sizeof (vector16b_intrinsics
) / sizeof (SimdIntrinsic
));
1935 if (!strcmp ("Vector16sb", class_name
))
1936 return emit_intrinsics (cfg
, cmethod
, fsig
, args
, vector16sb_intrinsics
, sizeof (vector16sb_intrinsics
) / sizeof (SimdIntrinsic
));
1941 // The entries should be ordered by name
1942 // System.Numerics.Vector2/Vector3/Vector4
1943 static const SimdIntrinsic vector2_intrinsics
[] = {
1944 { SN_ctor
, OP_EXPAND_R4
},
1946 { SN_Dot
, OP_DPPS
},
1947 { SN_Equals
, OP_COMPPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_EQUALITY
, SIMD_COMP_EQ
},
1948 { SN_Max
, OP_MAXPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
1949 { SN_Min
, OP_MINPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
1950 { SN_SquareRoot
, OP_SQRTPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_UNARY
},
1951 { SN_op_Addition
, OP_ADDPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
1952 { SN_op_Division
, OP_DIVPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
1953 { SN_op_Multiply
, OP_MULPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
1954 { SN_op_Subtraction
, OP_SUBPS
, SIMD_VERSION_SSE1
, SIMD_EMIT_BINARY
},
1958 emit_vector_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
1960 const SimdIntrinsic
*intrins
;
1961 MonoMethodSignature
*sig
= mono_method_signature (cmethod
);
1964 * Vector2/3/4 are handled the same way, since the underlying SIMD type is the same (4 * r4).
1966 intrins
= (const SimdIntrinsic
*)mono_binary_search (cmethod
->name
, vector2_intrinsics
, sizeof (vector2_intrinsics
) / sizeof (SimdIntrinsic
), sizeof (SimdIntrinsic
), &simd_intrinsic_compare_by_name
);
1968 //printf ("%s\n", mono_method_full_name (cmethod, 1));
1972 if (cfg
->verbose_level
> 1) {
1973 char *name
= mono_method_full_name (cmethod
, TRUE
);
1974 printf (" SIMD intrinsic %s\n", name
);
1978 switch (intrins
->name
) {
1980 return simd_intrinsic_emit_ctor (intrins
, cfg
, cmethod
, args
);
1983 return simd_intrinsic_emit_equality (intrins
, cfg
, cmethod
, args
);
1986 return simd_intrinsic_emit_unary (intrins
, cfg
, cmethod
, args
);
1989 if (COMPILE_LLVM (cfg
)) {
1992 ins
= simd_intrinsic_emit_binary (intrins
, cfg
, cmethod
, args
);
1993 /* The end result is in the lowest element */
1994 return simd_intrinsic_emit_getter_op (cfg
, 0, cmethod
->klass
, mono_method_signature (cmethod
)->ret
, ins
);
1998 // abs(x) = max(x, sub(0,x))
2002 MONO_INST_NEW (cfg
, zero
, OP_XZERO
);
2003 zero
->dreg
= alloc_xreg (cfg
);
2004 zero
->klass
= cmethod
->klass
;
2005 MONO_ADD_INS (cfg
->cbb
, zero
);
2007 sub
= simd_intrinsic_emit_binary_op (cfg
, OP_SUBPS
, 0, cmethod
->klass
, sig
->params
[0], sig
->params
[0], zero
, args
[0]);
2008 return simd_intrinsic_emit_binary_op (cfg
, OP_MAXPS
, 0, cmethod
->klass
, sig
->params
[0], sig
->params
[0], args
[0], sub
);
2012 case SN_op_Addition
:
2013 case SN_op_Division
:
2014 case SN_op_Multiply
:
2015 case SN_op_Subtraction
:
2016 return simd_intrinsic_emit_binary (intrins
, cfg
, cmethod
, args
);
2024 static const SimdIntrinsic vector_t_intrinsics
[] = {
2029 { SN_GreaterThanOrEqual
},
2031 { SN_LessThanOrEqual
},
2032 { SN_get_AllOnes
, OP_XONES
},
2035 { SN_get_Zero
, OP_XZERO
},
2040 { SN_op_Subtraction
}
2044 emit_vector_t_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
2046 const SimdIntrinsic
*intrins
;
2049 int size
, len
, index
;
2051 intrins
= (const SimdIntrinsic
*)mono_binary_search (cmethod
->name
, vector_t_intrinsics
, sizeof (vector_t_intrinsics
) / sizeof (SimdIntrinsic
), sizeof (SimdIntrinsic
), &simd_intrinsic_compare_by_name
);
2053 //printf ("%s\n", mono_method_full_name (cmethod, 1));
2057 etype
= mono_class_get_context (cmethod
->klass
)->class_inst
->type_argv
[0];
2058 size
= mono_class_value_size (mono_class_from_mono_type (etype
), NULL
);
2062 if (!MONO_TYPE_IS_PRIMITIVE (etype
))
2065 if (cfg
->verbose_level
> 1) {
2066 char *name
= mono_method_full_name (cmethod
, TRUE
);
2067 printf (" SIMD intrinsic %s\n", name
);
2071 switch (intrins
->name
) {
2073 EMIT_NEW_ICONST (cfg
, ins
, len
);
2075 case SN_get_AllOnes
:
2077 return simd_intrinsic_emit_const (intrins
, cfg
, cmethod
, args
);
2079 g_assert (fsig
->param_count
== 1);
2080 if (args
[1]->opcode
!= OP_ICONST
)
2082 index
= args
[1]->inst_c0
;
2083 if (index
< 0 || index
>= len
)
2085 return simd_intrinsic_emit_getter_op (cfg
, index
, cmethod
->klass
, etype
, args
[0]);
2087 if (fsig
->param_count
== 1 && mono_metadata_type_equal (fsig
->params
[0], etype
))
2088 return simd_intrinsic_emit_ctor (NULL
, cfg
, cmethod
, args
);
2089 if ((fsig
->param_count
== 1 || fsig
->param_count
== 2) && (fsig
->params
[0]->type
== MONO_TYPE_SZARRAY
)) {
2090 MonoInst
*array_ins
= args
[1];
2091 MonoInst
*index_ins
;
2092 MonoInst
*ldelema_ins
;
2096 if (args
[0]->opcode
!= OP_LDADDR
)
2099 /* .ctor (T[]) or .ctor (T[], index) */
2101 if (fsig
->param_count
== 2) {
2102 index_ins
= args
[2];
2104 EMIT_NEW_ICONST (cfg
, index_ins
, 0);
2107 /* Emit index check for the end (index + len - 1 < array length) */
2108 end_index_reg
= alloc_ireg (cfg
);
2109 EMIT_NEW_BIALU_IMM (cfg
, ins
, OP_IADD_IMM
, end_index_reg
, index_ins
->dreg
, len
- 1);
2110 MONO_EMIT_BOUNDS_CHECK (cfg
, array_ins
->dreg
, MonoArray
, max_length
, end_index_reg
);
2112 /* Load the array slice into the simd reg */
2113 ldelema_ins
= mini_emit_ldelema_1_ins (cfg
, mono_class_from_mono_type (etype
), array_ins
, index_ins
, TRUE
);
2114 g_assert (args
[0]->opcode
== OP_LDADDR
);
2115 var
= args
[0]->inst_p0
;
2116 EMIT_NEW_LOAD_MEMBASE (cfg
, ins
, OP_LOADX_MEMBASE
, var
->dreg
, ldelema_ins
->dreg
, 0);
2117 ins
->klass
= cmethod
->klass
;
2121 case SN_op_Explicit
:
2122 return simd_intrinsic_emit_cast (intrins
, cfg
, cmethod
, args
);
2124 if (fsig
->param_count
== 1)
2125 return simd_intrinsic_emit_equality_op (cfg
, cmethod
, args
, type_to_comp_op (etype
), SIMD_COMP_EQ
);
2126 if (fsig
->param_count
== 2)
2127 return simd_intrinsic_emit_binary_op (cfg
, type_to_comp_op (etype
), 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[1], args
[0], args
[1]);
2130 case SN_GreaterThan
:
2131 case SN_GreaterThanOrEqual
:
2133 MonoInst
*cmp1
, *cmp2
;
2136 switch (etype
->type
) {
2146 eq_op
= type_to_comp_op (etype
);
2147 gt_op
= type_to_gt_op (etype
);
2149 switch (intrins
->name
) {
2150 case SN_GreaterThan
:
2151 return simd_intrinsic_emit_binary_op (cfg
, gt_op
, 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[1], args
[0], args
[1]);
2152 case SN_LessThanOrEqual
:
2153 return simd_intrinsic_emit_binary_op (cfg
, gt_op
, 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[1], args
[1], args
[0]);
2154 case SN_GreaterThanOrEqual
:
2155 cmp1
= simd_intrinsic_emit_binary_op (cfg
, eq_op
, 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[1], args
[0], args
[1]);
2156 cmp2
= simd_intrinsic_emit_binary_op (cfg
, gt_op
, 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[1], args
[0], args
[1]);
2157 return simd_intrinsic_emit_binary_op (cfg
, OP_POR
, 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[1], cmp1
, cmp2
);
2159 cmp1
= simd_intrinsic_emit_binary_op (cfg
, eq_op
, 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[1], args
[1], args
[0]);
2160 cmp2
= simd_intrinsic_emit_binary_op (cfg
, gt_op
, 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[1], args
[1], args
[0]);
2161 return simd_intrinsic_emit_binary_op (cfg
, OP_POR
, 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[1], cmp1
, cmp2
);
2163 g_assert_not_reached ();
2169 switch (etype
->type
) {
2173 case MONO_TYPE_U8
: {
2177 MONO_INST_NEW (cfg
, ins
, OP_XMOVE
);
2178 ins
->klass
= cmethod
->klass
;
2179 ins
->type
= STACK_VTYPE
;
2180 ins
->sreg1
= args
[0]->dreg
;
2181 ins
->dreg
= alloc_xreg (cfg
);
2182 MONO_ADD_INS (cfg
->cbb
, ins
);
2189 case SN_op_Addition
: {
2190 int op
= type_to_padd_op (etype
);
2192 return simd_intrinsic_emit_binary_op (cfg
, op
, 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[0], args
[0], args
[1]);
2195 case SN_op_Subtraction
: {
2196 int op
= type_to_psub_op (etype
);
2198 return simd_intrinsic_emit_binary_op (cfg
, op
, 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[0], args
[0], args
[1]);
2201 case SN_op_Multiply
: {
2202 int op
= type_to_pmul_op (etype
);
2204 return simd_intrinsic_emit_binary_op (cfg
, op
, 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[0], args
[0], args
[1]);
2207 case SN_op_Division
: {
2208 int op
= type_to_pdiv_op (etype
);
2210 return simd_intrinsic_emit_binary_op (cfg
, op
, 0, cmethod
->klass
, fsig
->params
[0], fsig
->params
[0], args
[0], args
[1]);
2221 * emit_sys_numerics_intrinsics:
2223 * Emit intrinsics for the System.Numerics assembly.
2226 emit_sys_numerics_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
2228 const char *nspace
= cmethod
->klass
->name_space
;
2229 const char *class_name
= cmethod
->klass
->name
;
2235 if (!strcmp ("Vector2", class_name
) || !strcmp ("Vector4", class_name
) || !strcmp ("Vector3", class_name
))
2236 return emit_vector_intrinsics (cfg
, cmethod
, fsig
, args
);
2238 if (!strcmp ("Vector`1", class_name
))
2239 return emit_vector_t_intrinsics (cfg
, cmethod
, fsig
, args
);
2241 if (!strcmp ("System.Numerics", nspace
) && !strcmp ("Vector", class_name
)) {
2242 if (!strcmp (cmethod
->name
, "get_IsHardwareAccelerated")) {
2245 if (simd_supported_versions
)
2246 EMIT_NEW_ICONST (cfg
, ins
, 1);
2248 EMIT_NEW_ICONST (cfg
, ins
, 0);
2249 ins
->type
= STACK_I4
;
2258 emit_sys_numerics_vectors_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
2260 const char *class_name
= cmethod
->klass
->name
;
2266 if (!strcmp (class_name
, "Vector`1"))
2267 return emit_vector_t_intrinsics (cfg
, cmethod
, fsig
, args
);
2272 mono_emit_simd_field_load (MonoCompile
*cfg
, MonoClassField
*field
, MonoInst
*addr
)
2278 if (is_sys_numerics_assembly (field
->parent
->image
->assembly
)) {
2281 if (!strcmp (field
->parent
->name
, "Vector2") ||
2282 !strcmp (field
->parent
->name
, "Vector3") ||
2283 !strcmp (field
->parent
->name
, "Vector4")) {
2284 if (!strcmp (field
->name
, "X"))
2286 else if (!strcmp (field
->name
, "Y"))
2288 else if (!strcmp (field
->name
, "Z"))
2290 else if (!strcmp (field
->name
, "W"))
2295 if (cfg
->verbose_level
> 1)
2296 printf (" SIMD intrinsic field access: %s\n", field
->name
);
2298 return simd_intrinsic_emit_getter_op (cfg
, index
, field
->parent
, mono_field_get_type (field
), addr
);
2304 #endif /* DISABLE_JIT */
2309 mono_emit_simd_field_load (MonoCompile
*cfg
, MonoClassField
*field
, MonoInst
*addr
)
2314 #endif /* MONO_ARCH_SIMD_INTRINSICS */