Apply changes from https://github.com/dotnet/runtime/commit/eb1756e97d23df13bc6fe798e...
[mono-project.git] / mono / mini / simd-intrinsics-netcore.c
blob41dffe4c50d8063cf8b3857bb76bcfa3589ef23e
1 /**
2 * SIMD Intrinsics support for netcore.
3 * Only LLVM is supported as a backend.
4 */
6 #include <config.h>
7 #include <mono/utils/mono-compiler.h>
8 #include <mono/metadata/icall-decl.h>
9 #include "mini.h"
10 #include "mini-runtime.h"
11 #include "ir-emit.h"
12 #ifdef ENABLE_LLVM
13 #include "mini-llvm.h"
14 #endif
15 #include "mono/utils/bsearch.h"
16 #include <mono/metadata/abi-details.h>
17 #include <mono/metadata/reflection-internals.h>
18 #include <mono/utils/mono-hwcap.h>
20 #if defined (MONO_ARCH_SIMD_INTRINSICS) && defined(ENABLE_NETCORE)
22 #if defined(DISABLE_JIT)
24 void
25 mono_simd_intrinsics_init (void)
29 #else
31 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
32 #define MSGSTRFIELD1(line) str##line
33 static const struct msgstr_t {
34 #define METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)];
35 #define METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
36 #include "simd-methods-netcore.h"
37 #undef METHOD
38 #undef METHOD2
39 } method_names = {
40 #define METHOD(name) #name,
41 #define METHOD2(str,name) str,
42 #include "simd-methods-netcore.h"
43 #undef METHOD
44 #undef METHOD2
47 enum {
48 #define METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
49 #define METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
50 #include "simd-methods-netcore.h"
52 #define method_name(idx) ((const char*)&method_names + (idx))
54 static int register_size;
56 typedef struct {
57 // One of the SN_ constants
58 guint16 id;
59 // ins->opcode
60 int op;
61 // ins->inst_c0
62 int instc0;
63 } SimdIntrinsic;
65 void
66 mono_simd_intrinsics_init (void)
68 register_size = 16;
69 #if FALSE
70 if ((mini_get_cpu_features () & MONO_CPU_X86_AVX) != 0)
71 register_size = 32;
72 #endif
73 /* Tell the class init code the size of the System.Numerics.Register type */
74 mono_simd_register_size = register_size;
77 MonoInst*
78 mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr)
80 return NULL;
83 static int
84 simd_intrinsic_compare_by_name (const void *key, const void *value)
86 return strcmp ((const char*)key, method_name (*(guint16*)value));
89 static int
90 simd_intrinsic_info_compare_by_name (const void *key, const void *value)
92 SimdIntrinsic *info = (SimdIntrinsic*)value;
93 return strcmp ((const char*)key, method_name (info->id));
96 static int
97 lookup_intrins (guint16 *intrinsics, int size, MonoMethod *cmethod)
99 const guint16 *result = (const guint16 *)mono_binary_search (cmethod->name, intrinsics, size / sizeof (guint16), sizeof (guint16), &simd_intrinsic_compare_by_name);
101 if (result == NULL)
102 return -1;
103 else
104 return (int)*result;
107 static SimdIntrinsic*
108 lookup_intrins_info (SimdIntrinsic *intrinsics, int size, MonoMethod *cmethod)
110 #if 0
111 for (int i = 0; i < (size / sizeof (SimdIntrinsic)) - 1; ++i) {
112 const char *n1 = method_name (intrinsics [i].id);
113 const char *n2 = method_name (intrinsics [i + 1].id);
114 int len1 = strlen (n1);
115 int len2 = strlen (n2);
116 for (int j = 0; j < len1 && j < len2; ++j) {
117 if (n1 [j] > n2 [j]) {
118 printf ("%s %s\n", n1, n2);
119 g_assert_not_reached ();
120 } else if (n1 [j] < n2 [j]) {
121 break;
125 #endif
127 return (SimdIntrinsic *)mono_binary_search (cmethod->name, intrinsics, size / sizeof (SimdIntrinsic), sizeof (SimdIntrinsic), &simd_intrinsic_info_compare_by_name);
131 * Return a simd vreg for the simd value represented by SRC.
132 * SRC is the 'this' argument to methods.
133 * Set INDIRECT to TRUE if the value was loaded from memory.
135 static int
136 load_simd_vreg_class (MonoCompile *cfg, MonoClass *klass, MonoInst *src, gboolean *indirect)
138 const char *spec = INS_INFO (src->opcode);
140 if (indirect)
141 *indirect = FALSE;
142 if (src->opcode == OP_XMOVE) {
143 return src->sreg1;
144 } else if (src->opcode == OP_LDADDR) {
145 int res = ((MonoInst*)src->inst_p0)->dreg;
146 return res;
147 } else if (spec [MONO_INST_DEST] == 'x') {
148 return src->dreg;
149 } else if (src->type == STACK_PTR || src->type == STACK_MP) {
150 MonoInst *ins;
151 if (indirect)
152 *indirect = TRUE;
154 MONO_INST_NEW (cfg, ins, OP_LOADX_MEMBASE);
155 ins->klass = klass;
156 ins->sreg1 = src->dreg;
157 ins->type = STACK_VTYPE;
158 ins->dreg = alloc_ireg (cfg);
159 MONO_ADD_INS (cfg->cbb, ins);
160 return ins->dreg;
162 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src->type);
163 mono_print_ins (src);
164 g_assert_not_reached ();
167 static int
168 load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean *indirect)
170 return load_simd_vreg_class (cfg, cmethod->klass, src, indirect);
173 /* Create and emit a SIMD instruction, dreg is auto-allocated */
174 static MonoInst*
175 emit_simd_ins (MonoCompile *cfg, MonoClass *klass, int opcode, int sreg1, int sreg2)
177 const char *spec = INS_INFO (opcode);
178 MonoInst *ins;
180 MONO_INST_NEW (cfg, ins, opcode);
181 if (spec [MONO_INST_DEST] == 'x') {
182 ins->dreg = alloc_xreg (cfg);
183 ins->type = STACK_VTYPE;
184 } else if (spec [MONO_INST_DEST] == 'i') {
185 ins->dreg = alloc_ireg (cfg);
186 ins->type = STACK_I4;
187 } else if (spec [MONO_INST_DEST] == 'l') {
188 ins->dreg = alloc_lreg (cfg);
189 ins->type = STACK_I8;
190 } else if (spec [MONO_INST_DEST] == 'f') {
191 ins->dreg = alloc_freg (cfg);
192 ins->type = STACK_R8;
194 ins->sreg1 = sreg1;
195 ins->sreg2 = sreg2;
196 ins->klass = klass;
197 MONO_ADD_INS (cfg->cbb, ins);
198 return ins;
201 static MonoInst*
202 emit_simd_ins_for_sig (MonoCompile *cfg, MonoClass *klass, int opcode, int instc0, int instc1, MonoMethodSignature *fsig, MonoInst **args)
204 g_assert (fsig->param_count <= 3);
205 MonoInst* ins = emit_simd_ins (cfg, klass, opcode,
206 fsig->param_count > 0 ? args [0]->dreg : -1,
207 fsig->param_count > 1 ? args [1]->dreg : -1);
208 if (instc0 != -1)
209 ins->inst_c0 = instc0;
210 if (instc1 != -1)
211 ins->inst_c1 = instc1;
212 if (fsig->param_count == 3)
213 ins->sreg3 = args [2]->dreg;
214 return ins;
217 static gboolean
218 is_hw_intrinsics_class (MonoClass *klass, const char *name, gboolean *is_64bit)
220 const char *class_name = m_class_get_name (klass);
221 if ((!strcmp (class_name, "X64") || !strcmp (class_name, "Arm64")) && m_class_get_nested_in (klass)) {
222 *is_64bit = TRUE;
223 return !strcmp (m_class_get_name (m_class_get_nested_in (klass)), name);
224 } else {
225 *is_64bit = FALSE;
226 return !strcmp (class_name, name);
230 static MonoTypeEnum
231 get_underlying_type (MonoType* type)
233 MonoClass* klass = mono_class_from_mono_type_internal (type);
234 if (type->type == MONO_TYPE_PTR) // e.g. int* => MONO_TYPE_I4
235 return m_class_get_byval_arg (m_class_get_element_class (klass))->type;
236 else if (type->type == MONO_TYPE_GENERICINST) // e.g. Vector128<int> => MONO_TYPE_I4
237 return mono_class_get_context (klass)->class_inst->type_argv [0]->type;
238 else
239 return type->type;
242 static MonoInst*
243 emit_xcompare (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum etype, MonoInst *arg1, MonoInst *arg2)
245 MonoInst *ins;
246 gboolean is_fp = etype == MONO_TYPE_R4 || etype == MONO_TYPE_R8;
248 ins = emit_simd_ins (cfg, klass, is_fp ? OP_XCOMPARE_FP : OP_XCOMPARE, arg1->dreg, arg2->dreg);
249 ins->inst_c0 = CMP_EQ;
250 ins->inst_c1 = etype;
251 return ins;
254 static MonoType*
255 get_vector_t_elem_type (MonoType *vector_type)
257 MonoClass *klass;
258 MonoType *etype;
260 g_assert (vector_type->type == MONO_TYPE_GENERICINST);
261 klass = mono_class_from_mono_type_internal (vector_type);
262 g_assert (
263 !strcmp (m_class_get_name (klass), "Vector`1") ||
264 !strcmp (m_class_get_name (klass), "Vector128`1") ||
265 !strcmp (m_class_get_name (klass), "Vector256`1"));
266 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
267 return etype;
270 #ifdef TARGET_AMD64
272 static int
273 type_to_expand_op (MonoType *type)
275 switch (type->type) {
276 case MONO_TYPE_I1:
277 case MONO_TYPE_U1:
278 return OP_EXPAND_I1;
279 case MONO_TYPE_I2:
280 case MONO_TYPE_U2:
281 return OP_EXPAND_I2;
282 case MONO_TYPE_I4:
283 case MONO_TYPE_U4:
284 return OP_EXPAND_I4;
285 case MONO_TYPE_I8:
286 case MONO_TYPE_U8:
287 return OP_EXPAND_I8;
288 case MONO_TYPE_R4:
289 return OP_EXPAND_R4;
290 case MONO_TYPE_R8:
291 return OP_EXPAND_R8;
292 default:
293 g_assert_not_reached ();
297 static guint16 vector_methods [] = {
298 SN_ConvertToDouble,
299 SN_ConvertToInt32,
300 SN_ConvertToInt64,
301 SN_ConvertToSingle,
302 SN_ConvertToUInt32,
303 SN_ConvertToUInt64,
304 SN_Narrow,
305 SN_Widen,
306 SN_get_IsHardwareAccelerated,
309 static MonoInst*
310 emit_sys_numerics_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
312 MonoInst *ins;
313 gboolean supported = FALSE;
314 int id;
315 MonoType *etype;
317 id = lookup_intrins (vector_methods, sizeof (vector_methods), cmethod);
318 if (id == -1)
319 return NULL;
321 //printf ("%s\n", mono_method_full_name (cmethod, 1));
323 #ifdef MONO_ARCH_SIMD_INTRINSICS
324 supported = TRUE;
325 #endif
327 if (cfg->verbose_level > 1) {
328 char *name = mono_method_full_name (cmethod, TRUE);
329 printf (" SIMD intrinsic %s\n", name);
330 g_free (name);
333 switch (id) {
334 case SN_get_IsHardwareAccelerated:
335 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
336 ins->type = STACK_I4;
337 return ins;
338 case SN_ConvertToInt32:
339 etype = get_vector_t_elem_type (fsig->params [0]);
340 g_assert (etype->type == MONO_TYPE_R4);
341 return emit_simd_ins (cfg, mono_class_from_mono_type_internal (fsig->ret), OP_CVTPS2DQ, args [0]->dreg, -1);
342 case SN_ConvertToSingle:
343 etype = get_vector_t_elem_type (fsig->params [0]);
344 g_assert (etype->type == MONO_TYPE_I4 || etype->type == MONO_TYPE_U4);
345 // FIXME:
346 if (etype->type == MONO_TYPE_U4)
347 return NULL;
348 return emit_simd_ins (cfg, mono_class_from_mono_type_internal (fsig->ret), OP_CVTDQ2PS, args [0]->dreg, -1);
349 case SN_ConvertToDouble:
350 case SN_ConvertToInt64:
351 case SN_ConvertToUInt32:
352 case SN_ConvertToUInt64:
353 case SN_Narrow:
354 case SN_Widen:
355 // FIXME:
356 break;
357 default:
358 break;
361 return NULL;
364 static guint16 vector_t_methods [] = {
365 SN_ctor,
366 SN_CopyTo,
367 SN_Equals,
368 SN_GreaterThan,
369 SN_GreaterThanOrEqual,
370 SN_LessThan,
371 SN_LessThanOrEqual,
372 SN_Max,
373 SN_Min,
374 SN_get_AllBitsSet,
375 SN_get_Count,
376 SN_get_Item,
377 SN_get_One,
378 SN_get_Zero,
379 SN_op_Addition,
380 SN_op_BitwiseAnd,
381 SN_op_BitwiseOr,
382 SN_op_Division,
383 SN_op_Equality,
384 SN_op_ExclusiveOr,
385 SN_op_Explicit,
386 SN_op_Inequality,
387 SN_op_Multiply,
388 SN_op_Subtraction
391 static MonoInst*
392 emit_sys_numerics_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
394 MonoInst *ins;
395 MonoType *type, *etype;
396 MonoClass *klass;
397 int size, len, id;
398 gboolean is_unsigned;
400 static const float r4_one = 1.0f;
401 static const double r8_one = 1.0;
403 id = lookup_intrins (vector_t_methods, sizeof (vector_t_methods), cmethod);
404 if (id == -1)
405 return NULL;
407 klass = cmethod->klass;
408 type = m_class_get_byval_arg (klass);
409 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
410 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
411 g_assert (size);
412 len = register_size / size;
414 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
415 return NULL;
417 if (cfg->verbose_level > 1) {
418 char *name = mono_method_full_name (cmethod, TRUE);
419 printf (" SIMD intrinsic %s\n", name);
420 g_free (name);
423 switch (id) {
424 case SN_get_Count:
425 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
426 break;
427 EMIT_NEW_ICONST (cfg, ins, len);
428 return ins;
429 case SN_get_Zero:
430 g_assert (fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type));
431 return emit_simd_ins (cfg, klass, OP_XZERO, -1, -1);
432 case SN_get_One: {
433 g_assert (fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type));
434 MonoInst *one = NULL;
435 int expand_opcode = type_to_expand_op (etype);
436 MONO_INST_NEW (cfg, one, -1);
437 switch (expand_opcode) {
438 case OP_EXPAND_R4:
439 one->opcode = OP_R4CONST;
440 one->type = STACK_R4;
441 one->inst_p0 = (void *) &r4_one;
442 break;
443 case OP_EXPAND_R8:
444 one->opcode = OP_R8CONST;
445 one->type = STACK_R8;
446 one->inst_p0 = (void *) &r8_one;
447 break;
448 default:
449 one->opcode = OP_ICONST;
450 one->type = STACK_I4;
451 one->inst_c0 = 1;
452 break;
454 one->dreg = alloc_dreg (cfg, (MonoStackType)one->type);
455 MONO_ADD_INS (cfg->cbb, one);
456 return emit_simd_ins (cfg, klass, expand_opcode, one->dreg, -1);
458 case SN_get_AllBitsSet: {
459 /* Compare a zero vector with itself */
460 ins = emit_simd_ins (cfg, klass, OP_XZERO, -1, -1);
461 return emit_xcompare (cfg, klass, etype->type, ins, ins);
463 case SN_get_Item: {
464 if (!COMPILE_LLVM (cfg))
465 return NULL;
466 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
467 MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "IndexOutOfRangeException");
468 int opcode = -1;
469 int dreg;
470 gboolean is64 = FALSE;
471 switch (etype->type) {
472 case MONO_TYPE_I8:
473 case MONO_TYPE_U8:
474 opcode = OP_XEXTRACT_I64;
475 is64 = TRUE;
476 dreg = alloc_lreg (cfg);
477 break;
478 case MONO_TYPE_R8:
479 opcode = OP_XEXTRACT_R8;
480 dreg = alloc_freg (cfg);
481 break;
482 case MONO_TYPE_R4:
483 g_assert (cfg->r4fp);
484 opcode = OP_XEXTRACT_R4;
485 dreg = alloc_freg (cfg);
486 break;
487 default:
488 opcode = OP_XEXTRACT_I32;
489 dreg = alloc_ireg (cfg);
490 break;
492 MONO_INST_NEW (cfg, ins, opcode);
493 ins->dreg = dreg;
494 ins->sreg1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
495 ins->sreg2 = args [1]->dreg;
496 ins->inst_c0 = etype->type;
497 mini_type_to_eval_stack_type (cfg, etype, ins);
498 MONO_ADD_INS (cfg->cbb, ins);
499 return ins;
501 case SN_ctor:
502 if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) {
503 int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
505 int opcode = type_to_expand_op (etype);
506 ins = emit_simd_ins (cfg, klass, opcode, args [1]->dreg, -1);
507 ins->dreg = dreg;
508 return ins;
510 if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
511 MonoInst *array_ins = args [1];
512 MonoInst *index_ins;
513 MonoInst *ldelema_ins;
514 MonoInst *var;
515 int end_index_reg;
517 if (args [0]->opcode != OP_LDADDR)
518 return NULL;
520 /* .ctor (T[]) or .ctor (T[], index) */
522 if (fsig->param_count == 2) {
523 index_ins = args [2];
524 } else {
525 EMIT_NEW_ICONST (cfg, index_ins, 0);
528 /* Emit index check for the end (index + len - 1 < array length) */
529 end_index_reg = alloc_ireg (cfg);
530 EMIT_NEW_BIALU_IMM (cfg, ins, OP_IADD_IMM, end_index_reg, index_ins->dreg, len - 1);
531 MONO_EMIT_BOUNDS_CHECK (cfg, array_ins->dreg, MonoArray, max_length, end_index_reg);
533 /* Load the array slice into the simd reg */
534 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, TRUE, FALSE);
535 g_assert (args [0]->opcode == OP_LDADDR);
536 var = (MonoInst*)args [0]->inst_p0;
537 EMIT_NEW_LOAD_MEMBASE (cfg, ins, OP_LOADX_MEMBASE, var->dreg, ldelema_ins->dreg, 0);
538 ins->klass = cmethod->klass;
539 return args [0];
541 break;
542 case SN_CopyTo:
543 if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
544 MonoInst *array_ins = args [1];
545 MonoInst *index_ins;
546 MonoInst *ldelema_ins;
547 int val_vreg, end_index_reg;
549 val_vreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
551 /* CopyTo (T[]) or CopyTo (T[], index) */
553 if (fsig->param_count == 2) {
554 index_ins = args [2];
555 } else {
556 EMIT_NEW_ICONST (cfg, index_ins, 0);
559 /* CopyTo () does complicated argument checks */
560 mini_emit_bounds_check_offset (cfg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), index_ins->dreg, "ArgumentOutOfRangeException");
561 end_index_reg = alloc_ireg (cfg);
562 int len_reg = alloc_ireg (cfg);
563 MONO_EMIT_NEW_LOAD_MEMBASE_OP_FLAGS (cfg, OP_LOADI4_MEMBASE, len_reg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), MONO_INST_INVARIANT_LOAD);
564 EMIT_NEW_BIALU (cfg, ins, OP_ISUB, end_index_reg, len_reg, index_ins->dreg);
565 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, end_index_reg, len);
566 MONO_EMIT_NEW_COND_EXC (cfg, LT, "ArgumentException");
568 /* Load the array slice into the simd reg */
569 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, FALSE, FALSE);
570 EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_MEMBASE, ldelema_ins->dreg, 0, val_vreg);
571 ins->klass = cmethod->klass;
572 return ins;
574 break;
575 case SN_Equals:
576 if (fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && mono_metadata_type_equal (fsig->params [0], type)) {
577 int sreg1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
579 return emit_simd_ins (cfg, klass, OP_XEQUAL, sreg1, args [1]->dreg);
580 } else if (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)) {
581 /* Per element equality */
582 return emit_xcompare (cfg, klass, etype->type, args [0], args [1]);
584 break;
585 case SN_op_Equality:
586 case SN_op_Inequality:
587 g_assert (fsig->param_count == 2 && fsig->ret->type == MONO_TYPE_BOOLEAN &&
588 mono_metadata_type_equal (fsig->params [0], type) &&
589 mono_metadata_type_equal (fsig->params [1], type));
590 ins = emit_simd_ins (cfg, klass, OP_XEQUAL, args [0]->dreg, args [1]->dreg);
591 if (id == SN_op_Inequality) {
592 int sreg = ins->dreg;
593 int dreg = alloc_ireg (cfg);
594 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, sreg, 0);
595 EMIT_NEW_UNALU (cfg, ins, OP_CEQ, dreg, -1);
597 return ins;
598 case SN_GreaterThan:
599 case SN_GreaterThanOrEqual:
600 case SN_LessThan:
601 case SN_LessThanOrEqual:
602 g_assert (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type));
603 is_unsigned = etype->type == MONO_TYPE_U1 || etype->type == MONO_TYPE_U2 || etype->type == MONO_TYPE_U4 || etype->type == MONO_TYPE_U8;
604 ins = emit_xcompare (cfg, klass, etype->type, args [0], args [1]);
605 switch (id) {
606 case SN_GreaterThan:
607 ins->inst_c0 = is_unsigned ? CMP_GT_UN : CMP_GT;
608 break;
609 case SN_GreaterThanOrEqual:
610 ins->inst_c0 = is_unsigned ? CMP_GE_UN : CMP_GE;
611 break;
612 case SN_LessThan:
613 ins->inst_c0 = is_unsigned ? CMP_LT_UN : CMP_LT;
614 break;
615 case SN_LessThanOrEqual:
616 ins->inst_c0 = is_unsigned ? CMP_LE_UN : CMP_LE;
617 break;
618 default:
619 g_assert_not_reached ();
621 return ins;
622 case SN_op_Explicit:
623 return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1);
624 case SN_op_Addition:
625 case SN_op_Subtraction:
626 case SN_op_Division:
627 case SN_op_Multiply:
628 case SN_op_BitwiseAnd:
629 case SN_op_BitwiseOr:
630 case SN_op_ExclusiveOr:
631 case SN_Max:
632 case SN_Min:
633 if (!(fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)))
634 return NULL;
635 ins = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, args [1]->dreg);
636 ins->inst_c1 = etype->type;
638 if (etype->type == MONO_TYPE_R4 || etype->type == MONO_TYPE_R8) {
639 switch (id) {
640 case SN_op_Addition:
641 ins->inst_c0 = OP_FADD;
642 break;
643 case SN_op_Subtraction:
644 ins->inst_c0 = OP_FSUB;
645 break;
646 case SN_op_Multiply:
647 ins->inst_c0 = OP_FMUL;
648 break;
649 case SN_op_Division:
650 ins->inst_c0 = OP_FDIV;
651 break;
652 case SN_Max:
653 ins->inst_c0 = OP_FMAX;
654 break;
655 case SN_Min:
656 ins->inst_c0 = OP_FMIN;
657 break;
658 default:
659 NULLIFY_INS (ins);
660 return NULL;
662 } else {
663 switch (id) {
664 case SN_op_Addition:
665 ins->inst_c0 = OP_IADD;
666 break;
667 case SN_op_Subtraction:
668 ins->inst_c0 = OP_ISUB;
669 break;
671 case SN_op_Division:
672 ins->inst_c0 = OP_IDIV;
673 break;
674 case SN_op_Multiply:
675 ins->inst_c0 = OP_IMUL;
676 break;
678 case SN_op_BitwiseAnd:
679 ins->inst_c0 = OP_IAND;
680 break;
681 case SN_op_BitwiseOr:
682 ins->inst_c0 = OP_IOR;
683 break;
684 case SN_op_ExclusiveOr:
685 ins->inst_c0 = OP_IXOR;
686 break;
687 case SN_Max:
688 ins->inst_c0 = OP_IMAX;
689 break;
690 case SN_Min:
691 ins->inst_c0 = OP_IMIN;
692 break;
693 default:
694 NULLIFY_INS (ins);
695 return NULL;
698 return ins;
699 default:
700 break;
703 return NULL;
705 #endif // !TARGET_ARM64
707 static MonoInst*
708 emit_invalid_operation (MonoCompile *cfg, const char* message)
710 mono_cfg_set_exception (cfg, MONO_EXCEPTION_MONO_ERROR);
711 mono_error_set_generic_error (cfg->error, "System", "InvalidOperationException", "%s", message);
712 return NULL;
715 #ifdef TARGET_ARM64
717 static SimdIntrinsic armbase_methods [] = {
718 {SN_LeadingSignCount},
719 {SN_LeadingZeroCount},
720 {SN_ReverseElementBits},
721 {SN_get_IsSupported}
724 static SimdIntrinsic crc32_methods [] = {
725 {SN_ComputeCrc32},
726 {SN_ComputeCrc32C},
727 {SN_get_IsSupported}
730 static MonoInst*
731 emit_arm64_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
733 // Arm64 intrinsics are LLVM-only
734 if (!COMPILE_LLVM (cfg))
735 return NULL;
737 MonoInst *ins;
738 gboolean supported, is_64bit;
739 MonoClass *klass = cmethod->klass;
740 MonoTypeEnum arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID;
741 gboolean arg0_i32 = (arg0_type == MONO_TYPE_I4) || (arg0_type == MONO_TYPE_U4);
742 SimdIntrinsic *info;
744 if (is_hw_intrinsics_class (klass, "ArmBase", &is_64bit)) {
745 info = lookup_intrins_info (armbase_methods, sizeof (armbase_methods), cmethod);
746 if (!info)
747 return NULL;
749 supported = (mini_get_cpu_features (cfg) & MONO_CPU_ARM64_BASE) != 0;
751 switch (info->id) {
752 case SN_get_IsSupported:
753 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
754 ins->type = STACK_I4;
755 return ins;
756 case SN_LeadingZeroCount:
757 return emit_simd_ins_for_sig (cfg, klass, arg0_i32 ? OP_LZCNT32 : OP_LZCNT64, 0, arg0_type, fsig, args);
758 case SN_LeadingSignCount:
759 return emit_simd_ins_for_sig (cfg, klass, arg0_i32 ? OP_LSCNT32 : OP_LSCNT64, 0, arg0_type, fsig, args);
760 case SN_ReverseElementBits:
761 return emit_simd_ins_for_sig (cfg, klass,
762 (is_64bit ? OP_XOP_I8_I8 : OP_XOP_I4_I4),
763 (is_64bit ? SIMD_OP_ARM64_RBIT64 : SIMD_OP_ARM64_RBIT32),
764 arg0_type, fsig, args);
765 default:
766 g_assert_not_reached (); // if a new API is added we need to either implement it or change IsSupported to false
770 if (is_hw_intrinsics_class (klass, "Crc32", &is_64bit)) {
771 info = lookup_intrins_info (crc32_methods, sizeof (crc32_methods), cmethod);
772 if (!info)
773 return NULL;
775 supported = (mini_get_cpu_features (cfg) & MONO_CPU_ARM64_CRC) != 0;
777 switch (info->id) {
778 case SN_get_IsSupported:
779 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
780 ins->type = STACK_I4;
781 return ins;
782 case SN_ComputeCrc32:
783 case SN_ComputeCrc32C: {
784 SimdOp op = (SimdOp)0;
785 gboolean is_c = info->id == SN_ComputeCrc32C;
786 switch (get_underlying_type (fsig->params [1])) {
787 case MONO_TYPE_U1: op = is_c ? SIMD_OP_ARM64_CRC32CB : SIMD_OP_ARM64_CRC32B; break;
788 case MONO_TYPE_U2: op = is_c ? SIMD_OP_ARM64_CRC32CH : SIMD_OP_ARM64_CRC32H; break;
789 case MONO_TYPE_U4: op = is_c ? SIMD_OP_ARM64_CRC32CW : SIMD_OP_ARM64_CRC32W; break;
790 case MONO_TYPE_U8: op = is_c ? SIMD_OP_ARM64_CRC32CX : SIMD_OP_ARM64_CRC32X; break;
791 default: g_assert_not_reached (); break;
793 return emit_simd_ins_for_sig (cfg, klass, is_64bit ? OP_XOP_I4_I4_I8 : OP_XOP_I4_I4_I4, op, arg0_type, fsig, args);
795 default:
796 g_assert_not_reached (); // if a new API is added we need to either implement it or change IsSupported to false
799 return NULL;
801 #endif // TARGET_ARM64
803 #ifdef TARGET_AMD64
805 static SimdIntrinsic sse_methods [] = {
806 {SN_Add, OP_XBINOP, OP_FADD},
807 {SN_AddScalar, OP_SSE_ADDSS},
808 {SN_And, OP_SSE_AND},
809 {SN_AndNot, OP_SSE_ANDN},
810 {SN_CompareEqual, OP_XCOMPARE_FP, CMP_EQ},
811 {SN_CompareGreaterThan, OP_XCOMPARE_FP,CMP_GT},
812 {SN_CompareGreaterThanOrEqual, OP_XCOMPARE_FP, CMP_GE},
813 {SN_CompareLessThan, OP_XCOMPARE_FP, CMP_LT},
814 {SN_CompareLessThanOrEqual, OP_XCOMPARE_FP, CMP_LE},
815 {SN_CompareNotEqual, OP_XCOMPARE_FP, CMP_NE},
816 {SN_CompareNotGreaterThan, OP_XCOMPARE_FP, CMP_LE},
817 {SN_CompareNotGreaterThanOrEqual, OP_XCOMPARE_FP, CMP_LT},
818 {SN_CompareNotLessThan, OP_XCOMPARE_FP, CMP_GE},
819 {SN_CompareNotLessThanOrEqual, OP_XCOMPARE_FP, CMP_GT},
820 {SN_CompareOrdered, OP_XCOMPARE_FP, CMP_ORD},
821 {SN_CompareScalarEqual, OP_SSE_CMPSS, CMP_EQ},
822 {SN_CompareScalarGreaterThan, OP_SSE_CMPSS, CMP_GT},
823 {SN_CompareScalarGreaterThanOrEqual, OP_SSE_CMPSS, CMP_GE},
824 {SN_CompareScalarLessThan, OP_SSE_CMPSS, CMP_LT},
825 {SN_CompareScalarLessThanOrEqual, OP_SSE_CMPSS, CMP_LE},
826 {SN_CompareScalarNotEqual, OP_SSE_CMPSS, CMP_NE},
827 {SN_CompareScalarNotGreaterThan, OP_SSE_CMPSS, CMP_LE},
828 {SN_CompareScalarNotGreaterThanOrEqual, OP_SSE_CMPSS, CMP_LT},
829 {SN_CompareScalarNotLessThan, OP_SSE_CMPSS, CMP_GE},
830 {SN_CompareScalarNotLessThanOrEqual, OP_SSE_CMPSS, CMP_GT},
831 {SN_CompareScalarOrdered, OP_SSE_CMPSS, CMP_ORD},
832 {SN_CompareScalarOrderedEqual, OP_SSE_COMISS, CMP_EQ},
833 {SN_CompareScalarOrderedGreaterThan, OP_SSE_COMISS, CMP_GT},
834 {SN_CompareScalarOrderedGreaterThanOrEqual, OP_SSE_COMISS, CMP_GE},
835 {SN_CompareScalarOrderedLessThan, OP_SSE_COMISS, CMP_LT},
836 {SN_CompareScalarOrderedLessThanOrEqual, OP_SSE_COMISS, CMP_LE},
837 {SN_CompareScalarOrderedNotEqual, OP_SSE_COMISS, CMP_NE},
838 {SN_CompareScalarUnordered, OP_SSE_CMPSS, CMP_UNORD},
839 {SN_CompareScalarUnorderedEqual, OP_SSE_UCOMISS, CMP_EQ},
840 {SN_CompareScalarUnorderedGreaterThan, OP_SSE_UCOMISS, CMP_GT},
841 {SN_CompareScalarUnorderedGreaterThanOrEqual, OP_SSE_UCOMISS, CMP_GE},
842 {SN_CompareScalarUnorderedLessThan, OP_SSE_UCOMISS, CMP_LT},
843 {SN_CompareScalarUnorderedLessThanOrEqual, OP_SSE_UCOMISS, CMP_LE},
844 {SN_CompareScalarUnorderedNotEqual, OP_SSE_UCOMISS, CMP_NE},
845 {SN_CompareUnordered, OP_XCOMPARE_FP, CMP_UNORD},
846 {SN_ConvertScalarToVector128Single},
847 {SN_ConvertToInt32, OP_XOP_I4_X, SIMD_OP_SSE_CVTSS2SI},
848 {SN_ConvertToInt32WithTruncation, OP_XOP_I4_X, SIMD_OP_SSE_CVTTSS2SI},
849 {SN_ConvertToInt64, OP_XOP_I8_X, SIMD_OP_SSE_CVTSS2SI64},
850 {SN_ConvertToInt64WithTruncation, OP_XOP_I8_X, SIMD_OP_SSE_CVTTSS2SI64},
851 {SN_Divide, OP_XBINOP, OP_FDIV},
852 {SN_DivideScalar, OP_SSE_DIVSS},
853 {SN_LoadAlignedVector128, OP_SSE_LOADU, 16 /* alignment */},
854 {SN_LoadHigh, OP_SSE_MOVHPS_LOAD},
855 {SN_LoadLow, OP_SSE_MOVLPS_LOAD},
856 {SN_LoadScalarVector128, OP_SSE_MOVSS},
857 {SN_LoadVector128, OP_SSE_LOADU, 1 /* alignment */},
858 {SN_Max, OP_XOP_X_X_X, SIMD_OP_SSE_MAXPS},
859 {SN_MaxScalar, OP_XOP_X_X_X, SIMD_OP_SSE_MAXSS},
860 {SN_Min, OP_XOP_X_X_X, SIMD_OP_SSE_MINPS},
861 {SN_MinScalar, OP_XOP_X_X_X, SIMD_OP_SSE_MINSS},
862 {SN_MoveHighToLow, OP_SSE_MOVEHL},
863 {SN_MoveLowToHigh, OP_SSE_MOVELH},
864 {SN_MoveMask, OP_SSE_MOVMSK},
865 {SN_MoveScalar, OP_SSE_MOVS2},
866 {SN_Multiply, OP_XBINOP, OP_FMUL},
867 {SN_MultiplyScalar, OP_SSE_MULSS},
868 {SN_Or, OP_SSE_OR},
869 {SN_Prefetch0, OP_SSE_PREFETCHT0},
870 {SN_Prefetch1, OP_SSE_PREFETCHT1},
871 {SN_Prefetch2, OP_SSE_PREFETCHT2},
872 {SN_PrefetchNonTemporal, OP_SSE_PREFETCHNTA},
873 {SN_Reciprocal, OP_XOP_X_X, SIMD_OP_SSE_RCPPS},
874 {SN_ReciprocalScalar},
875 {SN_ReciprocalSqrt, OP_XOP_X_X, SIMD_OP_SSE_RSQRTPS},
876 {SN_ReciprocalSqrtScalar},
877 {SN_Shuffle},
878 {SN_Sqrt, OP_XOP_X_X, SIMD_OP_SSE_SQRTPS},
879 {SN_SqrtScalar},
880 {SN_Store, OP_SSE_STORE, 1 /* alignment */},
881 {SN_StoreAligned, OP_SSE_STORE, 16 /* alignment */},
882 {SN_StoreAlignedNonTemporal, OP_SSE_MOVNTPS, 16 /* alignment */},
883 {SN_StoreFence, OP_XOP, SIMD_OP_SSE_SFENCE},
884 {SN_StoreHigh, OP_SSE_MOVHPS_STORE},
885 {SN_StoreLow, OP_SSE_MOVLPS_STORE},
886 {SN_StoreScalar, OP_SSE_MOVSS_STORE},
887 {SN_Subtract, OP_XBINOP, OP_FSUB},
888 {SN_SubtractScalar, OP_SSE_SUBSS},
889 {SN_UnpackHigh, OP_SSE_UNPACKHI},
890 {SN_UnpackLow, OP_SSE_UNPACKLO},
891 {SN_Xor, OP_SSE_XOR},
892 {SN_get_IsSupported}
895 static SimdIntrinsic sse2_methods [] = {
896 {SN_Add},
897 {SN_AddSaturate, OP_SSE2_ADDS},
898 {SN_AddScalar, OP_SSE2_ADDSD},
899 {SN_And, OP_SSE_AND},
900 {SN_AndNot, OP_SSE_ANDN},
901 {SN_Average},
902 {SN_CompareEqual},
903 {SN_CompareGreaterThan},
904 {SN_CompareGreaterThanOrEqual, OP_XCOMPARE_FP, CMP_GE},
905 {SN_CompareLessThan},
906 {SN_CompareLessThanOrEqual, OP_XCOMPARE_FP, CMP_LE},
907 {SN_CompareNotEqual, OP_XCOMPARE_FP, CMP_NE},
908 {SN_CompareNotGreaterThan, OP_XCOMPARE_FP, CMP_LE},
909 {SN_CompareNotGreaterThanOrEqual, OP_XCOMPARE_FP, CMP_LT},
910 {SN_CompareNotLessThan, OP_XCOMPARE_FP, CMP_GE},
911 {SN_CompareNotLessThanOrEqual, OP_XCOMPARE_FP, CMP_GT},
912 {SN_CompareOrdered, OP_XCOMPARE_FP, CMP_ORD},
913 {SN_CompareScalarEqual, OP_SSE2_CMPSD, CMP_EQ},
914 {SN_CompareScalarGreaterThan, OP_SSE2_CMPSD, CMP_GT},
915 {SN_CompareScalarGreaterThanOrEqual, OP_SSE2_CMPSD, CMP_GE},
916 {SN_CompareScalarLessThan, OP_SSE2_CMPSD, CMP_LT},
917 {SN_CompareScalarLessThanOrEqual, OP_SSE2_CMPSD, CMP_LE},
918 {SN_CompareScalarNotEqual, OP_SSE2_CMPSD, CMP_NE},
919 {SN_CompareScalarNotGreaterThan, OP_SSE2_CMPSD, CMP_LE},
920 {SN_CompareScalarNotGreaterThanOrEqual, OP_SSE2_CMPSD, CMP_LT},
921 {SN_CompareScalarNotLessThan, OP_SSE2_CMPSD, CMP_GE},
922 {SN_CompareScalarNotLessThanOrEqual, OP_SSE2_CMPSD, CMP_GT},
923 {SN_CompareScalarOrdered, OP_SSE2_CMPSD, CMP_ORD},
924 {SN_CompareScalarOrderedEqual, OP_SSE2_COMISD, CMP_EQ},
925 {SN_CompareScalarOrderedGreaterThan, OP_SSE2_COMISD, CMP_GT},
926 {SN_CompareScalarOrderedGreaterThanOrEqual, OP_SSE2_COMISD, CMP_GE},
927 {SN_CompareScalarOrderedLessThan, OP_SSE2_COMISD, CMP_LT},
928 {SN_CompareScalarOrderedLessThanOrEqual, OP_SSE2_COMISD, CMP_LE},
929 {SN_CompareScalarOrderedNotEqual, OP_SSE2_COMISD, CMP_NE},
930 {SN_CompareScalarUnordered, OP_SSE2_CMPSD, CMP_UNORD},
931 {SN_CompareScalarUnorderedEqual, OP_SSE2_UCOMISD, CMP_EQ},
932 {SN_CompareScalarUnorderedGreaterThan, OP_SSE2_UCOMISD, CMP_GT},
933 {SN_CompareScalarUnorderedGreaterThanOrEqual, OP_SSE2_UCOMISD, CMP_GE},
934 {SN_CompareScalarUnorderedLessThan, OP_SSE2_UCOMISD, CMP_LT},
935 {SN_CompareScalarUnorderedLessThanOrEqual, OP_SSE2_UCOMISD, CMP_LE},
936 {SN_CompareScalarUnorderedNotEqual, OP_SSE2_UCOMISD, CMP_NE},
937 {SN_CompareUnordered, OP_XCOMPARE_FP, CMP_UNORD},
938 {SN_ConvertScalarToVector128Double},
939 {SN_ConvertScalarToVector128Int32},
940 {SN_ConvertScalarToVector128Int64},
941 {SN_ConvertScalarToVector128Single, OP_XOP_X_X_X, SIMD_OP_SSE_CVTSD2SS},
942 {SN_ConvertScalarToVector128UInt32},
943 {SN_ConvertScalarToVector128UInt64},
944 {SN_ConvertToInt32},
945 {SN_ConvertToInt32WithTruncation, OP_XOP_I4_X, SIMD_OP_SSE_CVTTSD2SI},
946 {SN_ConvertToInt64},
947 {SN_ConvertToInt64WithTruncation, OP_XOP_I8_X, SIMD_OP_SSE_CVTTSD2SI64},
948 {SN_ConvertToUInt32},
949 {SN_ConvertToUInt64},
950 {SN_ConvertToVector128Double},
951 {SN_ConvertToVector128Int32},
952 {SN_ConvertToVector128Int32WithTruncation},
953 {SN_ConvertToVector128Single},
954 {SN_Divide, OP_XBINOP, OP_FDIV},
955 {SN_DivideScalar, OP_SSE2_DIVSD},
956 {SN_Extract},
957 {SN_Insert},
958 {SN_LoadAlignedVector128},
959 {SN_LoadFence, OP_XOP, SIMD_OP_SSE_LFENCE},
960 {SN_LoadHigh, OP_SSE2_MOVHPD_LOAD},
961 {SN_LoadLow, OP_SSE2_MOVLPD_LOAD},
962 {SN_LoadScalarVector128},
963 {SN_LoadVector128},
964 {SN_MaskMove, OP_SSE2_MASKMOVDQU},
965 {SN_Max},
966 {SN_MaxScalar, OP_XOP_X_X_X, SIMD_OP_SSE_MAXSD},
967 {SN_MemoryFence, OP_XOP, SIMD_OP_SSE_MFENCE},
968 {SN_Min}, // FIXME:
969 {SN_MinScalar, OP_XOP_X_X_X, SIMD_OP_SSE_MINSD},
970 {SN_MoveMask, OP_SSE_MOVMSK},
971 {SN_MoveScalar},
972 {SN_Multiply},
973 {SN_MultiplyAddAdjacent, OP_XOP_X_X_X, SIMD_OP_SSE_PMADDWD},
974 {SN_MultiplyHigh},
975 {SN_MultiplyLow, OP_PMULW},
976 {SN_MultiplyScalar, OP_SSE2_MULSD},
977 {SN_Or, OP_SSE_OR},
978 {SN_PackSignedSaturate},
979 {SN_PackUnsignedSaturate},
980 {SN_ShiftLeftLogical},
981 {SN_ShiftLeftLogical128BitLane},
982 {SN_ShiftRightArithmetic},
983 {SN_ShiftRightLogical},
984 {SN_ShiftRightLogical128BitLane},
985 {SN_Shuffle},
986 {SN_ShuffleHigh},
987 {SN_ShuffleLow},
988 {SN_Sqrt, OP_XOP_X_X, SIMD_OP_SSE_SQRTPD},
989 {SN_SqrtScalar},
990 {SN_Store, OP_SSE_STORE, 1 /* alignment */},
991 {SN_StoreAligned, OP_SSE_STORE, 16 /* alignment */},
992 {SN_StoreAlignedNonTemporal, OP_SSE_MOVNTPS, 16 /* alignment */},
993 {SN_StoreHigh, OP_SSE2_MOVHPD_STORE},
994 {SN_StoreLow, OP_SSE2_MOVLPD_STORE},
995 {SN_StoreNonTemporal, OP_SSE_MOVNTPS, 1 /* alignment */},
996 {SN_StoreScalar, OP_SSE_STORES},
997 {SN_Subtract},
998 {SN_SubtractSaturate, OP_SSE2_SUBS},
999 {SN_SubtractScalar, OP_SSE2_SUBSD},
1000 {SN_SumAbsoluteDifferences, OP_XOP_X_X_X, SIMD_OP_SSE_PSADBW},
1001 {SN_UnpackHigh, OP_SSE_UNPACKHI},
1002 {SN_UnpackLow, OP_SSE_UNPACKLO},
1003 {SN_Xor, OP_SSE_XOR},
1004 {SN_get_IsSupported}
1007 static SimdIntrinsic sse3_methods [] = {
1008 {SN_AddSubtract},
1009 {SN_HorizontalAdd},
1010 {SN_HorizontalSubtract},
1011 {SN_LoadAndDuplicateToVector128, OP_SSE3_MOVDDUP_MEM},
1012 {SN_LoadDquVector128, OP_XOP_X_I, SIMD_OP_SSE_LDDQU},
1013 {SN_MoveAndDuplicate, OP_SSE3_MOVDDUP},
1014 {SN_MoveHighAndDuplicate, OP_SSE3_MOVSHDUP},
1015 {SN_MoveLowAndDuplicate, OP_SSE3_MOVSLDUP},
1016 {SN_get_IsSupported}
1019 static SimdIntrinsic ssse3_methods [] = {
1020 {SN_Abs, OP_SSSE3_ABS},
1021 {SN_AlignRight},
1022 {SN_HorizontalAdd},
1023 {SN_HorizontalAddSaturate, OP_XOP_X_X_X, SIMD_OP_SSE_PHADDSW},
1024 {SN_HorizontalSubtract},
1025 {SN_HorizontalSubtractSaturate, OP_XOP_X_X_X, SIMD_OP_SSE_PHSUBSW},
1026 {SN_MultiplyAddAdjacent, OP_XOP_X_X_X, SIMD_OP_SSE_PMADDUBSW},
1027 {SN_MultiplyHighRoundScale, OP_XOP_X_X_X, SIMD_OP_SSE_PMULHRSW},
1028 {SN_Shuffle, OP_SSSE3_SHUFFLE},
1029 {SN_Sign},
1030 {SN_get_IsSupported}
1033 static SimdIntrinsic sse41_methods [] = {
1034 {SN_Blend},
1035 {SN_BlendVariable},
1036 {SN_Ceiling, OP_SSE41_ROUNDP, 10 /*round mode*/},
1037 {SN_CeilingScalar, OP_SSE41_ROUNDS, 10 /*round mode*/},
1038 {SN_CompareEqual, OP_XCOMPARE, CMP_EQ},
1039 {SN_ConvertToVector128Int16, OP_SSE_CVTII, MONO_TYPE_I2},
1040 {SN_ConvertToVector128Int32, OP_SSE_CVTII, MONO_TYPE_I4},
1041 {SN_ConvertToVector128Int64, OP_SSE_CVTII, MONO_TYPE_I8},
1042 {SN_DotProduct},
1043 {SN_Extract},
1044 {SN_Floor, OP_SSE41_ROUNDP, 9 /*round mode*/},
1045 {SN_FloorScalar, OP_SSE41_ROUNDS, 9 /*round mode*/},
1046 {SN_Insert},
1047 {SN_LoadAlignedVector128NonTemporal, OP_SSE41_LOADANT},
1048 {SN_Max, OP_XBINOP, OP_IMAX},
1049 {SN_Min, OP_XBINOP, OP_IMIN},
1050 {SN_MinHorizontal, OP_XOP_X_X, SIMD_OP_SSE_PHMINPOSUW},
1051 {SN_MultipleSumAbsoluteDifferences},
1052 {SN_Multiply, OP_SSE41_MUL},
1053 {SN_MultiplyLow, OP_SSE41_MULLO},
1054 {SN_PackUnsignedSaturate, OP_XOP_X_X_X, SIMD_OP_SSE_PACKUSDW},
1055 {SN_RoundCurrentDirection, OP_SSE41_ROUNDP, 4 /*round mode*/},
1056 {SN_RoundCurrentDirectionScalar, OP_SSE41_ROUNDS, 4 /*round mode*/},
1057 {SN_RoundToNearestInteger, OP_SSE41_ROUNDP, 8 /*round mode*/},
1058 {SN_RoundToNearestIntegerScalar, OP_SSE41_ROUNDS, 8 /*round mode*/},
1059 {SN_RoundToNegativeInfinity, OP_SSE41_ROUNDP, 9 /*round mode*/},
1060 {SN_RoundToNegativeInfinityScalar, OP_SSE41_ROUNDS, 9 /*round mode*/},
1061 {SN_RoundToPositiveInfinity, OP_SSE41_ROUNDP, 10 /*round mode*/},
1062 {SN_RoundToPositiveInfinityScalar, OP_SSE41_ROUNDS, 10 /*round mode*/},
1063 {SN_RoundToZero, OP_SSE41_ROUNDP, 11 /*round mode*/},
1064 {SN_RoundToZeroScalar, OP_SSE41_ROUNDS, 11 /*round mode*/},
1065 {SN_TestC, OP_XOP_I4_X_X, SIMD_OP_SSE_TESTC},
1066 {SN_TestNotZAndNotC, OP_XOP_I4_X_X, SIMD_OP_SSE_TESTNZ},
1067 {SN_TestZ, OP_XOP_I4_X_X, SIMD_OP_SSE_TESTZ},
1068 {SN_get_IsSupported}
1071 static SimdIntrinsic sse42_methods [] = {
1072 {SN_CompareGreaterThan, OP_XCOMPARE, CMP_GT},
1073 {SN_Crc32},
1074 {SN_get_IsSupported}
1077 static SimdIntrinsic pclmulqdq_methods [] = {
1078 {SN_CarrylessMultiply},
1079 {SN_get_IsSupported}
1082 static SimdIntrinsic aes_methods [] = {
1083 {SN_Decrypt, OP_XOP_X_X_X, SIMD_OP_AES_DEC},
1084 {SN_DecryptLast, OP_XOP_X_X_X, SIMD_OP_AES_DECLAST},
1085 {SN_Encrypt, OP_XOP_X_X_X, SIMD_OP_AES_ENC},
1086 {SN_EncryptLast, OP_XOP_X_X_X, SIMD_OP_AES_ENCLAST},
1087 {SN_InverseMixColumns, OP_XOP_X_X, SIMD_OP_AES_IMC},
1088 {SN_KeygenAssist},
1089 {SN_get_IsSupported}
1092 static SimdIntrinsic popcnt_methods [] = {
1093 {SN_PopCount},
1094 {SN_get_IsSupported}
1097 static SimdIntrinsic lzcnt_methods [] = {
1098 {SN_LeadingZeroCount},
1099 {SN_get_IsSupported}
1102 static SimdIntrinsic bmi1_methods [] = {
1103 {SN_AndNot},
1104 {SN_BitFieldExtract},
1105 {SN_ExtractLowestSetBit},
1106 {SN_GetMaskUpToLowestSetBit},
1107 {SN_ResetLowestSetBit},
1108 {SN_TrailingZeroCount},
1109 {SN_get_IsSupported}
1112 static SimdIntrinsic bmi2_methods [] = {
1113 {SN_MultiplyNoFlags},
1114 {SN_ParallelBitDeposit},
1115 {SN_ParallelBitExtract},
1116 {SN_ZeroHighBits},
1117 {SN_get_IsSupported}
1120 static SimdIntrinsic x86base_methods [] = {
1121 {SN_BitScanForward},
1122 {SN_BitScanReverse},
1123 {SN_get_IsSupported}
1126 static MonoInst*
1127 emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
1129 MonoInst *ins;
1130 gboolean supported, is_64bit;
1131 MonoClass *klass = cmethod->klass;
1132 MonoTypeEnum arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID;
1133 SimdIntrinsic *info;
1135 if (is_hw_intrinsics_class (klass, "Sse", &is_64bit)) {
1136 if (!COMPILE_LLVM (cfg))
1137 return NULL;
1138 info = lookup_intrins_info (sse_methods, sizeof (sse_methods), cmethod);
1139 if (!info)
1140 return NULL;
1141 int id = info->id;
1143 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE) != 0;
1145 /* Common case */
1146 if (info->op != 0)
1147 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1149 switch (id) {
1150 case SN_get_IsSupported:
1151 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1152 ins->type = STACK_I4;
1153 return ins;
1154 case SN_Shuffle:
1155 if (args [2]->opcode == OP_ICONST)
1156 return emit_simd_ins_for_sig (cfg, klass, OP_SSE_SHUFFLE, args [2]->inst_c0, arg0_type, fsig, args);
1157 // FIXME: handle non-constant mask (generate a switch)
1158 return emit_invalid_operation (cfg, "mask in Sse.Shuffle must be constant");
1159 case SN_ConvertScalarToVector128Single: {
1160 int op = 0;
1161 switch (fsig->params [1]->type) {
1162 case MONO_TYPE_I4: op = OP_SSE_CVTSI2SS; break;
1163 case MONO_TYPE_I8: op = OP_SSE_CVTSI2SS64; break;
1164 default: g_assert_not_reached (); break;
1166 return emit_simd_ins_for_sig (cfg, klass, op, 0, 0, fsig, args);
1168 case SN_ReciprocalScalar:
1169 case SN_ReciprocalSqrtScalar:
1170 case SN_SqrtScalar: {
1171 int op = 0;
1172 switch (id) {
1173 case SN_ReciprocalScalar: op = OP_SSE_RCPSS; break;
1174 case SN_ReciprocalSqrtScalar: op = OP_SSE_RSQRTSS; break;
1175 case SN_SqrtScalar: op = OP_SSE_SQRTSS; break;
1177 if (fsig->param_count == 1)
1178 return emit_simd_ins (cfg, klass, op, args [0]->dreg, args[0]->dreg);
1179 else if (fsig->param_count == 2)
1180 return emit_simd_ins (cfg, klass, op, args [0]->dreg, args[1]->dreg);
1181 else
1182 g_assert_not_reached ();
1183 break;
1185 case SN_LoadScalarVector128:
1186 return NULL;
1187 default:
1188 return NULL;
1192 if (is_hw_intrinsics_class (klass, "Sse2", &is_64bit)) {
1193 if (!COMPILE_LLVM (cfg))
1194 return NULL;
1195 info = lookup_intrins_info (sse2_methods, sizeof (sse2_methods), cmethod);
1196 if (!info)
1197 return NULL;
1198 int id = info->id;
1200 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE2) != 0;
1202 /* Common case */
1203 if (info->op != 0)
1204 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1206 switch (id) {
1207 case SN_get_IsSupported: {
1208 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1209 ins->type = STACK_I4;
1210 return ins;
1212 case SN_Subtract:
1213 return emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, arg0_type == MONO_TYPE_R8 ? OP_FSUB : OP_ISUB, arg0_type, fsig, args);
1214 case SN_Add:
1215 return emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, arg0_type == MONO_TYPE_R8 ? OP_FADD : OP_IADD, arg0_type, fsig, args);
1216 case SN_Average:
1217 if (arg0_type == MONO_TYPE_U1)
1218 return emit_simd_ins_for_sig (cfg, klass, OP_PAVGB_UN, -1, arg0_type, fsig, args);
1219 else if (arg0_type == MONO_TYPE_U2)
1220 return emit_simd_ins_for_sig (cfg, klass, OP_PAVGW_UN, -1, arg0_type, fsig, args);
1221 else
1222 return NULL;
1223 case SN_CompareNotEqual:
1224 return emit_simd_ins_for_sig (cfg, klass, arg0_type == MONO_TYPE_R8 ? OP_XCOMPARE_FP : OP_XCOMPARE, CMP_NE, arg0_type, fsig, args);
1225 case SN_CompareEqual:
1226 return emit_simd_ins_for_sig (cfg, klass, arg0_type == MONO_TYPE_R8 ? OP_XCOMPARE_FP : OP_XCOMPARE, CMP_EQ, arg0_type, fsig, args);
1227 case SN_CompareGreaterThan:
1228 return emit_simd_ins_for_sig (cfg, klass, arg0_type == MONO_TYPE_R8 ? OP_XCOMPARE_FP : OP_XCOMPARE, CMP_GT, arg0_type, fsig, args);
1229 case SN_CompareLessThan:
1230 return emit_simd_ins_for_sig (cfg, klass, arg0_type == MONO_TYPE_R8 ? OP_XCOMPARE_FP : OP_XCOMPARE, CMP_LT, arg0_type, fsig, args);
1231 case SN_ConvertToInt32:
1232 if (arg0_type == MONO_TYPE_R8)
1233 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_I4_X, SIMD_OP_SSE_CVTSD2SI, arg0_type, fsig, args);
1234 else if (arg0_type == MONO_TYPE_I4)
1235 return emit_simd_ins_for_sig (cfg, klass, OP_EXTRACT_I4, 0, arg0_type, fsig, args);
1236 else
1237 return NULL;
1238 case SN_ConvertToInt64:
1239 if (arg0_type == MONO_TYPE_R8)
1240 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_I8_X, SIMD_OP_SSE_CVTSD2SI64, arg0_type, fsig, args);
1241 else if (arg0_type == MONO_TYPE_I8)
1242 return emit_simd_ins_for_sig (cfg, klass, OP_EXTRACT_I8, 0 /*element index*/, arg0_type, fsig, args);
1243 else
1244 g_assert_not_reached ();
1245 break;
1246 case SN_ConvertScalarToVector128Double: {
1247 int op = OP_SSE2_CVTSS2SD;
1248 switch (fsig->params [1]->type) {
1249 case MONO_TYPE_I4: op = OP_SSE2_CVTSI2SD; break;
1250 case MONO_TYPE_I8: op = OP_SSE2_CVTSI2SD64; break;
1252 return emit_simd_ins_for_sig (cfg, klass, op, 0, 0, fsig, args);
1254 case SN_ConvertScalarToVector128Int32:
1255 case SN_ConvertScalarToVector128Int64:
1256 case SN_ConvertScalarToVector128UInt32:
1257 case SN_ConvertScalarToVector128UInt64:
1258 return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR, -1, arg0_type, fsig, args);
1259 case SN_ConvertToUInt32:
1260 return emit_simd_ins_for_sig (cfg, klass, OP_EXTRACT_I4, 0 /*element index*/, arg0_type, fsig, args);
1261 case SN_ConvertToUInt64:
1262 return emit_simd_ins_for_sig (cfg, klass, OP_EXTRACT_I8, 0 /*element index*/, arg0_type, fsig, args);
1263 case SN_ConvertToVector128Double:
1264 if (arg0_type == MONO_TYPE_R4)
1265 return emit_simd_ins_for_sig (cfg, klass, OP_CVTPS2PD, 0, arg0_type, fsig, args);
1266 else if (arg0_type == MONO_TYPE_I4)
1267 return emit_simd_ins_for_sig (cfg, klass, OP_CVTDQ2PD, 0, arg0_type, fsig, args);
1268 else
1269 return NULL;
1270 case SN_ConvertToVector128Int32:
1271 if (arg0_type == MONO_TYPE_R4)
1272 return emit_simd_ins_for_sig (cfg, klass, OP_CVTPS2DQ, 0, arg0_type, fsig, args);
1273 else if (arg0_type == MONO_TYPE_R8)
1274 return emit_simd_ins_for_sig (cfg, klass, OP_CVTPD2DQ, 0, arg0_type, fsig, args);
1275 else
1276 return NULL;
1277 case SN_ConvertToVector128Int32WithTruncation:
1278 if (arg0_type == MONO_TYPE_R4)
1279 return emit_simd_ins_for_sig (cfg, klass, OP_CVTTPS2DQ, 0, arg0_type, fsig, args);
1280 else if (arg0_type == MONO_TYPE_R8)
1281 return emit_simd_ins_for_sig (cfg, klass, OP_CVTTPD2DQ, 0, arg0_type, fsig, args);
1282 else
1283 return NULL;
1284 case SN_ConvertToVector128Single:
1285 if (arg0_type == MONO_TYPE_I4)
1286 return emit_simd_ins_for_sig (cfg, klass, OP_CVTDQ2PS, 0, arg0_type, fsig, args);
1287 else if (arg0_type == MONO_TYPE_R8)
1288 return emit_simd_ins_for_sig (cfg, klass, OP_CVTPD2PS, 0, arg0_type, fsig, args);
1289 else
1290 return NULL;
1291 case SN_LoadAlignedVector128:
1292 return emit_simd_ins_for_sig (cfg, klass, OP_SSE_LOADU, 16 /*alignment*/, arg0_type, fsig, args);
1293 case SN_LoadVector128:
1294 return emit_simd_ins_for_sig (cfg, klass, OP_SSE_LOADU, 1 /*alignment*/, arg0_type, fsig, args);
1295 case SN_MoveScalar:
1296 return emit_simd_ins_for_sig (cfg, klass, fsig->param_count == 2 ? OP_SSE_MOVS2 : OP_SSE_MOVS, -1, arg0_type, fsig, args);
1297 case SN_Max:
1298 switch (arg0_type) {
1299 case MONO_TYPE_U1:
1300 return emit_simd_ins_for_sig (cfg, klass, OP_PMAXB_UN, 0, arg0_type, fsig, args);
1301 case MONO_TYPE_I2:
1302 return emit_simd_ins_for_sig (cfg, klass, OP_PMAXW, 0, arg0_type, fsig, args);
1303 case MONO_TYPE_R8: return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_MAXPD, arg0_type, fsig, args);
1304 default:
1305 g_assert_not_reached ();
1306 break;
1308 break;
1309 case SN_Min:
1310 switch (arg0_type) {
1311 case MONO_TYPE_U1:
1312 return emit_simd_ins_for_sig (cfg, klass, OP_PMINB_UN, 0, arg0_type, fsig, args);
1313 case MONO_TYPE_I2:
1314 return emit_simd_ins_for_sig (cfg, klass, OP_PMINW, 0, arg0_type, fsig, args);
1315 case MONO_TYPE_R8: return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_MINPD, arg0_type, fsig, args);
1316 default:
1317 g_assert_not_reached ();
1318 break;
1320 break;
1321 case SN_Multiply:
1322 if (arg0_type == MONO_TYPE_U4)
1323 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PMULUDQ, 0, arg0_type, fsig, args);
1324 else if (arg0_type == MONO_TYPE_R8)
1325 return emit_simd_ins_for_sig (cfg, klass, OP_MULPD, 0, arg0_type, fsig, args);
1326 else
1327 g_assert_not_reached ();
1328 case SN_MultiplyHigh:
1329 if (arg0_type == MONO_TYPE_I2)
1330 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PMULHW, arg0_type, fsig, args);
1331 else if (arg0_type == MONO_TYPE_U2)
1332 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PMULHUW, arg0_type, fsig, args);
1333 else
1334 g_assert_not_reached ();
1335 case SN_PackSignedSaturate:
1336 if (arg0_type == MONO_TYPE_I2)
1337 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PACKSSWB, arg0_type, fsig, args);
1338 else if (arg0_type == MONO_TYPE_I4)
1339 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PACKSSDW, arg0_type, fsig, args);
1340 else
1341 g_assert_not_reached ();
1342 case SN_PackUnsignedSaturate:
1343 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PACKUS, -1, arg0_type, fsig, args);
1344 case SN_Extract:
1345 g_assert (arg0_type == MONO_TYPE_U2);
1346 return emit_simd_ins_for_sig (cfg, klass, OP_XEXTRACT_I32, arg0_type, 0, fsig, args);
1347 case SN_Insert:
1348 g_assert (arg0_type == MONO_TYPE_I2 || arg0_type == MONO_TYPE_U2);
1349 return emit_simd_ins_for_sig (cfg, klass, OP_XINSERT_I2, 0, arg0_type, fsig, args);
1350 case SN_ShiftRightLogical: {
1351 gboolean is_imm = fsig->params [1]->type == MONO_TYPE_U1;
1352 SimdOp op = (SimdOp)0;
1353 switch (arg0_type) {
1354 case MONO_TYPE_I2:
1355 case MONO_TYPE_U2:
1356 op = is_imm ? SIMD_OP_SSE_PSRLW_IMM : SIMD_OP_SSE_PSRLW;
1357 break;
1358 case MONO_TYPE_I4:
1359 case MONO_TYPE_U4:
1360 op = is_imm ? SIMD_OP_SSE_PSRLD_IMM : SIMD_OP_SSE_PSRLD;
1361 break;
1362 case MONO_TYPE_I8:
1363 case MONO_TYPE_U8:
1364 op = is_imm ? SIMD_OP_SSE_PSRLQ_IMM : SIMD_OP_SSE_PSRLQ;
1365 break;
1366 default: g_assert_not_reached (); break;
1368 return emit_simd_ins_for_sig (cfg, klass, is_imm ? OP_XOP_X_X_I4 : OP_XOP_X_X_X, op, arg0_type, fsig, args);
1370 case SN_ShiftRightArithmetic: {
1371 gboolean is_imm = fsig->params [1]->type == MONO_TYPE_U1;
1372 SimdOp op = (SimdOp)0;
1373 switch (arg0_type) {
1374 case MONO_TYPE_I2:
1375 case MONO_TYPE_U2:
1376 op = is_imm ? SIMD_OP_SSE_PSRAW_IMM : SIMD_OP_SSE_PSRAW;
1377 break;
1378 case MONO_TYPE_I4:
1379 case MONO_TYPE_U4:
1380 op = is_imm ? SIMD_OP_SSE_PSRAD_IMM : SIMD_OP_SSE_PSRAD;
1381 break;
1382 default: g_assert_not_reached (); break;
1384 return emit_simd_ins_for_sig (cfg, klass, is_imm ? OP_XOP_X_X_I4 : OP_XOP_X_X_X, op, arg0_type, fsig, args);
1386 case SN_ShiftLeftLogical: {
1387 gboolean is_imm = fsig->params [1]->type == MONO_TYPE_U1;
1388 SimdOp op = (SimdOp)0;
1389 switch (arg0_type) {
1390 case MONO_TYPE_I2:
1391 case MONO_TYPE_U2:
1392 op = is_imm ? SIMD_OP_SSE_PSLLW_IMM : SIMD_OP_SSE_PSLLW;
1393 break;
1394 case MONO_TYPE_I4:
1395 case MONO_TYPE_U4:
1396 op = is_imm ? SIMD_OP_SSE_PSLLD_IMM : SIMD_OP_SSE_PSLLD;
1397 break;
1398 case MONO_TYPE_I8:
1399 case MONO_TYPE_U8:
1400 op = is_imm ? SIMD_OP_SSE_PSLLQ_IMM : SIMD_OP_SSE_PSLLQ;
1401 break;
1402 default: g_assert_not_reached (); break;
1404 return emit_simd_ins_for_sig (cfg, klass, is_imm ? OP_XOP_X_X_I4 : OP_XOP_X_X_X, op, arg0_type, fsig, args);
1406 case SN_ShiftLeftLogical128BitLane:
1407 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PSLLDQ, 0, arg0_type, fsig, args);
1408 case SN_ShiftRightLogical128BitLane:
1409 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PSRLDQ, 0, arg0_type, fsig, args);
1410 case SN_Shuffle: {
1411 if (fsig->param_count == 2) {
1412 g_assert (arg0_type == MONO_TYPE_I4 || arg0_type == MONO_TYPE_U4);
1413 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PSHUFD, 0, arg0_type, fsig, args);
1414 } else if (fsig->param_count == 3) {
1415 g_assert (arg0_type == MONO_TYPE_R8);
1416 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_SHUFPD, 0, arg0_type, fsig, args);
1417 } else {
1418 g_assert_not_reached ();
1419 break;
1422 case SN_ShuffleHigh:
1423 g_assert (fsig->param_count == 2);
1424 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PSHUFHW, 0, arg0_type, fsig, args);
1425 case SN_ShuffleLow:
1426 g_assert (fsig->param_count == 2);
1427 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PSHUFLW, 0, arg0_type, fsig, args);
1428 case SN_SqrtScalar: {
1429 if (fsig->param_count == 1)
1430 return emit_simd_ins (cfg, klass, OP_SSE2_SQRTSD, args [0]->dreg, args[0]->dreg);
1431 else if (fsig->param_count == 2)
1432 return emit_simd_ins (cfg, klass, OP_SSE2_SQRTSD, args [0]->dreg, args[1]->dreg);
1433 else {
1434 g_assert_not_reached ();
1435 break;
1438 case SN_LoadScalarVector128: {
1439 int op = 0;
1440 switch (arg0_type) {
1441 case MONO_TYPE_I4:
1442 case MONO_TYPE_U4: op = OP_SSE2_MOVD; break;
1443 case MONO_TYPE_I8:
1444 case MONO_TYPE_U8: op = OP_SSE2_MOVQ; break;
1445 case MONO_TYPE_R8: op = OP_SSE2_MOVUPD; break;
1446 default: g_assert_not_reached(); break;
1448 return emit_simd_ins_for_sig (cfg, klass, op, 0, 0, fsig, args);
1450 default:
1451 return NULL;
1455 if (is_hw_intrinsics_class (klass, "Sse3", &is_64bit)) {
1456 if (!COMPILE_LLVM (cfg))
1457 return NULL;
1458 info = lookup_intrins_info (sse3_methods, sizeof (sse3_methods), cmethod);
1459 if (!info)
1460 return NULL;
1461 int id = info->id;
1463 /* Common case */
1464 if (info->op != 0)
1465 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1467 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE3);
1469 switch (id) {
1470 case SN_get_IsSupported:
1471 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1472 ins->type = STACK_I4;
1473 return ins;
1474 case SN_AddSubtract:
1475 if (arg0_type == MONO_TYPE_R4)
1476 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_ADDSUBPS, arg0_type, fsig, args);
1477 else if (arg0_type == MONO_TYPE_R8)
1478 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_ADDSUBPD, arg0_type, fsig, args);
1479 else
1480 g_assert_not_reached ();
1481 break;
1482 case SN_HorizontalAdd:
1483 if (arg0_type == MONO_TYPE_R4)
1484 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_HADDPS, arg0_type, fsig, args);
1485 else if (arg0_type == MONO_TYPE_R8)
1486 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_HADDPD, arg0_type, fsig, args);
1487 else
1488 g_assert_not_reached ();
1489 break;
1490 case SN_HorizontalSubtract:
1491 if (arg0_type == MONO_TYPE_R4)
1492 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_HSUBPS, arg0_type, fsig, args);
1493 else if (arg0_type == MONO_TYPE_R8)
1494 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_HSUBPD, arg0_type, fsig, args);
1495 else
1496 g_assert_not_reached ();
1497 break;
1498 default:
1499 g_assert_not_reached ();
1500 break;
1504 if (is_hw_intrinsics_class (klass, "Ssse3", &is_64bit)) {
1505 if (!COMPILE_LLVM (cfg))
1506 return NULL;
1507 info = lookup_intrins_info (ssse3_methods, sizeof (ssse3_methods), cmethod);
1508 if (!info)
1509 return NULL;
1510 int id = info->id;
1512 /* Common case */
1513 if (info->op != 0)
1514 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1516 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSSE3) != 0;
1518 switch (id) {
1519 case SN_get_IsSupported:
1520 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1521 ins->type = STACK_I4;
1522 return ins;
1523 case SN_AlignRight:
1524 if (args [2]->opcode == OP_ICONST)
1525 return emit_simd_ins_for_sig (cfg, klass, OP_SSSE3_ALIGNR, args [2]->inst_c0, arg0_type, fsig, args);
1526 return emit_invalid_operation (cfg, "mask in Ssse3.AlignRight must be constant");
1527 case SN_HorizontalAdd:
1528 if (arg0_type == MONO_TYPE_I2)
1529 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHADDW, arg0_type, fsig, args);
1530 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHADDD, arg0_type, fsig, args);
1531 case SN_HorizontalSubtract:
1532 if (arg0_type == MONO_TYPE_I2)
1533 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHSUBW, arg0_type, fsig, args);
1534 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHSUBD, arg0_type, fsig, args);
1535 case SN_Sign:
1536 if (arg0_type == MONO_TYPE_I1)
1537 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PSIGNB, arg0_type, fsig, args);
1538 if (arg0_type == MONO_TYPE_I2)
1539 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PSIGNW, arg0_type, fsig, args);
1540 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PSIGND, arg0_type, fsig, args);
1541 default:
1542 g_assert_not_reached ();
1543 break;
1547 if (is_hw_intrinsics_class (klass, "Sse41", &is_64bit)) {
1548 if (!COMPILE_LLVM (cfg))
1549 return NULL;
1550 info = lookup_intrins_info (sse41_methods, sizeof (sse41_methods), cmethod);
1551 if (!info)
1552 return NULL;
1553 int id = info->id;
1555 /* Common case */
1556 if (info->op != 0)
1557 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1559 supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE41) != 0;
1561 switch (id) {
1562 case SN_get_IsSupported:
1563 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1564 ins->type = STACK_I4;
1565 return ins;
1566 case SN_DotProduct:
1567 if (args [2]->opcode == OP_ICONST && arg0_type == MONO_TYPE_R4)
1568 return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_DPPS_IMM, args [2]->inst_c0, arg0_type, fsig, args);
1569 else if (args [2]->opcode == OP_ICONST && arg0_type == MONO_TYPE_R8)
1570 return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_DPPD_IMM, args [2]->inst_c0, arg0_type, fsig, args);
1571 // FIXME: handle non-constant control byte (generate a switch)
1572 return emit_invalid_operation (cfg, "control byte in Sse41.DotProduct must be constant");
1573 case SN_MultipleSumAbsoluteDifferences:
1574 if (args [2]->opcode == OP_ICONST)
1575 return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_MPSADBW_IMM, args [2]->inst_c0, arg0_type, fsig, args);
1576 // FIXME: handle non-constant control byte (generate a switch)
1577 return emit_invalid_operation (cfg, "control byte in Sse41.MultipleSumAbsoluteDifferences must be constant");
1578 case SN_Blend:
1579 if (args [2]->opcode == OP_ICONST)
1580 return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_BLEND_IMM, args [2]->inst_c0, arg0_type, fsig, args);
1581 // FIXME: handle non-constant control byte (generate a switch)
1582 return emit_invalid_operation (cfg, "control byte in Sse41.Blend must be constant");
1583 case SN_BlendVariable:
1584 return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_BLENDV, -1, arg0_type, fsig, args);
1585 case SN_Extract: {
1586 int op = 0;
1587 switch (arg0_type) {
1588 case MONO_TYPE_U1:
1589 case MONO_TYPE_U4:
1590 case MONO_TYPE_I4: op = OP_XEXTRACT_I32; break;
1591 case MONO_TYPE_I8:
1592 case MONO_TYPE_U8: op = OP_XEXTRACT_I64; break;
1593 case MONO_TYPE_R4: op = OP_XEXTRACT_R4; break;
1594 default: g_assert_not_reached(); break;
1596 return emit_simd_ins_for_sig (cfg, klass, op, arg0_type, 0, fsig, args);
1598 case SN_Insert:
1599 if (args [2]->opcode == OP_ICONST)
1600 return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_INSERT, -1, arg0_type, fsig, args);
1601 // FIXME: handle non-constant index (generate a switch)
1602 return emit_invalid_operation (cfg, "index in Sse41.Insert must be constant");
1603 default:
1604 g_assert_not_reached ();
1605 break;
1609 if (is_hw_intrinsics_class (klass, "Sse42", &is_64bit)) {
1610 if (!COMPILE_LLVM (cfg))
1611 return NULL;
1612 info = lookup_intrins_info (sse42_methods, sizeof (sse42_methods), cmethod);
1613 if (!info)
1614 return NULL;
1615 int id = info->id;
1617 /* Common case */
1618 if (info->op != 0)
1619 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1621 supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE42) != 0;
1623 switch (id) {
1624 case SN_get_IsSupported:
1625 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1626 ins->type = STACK_I4;
1627 return ins;
1628 case SN_Crc32: {
1629 MonoTypeEnum arg1_type = get_underlying_type (fsig->params [1]);
1630 return emit_simd_ins_for_sig (cfg, klass,
1631 arg1_type == MONO_TYPE_U8 ? OP_SSE42_CRC64 : OP_SSE42_CRC32,
1632 arg1_type, arg0_type, fsig, args);
1634 default:
1635 g_assert_not_reached ();
1636 break;
1640 if (is_hw_intrinsics_class (klass, "Pclmulqdq", &is_64bit)) {
1641 if (!COMPILE_LLVM (cfg))
1642 return NULL;
1643 info = lookup_intrins_info (pclmulqdq_methods, sizeof (pclmulqdq_methods), cmethod);
1644 if (!info)
1645 return NULL;
1646 int id = info->id;
1648 /* Common case */
1649 if (info->op != 0)
1650 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1652 supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_PCLMUL) != 0;
1654 switch (id) {
1655 case SN_CarrylessMultiply: {
1656 if (args [2]->opcode == OP_ICONST)
1657 return emit_simd_ins_for_sig (cfg, klass, OP_PCLMULQDQ_IMM, args [2]->inst_c0, arg0_type, fsig, args);
1658 // FIXME: handle non-constant control byte (generate a switch)
1659 return emit_invalid_operation (cfg, "index in Pclmulqdq.CarrylessMultiply must be constant");
1661 case SN_get_IsSupported:
1662 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1663 ins->type = STACK_I4;
1664 return ins;
1665 default:
1666 g_assert_not_reached ();
1667 break;
1671 if (is_hw_intrinsics_class (klass, "Aes", &is_64bit)) {
1672 if (!COMPILE_LLVM (cfg))
1673 return NULL;
1674 info = lookup_intrins_info (aes_methods, sizeof (aes_methods), cmethod);
1675 if (!info)
1676 return NULL;
1677 int id = info->id;
1679 /* Common case */
1680 if (info->op != 0)
1681 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1683 supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_AES) != 0;
1685 switch (id) {
1686 case SN_get_IsSupported:
1687 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1688 ins->type = STACK_I4;
1689 return ins;
1690 case SN_KeygenAssist: {
1691 if (args [1]->opcode == OP_ICONST)
1692 return emit_simd_ins_for_sig (cfg, klass, OP_AES_KEYGEN_IMM, args [1]->inst_c0, arg0_type, fsig, args);
1693 // FIXME: handle non-constant control byte (generate a switch)
1694 return emit_invalid_operation (cfg, "control byte in Aes.KeygenAssist must be constant");
1696 default:
1697 g_assert_not_reached ();
1698 break;
1702 if (is_hw_intrinsics_class (klass, "Popcnt", &is_64bit)) {
1703 info = lookup_intrins_info (popcnt_methods, sizeof (popcnt_methods), cmethod);
1704 if (!info)
1705 return NULL;
1706 int id = info->id;
1708 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_POPCNT) != 0;
1710 switch (id) {
1711 case SN_get_IsSupported:
1712 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1713 ins->type = STACK_I4;
1714 return ins;
1715 case SN_PopCount:
1716 if (!supported)
1717 return NULL;
1718 MONO_INST_NEW (cfg, ins, is_64bit ? OP_POPCNT64 : OP_POPCNT32);
1719 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1720 ins->sreg1 = args [0]->dreg;
1721 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1722 MONO_ADD_INS (cfg->cbb, ins);
1723 return ins;
1724 default:
1725 return NULL;
1728 if (is_hw_intrinsics_class (klass, "Lzcnt", &is_64bit)) {
1729 info = lookup_intrins_info (lzcnt_methods, sizeof (lzcnt_methods), cmethod);
1730 if (!info)
1731 return NULL;
1732 int id = info->id;
1734 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_LZCNT) != 0;
1736 switch (id) {
1737 case SN_get_IsSupported:
1738 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1739 ins->type = STACK_I4;
1740 return ins;
1741 case SN_LeadingZeroCount:
1742 if (!supported)
1743 return NULL;
1744 MONO_INST_NEW (cfg, ins, is_64bit ? OP_LZCNT64 : OP_LZCNT32);
1745 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1746 ins->sreg1 = args [0]->dreg;
1747 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1748 MONO_ADD_INS (cfg->cbb, ins);
1749 return ins;
1750 default:
1751 return NULL;
1754 if (is_hw_intrinsics_class (klass, "Bmi1", &is_64bit)) {
1755 if (!COMPILE_LLVM (cfg))
1756 return NULL;
1757 info = lookup_intrins_info (bmi1_methods, sizeof (bmi1_methods), cmethod);
1758 if (!info)
1759 return NULL;
1760 int id = info->id;
1762 g_assert (id != -1);
1763 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_BMI1) != 0;
1765 switch (id) {
1766 case SN_get_IsSupported:
1767 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1768 ins->type = STACK_I4;
1769 return ins;
1770 case SN_AndNot: {
1771 // (a ^ -1) & b
1772 // LLVM replaces it with `andn`
1773 int tmp_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1774 int result_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1775 EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LXOR_IMM : OP_IXOR_IMM, tmp_reg, args [0]->dreg, -1);
1776 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, tmp_reg, args [1]->dreg);
1777 return ins;
1779 case SN_BitFieldExtract: {
1780 if (fsig->param_count == 2) {
1781 MONO_INST_NEW (cfg, ins, is_64bit ? OP_BEXTR64 : OP_BEXTR32);
1782 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1783 ins->sreg1 = args [0]->dreg;
1784 ins->sreg2 = args [1]->dreg;
1785 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1786 MONO_ADD_INS (cfg->cbb, ins);
1787 return ins;
1790 case SN_GetMaskUpToLowestSetBit: {
1791 // x ^ (x - 1)
1792 // LLVM replaces it with `blsmsk`
1793 int tmp_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1794 int result_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1795 EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LSUB_IMM : OP_ISUB_IMM, tmp_reg, args [0]->dreg, 1);
1796 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LXOR : OP_IXOR, result_reg, args [0]->dreg, tmp_reg);
1797 return ins;
1799 case SN_ResetLowestSetBit: {
1800 // x & (x - 1)
1801 // LLVM replaces it with `blsr`
1802 int tmp_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1803 int result_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1804 EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LSUB_IMM : OP_ISUB_IMM, tmp_reg, args [0]->dreg, 1);
1805 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, args [0]->dreg, tmp_reg);
1806 return ins;
1808 case SN_ExtractLowestSetBit: {
1809 // x & (0 - x)
1810 // LLVM replaces it with `blsi`
1811 int tmp_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1812 int result_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1813 int zero_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1814 MONO_EMIT_NEW_ICONST (cfg, zero_reg, 0);
1815 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LSUB : OP_ISUB, tmp_reg, zero_reg, args [0]->dreg);
1816 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, args [0]->dreg, tmp_reg);
1817 return ins;
1819 case SN_TrailingZeroCount:
1820 MONO_INST_NEW (cfg, ins, is_64bit ? OP_CTTZ64 : OP_CTTZ32);
1821 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1822 ins->sreg1 = args [0]->dreg;
1823 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1824 MONO_ADD_INS (cfg->cbb, ins);
1825 return ins;
1826 default:
1827 g_assert_not_reached ();
1830 if (is_hw_intrinsics_class (klass, "Bmi2", &is_64bit)) {
1831 if (!COMPILE_LLVM (cfg))
1832 return NULL;
1833 info = lookup_intrins_info (bmi2_methods, sizeof (bmi2_methods), cmethod);
1834 if (!info)
1835 return NULL;
1836 int id = info->id;
1838 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_BMI2) != 0;
1840 switch (id) {
1841 case SN_get_IsSupported:
1842 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1843 ins->type = STACK_I4;
1844 return ins;
1845 case SN_MultiplyNoFlags:
1846 if (fsig->param_count == 2) {
1847 MONO_INST_NEW (cfg, ins, is_64bit ? OP_MULX_H64 : OP_MULX_H32);
1848 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1849 ins->sreg1 = args [0]->dreg;
1850 ins->sreg2 = args [1]->dreg;
1851 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1852 MONO_ADD_INS (cfg->cbb, ins);
1853 } else if (fsig->param_count == 3) {
1854 MONO_INST_NEW (cfg, ins, is_64bit ? OP_MULX_HL64 : OP_MULX_HL32);
1855 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1856 ins->sreg1 = args [0]->dreg;
1857 ins->sreg2 = args [1]->dreg;
1858 ins->sreg3 = args [2]->dreg;
1859 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1860 MONO_ADD_INS (cfg->cbb, ins);
1861 } else {
1862 g_assert_not_reached ();
1864 return ins;
1865 case SN_ZeroHighBits:
1866 MONO_INST_NEW (cfg, ins, is_64bit ? OP_BZHI64 : OP_BZHI32);
1867 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1868 ins->sreg1 = args [0]->dreg;
1869 ins->sreg2 = args [1]->dreg;
1870 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1871 MONO_ADD_INS (cfg->cbb, ins);
1872 return ins;
1873 case SN_ParallelBitExtract:
1874 MONO_INST_NEW (cfg, ins, is_64bit ? OP_PEXT64 : OP_PEXT32);
1875 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1876 ins->sreg1 = args [0]->dreg;
1877 ins->sreg2 = args [1]->dreg;
1878 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1879 MONO_ADD_INS (cfg->cbb, ins);
1880 return ins;
1881 case SN_ParallelBitDeposit:
1882 MONO_INST_NEW (cfg, ins, is_64bit ? OP_PDEP64 : OP_PDEP32);
1883 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1884 ins->sreg1 = args [0]->dreg;
1885 ins->sreg2 = args [1]->dreg;
1886 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1887 MONO_ADD_INS (cfg->cbb, ins);
1888 return ins;
1889 default:
1890 g_assert_not_reached ();
1894 if (is_hw_intrinsics_class (klass, "X86Base", &is_64bit)) {
1895 if (!COMPILE_LLVM (cfg))
1896 return NULL;
1898 info = lookup_intrins_info (x86base_methods, sizeof (x86base_methods), cmethod);
1899 if (!info)
1900 return NULL;
1901 int id = info->id;
1903 switch (id) {
1904 case SN_get_IsSupported:
1905 EMIT_NEW_ICONST (cfg, ins, 1);
1906 ins->type = STACK_I4;
1907 return ins;
1908 case SN_BitScanForward:
1909 MONO_INST_NEW (cfg, ins, is_64bit ? OP_X86_BSF64 : OP_X86_BSF32);
1910 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1911 ins->sreg1 = args [0]->dreg;
1912 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1913 MONO_ADD_INS (cfg->cbb, ins);
1914 return ins;
1915 case SN_BitScanReverse:
1916 MONO_INST_NEW (cfg, ins, is_64bit ? OP_X86_BSR64 : OP_X86_BSR32);
1917 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1918 ins->sreg1 = args [0]->dreg;
1919 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1920 MONO_ADD_INS (cfg->cbb, ins);
1921 return ins;
1922 default:
1923 g_assert_not_reached ();
1927 return NULL;
1930 static guint16 vector_128_methods [] = {
1931 SN_AsByte,
1932 SN_AsDouble,
1933 SN_AsInt16,
1934 SN_AsInt32,
1935 SN_AsInt64,
1936 SN_AsSByte,
1937 SN_AsSingle,
1938 SN_AsUInt16,
1939 SN_AsUInt32,
1940 SN_AsUInt64,
1941 SN_Create,
1942 SN_CreateScalarUnsafe,
1945 static guint16 vector_128_t_methods [] = {
1946 SN_get_Count,
1947 SN_get_Zero,
1950 static MonoInst*
1951 emit_vector128 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
1953 MonoClass *klass;
1954 int id;
1956 if (!COMPILE_LLVM (cfg))
1957 return NULL;
1959 klass = cmethod->klass;
1960 id = lookup_intrins (vector_128_methods, sizeof (vector_128_methods), cmethod);
1961 if (id == -1)
1962 return NULL;
1964 if (!strcmp (m_class_get_name (cfg->method->klass), "Vector256"))
1965 return NULL; // TODO: Fix Vector256.WithUpper/WithLower
1967 MonoTypeEnum arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID;
1969 switch (id) {
1970 case SN_AsByte:
1971 case SN_AsDouble:
1972 case SN_AsInt16:
1973 case SN_AsInt32:
1974 case SN_AsInt64:
1975 case SN_AsSByte:
1976 case SN_AsSingle:
1977 case SN_AsUInt16:
1978 case SN_AsUInt32:
1979 case SN_AsUInt64:
1980 return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1);
1981 case SN_Create: {
1982 MonoType *etype = get_vector_t_elem_type (fsig->ret);
1983 if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) {
1984 return emit_simd_ins (cfg, klass, type_to_expand_op (etype), args [0]->dreg, -1);
1985 } else {
1986 MonoInst *ins, *load;
1988 // FIXME: Optimize this
1989 MONO_INST_NEW (cfg, ins, OP_LOCALLOC_IMM);
1990 ins->dreg = alloc_preg (cfg);
1991 ins->inst_imm = 16;
1992 MONO_ADD_INS (cfg->cbb, ins);
1994 int esize = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
1995 int store_opcode = mono_type_to_store_membase (cfg, etype);
1996 for (int i = 0; i < fsig->param_count; ++i)
1997 MONO_EMIT_NEW_STORE_MEMBASE (cfg, store_opcode, ins->dreg, i * esize, args [i]->dreg);
1999 load = emit_simd_ins (cfg, klass, OP_SSE_LOADU, ins->dreg, -1);
2000 load->inst_c0 = 16;
2001 load->inst_c1 = get_underlying_type (etype);
2002 return load;
2005 case SN_CreateScalarUnsafe:
2006 return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_UNSAFE, -1, arg0_type, fsig, args);
2007 default:
2008 break;
2011 return NULL;
2014 static MonoInst*
2015 emit_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2017 MonoInst *ins;
2018 MonoType *type, *etype;
2019 MonoClass *klass;
2020 int size, len, id;
2022 id = lookup_intrins (vector_128_t_methods, sizeof (vector_128_t_methods), cmethod);
2023 if (id == -1)
2024 return NULL;
2026 klass = cmethod->klass;
2027 type = m_class_get_byval_arg (klass);
2028 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
2029 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
2030 g_assert (size);
2031 len = 16 / size;
2033 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
2034 return NULL;
2036 if (cfg->verbose_level > 1) {
2037 char *name = mono_method_full_name (cmethod, TRUE);
2038 printf (" SIMD intrinsic %s\n", name);
2039 g_free (name);
2042 switch (id) {
2043 case SN_get_Count:
2044 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
2045 break;
2046 EMIT_NEW_ICONST (cfg, ins, len);
2047 return ins;
2048 case SN_get_Zero: {
2049 return emit_simd_ins (cfg, klass, OP_XZERO, -1, -1);
2051 default:
2052 break;
2055 return NULL;
2058 static guint16 vector_256_t_methods [] = {
2059 SN_get_Count,
2062 static MonoInst*
2063 emit_vector256_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2065 MonoInst *ins;
2066 MonoType *type, *etype;
2067 MonoClass *klass;
2068 int size, len, id;
2070 id = lookup_intrins (vector_256_t_methods, sizeof (vector_256_t_methods), cmethod);
2071 if (id == -1)
2072 return NULL;
2074 klass = cmethod->klass;
2075 type = m_class_get_byval_arg (klass);
2076 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
2077 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
2078 g_assert (size);
2079 len = 32 / size;
2081 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
2082 return NULL;
2084 if (cfg->verbose_level > 1) {
2085 char *name = mono_method_full_name (cmethod, TRUE);
2086 printf (" SIMD intrinsic %s\n", name);
2087 g_free (name);
2090 switch (id) {
2091 case SN_get_Count:
2092 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
2093 break;
2094 EMIT_NEW_ICONST (cfg, ins, len);
2095 return ins;
2096 default:
2097 break;
2100 return NULL;
2103 #endif // !TARGET_ARM64
2105 MonoInst*
2106 mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2108 const char *class_name;
2109 const char *class_ns;
2110 MonoImage *image = m_class_get_image (cmethod->klass);
2112 if (image != mono_get_corlib ())
2113 return NULL;
2115 class_ns = m_class_get_name_space (cmethod->klass);
2116 class_name = m_class_get_name (cmethod->klass);
2118 // If cmethod->klass is nested, the namespace is on the enclosing class.
2119 if (m_class_get_nested_in (cmethod->klass))
2120 class_ns = m_class_get_name_space (m_class_get_nested_in (cmethod->klass));
2122 #ifdef TARGET_ARM64
2123 if (!strcmp (class_ns, "System.Runtime.Intrinsics.Arm")) {
2124 MonoInst *ins = emit_arm64_intrinsics (cfg, cmethod, fsig, args);
2125 return ins;
2127 #endif // TARGET_ARM64
2129 #ifdef TARGET_AMD64 // TODO: test and enable for x86 too
2130 if (!strcmp (class_ns, "System.Runtime.Intrinsics.X86")) {
2131 MonoInst *ins = emit_x86_intrinsics (cfg, cmethod, fsig, args);
2132 return ins;
2135 // FIXME: implement Vector64<T>, Vector128<T> and Vector<T> for Arm64
2137 if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
2138 if (!strcmp (class_name, "Vector128`1"))
2139 return emit_vector128_t (cfg, cmethod, fsig, args);
2140 if (!strcmp (class_name, "Vector128"))
2141 return emit_vector128 (cfg, cmethod, fsig, args);
2142 if (!strcmp (class_name, "Vector256`1"))
2143 return emit_vector256_t (cfg, cmethod, fsig, args);
2146 if (!strcmp (class_ns, "System.Numerics")) {
2147 if (!strcmp (class_name, "Vector"))
2148 return emit_sys_numerics_vector (cfg, cmethod, fsig, args);
2149 if (!strcmp (class_name, "Vector`1"))
2150 return emit_sys_numerics_vector_t (cfg, cmethod, fsig, args);
2152 #endif // TARGET_AMD64
2154 return NULL;
2157 void
2158 mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
2162 void
2163 mono_simd_simplify_indirection (MonoCompile *cfg)
2167 #endif /* DISABLE_JIT */
2168 #endif /* MONO_ARCH_SIMD_INTRINSICS */
2170 #if defined(ENABLE_NETCORE) && defined(TARGET_AMD64)
2171 void
2172 ves_icall_System_Runtime_Intrinsics_X86_X86Base___cpuidex (int abcd[4], int function_id, int subfunction_id)
2174 #ifndef MONO_CROSS_COMPILE
2175 mono_hwcap_x86_call_cpuidex (function_id, subfunction_id,
2176 &abcd [0], &abcd [1], &abcd [2], &abcd [3]);
2177 #endif
2179 #endif
2181 MONO_EMPTY_SOURCE_FILE (simd_intrinsics_netcore);