[runtime] Transition the trampoline code to use memory managers for memory allocation...
[mono-project.git] / mono / mini / simd-intrinsics-netcore.c
blobc8594ef4e7008f1a16acaad9f933997b1038fd68
1 /**
2 * SIMD Intrinsics support for netcore
3 */
5 #include <config.h>
6 #include <mono/utils/mono-compiler.h>
7 #include <mono/metadata/icall-decl.h>
8 #include "mini.h"
10 #if defined(DISABLE_JIT)
12 void
13 mono_simd_intrinsics_init (void)
17 #else
20 * Only LLVM is supported as a backend.
23 #include "mini-runtime.h"
24 #include "ir-emit.h"
25 #ifdef ENABLE_LLVM
26 #include "mini-llvm.h"
27 #endif
28 #include "mono/utils/bsearch.h"
29 #include <mono/metadata/abi-details.h>
30 #include <mono/metadata/reflection-internals.h>
31 #include <mono/utils/mono-hwcap.h>
33 #if defined (MONO_ARCH_SIMD_INTRINSICS) && defined(ENABLE_NETCORE)
35 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
36 #define MSGSTRFIELD1(line) str##line
37 static const struct msgstr_t {
38 #define METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)];
39 #define METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
40 #include "simd-methods-netcore.h"
41 #undef METHOD
42 #undef METHOD2
43 } method_names = {
44 #define METHOD(name) #name,
45 #define METHOD2(str,name) str,
46 #include "simd-methods-netcore.h"
47 #undef METHOD
48 #undef METHOD2
51 enum {
52 #define METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
53 #define METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
54 #include "simd-methods-netcore.h"
56 #define method_name(idx) ((const char*)&method_names + (idx))
58 static int register_size;
60 typedef struct {
61 // One of the SN_ constants
62 guint16 id;
63 // ins->opcode
64 int op;
65 // ins->inst_c0
66 int instc0;
67 } SimdIntrinsic;
69 void
70 mono_simd_intrinsics_init (void)
72 register_size = 16;
73 #if FALSE
74 if ((mini_get_cpu_features () & MONO_CPU_X86_AVX) != 0)
75 register_size = 32;
76 #endif
77 /* Tell the class init code the size of the System.Numerics.Register type */
78 mono_simd_register_size = register_size;
81 MonoInst*
82 mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr)
84 return NULL;
87 static int
88 simd_intrinsic_compare_by_name (const void *key, const void *value)
90 return strcmp ((const char*)key, method_name (*(guint16*)value));
93 static int
94 simd_intrinsic_info_compare_by_name (const void *key, const void *value)
96 SimdIntrinsic *info = (SimdIntrinsic*)value;
97 return strcmp ((const char*)key, method_name (info->id));
100 static int
101 lookup_intrins (guint16 *intrinsics, int size, MonoMethod *cmethod)
103 const guint16 *result = (const guint16 *)mono_binary_search (cmethod->name, intrinsics, size / sizeof (guint16), sizeof (guint16), &simd_intrinsic_compare_by_name);
105 if (result == NULL)
106 return -1;
107 else
108 return (int)*result;
111 static SimdIntrinsic*
112 lookup_intrins_info (SimdIntrinsic *intrinsics, int size, MonoMethod *cmethod)
114 #if 0
115 for (int i = 0; i < (size / sizeof (SimdIntrinsic)) - 1; ++i) {
116 const char *n1 = method_name (intrinsics [i].id);
117 const char *n2 = method_name (intrinsics [i + 1].id);
118 int len1 = strlen (n1);
119 int len2 = strlen (n2);
120 for (int j = 0; j < len1 && j < len2; ++j) {
121 if (n1 [j] > n2 [j]) {
122 printf ("%s %s\n", n1, n2);
123 g_assert_not_reached ();
124 } else if (n1 [j] < n2 [j]) {
125 break;
129 #endif
131 return (SimdIntrinsic *)mono_binary_search (cmethod->name, intrinsics, size / sizeof (SimdIntrinsic), sizeof (SimdIntrinsic), &simd_intrinsic_info_compare_by_name);
135 * Return a simd vreg for the simd value represented by SRC.
136 * SRC is the 'this' argument to methods.
137 * Set INDIRECT to TRUE if the value was loaded from memory.
139 static int
140 load_simd_vreg_class (MonoCompile *cfg, MonoClass *klass, MonoInst *src, gboolean *indirect)
142 const char *spec = INS_INFO (src->opcode);
144 if (indirect)
145 *indirect = FALSE;
146 if (src->opcode == OP_XMOVE) {
147 return src->sreg1;
148 } else if (src->opcode == OP_LDADDR) {
149 int res = ((MonoInst*)src->inst_p0)->dreg;
150 return res;
151 } else if (spec [MONO_INST_DEST] == 'x') {
152 return src->dreg;
153 } else if (src->type == STACK_PTR || src->type == STACK_MP) {
154 MonoInst *ins;
155 if (indirect)
156 *indirect = TRUE;
158 MONO_INST_NEW (cfg, ins, OP_LOADX_MEMBASE);
159 ins->klass = klass;
160 ins->sreg1 = src->dreg;
161 ins->type = STACK_VTYPE;
162 ins->dreg = alloc_ireg (cfg);
163 MONO_ADD_INS (cfg->cbb, ins);
164 return ins->dreg;
166 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src->type);
167 mono_print_ins (src);
168 g_assert_not_reached ();
171 static int
172 load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean *indirect)
174 return load_simd_vreg_class (cfg, cmethod->klass, src, indirect);
177 /* Create and emit a SIMD instruction, dreg is auto-allocated */
178 static MonoInst*
179 emit_simd_ins (MonoCompile *cfg, MonoClass *klass, int opcode, int sreg1, int sreg2)
181 const char *spec = INS_INFO (opcode);
182 MonoInst *ins;
184 MONO_INST_NEW (cfg, ins, opcode);
185 if (spec [MONO_INST_DEST] == 'x') {
186 ins->dreg = alloc_xreg (cfg);
187 ins->type = STACK_VTYPE;
188 } else if (spec [MONO_INST_DEST] == 'i') {
189 ins->dreg = alloc_ireg (cfg);
190 ins->type = STACK_I4;
191 } else if (spec [MONO_INST_DEST] == 'l') {
192 ins->dreg = alloc_lreg (cfg);
193 ins->type = STACK_I8;
194 } else if (spec [MONO_INST_DEST] == 'f') {
195 ins->dreg = alloc_freg (cfg);
196 ins->type = STACK_R8;
198 ins->sreg1 = sreg1;
199 ins->sreg2 = sreg2;
200 ins->klass = klass;
201 MONO_ADD_INS (cfg->cbb, ins);
202 return ins;
205 static MonoInst*
206 emit_simd_ins_for_sig (MonoCompile *cfg, MonoClass *klass, int opcode, int instc0, int instc1, MonoMethodSignature *fsig, MonoInst **args)
208 g_assert (fsig->param_count <= 3);
209 MonoInst* ins = emit_simd_ins (cfg, klass, opcode,
210 fsig->param_count > 0 ? args [0]->dreg : -1,
211 fsig->param_count > 1 ? args [1]->dreg : -1);
212 if (instc0 != -1)
213 ins->inst_c0 = instc0;
214 if (instc1 != -1)
215 ins->inst_c1 = instc1;
216 if (fsig->param_count == 3)
217 ins->sreg3 = args [2]->dreg;
218 return ins;
221 static gboolean
222 is_hw_intrinsics_class (MonoClass *klass, const char *name, gboolean *is_64bit)
224 const char *class_name = m_class_get_name (klass);
225 if ((!strcmp (class_name, "X64") || !strcmp (class_name, "Arm64")) && m_class_get_nested_in (klass)) {
226 *is_64bit = TRUE;
227 return !strcmp (m_class_get_name (m_class_get_nested_in (klass)), name);
228 } else {
229 *is_64bit = FALSE;
230 return !strcmp (class_name, name);
234 static MonoTypeEnum
235 get_underlying_type (MonoType* type)
237 MonoClass* klass = mono_class_from_mono_type_internal (type);
238 if (type->type == MONO_TYPE_PTR) // e.g. int* => MONO_TYPE_I4
239 return m_class_get_byval_arg (m_class_get_element_class (klass))->type;
240 else if (type->type == MONO_TYPE_GENERICINST) // e.g. Vector128<int> => MONO_TYPE_I4
241 return mono_class_get_context (klass)->class_inst->type_argv [0]->type;
242 else
243 return type->type;
246 static MonoInst*
247 emit_xcompare (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum etype, MonoInst *arg1, MonoInst *arg2)
249 MonoInst *ins;
250 gboolean is_fp = etype == MONO_TYPE_R4 || etype == MONO_TYPE_R8;
252 ins = emit_simd_ins (cfg, klass, is_fp ? OP_XCOMPARE_FP : OP_XCOMPARE, arg1->dreg, arg2->dreg);
253 ins->inst_c0 = CMP_EQ;
254 ins->inst_c1 = etype;
255 return ins;
258 static MonoType*
259 get_vector_t_elem_type (MonoType *vector_type)
261 MonoClass *klass;
262 MonoType *etype;
264 g_assert (vector_type->type == MONO_TYPE_GENERICINST);
265 klass = mono_class_from_mono_type_internal (vector_type);
266 g_assert (
267 !strcmp (m_class_get_name (klass), "Vector`1") ||
268 !strcmp (m_class_get_name (klass), "Vector128`1") ||
269 !strcmp (m_class_get_name (klass), "Vector256`1"));
270 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
271 return etype;
274 #ifdef TARGET_AMD64
276 static int
277 type_to_expand_op (MonoType *type)
279 switch (type->type) {
280 case MONO_TYPE_I1:
281 case MONO_TYPE_U1:
282 return OP_EXPAND_I1;
283 case MONO_TYPE_I2:
284 case MONO_TYPE_U2:
285 return OP_EXPAND_I2;
286 case MONO_TYPE_I4:
287 case MONO_TYPE_U4:
288 return OP_EXPAND_I4;
289 case MONO_TYPE_I8:
290 case MONO_TYPE_U8:
291 return OP_EXPAND_I8;
292 case MONO_TYPE_R4:
293 return OP_EXPAND_R4;
294 case MONO_TYPE_R8:
295 return OP_EXPAND_R8;
296 default:
297 g_assert_not_reached ();
301 static guint16 vector_methods [] = {
302 SN_ConvertToDouble,
303 SN_ConvertToInt32,
304 SN_ConvertToInt64,
305 SN_ConvertToSingle,
306 SN_ConvertToUInt32,
307 SN_ConvertToUInt64,
308 SN_Narrow,
309 SN_Widen,
310 SN_get_IsHardwareAccelerated,
313 static MonoInst*
314 emit_sys_numerics_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
316 MonoInst *ins;
317 gboolean supported = FALSE;
318 int id;
319 MonoType *etype;
321 id = lookup_intrins (vector_methods, sizeof (vector_methods), cmethod);
322 if (id == -1)
323 return NULL;
325 //printf ("%s\n", mono_method_full_name (cmethod, 1));
327 #ifdef MONO_ARCH_SIMD_INTRINSICS
328 supported = TRUE;
329 #endif
331 if (cfg->verbose_level > 1) {
332 char *name = mono_method_full_name (cmethod, TRUE);
333 printf (" SIMD intrinsic %s\n", name);
334 g_free (name);
337 switch (id) {
338 case SN_get_IsHardwareAccelerated:
339 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
340 ins->type = STACK_I4;
341 return ins;
342 case SN_ConvertToInt32:
343 etype = get_vector_t_elem_type (fsig->params [0]);
344 g_assert (etype->type == MONO_TYPE_R4);
345 return emit_simd_ins (cfg, mono_class_from_mono_type_internal (fsig->ret), OP_CVTPS2DQ, args [0]->dreg, -1);
346 case SN_ConvertToSingle:
347 etype = get_vector_t_elem_type (fsig->params [0]);
348 g_assert (etype->type == MONO_TYPE_I4 || etype->type == MONO_TYPE_U4);
349 // FIXME:
350 if (etype->type == MONO_TYPE_U4)
351 return NULL;
352 return emit_simd_ins (cfg, mono_class_from_mono_type_internal (fsig->ret), OP_CVTDQ2PS, args [0]->dreg, -1);
353 case SN_ConvertToDouble:
354 case SN_ConvertToInt64:
355 case SN_ConvertToUInt32:
356 case SN_ConvertToUInt64:
357 case SN_Narrow:
358 case SN_Widen:
359 // FIXME:
360 break;
361 default:
362 break;
365 return NULL;
368 static guint16 vector_t_methods [] = {
369 SN_ctor,
370 SN_CopyTo,
371 SN_Equals,
372 SN_GreaterThan,
373 SN_GreaterThanOrEqual,
374 SN_LessThan,
375 SN_LessThanOrEqual,
376 SN_Max,
377 SN_Min,
378 SN_get_AllBitsSet,
379 SN_get_Count,
380 SN_get_Item,
381 SN_get_One,
382 SN_get_Zero,
383 SN_op_Addition,
384 SN_op_BitwiseAnd,
385 SN_op_BitwiseOr,
386 SN_op_Division,
387 SN_op_Equality,
388 SN_op_ExclusiveOr,
389 SN_op_Explicit,
390 SN_op_Inequality,
391 SN_op_Multiply,
392 SN_op_Subtraction
395 static MonoInst*
396 emit_sys_numerics_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
398 MonoInst *ins;
399 MonoType *type, *etype;
400 MonoClass *klass;
401 int size, len, id;
402 gboolean is_unsigned;
404 static const float r4_one = 1.0f;
405 static const double r8_one = 1.0;
407 id = lookup_intrins (vector_t_methods, sizeof (vector_t_methods), cmethod);
408 if (id == -1)
409 return NULL;
411 klass = cmethod->klass;
412 type = m_class_get_byval_arg (klass);
413 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
414 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
415 g_assert (size);
416 len = register_size / size;
418 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
419 return NULL;
421 if (cfg->verbose_level > 1) {
422 char *name = mono_method_full_name (cmethod, TRUE);
423 printf (" SIMD intrinsic %s\n", name);
424 g_free (name);
427 switch (id) {
428 case SN_get_Count:
429 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
430 break;
431 EMIT_NEW_ICONST (cfg, ins, len);
432 return ins;
433 case SN_get_Zero:
434 g_assert (fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type));
435 return emit_simd_ins (cfg, klass, OP_XZERO, -1, -1);
436 case SN_get_One: {
437 g_assert (fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type));
438 MonoInst *one = NULL;
439 int expand_opcode = type_to_expand_op (etype);
440 MONO_INST_NEW (cfg, one, -1);
441 switch (expand_opcode) {
442 case OP_EXPAND_R4:
443 one->opcode = OP_R4CONST;
444 one->type = STACK_R4;
445 one->inst_p0 = (void *) &r4_one;
446 break;
447 case OP_EXPAND_R8:
448 one->opcode = OP_R8CONST;
449 one->type = STACK_R8;
450 one->inst_p0 = (void *) &r8_one;
451 break;
452 default:
453 one->opcode = OP_ICONST;
454 one->type = STACK_I4;
455 one->inst_c0 = 1;
456 break;
458 one->dreg = alloc_dreg (cfg, one->type);
459 MONO_ADD_INS (cfg->cbb, one);
460 return emit_simd_ins (cfg, klass, expand_opcode, one->dreg, -1);
462 case SN_get_AllBitsSet: {
463 /* Compare a zero vector with itself */
464 ins = emit_simd_ins (cfg, klass, OP_XZERO, -1, -1);
465 return emit_xcompare (cfg, klass, etype->type, ins, ins);
467 case SN_get_Item: {
468 if (!COMPILE_LLVM (cfg))
469 return NULL;
470 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
471 MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "IndexOutOfRangeException");
472 int opcode = -1;
473 int dreg;
474 gboolean is64 = FALSE;
475 switch (etype->type) {
476 case MONO_TYPE_I8:
477 case MONO_TYPE_U8:
478 opcode = OP_XEXTRACT_I64;
479 is64 = TRUE;
480 dreg = alloc_lreg (cfg);
481 break;
482 case MONO_TYPE_R8:
483 opcode = OP_XEXTRACT_R8;
484 dreg = alloc_freg (cfg);
485 break;
486 case MONO_TYPE_R4:
487 g_assert (cfg->r4fp);
488 opcode = OP_XEXTRACT_R4;
489 dreg = alloc_freg (cfg);
490 break;
491 default:
492 opcode = OP_XEXTRACT_I32;
493 dreg = alloc_ireg (cfg);
494 break;
496 MONO_INST_NEW (cfg, ins, opcode);
497 ins->dreg = dreg;
498 ins->sreg1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
499 ins->sreg2 = args [1]->dreg;
500 ins->inst_c0 = etype->type;
501 mini_type_to_eval_stack_type (cfg, etype, ins);
502 MONO_ADD_INS (cfg->cbb, ins);
503 return ins;
505 case SN_ctor:
506 if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) {
507 int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
509 int opcode = type_to_expand_op (etype);
510 ins = emit_simd_ins (cfg, klass, opcode, args [1]->dreg, -1);
511 ins->dreg = dreg;
512 return ins;
514 if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
515 MonoInst *array_ins = args [1];
516 MonoInst *index_ins;
517 MonoInst *ldelema_ins;
518 MonoInst *var;
519 int end_index_reg;
521 if (args [0]->opcode != OP_LDADDR)
522 return NULL;
524 /* .ctor (T[]) or .ctor (T[], index) */
526 if (fsig->param_count == 2) {
527 index_ins = args [2];
528 } else {
529 EMIT_NEW_ICONST (cfg, index_ins, 0);
532 /* Emit index check for the end (index + len - 1 < array length) */
533 end_index_reg = alloc_ireg (cfg);
534 EMIT_NEW_BIALU_IMM (cfg, ins, OP_IADD_IMM, end_index_reg, index_ins->dreg, len - 1);
535 MONO_EMIT_BOUNDS_CHECK (cfg, array_ins->dreg, MonoArray, max_length, end_index_reg);
537 /* Load the array slice into the simd reg */
538 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, TRUE, FALSE);
539 g_assert (args [0]->opcode == OP_LDADDR);
540 var = (MonoInst*)args [0]->inst_p0;
541 EMIT_NEW_LOAD_MEMBASE (cfg, ins, OP_LOADX_MEMBASE, var->dreg, ldelema_ins->dreg, 0);
542 ins->klass = cmethod->klass;
543 return args [0];
545 break;
546 case SN_CopyTo:
547 if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
548 MonoInst *array_ins = args [1];
549 MonoInst *index_ins;
550 MonoInst *ldelema_ins;
551 int val_vreg, end_index_reg;
553 val_vreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
555 /* CopyTo (T[]) or CopyTo (T[], index) */
557 if (fsig->param_count == 2) {
558 index_ins = args [2];
559 } else {
560 EMIT_NEW_ICONST (cfg, index_ins, 0);
563 /* CopyTo () does complicated argument checks */
564 mini_emit_bounds_check_offset (cfg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), index_ins->dreg, "ArgumentOutOfRangeException");
565 end_index_reg = alloc_ireg (cfg);
566 int len_reg = alloc_ireg (cfg);
567 MONO_EMIT_NEW_LOAD_MEMBASE_OP_FLAGS (cfg, OP_LOADI4_MEMBASE, len_reg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), MONO_INST_INVARIANT_LOAD);
568 EMIT_NEW_BIALU (cfg, ins, OP_ISUB, end_index_reg, len_reg, index_ins->dreg);
569 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, end_index_reg, len);
570 MONO_EMIT_NEW_COND_EXC (cfg, LT, "ArgumentException");
572 /* Load the array slice into the simd reg */
573 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, FALSE, FALSE);
574 EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_MEMBASE, ldelema_ins->dreg, 0, val_vreg);
575 ins->klass = cmethod->klass;
576 return ins;
578 break;
579 case SN_Equals:
580 if (fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && mono_metadata_type_equal (fsig->params [0], type)) {
581 int sreg1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
583 return emit_simd_ins (cfg, klass, OP_XEQUAL, sreg1, args [1]->dreg);
584 } else if (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)) {
585 /* Per element equality */
586 return emit_xcompare (cfg, klass, etype->type, args [0], args [1]);
588 break;
589 case SN_op_Equality:
590 case SN_op_Inequality:
591 g_assert (fsig->param_count == 2 && fsig->ret->type == MONO_TYPE_BOOLEAN &&
592 mono_metadata_type_equal (fsig->params [0], type) &&
593 mono_metadata_type_equal (fsig->params [1], type));
594 ins = emit_simd_ins (cfg, klass, OP_XEQUAL, args [0]->dreg, args [1]->dreg);
595 if (id == SN_op_Inequality) {
596 int sreg = ins->dreg;
597 int dreg = alloc_ireg (cfg);
598 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, sreg, 0);
599 EMIT_NEW_UNALU (cfg, ins, OP_CEQ, dreg, -1);
601 return ins;
602 case SN_GreaterThan:
603 case SN_GreaterThanOrEqual:
604 case SN_LessThan:
605 case SN_LessThanOrEqual:
606 g_assert (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type));
607 is_unsigned = etype->type == MONO_TYPE_U1 || etype->type == MONO_TYPE_U2 || etype->type == MONO_TYPE_U4 || etype->type == MONO_TYPE_U8;
608 ins = emit_xcompare (cfg, klass, etype->type, args [0], args [1]);
609 switch (id) {
610 case SN_GreaterThan:
611 ins->inst_c0 = is_unsigned ? CMP_GT_UN : CMP_GT;
612 break;
613 case SN_GreaterThanOrEqual:
614 ins->inst_c0 = is_unsigned ? CMP_GE_UN : CMP_GE;
615 break;
616 case SN_LessThan:
617 ins->inst_c0 = is_unsigned ? CMP_LT_UN : CMP_LT;
618 break;
619 case SN_LessThanOrEqual:
620 ins->inst_c0 = is_unsigned ? CMP_LE_UN : CMP_LE;
621 break;
622 default:
623 g_assert_not_reached ();
625 return ins;
626 case SN_op_Explicit:
627 return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1);
628 case SN_op_Addition:
629 case SN_op_Subtraction:
630 case SN_op_Division:
631 case SN_op_Multiply:
632 case SN_op_BitwiseAnd:
633 case SN_op_BitwiseOr:
634 case SN_op_ExclusiveOr:
635 case SN_Max:
636 case SN_Min:
637 if (!(fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)))
638 return NULL;
639 ins = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, args [1]->dreg);
640 ins->inst_c1 = etype->type;
642 if (etype->type == MONO_TYPE_R4 || etype->type == MONO_TYPE_R8) {
643 switch (id) {
644 case SN_op_Addition:
645 ins->inst_c0 = OP_FADD;
646 break;
647 case SN_op_Subtraction:
648 ins->inst_c0 = OP_FSUB;
649 break;
650 case SN_op_Multiply:
651 ins->inst_c0 = OP_FMUL;
652 break;
653 case SN_op_Division:
654 ins->inst_c0 = OP_FDIV;
655 break;
656 case SN_Max:
657 ins->inst_c0 = OP_FMAX;
658 break;
659 case SN_Min:
660 ins->inst_c0 = OP_FMIN;
661 break;
662 default:
663 NULLIFY_INS (ins);
664 return NULL;
666 } else {
667 switch (id) {
668 case SN_op_Addition:
669 ins->inst_c0 = OP_IADD;
670 break;
671 case SN_op_Subtraction:
672 ins->inst_c0 = OP_ISUB;
673 break;
675 case SN_op_Division:
676 ins->inst_c0 = OP_IDIV;
677 break;
678 case SN_op_Multiply:
679 ins->inst_c0 = OP_IMUL;
680 break;
682 case SN_op_BitwiseAnd:
683 ins->inst_c0 = OP_IAND;
684 break;
685 case SN_op_BitwiseOr:
686 ins->inst_c0 = OP_IOR;
687 break;
688 case SN_op_ExclusiveOr:
689 ins->inst_c0 = OP_IXOR;
690 break;
691 case SN_Max:
692 ins->inst_c0 = OP_IMAX;
693 break;
694 case SN_Min:
695 ins->inst_c0 = OP_IMIN;
696 break;
697 default:
698 NULLIFY_INS (ins);
699 return NULL;
702 return ins;
703 default:
704 break;
707 return NULL;
709 #endif // !TARGET_ARM64
711 static MonoInst*
712 emit_invalid_operation (MonoCompile *cfg, const char* message)
714 mono_cfg_set_exception (cfg, MONO_EXCEPTION_MONO_ERROR);
715 mono_error_set_generic_error (cfg->error, "System", "InvalidOperationException", "%s", message);
716 return NULL;
719 #ifdef TARGET_ARM64
721 static SimdIntrinsic armbase_methods [] = {
722 {SN_LeadingSignCount},
723 {SN_LeadingZeroCount},
724 {SN_ReverseElementBits},
725 {SN_get_IsSupported}
728 static SimdIntrinsic crc32_methods [] = {
729 {SN_ComputeCrc32},
730 {SN_ComputeCrc32C},
731 {SN_get_IsSupported}
734 static MonoInst*
735 emit_arm64_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
737 // Arm64 intrinsics are LLVM-only
738 if (!COMPILE_LLVM (cfg))
739 return NULL;
741 MonoInst *ins;
742 gboolean supported, is_64bit;
743 MonoClass *klass = cmethod->klass;
744 MonoTypeEnum arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID;
745 gboolean arg0_i32 = (arg0_type == MONO_TYPE_I4) || (arg0_type == MONO_TYPE_U4);
746 SimdIntrinsic *info;
748 if (is_hw_intrinsics_class (klass, "ArmBase", &is_64bit)) {
749 info = lookup_intrins_info (armbase_methods, sizeof (armbase_methods), cmethod);
750 if (!info)
751 return NULL;
753 supported = (mini_get_cpu_features (cfg) & MONO_CPU_ARM64_BASE) != 0;
755 switch (info->id) {
756 case SN_get_IsSupported:
757 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
758 ins->type = STACK_I4;
759 return ins;
760 case SN_LeadingZeroCount:
761 return emit_simd_ins_for_sig (cfg, klass, arg0_i32 ? OP_LZCNT32 : OP_LZCNT64, 0, arg0_type, fsig, args);
762 case SN_LeadingSignCount:
763 return emit_simd_ins_for_sig (cfg, klass, arg0_i32 ? OP_LSCNT32 : OP_LSCNT64, 0, arg0_type, fsig, args);
764 case SN_ReverseElementBits:
765 return emit_simd_ins_for_sig (cfg, klass,
766 (is_64bit ? OP_XOP_I8_I8 : OP_XOP_I4_I4),
767 (is_64bit ? SIMD_OP_ARM64_RBIT64 : SIMD_OP_ARM64_RBIT32),
768 arg0_type, fsig, args);
769 default:
770 g_assert_not_reached (); // if a new API is added we need to either implement it or change IsSupported to false
774 if (is_hw_intrinsics_class (klass, "Crc32", &is_64bit)) {
775 info = lookup_intrins_info (crc32_methods, sizeof (crc32_methods), cmethod);
776 if (!info)
777 return NULL;
779 supported = (mini_get_cpu_features (cfg) & MONO_CPU_ARM64_CRC) != 0;
781 switch (info->id) {
782 case SN_get_IsSupported:
783 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
784 ins->type = STACK_I4;
785 return ins;
786 case SN_ComputeCrc32:
787 case SN_ComputeCrc32C: {
788 SimdOp op = (SimdOp)0;
789 gboolean is_c = info->id == SN_ComputeCrc32C;
790 switch (get_underlying_type (fsig->params [1])) {
791 case MONO_TYPE_U1: op = is_c ? SIMD_OP_ARM64_CRC32CB : SIMD_OP_ARM64_CRC32B; break;
792 case MONO_TYPE_U2: op = is_c ? SIMD_OP_ARM64_CRC32CH : SIMD_OP_ARM64_CRC32H; break;
793 case MONO_TYPE_U4: op = is_c ? SIMD_OP_ARM64_CRC32CW : SIMD_OP_ARM64_CRC32W; break;
794 case MONO_TYPE_U8: op = is_c ? SIMD_OP_ARM64_CRC32CX : SIMD_OP_ARM64_CRC32X; break;
795 default: g_assert_not_reached (); break;
797 return emit_simd_ins_for_sig (cfg, klass, is_64bit ? OP_XOP_I4_I4_I8 : OP_XOP_I4_I4_I4, op, arg0_type, fsig, args);
799 default:
800 g_assert_not_reached (); // if a new API is added we need to either implement it or change IsSupported to false
803 return NULL;
805 #endif // TARGET_ARM64
807 #ifdef TARGET_AMD64
809 static SimdIntrinsic sse_methods [] = {
810 {SN_Add, OP_XBINOP, OP_FADD},
811 {SN_AddScalar, OP_SSE_ADDSS},
812 {SN_And, OP_SSE_AND},
813 {SN_AndNot, OP_SSE_ANDN},
814 {SN_CompareEqual, OP_XCOMPARE_FP, CMP_EQ},
815 {SN_CompareGreaterThan, OP_XCOMPARE_FP,CMP_GT},
816 {SN_CompareGreaterThanOrEqual, OP_XCOMPARE_FP, CMP_GE},
817 {SN_CompareLessThan, OP_XCOMPARE_FP, CMP_LT},
818 {SN_CompareLessThanOrEqual, OP_XCOMPARE_FP, CMP_LE},
819 {SN_CompareNotEqual, OP_XCOMPARE_FP, CMP_NE},
820 {SN_CompareNotGreaterThan, OP_XCOMPARE_FP, CMP_LE},
821 {SN_CompareNotGreaterThanOrEqual, OP_XCOMPARE_FP, CMP_LT},
822 {SN_CompareNotLessThan, OP_XCOMPARE_FP, CMP_GE},
823 {SN_CompareNotLessThanOrEqual, OP_XCOMPARE_FP, CMP_GT},
824 {SN_CompareOrdered, OP_XCOMPARE_FP, CMP_ORD},
825 {SN_CompareScalarEqual, OP_SSE_CMPSS, CMP_EQ},
826 {SN_CompareScalarGreaterThan, OP_SSE_CMPSS, CMP_GT},
827 {SN_CompareScalarGreaterThanOrEqual, OP_SSE_CMPSS, CMP_GE},
828 {SN_CompareScalarLessThan, OP_SSE_CMPSS, CMP_LT},
829 {SN_CompareScalarLessThanOrEqual, OP_SSE_CMPSS, CMP_LE},
830 {SN_CompareScalarNotEqual, OP_SSE_CMPSS, CMP_NE},
831 {SN_CompareScalarNotGreaterThan, OP_SSE_CMPSS, CMP_LE},
832 {SN_CompareScalarNotGreaterThanOrEqual, OP_SSE_CMPSS, CMP_LT},
833 {SN_CompareScalarNotLessThan, OP_SSE_CMPSS, CMP_GE},
834 {SN_CompareScalarNotLessThanOrEqual, OP_SSE_CMPSS, CMP_GT},
835 {SN_CompareScalarOrdered, OP_SSE_CMPSS, CMP_ORD},
836 {SN_CompareScalarOrderedEqual, OP_SSE_COMISS, CMP_EQ},
837 {SN_CompareScalarOrderedGreaterThan, OP_SSE_COMISS, CMP_GT},
838 {SN_CompareScalarOrderedGreaterThanOrEqual, OP_SSE_COMISS, CMP_GE},
839 {SN_CompareScalarOrderedLessThan, OP_SSE_COMISS, CMP_LT},
840 {SN_CompareScalarOrderedLessThanOrEqual, OP_SSE_COMISS, CMP_LE},
841 {SN_CompareScalarOrderedNotEqual, OP_SSE_COMISS, CMP_NE},
842 {SN_CompareScalarUnordered, OP_SSE_CMPSS, CMP_UNORD},
843 {SN_CompareScalarUnorderedEqual, OP_SSE_UCOMISS, CMP_EQ},
844 {SN_CompareScalarUnorderedGreaterThan, OP_SSE_UCOMISS, CMP_GT},
845 {SN_CompareScalarUnorderedGreaterThanOrEqual, OP_SSE_UCOMISS, CMP_GE},
846 {SN_CompareScalarUnorderedLessThan, OP_SSE_UCOMISS, CMP_LT},
847 {SN_CompareScalarUnorderedLessThanOrEqual, OP_SSE_UCOMISS, CMP_LE},
848 {SN_CompareScalarUnorderedNotEqual, OP_SSE_UCOMISS, CMP_NE},
849 {SN_CompareUnordered, OP_XCOMPARE_FP, CMP_UNORD},
850 {SN_ConvertScalarToVector128Single},
851 {SN_ConvertToInt32, OP_XOP_I4_X, SIMD_OP_SSE_CVTSS2SI},
852 {SN_ConvertToInt32WithTruncation, OP_XOP_I4_X, SIMD_OP_SSE_CVTTSS2SI},
853 {SN_ConvertToInt64, OP_XOP_I8_X, SIMD_OP_SSE_CVTSS2SI64},
854 {SN_ConvertToInt64WithTruncation, OP_XOP_I8_X, SIMD_OP_SSE_CVTTSS2SI64},
855 {SN_Divide, OP_XBINOP, OP_FDIV},
856 {SN_DivideScalar, OP_SSE_DIVSS},
857 {SN_LoadAlignedVector128, OP_SSE_LOADU, 16 /* alignment */},
858 {SN_LoadHigh, OP_SSE_MOVHPS_LOAD},
859 {SN_LoadLow, OP_SSE_MOVLPS_LOAD},
860 {SN_LoadScalarVector128, OP_SSE_MOVSS},
861 {SN_LoadVector128, OP_SSE_LOADU, 1 /* alignment */},
862 {SN_Max, OP_XOP_X_X_X, SIMD_OP_SSE_MAXPS},
863 {SN_MaxScalar, OP_XOP_X_X_X, SIMD_OP_SSE_MAXSS},
864 {SN_Min, OP_XOP_X_X_X, SIMD_OP_SSE_MINPS},
865 {SN_MinScalar, OP_XOP_X_X_X, SIMD_OP_SSE_MINSS},
866 {SN_MoveHighToLow, OP_SSE_MOVEHL},
867 {SN_MoveLowToHigh, OP_SSE_MOVELH},
868 {SN_MoveMask, OP_SSE_MOVMSK},
869 {SN_MoveScalar, OP_SSE_MOVS2},
870 {SN_Multiply, OP_XBINOP, OP_FMUL},
871 {SN_MultiplyScalar, OP_SSE_MULSS},
872 {SN_Or, OP_SSE_OR},
873 {SN_Prefetch0, OP_SSE_PREFETCHT0},
874 {SN_Prefetch1, OP_SSE_PREFETCHT1},
875 {SN_Prefetch2, OP_SSE_PREFETCHT2},
876 {SN_PrefetchNonTemporal, OP_SSE_PREFETCHNTA},
877 {SN_Reciprocal, OP_XOP_X_X, SIMD_OP_SSE_RCPPS},
878 {SN_ReciprocalScalar},
879 {SN_ReciprocalSqrt, OP_XOP_X_X, SIMD_OP_SSE_RSQRTPS},
880 {SN_ReciprocalSqrtScalar},
881 {SN_Shuffle},
882 {SN_Sqrt, OP_XOP_X_X, SIMD_OP_SSE_SQRTPS},
883 {SN_SqrtScalar},
884 {SN_Store, OP_SSE_STORE, 1 /* alignment */},
885 {SN_StoreAligned, OP_SSE_STORE, 16 /* alignment */},
886 {SN_StoreAlignedNonTemporal, OP_SSE_MOVNTPS, 16 /* alignment */},
887 {SN_StoreFence, OP_XOP, SIMD_OP_SSE_SFENCE},
888 {SN_StoreHigh, OP_SSE_MOVHPS_STORE},
889 {SN_StoreLow, OP_SSE_MOVLPS_STORE},
890 {SN_StoreScalar, OP_SSE_MOVSS_STORE},
891 {SN_Subtract, OP_XBINOP, OP_FSUB},
892 {SN_SubtractScalar, OP_SSE_SUBSS},
893 {SN_UnpackHigh, OP_SSE_UNPACKHI},
894 {SN_UnpackLow, OP_SSE_UNPACKLO},
895 {SN_Xor, OP_SSE_XOR},
896 {SN_get_IsSupported}
899 static SimdIntrinsic sse2_methods [] = {
900 {SN_Add},
901 {SN_AddSaturate, OP_SSE2_ADDS},
902 {SN_AddScalar, OP_SSE2_ADDSD},
903 {SN_And, OP_SSE_AND},
904 {SN_AndNot, OP_SSE_ANDN},
905 {SN_Average},
906 {SN_CompareEqual},
907 {SN_CompareGreaterThan},
908 {SN_CompareGreaterThanOrEqual, OP_XCOMPARE_FP, CMP_GE},
909 {SN_CompareLessThan},
910 {SN_CompareLessThanOrEqual, OP_XCOMPARE_FP, CMP_LE},
911 {SN_CompareNotEqual, OP_XCOMPARE_FP, CMP_NE},
912 {SN_CompareNotGreaterThan, OP_XCOMPARE_FP, CMP_LE},
913 {SN_CompareNotGreaterThanOrEqual, OP_XCOMPARE_FP, CMP_LT},
914 {SN_CompareNotLessThan, OP_XCOMPARE_FP, CMP_GE},
915 {SN_CompareNotLessThanOrEqual, OP_XCOMPARE_FP, CMP_GT},
916 {SN_CompareOrdered, OP_XCOMPARE_FP, CMP_ORD},
917 {SN_CompareScalarEqual, OP_SSE2_CMPSD, CMP_EQ},
918 {SN_CompareScalarGreaterThan, OP_SSE2_CMPSD, CMP_GT},
919 {SN_CompareScalarGreaterThanOrEqual, OP_SSE2_CMPSD, CMP_GE},
920 {SN_CompareScalarLessThan, OP_SSE2_CMPSD, CMP_LT},
921 {SN_CompareScalarLessThanOrEqual, OP_SSE2_CMPSD, CMP_LE},
922 {SN_CompareScalarNotEqual, OP_SSE2_CMPSD, CMP_NE},
923 {SN_CompareScalarNotGreaterThan, OP_SSE2_CMPSD, CMP_LE},
924 {SN_CompareScalarNotGreaterThanOrEqual, OP_SSE2_CMPSD, CMP_LT},
925 {SN_CompareScalarNotLessThan, OP_SSE2_CMPSD, CMP_GE},
926 {SN_CompareScalarNotLessThanOrEqual, OP_SSE2_CMPSD, CMP_GT},
927 {SN_CompareScalarOrdered, OP_SSE2_CMPSD, CMP_ORD},
928 {SN_CompareScalarOrderedEqual, OP_SSE2_COMISD, CMP_EQ},
929 {SN_CompareScalarOrderedGreaterThan, OP_SSE2_COMISD, CMP_GT},
930 {SN_CompareScalarOrderedGreaterThanOrEqual, OP_SSE2_COMISD, CMP_GE},
931 {SN_CompareScalarOrderedLessThan, OP_SSE2_COMISD, CMP_LT},
932 {SN_CompareScalarOrderedLessThanOrEqual, OP_SSE2_COMISD, CMP_LE},
933 {SN_CompareScalarOrderedNotEqual, OP_SSE2_COMISD, CMP_NE},
934 {SN_CompareScalarUnordered, OP_SSE2_CMPSD, CMP_UNORD},
935 {SN_CompareScalarUnorderedEqual, OP_SSE2_UCOMISD, CMP_EQ},
936 {SN_CompareScalarUnorderedGreaterThan, OP_SSE2_UCOMISD, CMP_GT},
937 {SN_CompareScalarUnorderedGreaterThanOrEqual, OP_SSE2_UCOMISD, CMP_GE},
938 {SN_CompareScalarUnorderedLessThan, OP_SSE2_UCOMISD, CMP_LT},
939 {SN_CompareScalarUnorderedLessThanOrEqual, OP_SSE2_UCOMISD, CMP_LE},
940 {SN_CompareScalarUnorderedNotEqual, OP_SSE2_UCOMISD, CMP_NE},
941 {SN_CompareUnordered, OP_XCOMPARE_FP, CMP_UNORD},
942 {SN_ConvertScalarToVector128Double},
943 {SN_ConvertScalarToVector128Int32},
944 {SN_ConvertScalarToVector128Int64},
945 {SN_ConvertScalarToVector128Single, OP_XOP_X_X_X, SIMD_OP_SSE_CVTSD2SS},
946 {SN_ConvertScalarToVector128UInt32},
947 {SN_ConvertScalarToVector128UInt64},
948 {SN_ConvertToInt32},
949 {SN_ConvertToInt32WithTruncation, OP_XOP_I4_X, SIMD_OP_SSE_CVTTSD2SI},
950 {SN_ConvertToInt64},
951 {SN_ConvertToInt64WithTruncation, OP_XOP_I8_X, SIMD_OP_SSE_CVTTSD2SI64},
952 {SN_ConvertToUInt32},
953 {SN_ConvertToUInt64},
954 {SN_ConvertToVector128Double},
955 {SN_ConvertToVector128Int32},
956 {SN_ConvertToVector128Int32WithTruncation},
957 {SN_ConvertToVector128Single},
958 {SN_Divide, OP_XBINOP, OP_FDIV},
959 {SN_DivideScalar, OP_SSE2_DIVSD},
960 {SN_Extract},
961 {SN_Insert},
962 {SN_LoadAlignedVector128},
963 {SN_LoadFence, OP_XOP, SIMD_OP_SSE_LFENCE},
964 {SN_LoadHigh, OP_SSE2_MOVHPD_LOAD},
965 {SN_LoadLow, OP_SSE2_MOVLPD_LOAD},
966 {SN_LoadScalarVector128},
967 {SN_LoadVector128},
968 {SN_MaskMove, OP_SSE2_MASKMOVDQU},
969 {SN_Max},
970 {SN_MaxScalar, OP_XOP_X_X_X, SIMD_OP_SSE_MAXSD},
971 {SN_MemoryFence, OP_XOP, SIMD_OP_SSE_MFENCE},
972 {SN_Min}, // FIXME:
973 {SN_MinScalar, OP_XOP_X_X_X, SIMD_OP_SSE_MINSD},
974 {SN_MoveMask, OP_SSE_MOVMSK},
975 {SN_MoveScalar},
976 {SN_Multiply},
977 {SN_MultiplyAddAdjacent, OP_XOP_X_X_X, SIMD_OP_SSE_PMADDWD},
978 {SN_MultiplyHigh},
979 {SN_MultiplyLow, OP_PMULW},
980 {SN_MultiplyScalar, OP_SSE2_MULSD},
981 {SN_Or, OP_SSE_OR},
982 {SN_PackSignedSaturate},
983 {SN_PackUnsignedSaturate},
984 {SN_ShiftLeftLogical},
985 {SN_ShiftLeftLogical128BitLane},
986 {SN_ShiftRightArithmetic},
987 {SN_ShiftRightLogical},
988 {SN_ShiftRightLogical128BitLane},
989 {SN_Shuffle},
990 {SN_ShuffleHigh},
991 {SN_ShuffleLow},
992 {SN_Sqrt, OP_XOP_X_X, SIMD_OP_SSE_SQRTPD},
993 {SN_SqrtScalar},
994 {SN_Store, OP_SSE_STORE, 1 /* alignment */},
995 {SN_StoreAligned, OP_SSE_STORE, 16 /* alignment */},
996 {SN_StoreAlignedNonTemporal, OP_SSE_MOVNTPS, 16 /* alignment */},
997 {SN_StoreHigh, OP_SSE2_MOVHPD_STORE},
998 {SN_StoreLow, OP_SSE2_MOVLPD_STORE},
999 {SN_StoreNonTemporal, OP_SSE_MOVNTPS, 1 /* alignment */},
1000 {SN_StoreScalar, OP_SSE_STORES},
1001 {SN_Subtract},
1002 {SN_SubtractSaturate, OP_SSE2_SUBS},
1003 {SN_SubtractScalar, OP_SSE2_SUBSD},
1004 {SN_SumAbsoluteDifferences, OP_XOP_X_X_X, SIMD_OP_SSE_PSADBW},
1005 {SN_UnpackHigh, OP_SSE_UNPACKHI},
1006 {SN_UnpackLow, OP_SSE_UNPACKLO},
1007 {SN_Xor, OP_SSE_XOR},
1008 {SN_get_IsSupported}
1011 static SimdIntrinsic sse3_methods [] = {
1012 {SN_AddSubtract},
1013 {SN_HorizontalAdd},
1014 {SN_HorizontalSubtract},
1015 {SN_LoadAndDuplicateToVector128, OP_SSE3_MOVDDUP_MEM},
1016 {SN_LoadDquVector128, OP_XOP_X_I, SIMD_OP_SSE_LDDQU},
1017 {SN_MoveAndDuplicate, OP_SSE3_MOVDDUP},
1018 {SN_MoveHighAndDuplicate, OP_SSE3_MOVSHDUP},
1019 {SN_MoveLowAndDuplicate, OP_SSE3_MOVSLDUP},
1020 {SN_get_IsSupported}
1023 static SimdIntrinsic ssse3_methods [] = {
1024 {SN_Abs, OP_SSSE3_ABS},
1025 {SN_AlignRight},
1026 {SN_HorizontalAdd},
1027 {SN_HorizontalAddSaturate, OP_XOP_X_X_X, SIMD_OP_SSE_PHADDSW},
1028 {SN_HorizontalSubtract},
1029 {SN_HorizontalSubtractSaturate, OP_XOP_X_X_X, SIMD_OP_SSE_PHSUBSW},
1030 {SN_MultiplyAddAdjacent, OP_XOP_X_X_X, SIMD_OP_SSE_PMADDUBSW},
1031 {SN_MultiplyHighRoundScale, OP_XOP_X_X_X, SIMD_OP_SSE_PMULHRSW},
1032 {SN_Shuffle, OP_SSSE3_SHUFFLE},
1033 {SN_Sign},
1034 {SN_get_IsSupported}
1037 static SimdIntrinsic sse41_methods [] = {
1038 {SN_Blend},
1039 {SN_BlendVariable},
1040 {SN_Ceiling, OP_SSE41_ROUNDP, 10 /*round mode*/},
1041 {SN_CeilingScalar, OP_SSE41_ROUNDS, 10 /*round mode*/},
1042 {SN_CompareEqual, OP_XCOMPARE, CMP_EQ},
1043 {SN_ConvertToVector128Int16, OP_SSE_CVTII, MONO_TYPE_I2},
1044 {SN_ConvertToVector128Int32, OP_SSE_CVTII, MONO_TYPE_I4},
1045 {SN_ConvertToVector128Int64, OP_SSE_CVTII, MONO_TYPE_I8},
1046 {SN_DotProduct},
1047 {SN_Extract},
1048 {SN_Floor, OP_SSE41_ROUNDP, 9 /*round mode*/},
1049 {SN_FloorScalar, OP_SSE41_ROUNDS, 9 /*round mode*/},
1050 {SN_Insert},
1051 {SN_LoadAlignedVector128NonTemporal, OP_SSE41_LOADANT},
1052 {SN_Max, OP_XBINOP, OP_IMAX},
1053 {SN_Min, OP_XBINOP, OP_IMIN},
1054 {SN_MinHorizontal, OP_XOP_X_X, SIMD_OP_SSE_PHMINPOSUW},
1055 {SN_MultipleSumAbsoluteDifferences},
1056 {SN_Multiply, OP_SSE41_MUL},
1057 {SN_MultiplyLow, OP_SSE41_MULLO},
1058 {SN_PackUnsignedSaturate, OP_XOP_X_X_X, SIMD_OP_SSE_PACKUSDW},
1059 {SN_RoundCurrentDirection, OP_SSE41_ROUNDP, 4 /*round mode*/},
1060 {SN_RoundCurrentDirectionScalar, OP_SSE41_ROUNDS, 4 /*round mode*/},
1061 {SN_RoundToNearestInteger, OP_SSE41_ROUNDP, 8 /*round mode*/},
1062 {SN_RoundToNearestIntegerScalar, OP_SSE41_ROUNDS, 8 /*round mode*/},
1063 {SN_RoundToNegativeInfinity, OP_SSE41_ROUNDP, 9 /*round mode*/},
1064 {SN_RoundToNegativeInfinityScalar, OP_SSE41_ROUNDS, 9 /*round mode*/},
1065 {SN_RoundToPositiveInfinity, OP_SSE41_ROUNDP, 10 /*round mode*/},
1066 {SN_RoundToPositiveInfinityScalar, OP_SSE41_ROUNDS, 10 /*round mode*/},
1067 {SN_RoundToZero, OP_SSE41_ROUNDP, 11 /*round mode*/},
1068 {SN_RoundToZeroScalar, OP_SSE41_ROUNDS, 11 /*round mode*/},
1069 {SN_TestC, OP_XOP_I4_X_X, SIMD_OP_SSE_TESTC},
1070 {SN_TestNotZAndNotC, OP_XOP_I4_X_X, SIMD_OP_SSE_TESTNZ},
1071 {SN_TestZ, OP_XOP_I4_X_X, SIMD_OP_SSE_TESTZ},
1072 {SN_get_IsSupported}
1075 static SimdIntrinsic sse42_methods [] = {
1076 {SN_CompareGreaterThan, OP_XCOMPARE, CMP_GT},
1077 {SN_Crc32},
1078 {SN_get_IsSupported}
1081 static SimdIntrinsic pclmulqdq_methods [] = {
1082 {SN_CarrylessMultiply},
1083 {SN_get_IsSupported}
1086 static SimdIntrinsic aes_methods [] = {
1087 {SN_Decrypt, OP_XOP_X_X_X, SIMD_OP_AES_DEC},
1088 {SN_DecryptLast, OP_XOP_X_X_X, SIMD_OP_AES_DECLAST},
1089 {SN_Encrypt, OP_XOP_X_X_X, SIMD_OP_AES_ENC},
1090 {SN_EncryptLast, OP_XOP_X_X_X, SIMD_OP_AES_ENCLAST},
1091 {SN_InverseMixColumns, OP_XOP_X_X, SIMD_OP_AES_IMC},
1092 {SN_KeygenAssist},
1093 {SN_get_IsSupported}
1096 static SimdIntrinsic popcnt_methods [] = {
1097 {SN_PopCount},
1098 {SN_get_IsSupported}
1101 static SimdIntrinsic lzcnt_methods [] = {
1102 {SN_LeadingZeroCount},
1103 {SN_get_IsSupported}
1106 static SimdIntrinsic bmi1_methods [] = {
1107 {SN_AndNot},
1108 {SN_BitFieldExtract},
1109 {SN_ExtractLowestSetBit},
1110 {SN_GetMaskUpToLowestSetBit},
1111 {SN_ResetLowestSetBit},
1112 {SN_TrailingZeroCount},
1113 {SN_get_IsSupported}
1116 static SimdIntrinsic bmi2_methods [] = {
1117 {SN_MultiplyNoFlags},
1118 {SN_ParallelBitDeposit},
1119 {SN_ParallelBitExtract},
1120 {SN_ZeroHighBits},
1121 {SN_get_IsSupported}
1124 static SimdIntrinsic x86base_methods [] = {
1125 {SN_BitScanForward},
1126 {SN_BitScanReverse},
1127 {SN_get_IsSupported}
1130 static MonoInst*
1131 emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
1133 MonoInst *ins;
1134 gboolean supported, is_64bit;
1135 MonoClass *klass = cmethod->klass;
1136 MonoTypeEnum arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID;
1137 SimdIntrinsic *info;
1139 if (is_hw_intrinsics_class (klass, "Sse", &is_64bit)) {
1140 if (!COMPILE_LLVM (cfg))
1141 return NULL;
1142 info = lookup_intrins_info (sse_methods, sizeof (sse_methods), cmethod);
1143 if (!info)
1144 return NULL;
1145 int id = info->id;
1147 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE) != 0;
1149 /* Common case */
1150 if (info->op != 0)
1151 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1153 switch (id) {
1154 case SN_get_IsSupported:
1155 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1156 ins->type = STACK_I4;
1157 return ins;
1158 case SN_Shuffle:
1159 if (args [2]->opcode == OP_ICONST)
1160 return emit_simd_ins_for_sig (cfg, klass, OP_SSE_SHUFFLE, args [2]->inst_c0, arg0_type, fsig, args);
1161 // FIXME: handle non-constant mask (generate a switch)
1162 return emit_invalid_operation (cfg, "mask in Sse.Shuffle must be constant");
1163 case SN_ConvertScalarToVector128Single: {
1164 int op = 0;
1165 switch (fsig->params [1]->type) {
1166 case MONO_TYPE_I4: op = OP_SSE_CVTSI2SS; break;
1167 case MONO_TYPE_I8: op = OP_SSE_CVTSI2SS64; break;
1168 default: g_assert_not_reached (); break;
1170 return emit_simd_ins_for_sig (cfg, klass, op, 0, 0, fsig, args);
1172 case SN_ReciprocalScalar:
1173 case SN_ReciprocalSqrtScalar:
1174 case SN_SqrtScalar: {
1175 int op = 0;
1176 switch (id) {
1177 case SN_ReciprocalScalar: op = OP_SSE_RCPSS; break;
1178 case SN_ReciprocalSqrtScalar: op = OP_SSE_RSQRTSS; break;
1179 case SN_SqrtScalar: op = OP_SSE_SQRTSS; break;
1181 if (fsig->param_count == 1)
1182 return emit_simd_ins (cfg, klass, op, args [0]->dreg, args[0]->dreg);
1183 else if (fsig->param_count == 2)
1184 return emit_simd_ins (cfg, klass, op, args [0]->dreg, args[1]->dreg);
1185 else
1186 g_assert_not_reached ();
1187 break;
1189 case SN_LoadScalarVector128:
1190 return NULL;
1191 default:
1192 return NULL;
1196 if (is_hw_intrinsics_class (klass, "Sse2", &is_64bit)) {
1197 if (!COMPILE_LLVM (cfg))
1198 return NULL;
1199 info = lookup_intrins_info (sse2_methods, sizeof (sse2_methods), cmethod);
1200 if (!info)
1201 return NULL;
1202 int id = info->id;
1204 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE2) != 0;
1206 /* Common case */
1207 if (info->op != 0)
1208 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1210 switch (id) {
1211 case SN_get_IsSupported: {
1212 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1213 ins->type = STACK_I4;
1214 return ins;
1216 case SN_Subtract:
1217 return emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, arg0_type == MONO_TYPE_R8 ? OP_FSUB : OP_ISUB, arg0_type, fsig, args);
1218 case SN_Add:
1219 return emit_simd_ins_for_sig (cfg, klass, OP_XBINOP, arg0_type == MONO_TYPE_R8 ? OP_FADD : OP_IADD, arg0_type, fsig, args);
1220 case SN_Average:
1221 if (arg0_type == MONO_TYPE_U1)
1222 return emit_simd_ins_for_sig (cfg, klass, OP_PAVGB_UN, -1, arg0_type, fsig, args);
1223 else if (arg0_type == MONO_TYPE_U2)
1224 return emit_simd_ins_for_sig (cfg, klass, OP_PAVGW_UN, -1, arg0_type, fsig, args);
1225 else
1226 return NULL;
1227 case SN_CompareNotEqual:
1228 return emit_simd_ins_for_sig (cfg, klass, arg0_type == MONO_TYPE_R8 ? OP_XCOMPARE_FP : OP_XCOMPARE, CMP_NE, arg0_type, fsig, args);
1229 case SN_CompareEqual:
1230 return emit_simd_ins_for_sig (cfg, klass, arg0_type == MONO_TYPE_R8 ? OP_XCOMPARE_FP : OP_XCOMPARE, CMP_EQ, arg0_type, fsig, args);
1231 case SN_CompareGreaterThan:
1232 return emit_simd_ins_for_sig (cfg, klass, arg0_type == MONO_TYPE_R8 ? OP_XCOMPARE_FP : OP_XCOMPARE, CMP_GT, arg0_type, fsig, args);
1233 case SN_CompareLessThan:
1234 return emit_simd_ins_for_sig (cfg, klass, arg0_type == MONO_TYPE_R8 ? OP_XCOMPARE_FP : OP_XCOMPARE, CMP_LT, arg0_type, fsig, args);
1235 case SN_ConvertToInt32:
1236 if (arg0_type == MONO_TYPE_R8)
1237 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_I4_X, SIMD_OP_SSE_CVTSD2SI, arg0_type, fsig, args);
1238 else if (arg0_type == MONO_TYPE_I4)
1239 return emit_simd_ins_for_sig (cfg, klass, OP_EXTRACT_I4, 0, arg0_type, fsig, args);
1240 else
1241 return NULL;
1242 case SN_ConvertToInt64:
1243 if (arg0_type == MONO_TYPE_R8)
1244 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_I8_X, SIMD_OP_SSE_CVTSD2SI64, arg0_type, fsig, args);
1245 else if (arg0_type == MONO_TYPE_I8)
1246 return emit_simd_ins_for_sig (cfg, klass, OP_EXTRACT_I8, 0 /*element index*/, arg0_type, fsig, args);
1247 else
1248 g_assert_not_reached ();
1249 break;
1250 case SN_ConvertScalarToVector128Double: {
1251 int op = OP_SSE2_CVTSS2SD;
1252 switch (fsig->params [1]->type) {
1253 case MONO_TYPE_I4: op = OP_SSE2_CVTSI2SD; break;
1254 case MONO_TYPE_I8: op = OP_SSE2_CVTSI2SD64; break;
1256 return emit_simd_ins_for_sig (cfg, klass, op, 0, 0, fsig, args);
1258 case SN_ConvertScalarToVector128Int32:
1259 case SN_ConvertScalarToVector128Int64:
1260 case SN_ConvertScalarToVector128UInt32:
1261 case SN_ConvertScalarToVector128UInt64:
1262 return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR, -1, arg0_type, fsig, args);
1263 case SN_ConvertToUInt32:
1264 return emit_simd_ins_for_sig (cfg, klass, OP_EXTRACT_I4, 0 /*element index*/, arg0_type, fsig, args);
1265 case SN_ConvertToUInt64:
1266 return emit_simd_ins_for_sig (cfg, klass, OP_EXTRACT_I8, 0 /*element index*/, arg0_type, fsig, args);
1267 case SN_ConvertToVector128Double:
1268 if (arg0_type == MONO_TYPE_R4)
1269 return emit_simd_ins_for_sig (cfg, klass, OP_CVTPS2PD, 0, arg0_type, fsig, args);
1270 else if (arg0_type == MONO_TYPE_I4)
1271 return emit_simd_ins_for_sig (cfg, klass, OP_CVTDQ2PD, 0, arg0_type, fsig, args);
1272 else
1273 return NULL;
1274 case SN_ConvertToVector128Int32:
1275 if (arg0_type == MONO_TYPE_R4)
1276 return emit_simd_ins_for_sig (cfg, klass, OP_CVTPS2DQ, 0, arg0_type, fsig, args);
1277 else if (arg0_type == MONO_TYPE_R8)
1278 return emit_simd_ins_for_sig (cfg, klass, OP_CVTPD2DQ, 0, arg0_type, fsig, args);
1279 else
1280 return NULL;
1281 case SN_ConvertToVector128Int32WithTruncation:
1282 if (arg0_type == MONO_TYPE_R4)
1283 return emit_simd_ins_for_sig (cfg, klass, OP_CVTTPS2DQ, 0, arg0_type, fsig, args);
1284 else if (arg0_type == MONO_TYPE_R8)
1285 return emit_simd_ins_for_sig (cfg, klass, OP_CVTTPD2DQ, 0, arg0_type, fsig, args);
1286 else
1287 return NULL;
1288 case SN_ConvertToVector128Single:
1289 if (arg0_type == MONO_TYPE_I4)
1290 return emit_simd_ins_for_sig (cfg, klass, OP_CVTDQ2PS, 0, arg0_type, fsig, args);
1291 else if (arg0_type == MONO_TYPE_R8)
1292 return emit_simd_ins_for_sig (cfg, klass, OP_CVTPD2PS, 0, arg0_type, fsig, args);
1293 else
1294 return NULL;
1295 case SN_LoadAlignedVector128:
1296 return emit_simd_ins_for_sig (cfg, klass, OP_SSE_LOADU, 16 /*alignment*/, arg0_type, fsig, args);
1297 case SN_LoadVector128:
1298 return emit_simd_ins_for_sig (cfg, klass, OP_SSE_LOADU, 1 /*alignment*/, arg0_type, fsig, args);
1299 case SN_MoveScalar:
1300 return emit_simd_ins_for_sig (cfg, klass, fsig->param_count == 2 ? OP_SSE_MOVS2 : OP_SSE_MOVS, -1, arg0_type, fsig, args);
1301 case SN_Max:
1302 switch (arg0_type) {
1303 case MONO_TYPE_U1:
1304 return emit_simd_ins_for_sig (cfg, klass, OP_PMAXB_UN, 0, arg0_type, fsig, args);
1305 case MONO_TYPE_I2:
1306 return emit_simd_ins_for_sig (cfg, klass, OP_PMAXW, 0, arg0_type, fsig, args);
1307 case MONO_TYPE_R8: return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_MAXPD, arg0_type, fsig, args);
1308 default:
1309 g_assert_not_reached ();
1310 break;
1312 break;
1313 case SN_Min:
1314 switch (arg0_type) {
1315 case MONO_TYPE_U1:
1316 return emit_simd_ins_for_sig (cfg, klass, OP_PMINB_UN, 0, arg0_type, fsig, args);
1317 case MONO_TYPE_I2:
1318 return emit_simd_ins_for_sig (cfg, klass, OP_PMINW, 0, arg0_type, fsig, args);
1319 case MONO_TYPE_R8: return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_MINPD, arg0_type, fsig, args);
1320 default:
1321 g_assert_not_reached ();
1322 break;
1324 break;
1325 case SN_Multiply:
1326 if (arg0_type == MONO_TYPE_U4)
1327 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PMULUDQ, 0, arg0_type, fsig, args);
1328 else if (arg0_type == MONO_TYPE_R8)
1329 return emit_simd_ins_for_sig (cfg, klass, OP_MULPD, 0, arg0_type, fsig, args);
1330 else
1331 g_assert_not_reached ();
1332 case SN_MultiplyHigh:
1333 if (arg0_type == MONO_TYPE_I2)
1334 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PMULHW, arg0_type, fsig, args);
1335 else if (arg0_type == MONO_TYPE_U2)
1336 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PMULHUW, arg0_type, fsig, args);
1337 else
1338 g_assert_not_reached ();
1339 case SN_PackSignedSaturate:
1340 if (arg0_type == MONO_TYPE_I2)
1341 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PACKSSWB, arg0_type, fsig, args);
1342 else if (arg0_type == MONO_TYPE_I4)
1343 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PACKSSDW, arg0_type, fsig, args);
1344 else
1345 g_assert_not_reached ();
1346 case SN_PackUnsignedSaturate:
1347 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PACKUS, -1, arg0_type, fsig, args);
1348 case SN_Extract:
1349 g_assert (arg0_type == MONO_TYPE_U2);
1350 return emit_simd_ins_for_sig (cfg, klass, OP_XEXTRACT_I32, arg0_type, 0, fsig, args);
1351 case SN_Insert:
1352 g_assert (arg0_type == MONO_TYPE_I2 || arg0_type == MONO_TYPE_U2);
1353 return emit_simd_ins_for_sig (cfg, klass, OP_XINSERT_I2, 0, arg0_type, fsig, args);
1354 case SN_ShiftRightLogical: {
1355 gboolean is_imm = fsig->params [1]->type == MONO_TYPE_U1;
1356 SimdOp op = (SimdOp)0;
1357 switch (arg0_type) {
1358 case MONO_TYPE_I2:
1359 case MONO_TYPE_U2:
1360 op = is_imm ? SIMD_OP_SSE_PSRLW_IMM : SIMD_OP_SSE_PSRLW;
1361 break;
1362 case MONO_TYPE_I4:
1363 case MONO_TYPE_U4:
1364 op = is_imm ? SIMD_OP_SSE_PSRLD_IMM : SIMD_OP_SSE_PSRLD;
1365 break;
1366 case MONO_TYPE_I8:
1367 case MONO_TYPE_U8:
1368 op = is_imm ? SIMD_OP_SSE_PSRLQ_IMM : SIMD_OP_SSE_PSRLQ;
1369 break;
1370 default: g_assert_not_reached (); break;
1372 return emit_simd_ins_for_sig (cfg, klass, is_imm ? OP_XOP_X_X_I4 : OP_XOP_X_X_X, op, arg0_type, fsig, args);
1374 case SN_ShiftRightArithmetic: {
1375 gboolean is_imm = fsig->params [1]->type == MONO_TYPE_U1;
1376 SimdOp op = (SimdOp)0;
1377 switch (arg0_type) {
1378 case MONO_TYPE_I2:
1379 case MONO_TYPE_U2:
1380 op = is_imm ? SIMD_OP_SSE_PSRAW_IMM : SIMD_OP_SSE_PSRAW;
1381 break;
1382 case MONO_TYPE_I4:
1383 case MONO_TYPE_U4:
1384 op = is_imm ? SIMD_OP_SSE_PSRAD_IMM : SIMD_OP_SSE_PSRAD;
1385 break;
1386 default: g_assert_not_reached (); break;
1388 return emit_simd_ins_for_sig (cfg, klass, is_imm ? OP_XOP_X_X_I4 : OP_XOP_X_X_X, op, arg0_type, fsig, args);
1390 case SN_ShiftLeftLogical: {
1391 gboolean is_imm = fsig->params [1]->type == MONO_TYPE_U1;
1392 SimdOp op = (SimdOp)0;
1393 switch (arg0_type) {
1394 case MONO_TYPE_I2:
1395 case MONO_TYPE_U2:
1396 op = is_imm ? SIMD_OP_SSE_PSLLW_IMM : SIMD_OP_SSE_PSLLW;
1397 break;
1398 case MONO_TYPE_I4:
1399 case MONO_TYPE_U4:
1400 op = is_imm ? SIMD_OP_SSE_PSLLD_IMM : SIMD_OP_SSE_PSLLD;
1401 break;
1402 case MONO_TYPE_I8:
1403 case MONO_TYPE_U8:
1404 op = is_imm ? SIMD_OP_SSE_PSLLQ_IMM : SIMD_OP_SSE_PSLLQ;
1405 break;
1406 default: g_assert_not_reached (); break;
1408 return emit_simd_ins_for_sig (cfg, klass, is_imm ? OP_XOP_X_X_I4 : OP_XOP_X_X_X, op, arg0_type, fsig, args);
1410 case SN_ShiftLeftLogical128BitLane:
1411 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PSLLDQ, 0, arg0_type, fsig, args);
1412 case SN_ShiftRightLogical128BitLane:
1413 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PSRLDQ, 0, arg0_type, fsig, args);
1414 case SN_Shuffle: {
1415 if (fsig->param_count == 2) {
1416 g_assert (arg0_type == MONO_TYPE_I4 || arg0_type == MONO_TYPE_U4);
1417 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PSHUFD, 0, arg0_type, fsig, args);
1418 } else if (fsig->param_count == 3) {
1419 g_assert (arg0_type == MONO_TYPE_R8);
1420 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_SHUFPD, 0, arg0_type, fsig, args);
1421 } else {
1422 g_assert_not_reached ();
1423 break;
1426 case SN_ShuffleHigh:
1427 g_assert (fsig->param_count == 2);
1428 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PSHUFHW, 0, arg0_type, fsig, args);
1429 case SN_ShuffleLow:
1430 g_assert (fsig->param_count == 2);
1431 return emit_simd_ins_for_sig (cfg, klass, OP_SSE2_PSHUFLW, 0, arg0_type, fsig, args);
1432 case SN_SqrtScalar: {
1433 if (fsig->param_count == 1)
1434 return emit_simd_ins (cfg, klass, OP_SSE2_SQRTSD, args [0]->dreg, args[0]->dreg);
1435 else if (fsig->param_count == 2)
1436 return emit_simd_ins (cfg, klass, OP_SSE2_SQRTSD, args [0]->dreg, args[1]->dreg);
1437 else {
1438 g_assert_not_reached ();
1439 break;
1442 case SN_LoadScalarVector128: {
1443 int op = 0;
1444 switch (arg0_type) {
1445 case MONO_TYPE_I4:
1446 case MONO_TYPE_U4: op = OP_SSE2_MOVD; break;
1447 case MONO_TYPE_I8:
1448 case MONO_TYPE_U8: op = OP_SSE2_MOVQ; break;
1449 case MONO_TYPE_R8: op = OP_SSE2_MOVUPD; break;
1450 default: g_assert_not_reached(); break;
1452 return emit_simd_ins_for_sig (cfg, klass, op, 0, 0, fsig, args);
1454 default:
1455 return NULL;
1459 if (is_hw_intrinsics_class (klass, "Sse3", &is_64bit)) {
1460 if (!COMPILE_LLVM (cfg))
1461 return NULL;
1462 info = lookup_intrins_info (sse3_methods, sizeof (sse3_methods), cmethod);
1463 if (!info)
1464 return NULL;
1465 int id = info->id;
1467 /* Common case */
1468 if (info->op != 0)
1469 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1471 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE3);
1473 switch (id) {
1474 case SN_get_IsSupported:
1475 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1476 ins->type = STACK_I4;
1477 return ins;
1478 case SN_AddSubtract:
1479 if (arg0_type == MONO_TYPE_R4)
1480 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_ADDSUBPS, arg0_type, fsig, args);
1481 else if (arg0_type == MONO_TYPE_R8)
1482 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_ADDSUBPD, arg0_type, fsig, args);
1483 else
1484 g_assert_not_reached ();
1485 break;
1486 case SN_HorizontalAdd:
1487 if (arg0_type == MONO_TYPE_R4)
1488 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_HADDPS, arg0_type, fsig, args);
1489 else if (arg0_type == MONO_TYPE_R8)
1490 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_HADDPD, arg0_type, fsig, args);
1491 else
1492 g_assert_not_reached ();
1493 break;
1494 case SN_HorizontalSubtract:
1495 if (arg0_type == MONO_TYPE_R4)
1496 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_HSUBPS, arg0_type, fsig, args);
1497 else if (arg0_type == MONO_TYPE_R8)
1498 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_HSUBPD, arg0_type, fsig, args);
1499 else
1500 g_assert_not_reached ();
1501 break;
1502 default:
1503 g_assert_not_reached ();
1504 break;
1508 if (is_hw_intrinsics_class (klass, "Ssse3", &is_64bit)) {
1509 if (!COMPILE_LLVM (cfg))
1510 return NULL;
1511 info = lookup_intrins_info (ssse3_methods, sizeof (ssse3_methods), cmethod);
1512 if (!info)
1513 return NULL;
1514 int id = info->id;
1516 /* Common case */
1517 if (info->op != 0)
1518 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1520 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSSE3) != 0;
1522 switch (id) {
1523 case SN_get_IsSupported:
1524 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1525 ins->type = STACK_I4;
1526 return ins;
1527 case SN_AlignRight:
1528 if (args [2]->opcode == OP_ICONST)
1529 return emit_simd_ins_for_sig (cfg, klass, OP_SSSE3_ALIGNR, args [2]->inst_c0, arg0_type, fsig, args);
1530 return emit_invalid_operation (cfg, "mask in Ssse3.AlignRight must be constant");
1531 case SN_HorizontalAdd:
1532 if (arg0_type == MONO_TYPE_I2)
1533 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHADDW, arg0_type, fsig, args);
1534 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHADDD, arg0_type, fsig, args);
1535 case SN_HorizontalSubtract:
1536 if (arg0_type == MONO_TYPE_I2)
1537 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHSUBW, arg0_type, fsig, args);
1538 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PHSUBD, arg0_type, fsig, args);
1539 case SN_Sign:
1540 if (arg0_type == MONO_TYPE_I1)
1541 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PSIGNB, arg0_type, fsig, args);
1542 if (arg0_type == MONO_TYPE_I2)
1543 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PSIGNW, arg0_type, fsig, args);
1544 return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, SIMD_OP_SSE_PSIGND, arg0_type, fsig, args);
1545 default:
1546 g_assert_not_reached ();
1547 break;
1551 if (is_hw_intrinsics_class (klass, "Sse41", &is_64bit)) {
1552 if (!COMPILE_LLVM (cfg))
1553 return NULL;
1554 info = lookup_intrins_info (sse41_methods, sizeof (sse41_methods), cmethod);
1555 if (!info)
1556 return NULL;
1557 int id = info->id;
1559 /* Common case */
1560 if (info->op != 0)
1561 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1563 supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE41) != 0;
1565 switch (id) {
1566 case SN_get_IsSupported:
1567 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1568 ins->type = STACK_I4;
1569 return ins;
1570 case SN_DotProduct:
1571 if (args [2]->opcode == OP_ICONST && arg0_type == MONO_TYPE_R4)
1572 return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_DPPS_IMM, args [2]->inst_c0, arg0_type, fsig, args);
1573 else if (args [2]->opcode == OP_ICONST && arg0_type == MONO_TYPE_R8)
1574 return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_DPPD_IMM, args [2]->inst_c0, arg0_type, fsig, args);
1575 // FIXME: handle non-constant control byte (generate a switch)
1576 return emit_invalid_operation (cfg, "control byte in Sse41.DotProduct must be constant");
1577 case SN_MultipleSumAbsoluteDifferences:
1578 if (args [2]->opcode == OP_ICONST)
1579 return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_MPSADBW_IMM, args [2]->inst_c0, arg0_type, fsig, args);
1580 // FIXME: handle non-constant control byte (generate a switch)
1581 return emit_invalid_operation (cfg, "control byte in Sse41.MultipleSumAbsoluteDifferences must be constant");
1582 case SN_Blend:
1583 if (args [2]->opcode == OP_ICONST)
1584 return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_BLEND_IMM, args [2]->inst_c0, arg0_type, fsig, args);
1585 // FIXME: handle non-constant control byte (generate a switch)
1586 return emit_invalid_operation (cfg, "control byte in Sse41.Blend must be constant");
1587 case SN_BlendVariable:
1588 return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_BLENDV, -1, arg0_type, fsig, args);
1589 case SN_Extract: {
1590 int op = 0;
1591 switch (arg0_type) {
1592 case MONO_TYPE_U1:
1593 case MONO_TYPE_U4:
1594 case MONO_TYPE_I4: op = OP_XEXTRACT_I32; break;
1595 case MONO_TYPE_I8:
1596 case MONO_TYPE_U8: op = OP_XEXTRACT_I64; break;
1597 case MONO_TYPE_R4: op = OP_XEXTRACT_R4; break;
1598 default: g_assert_not_reached(); break;
1600 return emit_simd_ins_for_sig (cfg, klass, op, arg0_type, 0, fsig, args);
1602 case SN_Insert:
1603 if (args [2]->opcode == OP_ICONST)
1604 return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_INSERT, -1, arg0_type, fsig, args);
1605 // FIXME: handle non-constant index (generate a switch)
1606 return emit_invalid_operation (cfg, "index in Sse41.Insert must be constant");
1607 default:
1608 g_assert_not_reached ();
1609 break;
1613 if (is_hw_intrinsics_class (klass, "Sse42", &is_64bit)) {
1614 if (!COMPILE_LLVM (cfg))
1615 return NULL;
1616 info = lookup_intrins_info (sse42_methods, sizeof (sse42_methods), cmethod);
1617 if (!info)
1618 return NULL;
1619 int id = info->id;
1621 /* Common case */
1622 if (info->op != 0)
1623 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1625 supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_SSE42) != 0;
1627 switch (id) {
1628 case SN_get_IsSupported:
1629 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1630 ins->type = STACK_I4;
1631 return ins;
1632 case SN_Crc32: {
1633 MonoTypeEnum arg1_type = get_underlying_type (fsig->params [1]);
1634 return emit_simd_ins_for_sig (cfg, klass,
1635 arg1_type == MONO_TYPE_U8 ? OP_SSE42_CRC64 : OP_SSE42_CRC32,
1636 arg1_type, arg0_type, fsig, args);
1638 default:
1639 g_assert_not_reached ();
1640 break;
1644 if (is_hw_intrinsics_class (klass, "Pclmulqdq", &is_64bit)) {
1645 if (!COMPILE_LLVM (cfg))
1646 return NULL;
1647 info = lookup_intrins_info (pclmulqdq_methods, sizeof (pclmulqdq_methods), cmethod);
1648 if (!info)
1649 return NULL;
1650 int id = info->id;
1652 /* Common case */
1653 if (info->op != 0)
1654 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1656 supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_PCLMUL) != 0;
1658 switch (id) {
1659 case SN_CarrylessMultiply: {
1660 if (args [2]->opcode == OP_ICONST)
1661 return emit_simd_ins_for_sig (cfg, klass, OP_PCLMULQDQ_IMM, args [2]->inst_c0, arg0_type, fsig, args);
1662 // FIXME: handle non-constant control byte (generate a switch)
1663 return emit_invalid_operation (cfg, "index in Pclmulqdq.CarrylessMultiply must be constant");
1665 case SN_get_IsSupported:
1666 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1667 ins->type = STACK_I4;
1668 return ins;
1669 default:
1670 g_assert_not_reached ();
1671 break;
1675 if (is_hw_intrinsics_class (klass, "Aes", &is_64bit)) {
1676 if (!COMPILE_LLVM (cfg))
1677 return NULL;
1678 info = lookup_intrins_info (aes_methods, sizeof (aes_methods), cmethod);
1679 if (!info)
1680 return NULL;
1681 int id = info->id;
1683 /* Common case */
1684 if (info->op != 0)
1685 return emit_simd_ins_for_sig (cfg, klass, info->op, info->instc0, arg0_type, fsig, args);
1687 supported = COMPILE_LLVM (cfg) && (mini_get_cpu_features (cfg) & MONO_CPU_X86_AES) != 0;
1689 switch (id) {
1690 case SN_get_IsSupported:
1691 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1692 ins->type = STACK_I4;
1693 return ins;
1694 case SN_KeygenAssist: {
1695 if (args [1]->opcode == OP_ICONST)
1696 return emit_simd_ins_for_sig (cfg, klass, OP_AES_KEYGEN_IMM, args [1]->inst_c0, arg0_type, fsig, args);
1697 // FIXME: handle non-constant control byte (generate a switch)
1698 return emit_invalid_operation (cfg, "control byte in Aes.KeygenAssist must be constant");
1700 default:
1701 g_assert_not_reached ();
1702 break;
1706 if (is_hw_intrinsics_class (klass, "Popcnt", &is_64bit)) {
1707 info = lookup_intrins_info (popcnt_methods, sizeof (popcnt_methods), cmethod);
1708 if (!info)
1709 return NULL;
1710 int id = info->id;
1712 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_POPCNT) != 0;
1714 switch (id) {
1715 case SN_get_IsSupported:
1716 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1717 ins->type = STACK_I4;
1718 return ins;
1719 case SN_PopCount:
1720 if (!supported)
1721 return NULL;
1722 MONO_INST_NEW (cfg, ins, is_64bit ? OP_POPCNT64 : OP_POPCNT32);
1723 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1724 ins->sreg1 = args [0]->dreg;
1725 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1726 MONO_ADD_INS (cfg->cbb, ins);
1727 return ins;
1728 default:
1729 return NULL;
1732 if (is_hw_intrinsics_class (klass, "Lzcnt", &is_64bit)) {
1733 info = lookup_intrins_info (lzcnt_methods, sizeof (lzcnt_methods), cmethod);
1734 if (!info)
1735 return NULL;
1736 int id = info->id;
1738 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_LZCNT) != 0;
1740 switch (id) {
1741 case SN_get_IsSupported:
1742 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1743 ins->type = STACK_I4;
1744 return ins;
1745 case SN_LeadingZeroCount:
1746 if (!supported)
1747 return NULL;
1748 MONO_INST_NEW (cfg, ins, is_64bit ? OP_LZCNT64 : OP_LZCNT32);
1749 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1750 ins->sreg1 = args [0]->dreg;
1751 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1752 MONO_ADD_INS (cfg->cbb, ins);
1753 return ins;
1754 default:
1755 return NULL;
1758 if (is_hw_intrinsics_class (klass, "Bmi1", &is_64bit)) {
1759 if (!COMPILE_LLVM (cfg))
1760 return NULL;
1761 info = lookup_intrins_info (bmi1_methods, sizeof (bmi1_methods), cmethod);
1762 if (!info)
1763 return NULL;
1764 int id = info->id;
1766 g_assert (id != -1);
1767 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_BMI1) != 0;
1769 switch (id) {
1770 case SN_get_IsSupported:
1771 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1772 ins->type = STACK_I4;
1773 return ins;
1774 case SN_AndNot: {
1775 // (a ^ -1) & b
1776 // LLVM replaces it with `andn`
1777 int tmp_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1778 int result_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1779 EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LXOR_IMM : OP_IXOR_IMM, tmp_reg, args [0]->dreg, -1);
1780 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, tmp_reg, args [1]->dreg);
1781 return ins;
1783 case SN_BitFieldExtract: {
1784 if (fsig->param_count == 2) {
1785 MONO_INST_NEW (cfg, ins, is_64bit ? OP_BEXTR64 : OP_BEXTR32);
1786 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1787 ins->sreg1 = args [0]->dreg;
1788 ins->sreg2 = args [1]->dreg;
1789 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1790 MONO_ADD_INS (cfg->cbb, ins);
1791 return ins;
1794 case SN_GetMaskUpToLowestSetBit: {
1795 // x ^ (x - 1)
1796 // LLVM replaces it with `blsmsk`
1797 int tmp_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1798 int result_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1799 EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LSUB_IMM : OP_ISUB_IMM, tmp_reg, args [0]->dreg, 1);
1800 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LXOR : OP_IXOR, result_reg, args [0]->dreg, tmp_reg);
1801 return ins;
1803 case SN_ResetLowestSetBit: {
1804 // x & (x - 1)
1805 // LLVM replaces it with `blsr`
1806 int tmp_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1807 int result_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1808 EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LSUB_IMM : OP_ISUB_IMM, tmp_reg, args [0]->dreg, 1);
1809 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, args [0]->dreg, tmp_reg);
1810 return ins;
1812 case SN_ExtractLowestSetBit: {
1813 // x & (0 - x)
1814 // LLVM replaces it with `blsi`
1815 int tmp_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1816 int result_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1817 int zero_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1818 MONO_EMIT_NEW_ICONST (cfg, zero_reg, 0);
1819 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LSUB : OP_ISUB, tmp_reg, zero_reg, args [0]->dreg);
1820 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, args [0]->dreg, tmp_reg);
1821 return ins;
1823 case SN_TrailingZeroCount:
1824 MONO_INST_NEW (cfg, ins, is_64bit ? OP_CTTZ64 : OP_CTTZ32);
1825 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1826 ins->sreg1 = args [0]->dreg;
1827 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1828 MONO_ADD_INS (cfg->cbb, ins);
1829 return ins;
1830 default:
1831 g_assert_not_reached ();
1834 if (is_hw_intrinsics_class (klass, "Bmi2", &is_64bit)) {
1835 if (!COMPILE_LLVM (cfg))
1836 return NULL;
1837 info = lookup_intrins_info (bmi2_methods, sizeof (bmi2_methods), cmethod);
1838 if (!info)
1839 return NULL;
1840 int id = info->id;
1842 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_BMI2) != 0;
1844 switch (id) {
1845 case SN_get_IsSupported:
1846 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
1847 ins->type = STACK_I4;
1848 return ins;
1849 case SN_MultiplyNoFlags:
1850 if (fsig->param_count == 2) {
1851 MONO_INST_NEW (cfg, ins, is_64bit ? OP_MULX_H64 : OP_MULX_H32);
1852 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1853 ins->sreg1 = args [0]->dreg;
1854 ins->sreg2 = args [1]->dreg;
1855 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1856 MONO_ADD_INS (cfg->cbb, ins);
1857 } else if (fsig->param_count == 3) {
1858 MONO_INST_NEW (cfg, ins, is_64bit ? OP_MULX_HL64 : OP_MULX_HL32);
1859 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1860 ins->sreg1 = args [0]->dreg;
1861 ins->sreg2 = args [1]->dreg;
1862 ins->sreg3 = args [2]->dreg;
1863 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1864 MONO_ADD_INS (cfg->cbb, ins);
1865 } else {
1866 g_assert_not_reached ();
1868 return ins;
1869 case SN_ZeroHighBits:
1870 MONO_INST_NEW (cfg, ins, is_64bit ? OP_BZHI64 : OP_BZHI32);
1871 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1872 ins->sreg1 = args [0]->dreg;
1873 ins->sreg2 = args [1]->dreg;
1874 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1875 MONO_ADD_INS (cfg->cbb, ins);
1876 return ins;
1877 case SN_ParallelBitExtract:
1878 MONO_INST_NEW (cfg, ins, is_64bit ? OP_PEXT64 : OP_PEXT32);
1879 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1880 ins->sreg1 = args [0]->dreg;
1881 ins->sreg2 = args [1]->dreg;
1882 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1883 MONO_ADD_INS (cfg->cbb, ins);
1884 return ins;
1885 case SN_ParallelBitDeposit:
1886 MONO_INST_NEW (cfg, ins, is_64bit ? OP_PDEP64 : OP_PDEP32);
1887 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1888 ins->sreg1 = args [0]->dreg;
1889 ins->sreg2 = args [1]->dreg;
1890 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1891 MONO_ADD_INS (cfg->cbb, ins);
1892 return ins;
1893 default:
1894 g_assert_not_reached ();
1898 if (is_hw_intrinsics_class (klass, "X86Base", &is_64bit)) {
1899 if (!COMPILE_LLVM (cfg))
1900 return NULL;
1902 info = lookup_intrins_info (x86base_methods, sizeof (x86base_methods), cmethod);
1903 if (!info)
1904 return NULL;
1905 int id = info->id;
1907 switch (id) {
1908 case SN_get_IsSupported:
1909 EMIT_NEW_ICONST (cfg, ins, 1);
1910 ins->type = STACK_I4;
1911 return ins;
1912 case SN_BitScanForward:
1913 MONO_INST_NEW (cfg, ins, is_64bit ? OP_X86_BSF64 : OP_X86_BSF32);
1914 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1915 ins->sreg1 = args [0]->dreg;
1916 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1917 MONO_ADD_INS (cfg->cbb, ins);
1918 return ins;
1919 case SN_BitScanReverse:
1920 MONO_INST_NEW (cfg, ins, is_64bit ? OP_X86_BSR64 : OP_X86_BSR32);
1921 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
1922 ins->sreg1 = args [0]->dreg;
1923 ins->type = is_64bit ? STACK_I8 : STACK_I4;
1924 MONO_ADD_INS (cfg->cbb, ins);
1925 return ins;
1926 default:
1927 g_assert_not_reached ();
1931 return NULL;
1934 static guint16 vector_128_methods [] = {
1935 SN_AsByte,
1936 SN_AsDouble,
1937 SN_AsInt16,
1938 SN_AsInt32,
1939 SN_AsInt64,
1940 SN_AsSByte,
1941 SN_AsSingle,
1942 SN_AsUInt16,
1943 SN_AsUInt32,
1944 SN_AsUInt64,
1945 SN_Create,
1946 SN_CreateScalarUnsafe,
1949 static guint16 vector_128_t_methods [] = {
1950 SN_get_Count,
1951 SN_get_Zero,
1954 static MonoInst*
1955 emit_vector128 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
1957 MonoClass *klass;
1958 int id;
1960 if (!COMPILE_LLVM (cfg))
1961 return NULL;
1963 klass = cmethod->klass;
1964 id = lookup_intrins (vector_128_methods, sizeof (vector_128_methods), cmethod);
1965 if (id == -1)
1966 return NULL;
1968 if (!strcmp (m_class_get_name (cfg->method->klass), "Vector256"))
1969 return NULL; // TODO: Fix Vector256.WithUpper/WithLower
1971 MonoTypeEnum arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID;
1973 switch (id) {
1974 case SN_AsByte:
1975 case SN_AsDouble:
1976 case SN_AsInt16:
1977 case SN_AsInt32:
1978 case SN_AsInt64:
1979 case SN_AsSByte:
1980 case SN_AsSingle:
1981 case SN_AsUInt16:
1982 case SN_AsUInt32:
1983 case SN_AsUInt64:
1984 return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1);
1985 case SN_Create: {
1986 MonoType *etype = get_vector_t_elem_type (fsig->ret);
1987 if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) {
1988 return emit_simd_ins (cfg, klass, type_to_expand_op (etype), args [0]->dreg, -1);
1989 } else {
1990 MonoInst *ins, *load;
1992 // FIXME: Optimize this
1993 MONO_INST_NEW (cfg, ins, OP_LOCALLOC_IMM);
1994 ins->dreg = alloc_preg (cfg);
1995 ins->inst_imm = 16;
1996 MONO_ADD_INS (cfg->cbb, ins);
1998 int esize = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
1999 int store_opcode = mono_type_to_store_membase (cfg, etype);
2000 for (int i = 0; i < fsig->param_count; ++i)
2001 MONO_EMIT_NEW_STORE_MEMBASE (cfg, store_opcode, ins->dreg, i * esize, args [i]->dreg);
2003 load = emit_simd_ins (cfg, klass, OP_SSE_LOADU, ins->dreg, -1);
2004 load->inst_c0 = 16;
2005 load->inst_c1 = get_underlying_type (etype);
2006 return load;
2009 case SN_CreateScalarUnsafe:
2010 return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_UNSAFE, -1, arg0_type, fsig, args);
2011 default:
2012 break;
2015 return NULL;
2018 static MonoInst*
2019 emit_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2021 MonoInst *ins;
2022 MonoType *type, *etype;
2023 MonoClass *klass;
2024 int size, len, id;
2026 id = lookup_intrins (vector_128_t_methods, sizeof (vector_128_t_methods), cmethod);
2027 if (id == -1)
2028 return NULL;
2030 klass = cmethod->klass;
2031 type = m_class_get_byval_arg (klass);
2032 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
2033 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
2034 g_assert (size);
2035 len = 16 / size;
2037 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
2038 return NULL;
2040 if (cfg->verbose_level > 1) {
2041 char *name = mono_method_full_name (cmethod, TRUE);
2042 printf (" SIMD intrinsic %s\n", name);
2043 g_free (name);
2046 switch (id) {
2047 case SN_get_Count:
2048 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
2049 break;
2050 EMIT_NEW_ICONST (cfg, ins, len);
2051 return ins;
2052 case SN_get_Zero: {
2053 return emit_simd_ins (cfg, klass, OP_XZERO, -1, -1);
2055 default:
2056 break;
2059 return NULL;
2062 static guint16 vector_256_t_methods [] = {
2063 SN_get_Count,
2066 static MonoInst*
2067 emit_vector256_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2069 MonoInst *ins;
2070 MonoType *type, *etype;
2071 MonoClass *klass;
2072 int size, len, id;
2074 id = lookup_intrins (vector_256_t_methods, sizeof (vector_256_t_methods), cmethod);
2075 if (id == -1)
2076 return NULL;
2078 klass = cmethod->klass;
2079 type = m_class_get_byval_arg (klass);
2080 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
2081 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
2082 g_assert (size);
2083 len = 32 / size;
2085 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
2086 return NULL;
2088 if (cfg->verbose_level > 1) {
2089 char *name = mono_method_full_name (cmethod, TRUE);
2090 printf (" SIMD intrinsic %s\n", name);
2091 g_free (name);
2094 switch (id) {
2095 case SN_get_Count:
2096 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
2097 break;
2098 EMIT_NEW_ICONST (cfg, ins, len);
2099 return ins;
2100 default:
2101 break;
2104 return NULL;
2107 #endif // !TARGET_ARM64
2109 MonoInst*
2110 mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2112 const char *class_name;
2113 const char *class_ns;
2114 MonoImage *image = m_class_get_image (cmethod->klass);
2116 if (image != mono_get_corlib ())
2117 return NULL;
2119 class_ns = m_class_get_name_space (cmethod->klass);
2120 class_name = m_class_get_name (cmethod->klass);
2122 // If cmethod->klass is nested, the namespace is on the enclosing class.
2123 if (m_class_get_nested_in (cmethod->klass))
2124 class_ns = m_class_get_name_space (m_class_get_nested_in (cmethod->klass));
2126 #ifdef TARGET_ARM64
2127 if (!strcmp (class_ns, "System.Runtime.Intrinsics.Arm")) {
2128 MonoInst *ins = emit_arm64_intrinsics (cfg, cmethod, fsig, args);
2129 return ins;
2131 #endif // TARGET_ARM64
2133 #ifdef TARGET_AMD64 // TODO: test and enable for x86 too
2134 if (!strcmp (class_ns, "System.Runtime.Intrinsics.X86")) {
2135 MonoInst *ins = emit_x86_intrinsics (cfg, cmethod, fsig, args);
2136 return ins;
2139 // FIXME: implement Vector64<T>, Vector128<T> and Vector<T> for Arm64
2141 if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
2142 if (!strcmp (class_name, "Vector128`1"))
2143 return emit_vector128_t (cfg, cmethod, fsig, args);
2144 if (!strcmp (class_name, "Vector128"))
2145 return emit_vector128 (cfg, cmethod, fsig, args);
2146 if (!strcmp (class_name, "Vector256`1"))
2147 return emit_vector256_t (cfg, cmethod, fsig, args);
2150 if (!strcmp (class_ns, "System.Numerics")) {
2151 if (!strcmp (class_name, "Vector"))
2152 return emit_sys_numerics_vector (cfg, cmethod, fsig, args);
2153 if (!strcmp (class_name, "Vector`1"))
2154 return emit_sys_numerics_vector_t (cfg, cmethod, fsig, args);
2156 #endif // TARGET_AMD64
2158 return NULL;
2161 void
2162 mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
2166 void
2167 mono_simd_simplify_indirection (MonoCompile *cfg)
2171 #else
2173 MONO_EMPTY_SOURCE_FILE (simd_intrinsics_netcore);
2175 #endif
2177 #endif /* DISABLE_JIT */
2180 #if defined(ENABLE_NETCORE) && defined(TARGET_AMD64)
2181 void
2182 ves_icall_System_Runtime_Intrinsics_X86_X86Base___cpuidex (int abcd[4], int function_id, int subfunction_id)
2184 #ifndef MONO_CROSS_COMPILE
2185 mono_hwcap_x86_call_cpuidex (function_id, subfunction_id,
2186 &abcd [0], &abcd [1], &abcd [2], &abcd [3]);
2187 #endif
2189 #endif