2 * SIMD Intrinsics support for netcore
6 #include <mono/utils/mono-compiler.h>
7 #include <mono/metadata/icall-decl.h>
10 #if defined(DISABLE_JIT)
13 mono_simd_intrinsics_init (void)
20 * Only LLVM is supported as a backend.
23 #include "mini-runtime.h"
26 #include "mini-llvm.h"
28 #include "mono/utils/bsearch.h"
29 #include <mono/metadata/abi-details.h>
30 #include <mono/metadata/reflection-internals.h>
31 #include <mono/utils/mono-hwcap.h>
33 #if defined (MONO_ARCH_SIMD_INTRINSICS) && defined(ENABLE_NETCORE)
35 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
36 #define MSGSTRFIELD1(line) str##line
37 static const struct msgstr_t
{
38 #define METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)];
39 #define METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
40 #include "simd-methods-netcore.h"
44 #define METHOD(name) #name,
45 #define METHOD2(str,name) str,
46 #include "simd-methods-netcore.h"
52 #define METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
53 #define METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
54 #include "simd-methods-netcore.h"
56 #define method_name(idx) ((const char*)&method_names + (idx))
58 static int register_size
;
61 // One of the SN_ constants
70 mono_simd_intrinsics_init (void)
74 if ((mini_get_cpu_features () & MONO_CPU_X86_AVX
) != 0)
77 /* Tell the class init code the size of the System.Numerics.Register type */
78 mono_simd_register_size
= register_size
;
82 mono_emit_simd_field_load (MonoCompile
*cfg
, MonoClassField
*field
, MonoInst
*addr
)
88 simd_intrinsic_compare_by_name (const void *key
, const void *value
)
90 return strcmp ((const char*)key
, method_name (*(guint16
*)value
));
94 simd_intrinsic_info_compare_by_name (const void *key
, const void *value
)
96 SimdIntrinsic
*info
= (SimdIntrinsic
*)value
;
97 return strcmp ((const char*)key
, method_name (info
->id
));
101 lookup_intrins (guint16
*intrinsics
, int size
, MonoMethod
*cmethod
)
103 const guint16
*result
= (const guint16
*)mono_binary_search (cmethod
->name
, intrinsics
, size
/ sizeof (guint16
), sizeof (guint16
), &simd_intrinsic_compare_by_name
);
111 static SimdIntrinsic
*
112 lookup_intrins_info (SimdIntrinsic
*intrinsics
, int size
, MonoMethod
*cmethod
)
115 for (int i
= 0; i
< (size
/ sizeof (SimdIntrinsic
)) - 1; ++i
) {
116 const char *n1
= method_name (intrinsics
[i
].id
);
117 const char *n2
= method_name (intrinsics
[i
+ 1].id
);
118 int len1
= strlen (n1
);
119 int len2
= strlen (n2
);
120 for (int j
= 0; j
< len1
&& j
< len2
; ++j
) {
121 if (n1
[j
] > n2
[j
]) {
122 printf ("%s %s\n", n1
, n2
);
123 g_assert_not_reached ();
124 } else if (n1
[j
] < n2
[j
]) {
131 return (SimdIntrinsic
*)mono_binary_search (cmethod
->name
, intrinsics
, size
/ sizeof (SimdIntrinsic
), sizeof (SimdIntrinsic
), &simd_intrinsic_info_compare_by_name
);
135 * Return a simd vreg for the simd value represented by SRC.
136 * SRC is the 'this' argument to methods.
137 * Set INDIRECT to TRUE if the value was loaded from memory.
140 load_simd_vreg_class (MonoCompile
*cfg
, MonoClass
*klass
, MonoInst
*src
, gboolean
*indirect
)
142 const char *spec
= INS_INFO (src
->opcode
);
146 if (src
->opcode
== OP_XMOVE
) {
148 } else if (src
->opcode
== OP_LDADDR
) {
149 int res
= ((MonoInst
*)src
->inst_p0
)->dreg
;
151 } else if (spec
[MONO_INST_DEST
] == 'x') {
153 } else if (src
->type
== STACK_PTR
|| src
->type
== STACK_MP
) {
158 MONO_INST_NEW (cfg
, ins
, OP_LOADX_MEMBASE
);
160 ins
->sreg1
= src
->dreg
;
161 ins
->type
= STACK_VTYPE
;
162 ins
->dreg
= alloc_ireg (cfg
);
163 MONO_ADD_INS (cfg
->cbb
, ins
);
166 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src
->type
);
167 mono_print_ins (src
);
168 g_assert_not_reached ();
172 load_simd_vreg (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
*src
, gboolean
*indirect
)
174 return load_simd_vreg_class (cfg
, cmethod
->klass
, src
, indirect
);
177 /* Create and emit a SIMD instruction, dreg is auto-allocated */
179 emit_simd_ins (MonoCompile
*cfg
, MonoClass
*klass
, int opcode
, int sreg1
, int sreg2
)
181 const char *spec
= INS_INFO (opcode
);
184 MONO_INST_NEW (cfg
, ins
, opcode
);
185 if (spec
[MONO_INST_DEST
] == 'x') {
186 ins
->dreg
= alloc_xreg (cfg
);
187 ins
->type
= STACK_VTYPE
;
188 } else if (spec
[MONO_INST_DEST
] == 'i') {
189 ins
->dreg
= alloc_ireg (cfg
);
190 ins
->type
= STACK_I4
;
191 } else if (spec
[MONO_INST_DEST
] == 'l') {
192 ins
->dreg
= alloc_lreg (cfg
);
193 ins
->type
= STACK_I8
;
194 } else if (spec
[MONO_INST_DEST
] == 'f') {
195 ins
->dreg
= alloc_freg (cfg
);
196 ins
->type
= STACK_R8
;
201 MONO_ADD_INS (cfg
->cbb
, ins
);
206 emit_simd_ins_for_sig (MonoCompile
*cfg
, MonoClass
*klass
, int opcode
, int instc0
, int instc1
, MonoMethodSignature
*fsig
, MonoInst
**args
)
208 g_assert (fsig
->param_count
<= 3);
209 MonoInst
* ins
= emit_simd_ins (cfg
, klass
, opcode
,
210 fsig
->param_count
> 0 ? args
[0]->dreg
: -1,
211 fsig
->param_count
> 1 ? args
[1]->dreg
: -1);
213 ins
->inst_c0
= instc0
;
215 ins
->inst_c1
= instc1
;
216 if (fsig
->param_count
== 3)
217 ins
->sreg3
= args
[2]->dreg
;
222 is_hw_intrinsics_class (MonoClass
*klass
, const char *name
, gboolean
*is_64bit
)
224 const char *class_name
= m_class_get_name (klass
);
225 if ((!strcmp (class_name
, "X64") || !strcmp (class_name
, "Arm64")) && m_class_get_nested_in (klass
)) {
227 return !strcmp (m_class_get_name (m_class_get_nested_in (klass
)), name
);
230 return !strcmp (class_name
, name
);
235 get_underlying_type (MonoType
* type
)
237 MonoClass
* klass
= mono_class_from_mono_type_internal (type
);
238 if (type
->type
== MONO_TYPE_PTR
) // e.g. int* => MONO_TYPE_I4
239 return m_class_get_byval_arg (m_class_get_element_class (klass
))->type
;
240 else if (type
->type
== MONO_TYPE_GENERICINST
) // e.g. Vector128<int> => MONO_TYPE_I4
241 return mono_class_get_context (klass
)->class_inst
->type_argv
[0]->type
;
247 emit_xcompare (MonoCompile
*cfg
, MonoClass
*klass
, MonoTypeEnum etype
, MonoInst
*arg1
, MonoInst
*arg2
)
250 gboolean is_fp
= etype
== MONO_TYPE_R4
|| etype
== MONO_TYPE_R8
;
252 ins
= emit_simd_ins (cfg
, klass
, is_fp
? OP_XCOMPARE_FP
: OP_XCOMPARE
, arg1
->dreg
, arg2
->dreg
);
253 ins
->inst_c0
= CMP_EQ
;
254 ins
->inst_c1
= etype
;
259 get_vector_t_elem_type (MonoType
*vector_type
)
264 g_assert (vector_type
->type
== MONO_TYPE_GENERICINST
);
265 klass
= mono_class_from_mono_type_internal (vector_type
);
267 !strcmp (m_class_get_name (klass
), "Vector`1") ||
268 !strcmp (m_class_get_name (klass
), "Vector128`1") ||
269 !strcmp (m_class_get_name (klass
), "Vector256`1"));
270 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
277 type_to_expand_op (MonoType
*type
)
279 switch (type
->type
) {
297 g_assert_not_reached ();
301 static guint16 vector_methods
[] = {
310 SN_get_IsHardwareAccelerated
,
314 emit_sys_numerics_vector (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
317 gboolean supported
= FALSE
;
321 id
= lookup_intrins (vector_methods
, sizeof (vector_methods
), cmethod
);
325 //printf ("%s\n", mono_method_full_name (cmethod, 1));
327 #ifdef MONO_ARCH_SIMD_INTRINSICS
331 if (cfg
->verbose_level
> 1) {
332 char *name
= mono_method_full_name (cmethod
, TRUE
);
333 printf (" SIMD intrinsic %s\n", name
);
338 case SN_get_IsHardwareAccelerated
:
339 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
340 ins
->type
= STACK_I4
;
342 case SN_ConvertToInt32
:
343 etype
= get_vector_t_elem_type (fsig
->params
[0]);
344 g_assert (etype
->type
== MONO_TYPE_R4
);
345 return emit_simd_ins (cfg
, mono_class_from_mono_type_internal (fsig
->ret
), OP_CVTPS2DQ
, args
[0]->dreg
, -1);
346 case SN_ConvertToSingle
:
347 etype
= get_vector_t_elem_type (fsig
->params
[0]);
348 g_assert (etype
->type
== MONO_TYPE_I4
|| etype
->type
== MONO_TYPE_U4
);
350 if (etype
->type
== MONO_TYPE_U4
)
352 return emit_simd_ins (cfg
, mono_class_from_mono_type_internal (fsig
->ret
), OP_CVTDQ2PS
, args
[0]->dreg
, -1);
353 case SN_ConvertToDouble
:
354 case SN_ConvertToInt64
:
355 case SN_ConvertToUInt32
:
356 case SN_ConvertToUInt64
:
368 static guint16 vector_t_methods
[] = {
373 SN_GreaterThanOrEqual
,
396 emit_sys_numerics_vector_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
399 MonoType
*type
, *etype
;
402 gboolean is_unsigned
;
404 static const float r4_one
= 1.0f
;
405 static const double r8_one
= 1.0;
407 id
= lookup_intrins (vector_t_methods
, sizeof (vector_t_methods
), cmethod
);
411 klass
= cmethod
->klass
;
412 type
= m_class_get_byval_arg (klass
);
413 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
414 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
416 len
= register_size
/ size
;
418 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
421 if (cfg
->verbose_level
> 1) {
422 char *name
= mono_method_full_name (cmethod
, TRUE
);
423 printf (" SIMD intrinsic %s\n", name
);
429 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
431 EMIT_NEW_ICONST (cfg
, ins
, len
);
434 g_assert (fsig
->param_count
== 0 && mono_metadata_type_equal (fsig
->ret
, type
));
435 return emit_simd_ins (cfg
, klass
, OP_XZERO
, -1, -1);
437 g_assert (fsig
->param_count
== 0 && mono_metadata_type_equal (fsig
->ret
, type
));
438 MonoInst
*one
= NULL
;
439 int expand_opcode
= type_to_expand_op (etype
);
440 MONO_INST_NEW (cfg
, one
, -1);
441 switch (expand_opcode
) {
443 one
->opcode
= OP_R4CONST
;
444 one
->type
= STACK_R4
;
445 one
->inst_p0
= (void *) &r4_one
;
448 one
->opcode
= OP_R8CONST
;
449 one
->type
= STACK_R8
;
450 one
->inst_p0
= (void *) &r8_one
;
453 one
->opcode
= OP_ICONST
;
454 one
->type
= STACK_I4
;
458 one
->dreg
= alloc_dreg (cfg
, one
->type
);
459 MONO_ADD_INS (cfg
->cbb
, one
);
460 return emit_simd_ins (cfg
, klass
, expand_opcode
, one
->dreg
, -1);
462 case SN_get_AllBitsSet
: {
463 /* Compare a zero vector with itself */
464 ins
= emit_simd_ins (cfg
, klass
, OP_XZERO
, -1, -1);
465 return emit_xcompare (cfg
, klass
, etype
->type
, ins
, ins
);
468 if (!COMPILE_LLVM (cfg
))
470 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, args
[1]->dreg
, len
);
471 MONO_EMIT_NEW_COND_EXC (cfg
, GE_UN
, "IndexOutOfRangeException");
474 gboolean is64
= FALSE
;
475 switch (etype
->type
) {
478 opcode
= OP_XEXTRACT_I64
;
480 dreg
= alloc_lreg (cfg
);
483 opcode
= OP_XEXTRACT_R8
;
484 dreg
= alloc_freg (cfg
);
487 g_assert (cfg
->r4fp
);
488 opcode
= OP_XEXTRACT_R4
;
489 dreg
= alloc_freg (cfg
);
492 opcode
= OP_XEXTRACT_I32
;
493 dreg
= alloc_ireg (cfg
);
496 MONO_INST_NEW (cfg
, ins
, opcode
);
498 ins
->sreg1
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
499 ins
->sreg2
= args
[1]->dreg
;
500 ins
->inst_c0
= etype
->type
;
501 mini_type_to_eval_stack_type (cfg
, etype
, ins
);
502 MONO_ADD_INS (cfg
->cbb
, ins
);
506 if (fsig
->param_count
== 1 && mono_metadata_type_equal (fsig
->params
[0], etype
)) {
507 int dreg
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
509 int opcode
= type_to_expand_op (etype
);
510 ins
= emit_simd_ins (cfg
, klass
, opcode
, args
[1]->dreg
, -1);
514 if ((fsig
->param_count
== 1 || fsig
->param_count
== 2) && (fsig
->params
[0]->type
== MONO_TYPE_SZARRAY
)) {
515 MonoInst
*array_ins
= args
[1];
517 MonoInst
*ldelema_ins
;
521 if (args
[0]->opcode
!= OP_LDADDR
)
524 /* .ctor (T[]) or .ctor (T[], index) */
526 if (fsig
->param_count
== 2) {
527 index_ins
= args
[2];
529 EMIT_NEW_ICONST (cfg
, index_ins
, 0);
532 /* Emit index check for the end (index + len - 1 < array length) */
533 end_index_reg
= alloc_ireg (cfg
);
534 EMIT_NEW_BIALU_IMM (cfg
, ins
, OP_IADD_IMM
, end_index_reg
, index_ins
->dreg
, len
- 1);
535 MONO_EMIT_BOUNDS_CHECK (cfg
, array_ins
->dreg
, MonoArray
, max_length
, end_index_reg
);
537 /* Load the array slice into the simd reg */
538 ldelema_ins
= mini_emit_ldelema_1_ins (cfg
, mono_class_from_mono_type_internal (etype
), array_ins
, index_ins
, TRUE
, FALSE
);
539 g_assert (args
[0]->opcode
== OP_LDADDR
);
540 var
= (MonoInst
*)args
[0]->inst_p0
;
541 EMIT_NEW_LOAD_MEMBASE (cfg
, ins
, OP_LOADX_MEMBASE
, var
->dreg
, ldelema_ins
->dreg
, 0);
542 ins
->klass
= cmethod
->klass
;
547 if ((fsig
->param_count
== 1 || fsig
->param_count
== 2) && (fsig
->params
[0]->type
== MONO_TYPE_SZARRAY
)) {
548 MonoInst
*array_ins
= args
[1];
550 MonoInst
*ldelema_ins
;
551 int val_vreg
, end_index_reg
;
553 val_vreg
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
555 /* CopyTo (T[]) or CopyTo (T[], index) */
557 if (fsig
->param_count
== 2) {
558 index_ins
= args
[2];
560 EMIT_NEW_ICONST (cfg
, index_ins
, 0);
563 /* CopyTo () does complicated argument checks */
564 mini_emit_bounds_check_offset (cfg
, array_ins
->dreg
, MONO_STRUCT_OFFSET (MonoArray
, max_length
), index_ins
->dreg
, "ArgumentOutOfRangeException");
565 end_index_reg
= alloc_ireg (cfg
);
566 int len_reg
= alloc_ireg (cfg
);
567 MONO_EMIT_NEW_LOAD_MEMBASE_OP_FLAGS (cfg
, OP_LOADI4_MEMBASE
, len_reg
, array_ins
->dreg
, MONO_STRUCT_OFFSET (MonoArray
, max_length
), MONO_INST_INVARIANT_LOAD
);
568 EMIT_NEW_BIALU (cfg
, ins
, OP_ISUB
, end_index_reg
, len_reg
, index_ins
->dreg
);
569 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, end_index_reg
, len
);
570 MONO_EMIT_NEW_COND_EXC (cfg
, LT
, "ArgumentException");
572 /* Load the array slice into the simd reg */
573 ldelema_ins
= mini_emit_ldelema_1_ins (cfg
, mono_class_from_mono_type_internal (etype
), array_ins
, index_ins
, FALSE
, FALSE
);
574 EMIT_NEW_STORE_MEMBASE (cfg
, ins
, OP_STOREX_MEMBASE
, ldelema_ins
->dreg
, 0, val_vreg
);
575 ins
->klass
= cmethod
->klass
;
580 if (fsig
->param_count
== 1 && fsig
->ret
->type
== MONO_TYPE_BOOLEAN
&& mono_metadata_type_equal (fsig
->params
[0], type
)) {
581 int sreg1
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
583 return emit_simd_ins (cfg
, klass
, OP_XEQUAL
, sreg1
, args
[1]->dreg
);
584 } else if (fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
)) {
585 /* Per element equality */
586 return emit_xcompare (cfg
, klass
, etype
->type
, args
[0], args
[1]);
590 case SN_op_Inequality
:
591 g_assert (fsig
->param_count
== 2 && fsig
->ret
->type
== MONO_TYPE_BOOLEAN
&&
592 mono_metadata_type_equal (fsig
->params
[0], type
) &&
593 mono_metadata_type_equal (fsig
->params
[1], type
));
594 ins
= emit_simd_ins (cfg
, klass
, OP_XEQUAL
, args
[0]->dreg
, args
[1]->dreg
);
595 if (id
== SN_op_Inequality
) {
596 int sreg
= ins
->dreg
;
597 int dreg
= alloc_ireg (cfg
);
598 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, sreg
, 0);
599 EMIT_NEW_UNALU (cfg
, ins
, OP_CEQ
, dreg
, -1);
603 case SN_GreaterThanOrEqual
:
605 case SN_LessThanOrEqual
:
606 g_assert (fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
));
607 is_unsigned
= etype
->type
== MONO_TYPE_U1
|| etype
->type
== MONO_TYPE_U2
|| etype
->type
== MONO_TYPE_U4
|| etype
->type
== MONO_TYPE_U8
;
608 ins
= emit_xcompare (cfg
, klass
, etype
->type
, args
[0], args
[1]);
611 ins
->inst_c0
= is_unsigned
? CMP_GT_UN
: CMP_GT
;
613 case SN_GreaterThanOrEqual
:
614 ins
->inst_c0
= is_unsigned
? CMP_GE_UN
: CMP_GE
;
617 ins
->inst_c0
= is_unsigned
? CMP_LT_UN
: CMP_LT
;
619 case SN_LessThanOrEqual
:
620 ins
->inst_c0
= is_unsigned
? CMP_LE_UN
: CMP_LE
;
623 g_assert_not_reached ();
627 return emit_simd_ins (cfg
, klass
, OP_XCAST
, args
[0]->dreg
, -1);
629 case SN_op_Subtraction
:
632 case SN_op_BitwiseAnd
:
633 case SN_op_BitwiseOr
:
634 case SN_op_ExclusiveOr
:
637 if (!(fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
)))
639 ins
= emit_simd_ins (cfg
, klass
, OP_XBINOP
, args
[0]->dreg
, args
[1]->dreg
);
640 ins
->inst_c1
= etype
->type
;
642 if (etype
->type
== MONO_TYPE_R4
|| etype
->type
== MONO_TYPE_R8
) {
645 ins
->inst_c0
= OP_FADD
;
647 case SN_op_Subtraction
:
648 ins
->inst_c0
= OP_FSUB
;
651 ins
->inst_c0
= OP_FMUL
;
654 ins
->inst_c0
= OP_FDIV
;
657 ins
->inst_c0
= OP_FMAX
;
660 ins
->inst_c0
= OP_FMIN
;
669 ins
->inst_c0
= OP_IADD
;
671 case SN_op_Subtraction
:
672 ins
->inst_c0
= OP_ISUB
;
676 ins->inst_c0 = OP_IDIV;
679 ins->inst_c0 = OP_IMUL;
682 case SN_op_BitwiseAnd
:
683 ins
->inst_c0
= OP_IAND
;
685 case SN_op_BitwiseOr
:
686 ins
->inst_c0
= OP_IOR
;
688 case SN_op_ExclusiveOr
:
689 ins
->inst_c0
= OP_IXOR
;
692 ins
->inst_c0
= OP_IMAX
;
695 ins
->inst_c0
= OP_IMIN
;
709 #endif // !TARGET_ARM64
712 emit_invalid_operation (MonoCompile
*cfg
, const char* message
)
714 mono_cfg_set_exception (cfg
, MONO_EXCEPTION_MONO_ERROR
);
715 mono_error_set_generic_error (cfg
->error
, "System", "InvalidOperationException", "%s", message
);
721 static SimdIntrinsic armbase_methods
[] = {
722 {SN_LeadingSignCount
},
723 {SN_LeadingZeroCount
},
724 {SN_ReverseElementBits
},
728 static SimdIntrinsic crc32_methods
[] = {
735 emit_arm64_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
737 // Arm64 intrinsics are LLVM-only
738 if (!COMPILE_LLVM (cfg
))
742 gboolean supported
, is_64bit
;
743 MonoClass
*klass
= cmethod
->klass
;
744 MonoTypeEnum arg0_type
= fsig
->param_count
> 0 ? get_underlying_type (fsig
->params
[0]) : MONO_TYPE_VOID
;
745 gboolean arg0_i32
= (arg0_type
== MONO_TYPE_I4
) || (arg0_type
== MONO_TYPE_U4
);
748 if (is_hw_intrinsics_class (klass
, "ArmBase", &is_64bit
)) {
749 info
= lookup_intrins_info (armbase_methods
, sizeof (armbase_methods
), cmethod
);
753 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_ARM64_BASE
) != 0;
756 case SN_get_IsSupported
:
757 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
758 ins
->type
= STACK_I4
;
760 case SN_LeadingZeroCount
:
761 return emit_simd_ins_for_sig (cfg
, klass
, arg0_i32
? OP_LZCNT32
: OP_LZCNT64
, 0, arg0_type
, fsig
, args
);
762 case SN_LeadingSignCount
:
763 return emit_simd_ins_for_sig (cfg
, klass
, arg0_i32
? OP_LSCNT32
: OP_LSCNT64
, 0, arg0_type
, fsig
, args
);
764 case SN_ReverseElementBits
:
765 return emit_simd_ins_for_sig (cfg
, klass
,
766 (is_64bit
? OP_XOP_I8_I8
: OP_XOP_I4_I4
),
767 (is_64bit
? SIMD_OP_ARM64_RBIT64
: SIMD_OP_ARM64_RBIT32
),
768 arg0_type
, fsig
, args
);
770 g_assert_not_reached (); // if a new API is added we need to either implement it or change IsSupported to false
774 if (is_hw_intrinsics_class (klass
, "Crc32", &is_64bit
)) {
775 info
= lookup_intrins_info (crc32_methods
, sizeof (crc32_methods
), cmethod
);
779 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_ARM64_CRC
) != 0;
782 case SN_get_IsSupported
:
783 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
784 ins
->type
= STACK_I4
;
786 case SN_ComputeCrc32
:
787 case SN_ComputeCrc32C
: {
788 SimdOp op
= (SimdOp
)0;
789 gboolean is_c
= info
->id
== SN_ComputeCrc32C
;
790 switch (get_underlying_type (fsig
->params
[1])) {
791 case MONO_TYPE_U1
: op
= is_c
? SIMD_OP_ARM64_CRC32CB
: SIMD_OP_ARM64_CRC32B
; break;
792 case MONO_TYPE_U2
: op
= is_c
? SIMD_OP_ARM64_CRC32CH
: SIMD_OP_ARM64_CRC32H
; break;
793 case MONO_TYPE_U4
: op
= is_c
? SIMD_OP_ARM64_CRC32CW
: SIMD_OP_ARM64_CRC32W
; break;
794 case MONO_TYPE_U8
: op
= is_c
? SIMD_OP_ARM64_CRC32CX
: SIMD_OP_ARM64_CRC32X
; break;
795 default: g_assert_not_reached (); break;
797 return emit_simd_ins_for_sig (cfg
, klass
, is_64bit
? OP_XOP_I4_I4_I8
: OP_XOP_I4_I4_I4
, op
, arg0_type
, fsig
, args
);
800 g_assert_not_reached (); // if a new API is added we need to either implement it or change IsSupported to false
805 #endif // TARGET_ARM64
809 static SimdIntrinsic sse_methods
[] = {
810 {SN_Add
, OP_XBINOP
, OP_FADD
},
811 {SN_AddScalar
, OP_SSE_ADDSS
},
812 {SN_And
, OP_SSE_AND
},
813 {SN_AndNot
, OP_SSE_ANDN
},
814 {SN_CompareEqual
, OP_XCOMPARE_FP
, CMP_EQ
},
815 {SN_CompareGreaterThan
, OP_XCOMPARE_FP
,CMP_GT
},
816 {SN_CompareGreaterThanOrEqual
, OP_XCOMPARE_FP
, CMP_GE
},
817 {SN_CompareLessThan
, OP_XCOMPARE_FP
, CMP_LT
},
818 {SN_CompareLessThanOrEqual
, OP_XCOMPARE_FP
, CMP_LE
},
819 {SN_CompareNotEqual
, OP_XCOMPARE_FP
, CMP_NE
},
820 {SN_CompareNotGreaterThan
, OP_XCOMPARE_FP
, CMP_LE
},
821 {SN_CompareNotGreaterThanOrEqual
, OP_XCOMPARE_FP
, CMP_LT
},
822 {SN_CompareNotLessThan
, OP_XCOMPARE_FP
, CMP_GE
},
823 {SN_CompareNotLessThanOrEqual
, OP_XCOMPARE_FP
, CMP_GT
},
824 {SN_CompareOrdered
, OP_XCOMPARE_FP
, CMP_ORD
},
825 {SN_CompareScalarEqual
, OP_SSE_CMPSS
, CMP_EQ
},
826 {SN_CompareScalarGreaterThan
, OP_SSE_CMPSS
, CMP_GT
},
827 {SN_CompareScalarGreaterThanOrEqual
, OP_SSE_CMPSS
, CMP_GE
},
828 {SN_CompareScalarLessThan
, OP_SSE_CMPSS
, CMP_LT
},
829 {SN_CompareScalarLessThanOrEqual
, OP_SSE_CMPSS
, CMP_LE
},
830 {SN_CompareScalarNotEqual
, OP_SSE_CMPSS
, CMP_NE
},
831 {SN_CompareScalarNotGreaterThan
, OP_SSE_CMPSS
, CMP_LE
},
832 {SN_CompareScalarNotGreaterThanOrEqual
, OP_SSE_CMPSS
, CMP_LT
},
833 {SN_CompareScalarNotLessThan
, OP_SSE_CMPSS
, CMP_GE
},
834 {SN_CompareScalarNotLessThanOrEqual
, OP_SSE_CMPSS
, CMP_GT
},
835 {SN_CompareScalarOrdered
, OP_SSE_CMPSS
, CMP_ORD
},
836 {SN_CompareScalarOrderedEqual
, OP_SSE_COMISS
, CMP_EQ
},
837 {SN_CompareScalarOrderedGreaterThan
, OP_SSE_COMISS
, CMP_GT
},
838 {SN_CompareScalarOrderedGreaterThanOrEqual
, OP_SSE_COMISS
, CMP_GE
},
839 {SN_CompareScalarOrderedLessThan
, OP_SSE_COMISS
, CMP_LT
},
840 {SN_CompareScalarOrderedLessThanOrEqual
, OP_SSE_COMISS
, CMP_LE
},
841 {SN_CompareScalarOrderedNotEqual
, OP_SSE_COMISS
, CMP_NE
},
842 {SN_CompareScalarUnordered
, OP_SSE_CMPSS
, CMP_UNORD
},
843 {SN_CompareScalarUnorderedEqual
, OP_SSE_UCOMISS
, CMP_EQ
},
844 {SN_CompareScalarUnorderedGreaterThan
, OP_SSE_UCOMISS
, CMP_GT
},
845 {SN_CompareScalarUnorderedGreaterThanOrEqual
, OP_SSE_UCOMISS
, CMP_GE
},
846 {SN_CompareScalarUnorderedLessThan
, OP_SSE_UCOMISS
, CMP_LT
},
847 {SN_CompareScalarUnorderedLessThanOrEqual
, OP_SSE_UCOMISS
, CMP_LE
},
848 {SN_CompareScalarUnorderedNotEqual
, OP_SSE_UCOMISS
, CMP_NE
},
849 {SN_CompareUnordered
, OP_XCOMPARE_FP
, CMP_UNORD
},
850 {SN_ConvertScalarToVector128Single
},
851 {SN_ConvertToInt32
, OP_XOP_I4_X
, SIMD_OP_SSE_CVTSS2SI
},
852 {SN_ConvertToInt32WithTruncation
, OP_XOP_I4_X
, SIMD_OP_SSE_CVTTSS2SI
},
853 {SN_ConvertToInt64
, OP_XOP_I8_X
, SIMD_OP_SSE_CVTSS2SI64
},
854 {SN_ConvertToInt64WithTruncation
, OP_XOP_I8_X
, SIMD_OP_SSE_CVTTSS2SI64
},
855 {SN_Divide
, OP_XBINOP
, OP_FDIV
},
856 {SN_DivideScalar
, OP_SSE_DIVSS
},
857 {SN_LoadAlignedVector128
, OP_SSE_LOADU
, 16 /* alignment */},
858 {SN_LoadHigh
, OP_SSE_MOVHPS_LOAD
},
859 {SN_LoadLow
, OP_SSE_MOVLPS_LOAD
},
860 {SN_LoadScalarVector128
, OP_SSE_MOVSS
},
861 {SN_LoadVector128
, OP_SSE_LOADU
, 1 /* alignment */},
862 {SN_Max
, OP_XOP_X_X_X
, SIMD_OP_SSE_MAXPS
},
863 {SN_MaxScalar
, OP_XOP_X_X_X
, SIMD_OP_SSE_MAXSS
},
864 {SN_Min
, OP_XOP_X_X_X
, SIMD_OP_SSE_MINPS
},
865 {SN_MinScalar
, OP_XOP_X_X_X
, SIMD_OP_SSE_MINSS
},
866 {SN_MoveHighToLow
, OP_SSE_MOVEHL
},
867 {SN_MoveLowToHigh
, OP_SSE_MOVELH
},
868 {SN_MoveMask
, OP_SSE_MOVMSK
},
869 {SN_MoveScalar
, OP_SSE_MOVS2
},
870 {SN_Multiply
, OP_XBINOP
, OP_FMUL
},
871 {SN_MultiplyScalar
, OP_SSE_MULSS
},
873 {SN_Prefetch0
, OP_SSE_PREFETCHT0
},
874 {SN_Prefetch1
, OP_SSE_PREFETCHT1
},
875 {SN_Prefetch2
, OP_SSE_PREFETCHT2
},
876 {SN_PrefetchNonTemporal
, OP_SSE_PREFETCHNTA
},
877 {SN_Reciprocal
, OP_XOP_X_X
, SIMD_OP_SSE_RCPPS
},
878 {SN_ReciprocalScalar
},
879 {SN_ReciprocalSqrt
, OP_XOP_X_X
, SIMD_OP_SSE_RSQRTPS
},
880 {SN_ReciprocalSqrtScalar
},
882 {SN_Sqrt
, OP_XOP_X_X
, SIMD_OP_SSE_SQRTPS
},
884 {SN_Store
, OP_SSE_STORE
, 1 /* alignment */},
885 {SN_StoreAligned
, OP_SSE_STORE
, 16 /* alignment */},
886 {SN_StoreAlignedNonTemporal
, OP_SSE_MOVNTPS
, 16 /* alignment */},
887 {SN_StoreFence
, OP_XOP
, SIMD_OP_SSE_SFENCE
},
888 {SN_StoreHigh
, OP_SSE_MOVHPS_STORE
},
889 {SN_StoreLow
, OP_SSE_MOVLPS_STORE
},
890 {SN_StoreScalar
, OP_SSE_MOVSS_STORE
},
891 {SN_Subtract
, OP_XBINOP
, OP_FSUB
},
892 {SN_SubtractScalar
, OP_SSE_SUBSS
},
893 {SN_UnpackHigh
, OP_SSE_UNPACKHI
},
894 {SN_UnpackLow
, OP_SSE_UNPACKLO
},
895 {SN_Xor
, OP_SSE_XOR
},
899 static SimdIntrinsic sse2_methods
[] = {
901 {SN_AddSaturate
, OP_SSE2_ADDS
},
902 {SN_AddScalar
, OP_SSE2_ADDSD
},
903 {SN_And
, OP_SSE_AND
},
904 {SN_AndNot
, OP_SSE_ANDN
},
907 {SN_CompareGreaterThan
},
908 {SN_CompareGreaterThanOrEqual
, OP_XCOMPARE_FP
, CMP_GE
},
909 {SN_CompareLessThan
},
910 {SN_CompareLessThanOrEqual
, OP_XCOMPARE_FP
, CMP_LE
},
911 {SN_CompareNotEqual
, OP_XCOMPARE_FP
, CMP_NE
},
912 {SN_CompareNotGreaterThan
, OP_XCOMPARE_FP
, CMP_LE
},
913 {SN_CompareNotGreaterThanOrEqual
, OP_XCOMPARE_FP
, CMP_LT
},
914 {SN_CompareNotLessThan
, OP_XCOMPARE_FP
, CMP_GE
},
915 {SN_CompareNotLessThanOrEqual
, OP_XCOMPARE_FP
, CMP_GT
},
916 {SN_CompareOrdered
, OP_XCOMPARE_FP
, CMP_ORD
},
917 {SN_CompareScalarEqual
, OP_SSE2_CMPSD
, CMP_EQ
},
918 {SN_CompareScalarGreaterThan
, OP_SSE2_CMPSD
, CMP_GT
},
919 {SN_CompareScalarGreaterThanOrEqual
, OP_SSE2_CMPSD
, CMP_GE
},
920 {SN_CompareScalarLessThan
, OP_SSE2_CMPSD
, CMP_LT
},
921 {SN_CompareScalarLessThanOrEqual
, OP_SSE2_CMPSD
, CMP_LE
},
922 {SN_CompareScalarNotEqual
, OP_SSE2_CMPSD
, CMP_NE
},
923 {SN_CompareScalarNotGreaterThan
, OP_SSE2_CMPSD
, CMP_LE
},
924 {SN_CompareScalarNotGreaterThanOrEqual
, OP_SSE2_CMPSD
, CMP_LT
},
925 {SN_CompareScalarNotLessThan
, OP_SSE2_CMPSD
, CMP_GE
},
926 {SN_CompareScalarNotLessThanOrEqual
, OP_SSE2_CMPSD
, CMP_GT
},
927 {SN_CompareScalarOrdered
, OP_SSE2_CMPSD
, CMP_ORD
},
928 {SN_CompareScalarOrderedEqual
, OP_SSE2_COMISD
, CMP_EQ
},
929 {SN_CompareScalarOrderedGreaterThan
, OP_SSE2_COMISD
, CMP_GT
},
930 {SN_CompareScalarOrderedGreaterThanOrEqual
, OP_SSE2_COMISD
, CMP_GE
},
931 {SN_CompareScalarOrderedLessThan
, OP_SSE2_COMISD
, CMP_LT
},
932 {SN_CompareScalarOrderedLessThanOrEqual
, OP_SSE2_COMISD
, CMP_LE
},
933 {SN_CompareScalarOrderedNotEqual
, OP_SSE2_COMISD
, CMP_NE
},
934 {SN_CompareScalarUnordered
, OP_SSE2_CMPSD
, CMP_UNORD
},
935 {SN_CompareScalarUnorderedEqual
, OP_SSE2_UCOMISD
, CMP_EQ
},
936 {SN_CompareScalarUnorderedGreaterThan
, OP_SSE2_UCOMISD
, CMP_GT
},
937 {SN_CompareScalarUnorderedGreaterThanOrEqual
, OP_SSE2_UCOMISD
, CMP_GE
},
938 {SN_CompareScalarUnorderedLessThan
, OP_SSE2_UCOMISD
, CMP_LT
},
939 {SN_CompareScalarUnorderedLessThanOrEqual
, OP_SSE2_UCOMISD
, CMP_LE
},
940 {SN_CompareScalarUnorderedNotEqual
, OP_SSE2_UCOMISD
, CMP_NE
},
941 {SN_CompareUnordered
, OP_XCOMPARE_FP
, CMP_UNORD
},
942 {SN_ConvertScalarToVector128Double
},
943 {SN_ConvertScalarToVector128Int32
},
944 {SN_ConvertScalarToVector128Int64
},
945 {SN_ConvertScalarToVector128Single
, OP_XOP_X_X_X
, SIMD_OP_SSE_CVTSD2SS
},
946 {SN_ConvertScalarToVector128UInt32
},
947 {SN_ConvertScalarToVector128UInt64
},
949 {SN_ConvertToInt32WithTruncation
, OP_XOP_I4_X
, SIMD_OP_SSE_CVTTSD2SI
},
951 {SN_ConvertToInt64WithTruncation
, OP_XOP_I8_X
, SIMD_OP_SSE_CVTTSD2SI64
},
952 {SN_ConvertToUInt32
},
953 {SN_ConvertToUInt64
},
954 {SN_ConvertToVector128Double
},
955 {SN_ConvertToVector128Int32
},
956 {SN_ConvertToVector128Int32WithTruncation
},
957 {SN_ConvertToVector128Single
},
958 {SN_Divide
, OP_XBINOP
, OP_FDIV
},
959 {SN_DivideScalar
, OP_SSE2_DIVSD
},
962 {SN_LoadAlignedVector128
},
963 {SN_LoadFence
, OP_XOP
, SIMD_OP_SSE_LFENCE
},
964 {SN_LoadHigh
, OP_SSE2_MOVHPD_LOAD
},
965 {SN_LoadLow
, OP_SSE2_MOVLPD_LOAD
},
966 {SN_LoadScalarVector128
},
968 {SN_MaskMove
, OP_SSE2_MASKMOVDQU
},
970 {SN_MaxScalar
, OP_XOP_X_X_X
, SIMD_OP_SSE_MAXSD
},
971 {SN_MemoryFence
, OP_XOP
, SIMD_OP_SSE_MFENCE
},
973 {SN_MinScalar
, OP_XOP_X_X_X
, SIMD_OP_SSE_MINSD
},
974 {SN_MoveMask
, OP_SSE_MOVMSK
},
977 {SN_MultiplyAddAdjacent
, OP_XOP_X_X_X
, SIMD_OP_SSE_PMADDWD
},
979 {SN_MultiplyLow
, OP_PMULW
},
980 {SN_MultiplyScalar
, OP_SSE2_MULSD
},
982 {SN_PackSignedSaturate
},
983 {SN_PackUnsignedSaturate
},
984 {SN_ShiftLeftLogical
},
985 {SN_ShiftLeftLogical128BitLane
},
986 {SN_ShiftRightArithmetic
},
987 {SN_ShiftRightLogical
},
988 {SN_ShiftRightLogical128BitLane
},
992 {SN_Sqrt
, OP_XOP_X_X
, SIMD_OP_SSE_SQRTPD
},
994 {SN_Store
, OP_SSE_STORE
, 1 /* alignment */},
995 {SN_StoreAligned
, OP_SSE_STORE
, 16 /* alignment */},
996 {SN_StoreAlignedNonTemporal
, OP_SSE_MOVNTPS
, 16 /* alignment */},
997 {SN_StoreHigh
, OP_SSE2_MOVHPD_STORE
},
998 {SN_StoreLow
, OP_SSE2_MOVLPD_STORE
},
999 {SN_StoreNonTemporal
, OP_SSE_MOVNTPS
, 1 /* alignment */},
1000 {SN_StoreScalar
, OP_SSE_STORES
},
1002 {SN_SubtractSaturate
, OP_SSE2_SUBS
},
1003 {SN_SubtractScalar
, OP_SSE2_SUBSD
},
1004 {SN_SumAbsoluteDifferences
, OP_XOP_X_X_X
, SIMD_OP_SSE_PSADBW
},
1005 {SN_UnpackHigh
, OP_SSE_UNPACKHI
},
1006 {SN_UnpackLow
, OP_SSE_UNPACKLO
},
1007 {SN_Xor
, OP_SSE_XOR
},
1008 {SN_get_IsSupported
}
1011 static SimdIntrinsic sse3_methods
[] = {
1014 {SN_HorizontalSubtract
},
1015 {SN_LoadAndDuplicateToVector128
, OP_SSE3_MOVDDUP_MEM
},
1016 {SN_LoadDquVector128
, OP_XOP_X_I
, SIMD_OP_SSE_LDDQU
},
1017 {SN_MoveAndDuplicate
, OP_SSE3_MOVDDUP
},
1018 {SN_MoveHighAndDuplicate
, OP_SSE3_MOVSHDUP
},
1019 {SN_MoveLowAndDuplicate
, OP_SSE3_MOVSLDUP
},
1020 {SN_get_IsSupported
}
1023 static SimdIntrinsic ssse3_methods
[] = {
1024 {SN_Abs
, OP_SSSE3_ABS
},
1027 {SN_HorizontalAddSaturate
, OP_XOP_X_X_X
, SIMD_OP_SSE_PHADDSW
},
1028 {SN_HorizontalSubtract
},
1029 {SN_HorizontalSubtractSaturate
, OP_XOP_X_X_X
, SIMD_OP_SSE_PHSUBSW
},
1030 {SN_MultiplyAddAdjacent
, OP_XOP_X_X_X
, SIMD_OP_SSE_PMADDUBSW
},
1031 {SN_MultiplyHighRoundScale
, OP_XOP_X_X_X
, SIMD_OP_SSE_PMULHRSW
},
1032 {SN_Shuffle
, OP_SSSE3_SHUFFLE
},
1034 {SN_get_IsSupported
}
1037 static SimdIntrinsic sse41_methods
[] = {
1040 {SN_Ceiling
, OP_SSE41_ROUNDP
, 10 /*round mode*/},
1041 {SN_CeilingScalar
, OP_SSE41_ROUNDS
, 10 /*round mode*/},
1042 {SN_CompareEqual
, OP_XCOMPARE
, CMP_EQ
},
1043 {SN_ConvertToVector128Int16
, OP_SSE_CVTII
, MONO_TYPE_I2
},
1044 {SN_ConvertToVector128Int32
, OP_SSE_CVTII
, MONO_TYPE_I4
},
1045 {SN_ConvertToVector128Int64
, OP_SSE_CVTII
, MONO_TYPE_I8
},
1048 {SN_Floor
, OP_SSE41_ROUNDP
, 9 /*round mode*/},
1049 {SN_FloorScalar
, OP_SSE41_ROUNDS
, 9 /*round mode*/},
1051 {SN_LoadAlignedVector128NonTemporal
, OP_SSE41_LOADANT
},
1052 {SN_Max
, OP_XBINOP
, OP_IMAX
},
1053 {SN_Min
, OP_XBINOP
, OP_IMIN
},
1054 {SN_MinHorizontal
, OP_XOP_X_X
, SIMD_OP_SSE_PHMINPOSUW
},
1055 {SN_MultipleSumAbsoluteDifferences
},
1056 {SN_Multiply
, OP_SSE41_MUL
},
1057 {SN_MultiplyLow
, OP_SSE41_MULLO
},
1058 {SN_PackUnsignedSaturate
, OP_XOP_X_X_X
, SIMD_OP_SSE_PACKUSDW
},
1059 {SN_RoundCurrentDirection
, OP_SSE41_ROUNDP
, 4 /*round mode*/},
1060 {SN_RoundCurrentDirectionScalar
, OP_SSE41_ROUNDS
, 4 /*round mode*/},
1061 {SN_RoundToNearestInteger
, OP_SSE41_ROUNDP
, 8 /*round mode*/},
1062 {SN_RoundToNearestIntegerScalar
, OP_SSE41_ROUNDS
, 8 /*round mode*/},
1063 {SN_RoundToNegativeInfinity
, OP_SSE41_ROUNDP
, 9 /*round mode*/},
1064 {SN_RoundToNegativeInfinityScalar
, OP_SSE41_ROUNDS
, 9 /*round mode*/},
1065 {SN_RoundToPositiveInfinity
, OP_SSE41_ROUNDP
, 10 /*round mode*/},
1066 {SN_RoundToPositiveInfinityScalar
, OP_SSE41_ROUNDS
, 10 /*round mode*/},
1067 {SN_RoundToZero
, OP_SSE41_ROUNDP
, 11 /*round mode*/},
1068 {SN_RoundToZeroScalar
, OP_SSE41_ROUNDS
, 11 /*round mode*/},
1069 {SN_TestC
, OP_XOP_I4_X_X
, SIMD_OP_SSE_TESTC
},
1070 {SN_TestNotZAndNotC
, OP_XOP_I4_X_X
, SIMD_OP_SSE_TESTNZ
},
1071 {SN_TestZ
, OP_XOP_I4_X_X
, SIMD_OP_SSE_TESTZ
},
1072 {SN_get_IsSupported
}
1075 static SimdIntrinsic sse42_methods
[] = {
1076 {SN_CompareGreaterThan
, OP_XCOMPARE
, CMP_GT
},
1078 {SN_get_IsSupported
}
1081 static SimdIntrinsic pclmulqdq_methods
[] = {
1082 {SN_CarrylessMultiply
},
1083 {SN_get_IsSupported
}
1086 static SimdIntrinsic aes_methods
[] = {
1087 {SN_Decrypt
, OP_XOP_X_X_X
, SIMD_OP_AES_DEC
},
1088 {SN_DecryptLast
, OP_XOP_X_X_X
, SIMD_OP_AES_DECLAST
},
1089 {SN_Encrypt
, OP_XOP_X_X_X
, SIMD_OP_AES_ENC
},
1090 {SN_EncryptLast
, OP_XOP_X_X_X
, SIMD_OP_AES_ENCLAST
},
1091 {SN_InverseMixColumns
, OP_XOP_X_X
, SIMD_OP_AES_IMC
},
1093 {SN_get_IsSupported
}
1096 static SimdIntrinsic popcnt_methods
[] = {
1098 {SN_get_IsSupported
}
1101 static SimdIntrinsic lzcnt_methods
[] = {
1102 {SN_LeadingZeroCount
},
1103 {SN_get_IsSupported
}
1106 static SimdIntrinsic bmi1_methods
[] = {
1108 {SN_BitFieldExtract
},
1109 {SN_ExtractLowestSetBit
},
1110 {SN_GetMaskUpToLowestSetBit
},
1111 {SN_ResetLowestSetBit
},
1112 {SN_TrailingZeroCount
},
1113 {SN_get_IsSupported
}
1116 static SimdIntrinsic bmi2_methods
[] = {
1117 {SN_MultiplyNoFlags
},
1118 {SN_ParallelBitDeposit
},
1119 {SN_ParallelBitExtract
},
1121 {SN_get_IsSupported
}
1124 static SimdIntrinsic x86base_methods
[] = {
1125 {SN_BitScanForward
},
1126 {SN_BitScanReverse
},
1127 {SN_get_IsSupported
}
1131 emit_x86_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
1134 gboolean supported
, is_64bit
;
1135 MonoClass
*klass
= cmethod
->klass
;
1136 MonoTypeEnum arg0_type
= fsig
->param_count
> 0 ? get_underlying_type (fsig
->params
[0]) : MONO_TYPE_VOID
;
1137 SimdIntrinsic
*info
;
1139 if (is_hw_intrinsics_class (klass
, "Sse", &is_64bit
)) {
1140 if (!COMPILE_LLVM (cfg
))
1142 info
= lookup_intrins_info (sse_methods
, sizeof (sse_methods
), cmethod
);
1147 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSE
) != 0;
1151 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1154 case SN_get_IsSupported
:
1155 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1156 ins
->type
= STACK_I4
;
1159 if (args
[2]->opcode
== OP_ICONST
)
1160 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE_SHUFFLE
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1161 // FIXME: handle non-constant mask (generate a switch)
1162 return emit_invalid_operation (cfg
, "mask in Sse.Shuffle must be constant");
1163 case SN_ConvertScalarToVector128Single
: {
1165 switch (fsig
->params
[1]->type
) {
1166 case MONO_TYPE_I4
: op
= OP_SSE_CVTSI2SS
; break;
1167 case MONO_TYPE_I8
: op
= OP_SSE_CVTSI2SS64
; break;
1168 default: g_assert_not_reached (); break;
1170 return emit_simd_ins_for_sig (cfg
, klass
, op
, 0, 0, fsig
, args
);
1172 case SN_ReciprocalScalar
:
1173 case SN_ReciprocalSqrtScalar
:
1174 case SN_SqrtScalar
: {
1177 case SN_ReciprocalScalar
: op
= OP_SSE_RCPSS
; break;
1178 case SN_ReciprocalSqrtScalar
: op
= OP_SSE_RSQRTSS
; break;
1179 case SN_SqrtScalar
: op
= OP_SSE_SQRTSS
; break;
1181 if (fsig
->param_count
== 1)
1182 return emit_simd_ins (cfg
, klass
, op
, args
[0]->dreg
, args
[0]->dreg
);
1183 else if (fsig
->param_count
== 2)
1184 return emit_simd_ins (cfg
, klass
, op
, args
[0]->dreg
, args
[1]->dreg
);
1186 g_assert_not_reached ();
1189 case SN_LoadScalarVector128
:
1196 if (is_hw_intrinsics_class (klass
, "Sse2", &is_64bit
)) {
1197 if (!COMPILE_LLVM (cfg
))
1199 info
= lookup_intrins_info (sse2_methods
, sizeof (sse2_methods
), cmethod
);
1204 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSE2
) != 0;
1208 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1211 case SN_get_IsSupported
: {
1212 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1213 ins
->type
= STACK_I4
;
1217 return emit_simd_ins_for_sig (cfg
, klass
, OP_XBINOP
, arg0_type
== MONO_TYPE_R8
? OP_FSUB
: OP_ISUB
, arg0_type
, fsig
, args
);
1219 return emit_simd_ins_for_sig (cfg
, klass
, OP_XBINOP
, arg0_type
== MONO_TYPE_R8
? OP_FADD
: OP_IADD
, arg0_type
, fsig
, args
);
1221 if (arg0_type
== MONO_TYPE_U1
)
1222 return emit_simd_ins_for_sig (cfg
, klass
, OP_PAVGB_UN
, -1, arg0_type
, fsig
, args
);
1223 else if (arg0_type
== MONO_TYPE_U2
)
1224 return emit_simd_ins_for_sig (cfg
, klass
, OP_PAVGW_UN
, -1, arg0_type
, fsig
, args
);
1227 case SN_CompareNotEqual
:
1228 return emit_simd_ins_for_sig (cfg
, klass
, arg0_type
== MONO_TYPE_R8
? OP_XCOMPARE_FP
: OP_XCOMPARE
, CMP_NE
, arg0_type
, fsig
, args
);
1229 case SN_CompareEqual
:
1230 return emit_simd_ins_for_sig (cfg
, klass
, arg0_type
== MONO_TYPE_R8
? OP_XCOMPARE_FP
: OP_XCOMPARE
, CMP_EQ
, arg0_type
, fsig
, args
);
1231 case SN_CompareGreaterThan
:
1232 return emit_simd_ins_for_sig (cfg
, klass
, arg0_type
== MONO_TYPE_R8
? OP_XCOMPARE_FP
: OP_XCOMPARE
, CMP_GT
, arg0_type
, fsig
, args
);
1233 case SN_CompareLessThan
:
1234 return emit_simd_ins_for_sig (cfg
, klass
, arg0_type
== MONO_TYPE_R8
? OP_XCOMPARE_FP
: OP_XCOMPARE
, CMP_LT
, arg0_type
, fsig
, args
);
1235 case SN_ConvertToInt32
:
1236 if (arg0_type
== MONO_TYPE_R8
)
1237 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_I4_X
, SIMD_OP_SSE_CVTSD2SI
, arg0_type
, fsig
, args
);
1238 else if (arg0_type
== MONO_TYPE_I4
)
1239 return emit_simd_ins_for_sig (cfg
, klass
, OP_EXTRACT_I4
, 0, arg0_type
, fsig
, args
);
1242 case SN_ConvertToInt64
:
1243 if (arg0_type
== MONO_TYPE_R8
)
1244 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_I8_X
, SIMD_OP_SSE_CVTSD2SI64
, arg0_type
, fsig
, args
);
1245 else if (arg0_type
== MONO_TYPE_I8
)
1246 return emit_simd_ins_for_sig (cfg
, klass
, OP_EXTRACT_I8
, 0 /*element index*/, arg0_type
, fsig
, args
);
1248 g_assert_not_reached ();
1250 case SN_ConvertScalarToVector128Double
: {
1251 int op
= OP_SSE2_CVTSS2SD
;
1252 switch (fsig
->params
[1]->type
) {
1253 case MONO_TYPE_I4
: op
= OP_SSE2_CVTSI2SD
; break;
1254 case MONO_TYPE_I8
: op
= OP_SSE2_CVTSI2SD64
; break;
1256 return emit_simd_ins_for_sig (cfg
, klass
, op
, 0, 0, fsig
, args
);
1258 case SN_ConvertScalarToVector128Int32
:
1259 case SN_ConvertScalarToVector128Int64
:
1260 case SN_ConvertScalarToVector128UInt32
:
1261 case SN_ConvertScalarToVector128UInt64
:
1262 return emit_simd_ins_for_sig (cfg
, klass
, OP_CREATE_SCALAR
, -1, arg0_type
, fsig
, args
);
1263 case SN_ConvertToUInt32
:
1264 return emit_simd_ins_for_sig (cfg
, klass
, OP_EXTRACT_I4
, 0 /*element index*/, arg0_type
, fsig
, args
);
1265 case SN_ConvertToUInt64
:
1266 return emit_simd_ins_for_sig (cfg
, klass
, OP_EXTRACT_I8
, 0 /*element index*/, arg0_type
, fsig
, args
);
1267 case SN_ConvertToVector128Double
:
1268 if (arg0_type
== MONO_TYPE_R4
)
1269 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTPS2PD
, 0, arg0_type
, fsig
, args
);
1270 else if (arg0_type
== MONO_TYPE_I4
)
1271 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTDQ2PD
, 0, arg0_type
, fsig
, args
);
1274 case SN_ConvertToVector128Int32
:
1275 if (arg0_type
== MONO_TYPE_R4
)
1276 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTPS2DQ
, 0, arg0_type
, fsig
, args
);
1277 else if (arg0_type
== MONO_TYPE_R8
)
1278 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTPD2DQ
, 0, arg0_type
, fsig
, args
);
1281 case SN_ConvertToVector128Int32WithTruncation
:
1282 if (arg0_type
== MONO_TYPE_R4
)
1283 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTTPS2DQ
, 0, arg0_type
, fsig
, args
);
1284 else if (arg0_type
== MONO_TYPE_R8
)
1285 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTTPD2DQ
, 0, arg0_type
, fsig
, args
);
1288 case SN_ConvertToVector128Single
:
1289 if (arg0_type
== MONO_TYPE_I4
)
1290 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTDQ2PS
, 0, arg0_type
, fsig
, args
);
1291 else if (arg0_type
== MONO_TYPE_R8
)
1292 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTPD2PS
, 0, arg0_type
, fsig
, args
);
1295 case SN_LoadAlignedVector128
:
1296 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE_LOADU
, 16 /*alignment*/, arg0_type
, fsig
, args
);
1297 case SN_LoadVector128
:
1298 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE_LOADU
, 1 /*alignment*/, arg0_type
, fsig
, args
);
1300 return emit_simd_ins_for_sig (cfg
, klass
, fsig
->param_count
== 2 ? OP_SSE_MOVS2
: OP_SSE_MOVS
, -1, arg0_type
, fsig
, args
);
1302 switch (arg0_type
) {
1304 return emit_simd_ins_for_sig (cfg
, klass
, OP_PMAXB_UN
, 0, arg0_type
, fsig
, args
);
1306 return emit_simd_ins_for_sig (cfg
, klass
, OP_PMAXW
, 0, arg0_type
, fsig
, args
);
1307 case MONO_TYPE_R8
: return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_MAXPD
, arg0_type
, fsig
, args
);
1309 g_assert_not_reached ();
1314 switch (arg0_type
) {
1316 return emit_simd_ins_for_sig (cfg
, klass
, OP_PMINB_UN
, 0, arg0_type
, fsig
, args
);
1318 return emit_simd_ins_for_sig (cfg
, klass
, OP_PMINW
, 0, arg0_type
, fsig
, args
);
1319 case MONO_TYPE_R8
: return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_MINPD
, arg0_type
, fsig
, args
);
1321 g_assert_not_reached ();
1326 if (arg0_type
== MONO_TYPE_U4
)
1327 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PMULUDQ
, 0, arg0_type
, fsig
, args
);
1328 else if (arg0_type
== MONO_TYPE_R8
)
1329 return emit_simd_ins_for_sig (cfg
, klass
, OP_MULPD
, 0, arg0_type
, fsig
, args
);
1331 g_assert_not_reached ();
1332 case SN_MultiplyHigh
:
1333 if (arg0_type
== MONO_TYPE_I2
)
1334 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PMULHW
, arg0_type
, fsig
, args
);
1335 else if (arg0_type
== MONO_TYPE_U2
)
1336 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PMULHUW
, arg0_type
, fsig
, args
);
1338 g_assert_not_reached ();
1339 case SN_PackSignedSaturate
:
1340 if (arg0_type
== MONO_TYPE_I2
)
1341 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PACKSSWB
, arg0_type
, fsig
, args
);
1342 else if (arg0_type
== MONO_TYPE_I4
)
1343 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PACKSSDW
, arg0_type
, fsig
, args
);
1345 g_assert_not_reached ();
1346 case SN_PackUnsignedSaturate
:
1347 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PACKUS
, -1, arg0_type
, fsig
, args
);
1349 g_assert (arg0_type
== MONO_TYPE_U2
);
1350 return emit_simd_ins_for_sig (cfg
, klass
, OP_XEXTRACT_I32
, arg0_type
, 0, fsig
, args
);
1352 g_assert (arg0_type
== MONO_TYPE_I2
|| arg0_type
== MONO_TYPE_U2
);
1353 return emit_simd_ins_for_sig (cfg
, klass
, OP_XINSERT_I2
, 0, arg0_type
, fsig
, args
);
1354 case SN_ShiftRightLogical
: {
1355 gboolean is_imm
= fsig
->params
[1]->type
== MONO_TYPE_U1
;
1356 SimdOp op
= (SimdOp
)0;
1357 switch (arg0_type
) {
1360 op
= is_imm
? SIMD_OP_SSE_PSRLW_IMM
: SIMD_OP_SSE_PSRLW
;
1364 op
= is_imm
? SIMD_OP_SSE_PSRLD_IMM
: SIMD_OP_SSE_PSRLD
;
1368 op
= is_imm
? SIMD_OP_SSE_PSRLQ_IMM
: SIMD_OP_SSE_PSRLQ
;
1370 default: g_assert_not_reached (); break;
1372 return emit_simd_ins_for_sig (cfg
, klass
, is_imm
? OP_XOP_X_X_I4
: OP_XOP_X_X_X
, op
, arg0_type
, fsig
, args
);
1374 case SN_ShiftRightArithmetic
: {
1375 gboolean is_imm
= fsig
->params
[1]->type
== MONO_TYPE_U1
;
1376 SimdOp op
= (SimdOp
)0;
1377 switch (arg0_type
) {
1380 op
= is_imm
? SIMD_OP_SSE_PSRAW_IMM
: SIMD_OP_SSE_PSRAW
;
1384 op
= is_imm
? SIMD_OP_SSE_PSRAD_IMM
: SIMD_OP_SSE_PSRAD
;
1386 default: g_assert_not_reached (); break;
1388 return emit_simd_ins_for_sig (cfg
, klass
, is_imm
? OP_XOP_X_X_I4
: OP_XOP_X_X_X
, op
, arg0_type
, fsig
, args
);
1390 case SN_ShiftLeftLogical
: {
1391 gboolean is_imm
= fsig
->params
[1]->type
== MONO_TYPE_U1
;
1392 SimdOp op
= (SimdOp
)0;
1393 switch (arg0_type
) {
1396 op
= is_imm
? SIMD_OP_SSE_PSLLW_IMM
: SIMD_OP_SSE_PSLLW
;
1400 op
= is_imm
? SIMD_OP_SSE_PSLLD_IMM
: SIMD_OP_SSE_PSLLD
;
1404 op
= is_imm
? SIMD_OP_SSE_PSLLQ_IMM
: SIMD_OP_SSE_PSLLQ
;
1406 default: g_assert_not_reached (); break;
1408 return emit_simd_ins_for_sig (cfg
, klass
, is_imm
? OP_XOP_X_X_I4
: OP_XOP_X_X_X
, op
, arg0_type
, fsig
, args
);
1410 case SN_ShiftLeftLogical128BitLane
:
1411 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PSLLDQ
, 0, arg0_type
, fsig
, args
);
1412 case SN_ShiftRightLogical128BitLane
:
1413 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PSRLDQ
, 0, arg0_type
, fsig
, args
);
1415 if (fsig
->param_count
== 2) {
1416 g_assert (arg0_type
== MONO_TYPE_I4
|| arg0_type
== MONO_TYPE_U4
);
1417 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PSHUFD
, 0, arg0_type
, fsig
, args
);
1418 } else if (fsig
->param_count
== 3) {
1419 g_assert (arg0_type
== MONO_TYPE_R8
);
1420 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_SHUFPD
, 0, arg0_type
, fsig
, args
);
1422 g_assert_not_reached ();
1426 case SN_ShuffleHigh
:
1427 g_assert (fsig
->param_count
== 2);
1428 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PSHUFHW
, 0, arg0_type
, fsig
, args
);
1430 g_assert (fsig
->param_count
== 2);
1431 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PSHUFLW
, 0, arg0_type
, fsig
, args
);
1432 case SN_SqrtScalar
: {
1433 if (fsig
->param_count
== 1)
1434 return emit_simd_ins (cfg
, klass
, OP_SSE2_SQRTSD
, args
[0]->dreg
, args
[0]->dreg
);
1435 else if (fsig
->param_count
== 2)
1436 return emit_simd_ins (cfg
, klass
, OP_SSE2_SQRTSD
, args
[0]->dreg
, args
[1]->dreg
);
1438 g_assert_not_reached ();
1442 case SN_LoadScalarVector128
: {
1444 switch (arg0_type
) {
1446 case MONO_TYPE_U4
: op
= OP_SSE2_MOVD
; break;
1448 case MONO_TYPE_U8
: op
= OP_SSE2_MOVQ
; break;
1449 case MONO_TYPE_R8
: op
= OP_SSE2_MOVUPD
; break;
1450 default: g_assert_not_reached(); break;
1452 return emit_simd_ins_for_sig (cfg
, klass
, op
, 0, 0, fsig
, args
);
1459 if (is_hw_intrinsics_class (klass
, "Sse3", &is_64bit
)) {
1460 if (!COMPILE_LLVM (cfg
))
1462 info
= lookup_intrins_info (sse3_methods
, sizeof (sse3_methods
), cmethod
);
1469 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1471 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSE3
);
1474 case SN_get_IsSupported
:
1475 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1476 ins
->type
= STACK_I4
;
1478 case SN_AddSubtract
:
1479 if (arg0_type
== MONO_TYPE_R4
)
1480 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_ADDSUBPS
, arg0_type
, fsig
, args
);
1481 else if (arg0_type
== MONO_TYPE_R8
)
1482 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_ADDSUBPD
, arg0_type
, fsig
, args
);
1484 g_assert_not_reached ();
1486 case SN_HorizontalAdd
:
1487 if (arg0_type
== MONO_TYPE_R4
)
1488 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_HADDPS
, arg0_type
, fsig
, args
);
1489 else if (arg0_type
== MONO_TYPE_R8
)
1490 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_HADDPD
, arg0_type
, fsig
, args
);
1492 g_assert_not_reached ();
1494 case SN_HorizontalSubtract
:
1495 if (arg0_type
== MONO_TYPE_R4
)
1496 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_HSUBPS
, arg0_type
, fsig
, args
);
1497 else if (arg0_type
== MONO_TYPE_R8
)
1498 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_HSUBPD
, arg0_type
, fsig
, args
);
1500 g_assert_not_reached ();
1503 g_assert_not_reached ();
1508 if (is_hw_intrinsics_class (klass
, "Ssse3", &is_64bit
)) {
1509 if (!COMPILE_LLVM (cfg
))
1511 info
= lookup_intrins_info (ssse3_methods
, sizeof (ssse3_methods
), cmethod
);
1518 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1520 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSSE3
) != 0;
1523 case SN_get_IsSupported
:
1524 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1525 ins
->type
= STACK_I4
;
1528 if (args
[2]->opcode
== OP_ICONST
)
1529 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSSE3_ALIGNR
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1530 return emit_invalid_operation (cfg
, "mask in Ssse3.AlignRight must be constant");
1531 case SN_HorizontalAdd
:
1532 if (arg0_type
== MONO_TYPE_I2
)
1533 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PHADDW
, arg0_type
, fsig
, args
);
1534 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PHADDD
, arg0_type
, fsig
, args
);
1535 case SN_HorizontalSubtract
:
1536 if (arg0_type
== MONO_TYPE_I2
)
1537 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PHSUBW
, arg0_type
, fsig
, args
);
1538 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PHSUBD
, arg0_type
, fsig
, args
);
1540 if (arg0_type
== MONO_TYPE_I1
)
1541 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PSIGNB
, arg0_type
, fsig
, args
);
1542 if (arg0_type
== MONO_TYPE_I2
)
1543 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PSIGNW
, arg0_type
, fsig
, args
);
1544 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PSIGND
, arg0_type
, fsig
, args
);
1546 g_assert_not_reached ();
1551 if (is_hw_intrinsics_class (klass
, "Sse41", &is_64bit
)) {
1552 if (!COMPILE_LLVM (cfg
))
1554 info
= lookup_intrins_info (sse41_methods
, sizeof (sse41_methods
), cmethod
);
1561 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1563 supported
= COMPILE_LLVM (cfg
) && (mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSE41
) != 0;
1566 case SN_get_IsSupported
:
1567 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1568 ins
->type
= STACK_I4
;
1571 if (args
[2]->opcode
== OP_ICONST
&& arg0_type
== MONO_TYPE_R4
)
1572 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE41_DPPS_IMM
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1573 else if (args
[2]->opcode
== OP_ICONST
&& arg0_type
== MONO_TYPE_R8
)
1574 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE41_DPPD_IMM
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1575 // FIXME: handle non-constant control byte (generate a switch)
1576 return emit_invalid_operation (cfg
, "control byte in Sse41.DotProduct must be constant");
1577 case SN_MultipleSumAbsoluteDifferences
:
1578 if (args
[2]->opcode
== OP_ICONST
)
1579 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE41_MPSADBW_IMM
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1580 // FIXME: handle non-constant control byte (generate a switch)
1581 return emit_invalid_operation (cfg
, "control byte in Sse41.MultipleSumAbsoluteDifferences must be constant");
1583 if (args
[2]->opcode
== OP_ICONST
)
1584 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE41_BLEND_IMM
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1585 // FIXME: handle non-constant control byte (generate a switch)
1586 return emit_invalid_operation (cfg
, "control byte in Sse41.Blend must be constant");
1587 case SN_BlendVariable
:
1588 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE41_BLENDV
, -1, arg0_type
, fsig
, args
);
1591 switch (arg0_type
) {
1594 case MONO_TYPE_I4
: op
= OP_XEXTRACT_I32
; break;
1596 case MONO_TYPE_U8
: op
= OP_XEXTRACT_I64
; break;
1597 case MONO_TYPE_R4
: op
= OP_XEXTRACT_R4
; break;
1598 default: g_assert_not_reached(); break;
1600 return emit_simd_ins_for_sig (cfg
, klass
, op
, arg0_type
, 0, fsig
, args
);
1603 if (args
[2]->opcode
== OP_ICONST
)
1604 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE41_INSERT
, -1, arg0_type
, fsig
, args
);
1605 // FIXME: handle non-constant index (generate a switch)
1606 return emit_invalid_operation (cfg
, "index in Sse41.Insert must be constant");
1608 g_assert_not_reached ();
1613 if (is_hw_intrinsics_class (klass
, "Sse42", &is_64bit
)) {
1614 if (!COMPILE_LLVM (cfg
))
1616 info
= lookup_intrins_info (sse42_methods
, sizeof (sse42_methods
), cmethod
);
1623 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1625 supported
= COMPILE_LLVM (cfg
) && (mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSE42
) != 0;
1628 case SN_get_IsSupported
:
1629 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1630 ins
->type
= STACK_I4
;
1633 MonoTypeEnum arg1_type
= get_underlying_type (fsig
->params
[1]);
1634 return emit_simd_ins_for_sig (cfg
, klass
,
1635 arg1_type
== MONO_TYPE_U8
? OP_SSE42_CRC64
: OP_SSE42_CRC32
,
1636 arg1_type
, arg0_type
, fsig
, args
);
1639 g_assert_not_reached ();
1644 if (is_hw_intrinsics_class (klass
, "Pclmulqdq", &is_64bit
)) {
1645 if (!COMPILE_LLVM (cfg
))
1647 info
= lookup_intrins_info (pclmulqdq_methods
, sizeof (pclmulqdq_methods
), cmethod
);
1654 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1656 supported
= COMPILE_LLVM (cfg
) && (mini_get_cpu_features (cfg
) & MONO_CPU_X86_PCLMUL
) != 0;
1659 case SN_CarrylessMultiply
: {
1660 if (args
[2]->opcode
== OP_ICONST
)
1661 return emit_simd_ins_for_sig (cfg
, klass
, OP_PCLMULQDQ_IMM
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1662 // FIXME: handle non-constant control byte (generate a switch)
1663 return emit_invalid_operation (cfg
, "index in Pclmulqdq.CarrylessMultiply must be constant");
1665 case SN_get_IsSupported
:
1666 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1667 ins
->type
= STACK_I4
;
1670 g_assert_not_reached ();
1675 if (is_hw_intrinsics_class (klass
, "Aes", &is_64bit
)) {
1676 if (!COMPILE_LLVM (cfg
))
1678 info
= lookup_intrins_info (aes_methods
, sizeof (aes_methods
), cmethod
);
1685 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1687 supported
= COMPILE_LLVM (cfg
) && (mini_get_cpu_features (cfg
) & MONO_CPU_X86_AES
) != 0;
1690 case SN_get_IsSupported
:
1691 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1692 ins
->type
= STACK_I4
;
1694 case SN_KeygenAssist
: {
1695 if (args
[1]->opcode
== OP_ICONST
)
1696 return emit_simd_ins_for_sig (cfg
, klass
, OP_AES_KEYGEN_IMM
, args
[1]->inst_c0
, arg0_type
, fsig
, args
);
1697 // FIXME: handle non-constant control byte (generate a switch)
1698 return emit_invalid_operation (cfg
, "control byte in Aes.KeygenAssist must be constant");
1701 g_assert_not_reached ();
1706 if (is_hw_intrinsics_class (klass
, "Popcnt", &is_64bit
)) {
1707 info
= lookup_intrins_info (popcnt_methods
, sizeof (popcnt_methods
), cmethod
);
1712 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_POPCNT
) != 0;
1715 case SN_get_IsSupported
:
1716 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1717 ins
->type
= STACK_I4
;
1722 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_POPCNT64
: OP_POPCNT32
);
1723 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1724 ins
->sreg1
= args
[0]->dreg
;
1725 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1726 MONO_ADD_INS (cfg
->cbb
, ins
);
1732 if (is_hw_intrinsics_class (klass
, "Lzcnt", &is_64bit
)) {
1733 info
= lookup_intrins_info (lzcnt_methods
, sizeof (lzcnt_methods
), cmethod
);
1738 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_LZCNT
) != 0;
1741 case SN_get_IsSupported
:
1742 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1743 ins
->type
= STACK_I4
;
1745 case SN_LeadingZeroCount
:
1748 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_LZCNT64
: OP_LZCNT32
);
1749 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1750 ins
->sreg1
= args
[0]->dreg
;
1751 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1752 MONO_ADD_INS (cfg
->cbb
, ins
);
1758 if (is_hw_intrinsics_class (klass
, "Bmi1", &is_64bit
)) {
1759 if (!COMPILE_LLVM (cfg
))
1761 info
= lookup_intrins_info (bmi1_methods
, sizeof (bmi1_methods
), cmethod
);
1766 g_assert (id
!= -1);
1767 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_BMI1
) != 0;
1770 case SN_get_IsSupported
:
1771 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1772 ins
->type
= STACK_I4
;
1776 // LLVM replaces it with `andn`
1777 int tmp_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1778 int result_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1779 EMIT_NEW_BIALU_IMM (cfg
, ins
, is_64bit
? OP_LXOR_IMM
: OP_IXOR_IMM
, tmp_reg
, args
[0]->dreg
, -1);
1780 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LAND
: OP_IAND
, result_reg
, tmp_reg
, args
[1]->dreg
);
1783 case SN_BitFieldExtract
: {
1784 if (fsig
->param_count
== 2) {
1785 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_BEXTR64
: OP_BEXTR32
);
1786 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1787 ins
->sreg1
= args
[0]->dreg
;
1788 ins
->sreg2
= args
[1]->dreg
;
1789 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1790 MONO_ADD_INS (cfg
->cbb
, ins
);
1794 case SN_GetMaskUpToLowestSetBit
: {
1796 // LLVM replaces it with `blsmsk`
1797 int tmp_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1798 int result_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1799 EMIT_NEW_BIALU_IMM (cfg
, ins
, is_64bit
? OP_LSUB_IMM
: OP_ISUB_IMM
, tmp_reg
, args
[0]->dreg
, 1);
1800 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LXOR
: OP_IXOR
, result_reg
, args
[0]->dreg
, tmp_reg
);
1803 case SN_ResetLowestSetBit
: {
1805 // LLVM replaces it with `blsr`
1806 int tmp_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1807 int result_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1808 EMIT_NEW_BIALU_IMM (cfg
, ins
, is_64bit
? OP_LSUB_IMM
: OP_ISUB_IMM
, tmp_reg
, args
[0]->dreg
, 1);
1809 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LAND
: OP_IAND
, result_reg
, args
[0]->dreg
, tmp_reg
);
1812 case SN_ExtractLowestSetBit
: {
1814 // LLVM replaces it with `blsi`
1815 int tmp_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1816 int result_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1817 int zero_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1818 MONO_EMIT_NEW_ICONST (cfg
, zero_reg
, 0);
1819 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LSUB
: OP_ISUB
, tmp_reg
, zero_reg
, args
[0]->dreg
);
1820 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LAND
: OP_IAND
, result_reg
, args
[0]->dreg
, tmp_reg
);
1823 case SN_TrailingZeroCount
:
1824 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_CTTZ64
: OP_CTTZ32
);
1825 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1826 ins
->sreg1
= args
[0]->dreg
;
1827 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1828 MONO_ADD_INS (cfg
->cbb
, ins
);
1831 g_assert_not_reached ();
1834 if (is_hw_intrinsics_class (klass
, "Bmi2", &is_64bit
)) {
1835 if (!COMPILE_LLVM (cfg
))
1837 info
= lookup_intrins_info (bmi2_methods
, sizeof (bmi2_methods
), cmethod
);
1842 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_BMI2
) != 0;
1845 case SN_get_IsSupported
:
1846 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1847 ins
->type
= STACK_I4
;
1849 case SN_MultiplyNoFlags
:
1850 if (fsig
->param_count
== 2) {
1851 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_MULX_H64
: OP_MULX_H32
);
1852 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1853 ins
->sreg1
= args
[0]->dreg
;
1854 ins
->sreg2
= args
[1]->dreg
;
1855 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1856 MONO_ADD_INS (cfg
->cbb
, ins
);
1857 } else if (fsig
->param_count
== 3) {
1858 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_MULX_HL64
: OP_MULX_HL32
);
1859 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1860 ins
->sreg1
= args
[0]->dreg
;
1861 ins
->sreg2
= args
[1]->dreg
;
1862 ins
->sreg3
= args
[2]->dreg
;
1863 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1864 MONO_ADD_INS (cfg
->cbb
, ins
);
1866 g_assert_not_reached ();
1869 case SN_ZeroHighBits
:
1870 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_BZHI64
: OP_BZHI32
);
1871 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1872 ins
->sreg1
= args
[0]->dreg
;
1873 ins
->sreg2
= args
[1]->dreg
;
1874 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1875 MONO_ADD_INS (cfg
->cbb
, ins
);
1877 case SN_ParallelBitExtract
:
1878 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_PEXT64
: OP_PEXT32
);
1879 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1880 ins
->sreg1
= args
[0]->dreg
;
1881 ins
->sreg2
= args
[1]->dreg
;
1882 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1883 MONO_ADD_INS (cfg
->cbb
, ins
);
1885 case SN_ParallelBitDeposit
:
1886 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_PDEP64
: OP_PDEP32
);
1887 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1888 ins
->sreg1
= args
[0]->dreg
;
1889 ins
->sreg2
= args
[1]->dreg
;
1890 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1891 MONO_ADD_INS (cfg
->cbb
, ins
);
1894 g_assert_not_reached ();
1898 if (is_hw_intrinsics_class (klass
, "X86Base", &is_64bit
)) {
1899 if (!COMPILE_LLVM (cfg
))
1902 info
= lookup_intrins_info (x86base_methods
, sizeof (x86base_methods
), cmethod
);
1908 case SN_get_IsSupported
:
1909 EMIT_NEW_ICONST (cfg
, ins
, 1);
1910 ins
->type
= STACK_I4
;
1912 case SN_BitScanForward
:
1913 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_X86_BSF64
: OP_X86_BSF32
);
1914 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1915 ins
->sreg1
= args
[0]->dreg
;
1916 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1917 MONO_ADD_INS (cfg
->cbb
, ins
);
1919 case SN_BitScanReverse
:
1920 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_X86_BSR64
: OP_X86_BSR32
);
1921 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1922 ins
->sreg1
= args
[0]->dreg
;
1923 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1924 MONO_ADD_INS (cfg
->cbb
, ins
);
1927 g_assert_not_reached ();
1934 static guint16 vector_128_methods
[] = {
1946 SN_CreateScalarUnsafe
,
1949 static guint16 vector_128_t_methods
[] = {
1955 emit_vector128 (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
1960 if (!COMPILE_LLVM (cfg
))
1963 klass
= cmethod
->klass
;
1964 id
= lookup_intrins (vector_128_methods
, sizeof (vector_128_methods
), cmethod
);
1968 if (!strcmp (m_class_get_name (cfg
->method
->klass
), "Vector256"))
1969 return NULL
; // TODO: Fix Vector256.WithUpper/WithLower
1971 MonoTypeEnum arg0_type
= fsig
->param_count
> 0 ? get_underlying_type (fsig
->params
[0]) : MONO_TYPE_VOID
;
1984 return emit_simd_ins (cfg
, klass
, OP_XCAST
, args
[0]->dreg
, -1);
1986 MonoType
*etype
= get_vector_t_elem_type (fsig
->ret
);
1987 if (fsig
->param_count
== 1 && mono_metadata_type_equal (fsig
->params
[0], etype
)) {
1988 return emit_simd_ins (cfg
, klass
, type_to_expand_op (etype
), args
[0]->dreg
, -1);
1990 MonoInst
*ins
, *load
;
1992 // FIXME: Optimize this
1993 MONO_INST_NEW (cfg
, ins
, OP_LOCALLOC_IMM
);
1994 ins
->dreg
= alloc_preg (cfg
);
1996 MONO_ADD_INS (cfg
->cbb
, ins
);
1998 int esize
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
1999 int store_opcode
= mono_type_to_store_membase (cfg
, etype
);
2000 for (int i
= 0; i
< fsig
->param_count
; ++i
)
2001 MONO_EMIT_NEW_STORE_MEMBASE (cfg
, store_opcode
, ins
->dreg
, i
* esize
, args
[i
]->dreg
);
2003 load
= emit_simd_ins (cfg
, klass
, OP_SSE_LOADU
, ins
->dreg
, -1);
2005 load
->inst_c1
= get_underlying_type (etype
);
2009 case SN_CreateScalarUnsafe
:
2010 return emit_simd_ins_for_sig (cfg
, klass
, OP_CREATE_SCALAR_UNSAFE
, -1, arg0_type
, fsig
, args
);
2019 emit_vector128_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
2022 MonoType
*type
, *etype
;
2026 id
= lookup_intrins (vector_128_t_methods
, sizeof (vector_128_t_methods
), cmethod
);
2030 klass
= cmethod
->klass
;
2031 type
= m_class_get_byval_arg (klass
);
2032 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
2033 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
2037 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
2040 if (cfg
->verbose_level
> 1) {
2041 char *name
= mono_method_full_name (cmethod
, TRUE
);
2042 printf (" SIMD intrinsic %s\n", name
);
2048 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
2050 EMIT_NEW_ICONST (cfg
, ins
, len
);
2053 return emit_simd_ins (cfg
, klass
, OP_XZERO
, -1, -1);
2062 static guint16 vector_256_t_methods
[] = {
2067 emit_vector256_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
2070 MonoType
*type
, *etype
;
2074 id
= lookup_intrins (vector_256_t_methods
, sizeof (vector_256_t_methods
), cmethod
);
2078 klass
= cmethod
->klass
;
2079 type
= m_class_get_byval_arg (klass
);
2080 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
2081 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
2085 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
2088 if (cfg
->verbose_level
> 1) {
2089 char *name
= mono_method_full_name (cmethod
, TRUE
);
2090 printf (" SIMD intrinsic %s\n", name
);
2096 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
2098 EMIT_NEW_ICONST (cfg
, ins
, len
);
2107 #endif // !TARGET_ARM64
2110 mono_emit_simd_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
2112 const char *class_name
;
2113 const char *class_ns
;
2114 MonoImage
*image
= m_class_get_image (cmethod
->klass
);
2116 if (image
!= mono_get_corlib ())
2119 class_ns
= m_class_get_name_space (cmethod
->klass
);
2120 class_name
= m_class_get_name (cmethod
->klass
);
2122 // If cmethod->klass is nested, the namespace is on the enclosing class.
2123 if (m_class_get_nested_in (cmethod
->klass
))
2124 class_ns
= m_class_get_name_space (m_class_get_nested_in (cmethod
->klass
));
2127 if (!strcmp (class_ns
, "System.Runtime.Intrinsics.Arm")) {
2128 MonoInst
*ins
= emit_arm64_intrinsics (cfg
, cmethod
, fsig
, args
);
2131 #endif // TARGET_ARM64
2133 #ifdef TARGET_AMD64 // TODO: test and enable for x86 too
2134 if (!strcmp (class_ns
, "System.Runtime.Intrinsics.X86")) {
2135 MonoInst
*ins
= emit_x86_intrinsics (cfg
, cmethod
, fsig
, args
);
2139 // FIXME: implement Vector64<T>, Vector128<T> and Vector<T> for Arm64
2141 if (!strcmp (class_ns
, "System.Runtime.Intrinsics")) {
2142 if (!strcmp (class_name
, "Vector128`1"))
2143 return emit_vector128_t (cfg
, cmethod
, fsig
, args
);
2144 if (!strcmp (class_name
, "Vector128"))
2145 return emit_vector128 (cfg
, cmethod
, fsig
, args
);
2146 if (!strcmp (class_name
, "Vector256`1"))
2147 return emit_vector256_t (cfg
, cmethod
, fsig
, args
);
2150 if (!strcmp (class_ns
, "System.Numerics")) {
2151 if (!strcmp (class_name
, "Vector"))
2152 return emit_sys_numerics_vector (cfg
, cmethod
, fsig
, args
);
2153 if (!strcmp (class_name
, "Vector`1"))
2154 return emit_sys_numerics_vector_t (cfg
, cmethod
, fsig
, args
);
2156 #endif // TARGET_AMD64
2162 mono_simd_decompose_intrinsic (MonoCompile
*cfg
, MonoBasicBlock
*bb
, MonoInst
*ins
)
2167 mono_simd_simplify_indirection (MonoCompile
*cfg
)
2173 MONO_EMPTY_SOURCE_FILE (simd_intrinsics_netcore
);
2177 #endif /* DISABLE_JIT */
2180 #if defined(ENABLE_NETCORE) && defined(TARGET_AMD64)
2182 ves_icall_System_Runtime_Intrinsics_X86_X86Base___cpuidex (int abcd
[4], int function_id
, int subfunction_id
)
2184 #ifndef MONO_CROSS_COMPILE
2185 mono_hwcap_x86_call_cpuidex (function_id
, subfunction_id
,
2186 &abcd
[0], &abcd
[1], &abcd
[2], &abcd
[3]);