2 * SIMD Intrinsics support for netcore.
3 * Only LLVM is supported as a backend.
7 #include <mono/utils/mono-compiler.h>
8 #include <mono/metadata/icall-decl.h>
10 #include "mini-runtime.h"
13 #include "mini-llvm.h"
15 #include "mono/utils/bsearch.h"
16 #include <mono/metadata/abi-details.h>
17 #include <mono/metadata/reflection-internals.h>
18 #include <mono/utils/mono-hwcap.h>
20 #if defined (MONO_ARCH_SIMD_INTRINSICS) && defined(ENABLE_NETCORE)
22 #if defined(DISABLE_JIT)
25 mono_simd_intrinsics_init (void)
31 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
32 #define MSGSTRFIELD1(line) str##line
33 static const struct msgstr_t
{
34 #define METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)];
35 #define METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
36 #include "simd-methods-netcore.h"
40 #define METHOD(name) #name,
41 #define METHOD2(str,name) str,
42 #include "simd-methods-netcore.h"
48 #define METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
49 #define METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
50 #include "simd-methods-netcore.h"
52 #define method_name(idx) ((const char*)&method_names + (idx))
54 static int register_size
;
57 // One of the SN_ constants
66 mono_simd_intrinsics_init (void)
70 if ((mini_get_cpu_features () & MONO_CPU_X86_AVX
) != 0)
73 /* Tell the class init code the size of the System.Numerics.Register type */
74 mono_simd_register_size
= register_size
;
78 mono_emit_simd_field_load (MonoCompile
*cfg
, MonoClassField
*field
, MonoInst
*addr
)
84 simd_intrinsic_compare_by_name (const void *key
, const void *value
)
86 return strcmp ((const char*)key
, method_name (*(guint16
*)value
));
90 simd_intrinsic_info_compare_by_name (const void *key
, const void *value
)
92 SimdIntrinsic
*info
= (SimdIntrinsic
*)value
;
93 return strcmp ((const char*)key
, method_name (info
->id
));
97 lookup_intrins (guint16
*intrinsics
, int size
, MonoMethod
*cmethod
)
99 const guint16
*result
= (const guint16
*)mono_binary_search (cmethod
->name
, intrinsics
, size
/ sizeof (guint16
), sizeof (guint16
), &simd_intrinsic_compare_by_name
);
107 static SimdIntrinsic
*
108 lookup_intrins_info (SimdIntrinsic
*intrinsics
, int size
, MonoMethod
*cmethod
)
111 for (int i
= 0; i
< (size
/ sizeof (SimdIntrinsic
)) - 1; ++i
) {
112 const char *n1
= method_name (intrinsics
[i
].id
);
113 const char *n2
= method_name (intrinsics
[i
+ 1].id
);
114 int len1
= strlen (n1
);
115 int len2
= strlen (n2
);
116 for (int j
= 0; j
< len1
&& j
< len2
; ++j
) {
117 if (n1
[j
] > n2
[j
]) {
118 printf ("%s %s\n", n1
, n2
);
119 g_assert_not_reached ();
120 } else if (n1
[j
] < n2
[j
]) {
127 return (SimdIntrinsic
*)mono_binary_search (cmethod
->name
, intrinsics
, size
/ sizeof (SimdIntrinsic
), sizeof (SimdIntrinsic
), &simd_intrinsic_info_compare_by_name
);
131 * Return a simd vreg for the simd value represented by SRC.
132 * SRC is the 'this' argument to methods.
133 * Set INDIRECT to TRUE if the value was loaded from memory.
136 load_simd_vreg_class (MonoCompile
*cfg
, MonoClass
*klass
, MonoInst
*src
, gboolean
*indirect
)
138 const char *spec
= INS_INFO (src
->opcode
);
142 if (src
->opcode
== OP_XMOVE
) {
144 } else if (src
->opcode
== OP_LDADDR
) {
145 int res
= ((MonoInst
*)src
->inst_p0
)->dreg
;
147 } else if (spec
[MONO_INST_DEST
] == 'x') {
149 } else if (src
->type
== STACK_PTR
|| src
->type
== STACK_MP
) {
154 MONO_INST_NEW (cfg
, ins
, OP_LOADX_MEMBASE
);
156 ins
->sreg1
= src
->dreg
;
157 ins
->type
= STACK_VTYPE
;
158 ins
->dreg
= alloc_ireg (cfg
);
159 MONO_ADD_INS (cfg
->cbb
, ins
);
162 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src
->type
);
163 mono_print_ins (src
);
164 g_assert_not_reached ();
168 load_simd_vreg (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
*src
, gboolean
*indirect
)
170 return load_simd_vreg_class (cfg
, cmethod
->klass
, src
, indirect
);
173 /* Create and emit a SIMD instruction, dreg is auto-allocated */
175 emit_simd_ins (MonoCompile
*cfg
, MonoClass
*klass
, int opcode
, int sreg1
, int sreg2
)
177 const char *spec
= INS_INFO (opcode
);
180 MONO_INST_NEW (cfg
, ins
, opcode
);
181 if (spec
[MONO_INST_DEST
] == 'x') {
182 ins
->dreg
= alloc_xreg (cfg
);
183 ins
->type
= STACK_VTYPE
;
184 } else if (spec
[MONO_INST_DEST
] == 'i') {
185 ins
->dreg
= alloc_ireg (cfg
);
186 ins
->type
= STACK_I4
;
187 } else if (spec
[MONO_INST_DEST
] == 'l') {
188 ins
->dreg
= alloc_lreg (cfg
);
189 ins
->type
= STACK_I8
;
190 } else if (spec
[MONO_INST_DEST
] == 'f') {
191 ins
->dreg
= alloc_freg (cfg
);
192 ins
->type
= STACK_R8
;
197 MONO_ADD_INS (cfg
->cbb
, ins
);
202 emit_simd_ins_for_sig (MonoCompile
*cfg
, MonoClass
*klass
, int opcode
, int instc0
, int instc1
, MonoMethodSignature
*fsig
, MonoInst
**args
)
204 g_assert (fsig
->param_count
<= 3);
205 MonoInst
* ins
= emit_simd_ins (cfg
, klass
, opcode
,
206 fsig
->param_count
> 0 ? args
[0]->dreg
: -1,
207 fsig
->param_count
> 1 ? args
[1]->dreg
: -1);
209 ins
->inst_c0
= instc0
;
211 ins
->inst_c1
= instc1
;
212 if (fsig
->param_count
== 3)
213 ins
->sreg3
= args
[2]->dreg
;
218 is_hw_intrinsics_class (MonoClass
*klass
, const char *name
, gboolean
*is_64bit
)
220 const char *class_name
= m_class_get_name (klass
);
221 if ((!strcmp (class_name
, "X64") || !strcmp (class_name
, "Arm64")) && m_class_get_nested_in (klass
)) {
223 return !strcmp (m_class_get_name (m_class_get_nested_in (klass
)), name
);
226 return !strcmp (class_name
, name
);
231 get_underlying_type (MonoType
* type
)
233 MonoClass
* klass
= mono_class_from_mono_type_internal (type
);
234 if (type
->type
== MONO_TYPE_PTR
) // e.g. int* => MONO_TYPE_I4
235 return m_class_get_byval_arg (m_class_get_element_class (klass
))->type
;
236 else if (type
->type
== MONO_TYPE_GENERICINST
) // e.g. Vector128<int> => MONO_TYPE_I4
237 return mono_class_get_context (klass
)->class_inst
->type_argv
[0]->type
;
243 emit_xcompare (MonoCompile
*cfg
, MonoClass
*klass
, MonoTypeEnum etype
, MonoInst
*arg1
, MonoInst
*arg2
)
246 gboolean is_fp
= etype
== MONO_TYPE_R4
|| etype
== MONO_TYPE_R8
;
248 ins
= emit_simd_ins (cfg
, klass
, is_fp
? OP_XCOMPARE_FP
: OP_XCOMPARE
, arg1
->dreg
, arg2
->dreg
);
249 ins
->inst_c0
= CMP_EQ
;
250 ins
->inst_c1
= etype
;
255 get_vector_t_elem_type (MonoType
*vector_type
)
260 g_assert (vector_type
->type
== MONO_TYPE_GENERICINST
);
261 klass
= mono_class_from_mono_type_internal (vector_type
);
263 !strcmp (m_class_get_name (klass
), "Vector`1") ||
264 !strcmp (m_class_get_name (klass
), "Vector128`1") ||
265 !strcmp (m_class_get_name (klass
), "Vector256`1"));
266 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
273 type_to_expand_op (MonoType
*type
)
275 switch (type
->type
) {
293 g_assert_not_reached ();
297 static guint16 vector_methods
[] = {
306 SN_get_IsHardwareAccelerated
,
310 emit_sys_numerics_vector (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
313 gboolean supported
= FALSE
;
317 id
= lookup_intrins (vector_methods
, sizeof (vector_methods
), cmethod
);
321 //printf ("%s\n", mono_method_full_name (cmethod, 1));
323 #ifdef MONO_ARCH_SIMD_INTRINSICS
327 if (cfg
->verbose_level
> 1) {
328 char *name
= mono_method_full_name (cmethod
, TRUE
);
329 printf (" SIMD intrinsic %s\n", name
);
334 case SN_get_IsHardwareAccelerated
:
335 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
336 ins
->type
= STACK_I4
;
338 case SN_ConvertToInt32
:
339 etype
= get_vector_t_elem_type (fsig
->params
[0]);
340 g_assert (etype
->type
== MONO_TYPE_R4
);
341 return emit_simd_ins (cfg
, mono_class_from_mono_type_internal (fsig
->ret
), OP_CVTPS2DQ
, args
[0]->dreg
, -1);
342 case SN_ConvertToSingle
:
343 etype
= get_vector_t_elem_type (fsig
->params
[0]);
344 g_assert (etype
->type
== MONO_TYPE_I4
|| etype
->type
== MONO_TYPE_U4
);
346 if (etype
->type
== MONO_TYPE_U4
)
348 return emit_simd_ins (cfg
, mono_class_from_mono_type_internal (fsig
->ret
), OP_CVTDQ2PS
, args
[0]->dreg
, -1);
349 case SN_ConvertToDouble
:
350 case SN_ConvertToInt64
:
351 case SN_ConvertToUInt32
:
352 case SN_ConvertToUInt64
:
364 static guint16 vector_t_methods
[] = {
369 SN_GreaterThanOrEqual
,
392 emit_sys_numerics_vector_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
395 MonoType
*type
, *etype
;
398 gboolean is_unsigned
;
400 static const float r4_one
= 1.0f
;
401 static const double r8_one
= 1.0;
403 id
= lookup_intrins (vector_t_methods
, sizeof (vector_t_methods
), cmethod
);
407 klass
= cmethod
->klass
;
408 type
= m_class_get_byval_arg (klass
);
409 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
410 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
412 len
= register_size
/ size
;
414 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
417 if (cfg
->verbose_level
> 1) {
418 char *name
= mono_method_full_name (cmethod
, TRUE
);
419 printf (" SIMD intrinsic %s\n", name
);
425 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
427 EMIT_NEW_ICONST (cfg
, ins
, len
);
430 g_assert (fsig
->param_count
== 0 && mono_metadata_type_equal (fsig
->ret
, type
));
431 return emit_simd_ins (cfg
, klass
, OP_XZERO
, -1, -1);
433 g_assert (fsig
->param_count
== 0 && mono_metadata_type_equal (fsig
->ret
, type
));
434 MonoInst
*one
= NULL
;
435 int expand_opcode
= type_to_expand_op (etype
);
436 MONO_INST_NEW (cfg
, one
, -1);
437 switch (expand_opcode
) {
439 one
->opcode
= OP_R4CONST
;
440 one
->type
= STACK_R4
;
441 one
->inst_p0
= (void *) &r4_one
;
444 one
->opcode
= OP_R8CONST
;
445 one
->type
= STACK_R8
;
446 one
->inst_p0
= (void *) &r8_one
;
449 one
->opcode
= OP_ICONST
;
450 one
->type
= STACK_I4
;
454 one
->dreg
= alloc_dreg (cfg
, (MonoStackType
)one
->type
);
455 MONO_ADD_INS (cfg
->cbb
, one
);
456 return emit_simd_ins (cfg
, klass
, expand_opcode
, one
->dreg
, -1);
458 case SN_get_AllBitsSet
: {
459 /* Compare a zero vector with itself */
460 ins
= emit_simd_ins (cfg
, klass
, OP_XZERO
, -1, -1);
461 return emit_xcompare (cfg
, klass
, etype
->type
, ins
, ins
);
464 if (!COMPILE_LLVM (cfg
))
466 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, args
[1]->dreg
, len
);
467 MONO_EMIT_NEW_COND_EXC (cfg
, GE_UN
, "IndexOutOfRangeException");
470 gboolean is64
= FALSE
;
471 switch (etype
->type
) {
474 opcode
= OP_XEXTRACT_I64
;
476 dreg
= alloc_lreg (cfg
);
479 opcode
= OP_XEXTRACT_R8
;
480 dreg
= alloc_freg (cfg
);
483 g_assert (cfg
->r4fp
);
484 opcode
= OP_XEXTRACT_R4
;
485 dreg
= alloc_freg (cfg
);
488 opcode
= OP_XEXTRACT_I32
;
489 dreg
= alloc_ireg (cfg
);
492 MONO_INST_NEW (cfg
, ins
, opcode
);
494 ins
->sreg1
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
495 ins
->sreg2
= args
[1]->dreg
;
496 ins
->inst_c0
= etype
->type
;
497 mini_type_to_eval_stack_type (cfg
, etype
, ins
);
498 MONO_ADD_INS (cfg
->cbb
, ins
);
502 if (fsig
->param_count
== 1 && mono_metadata_type_equal (fsig
->params
[0], etype
)) {
503 int dreg
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
505 int opcode
= type_to_expand_op (etype
);
506 ins
= emit_simd_ins (cfg
, klass
, opcode
, args
[1]->dreg
, -1);
510 if ((fsig
->param_count
== 1 || fsig
->param_count
== 2) && (fsig
->params
[0]->type
== MONO_TYPE_SZARRAY
)) {
511 MonoInst
*array_ins
= args
[1];
513 MonoInst
*ldelema_ins
;
517 if (args
[0]->opcode
!= OP_LDADDR
)
520 /* .ctor (T[]) or .ctor (T[], index) */
522 if (fsig
->param_count
== 2) {
523 index_ins
= args
[2];
525 EMIT_NEW_ICONST (cfg
, index_ins
, 0);
528 /* Emit index check for the end (index + len - 1 < array length) */
529 end_index_reg
= alloc_ireg (cfg
);
530 EMIT_NEW_BIALU_IMM (cfg
, ins
, OP_IADD_IMM
, end_index_reg
, index_ins
->dreg
, len
- 1);
531 MONO_EMIT_BOUNDS_CHECK (cfg
, array_ins
->dreg
, MonoArray
, max_length
, end_index_reg
);
533 /* Load the array slice into the simd reg */
534 ldelema_ins
= mini_emit_ldelema_1_ins (cfg
, mono_class_from_mono_type_internal (etype
), array_ins
, index_ins
, TRUE
, FALSE
);
535 g_assert (args
[0]->opcode
== OP_LDADDR
);
536 var
= (MonoInst
*)args
[0]->inst_p0
;
537 EMIT_NEW_LOAD_MEMBASE (cfg
, ins
, OP_LOADX_MEMBASE
, var
->dreg
, ldelema_ins
->dreg
, 0);
538 ins
->klass
= cmethod
->klass
;
543 if ((fsig
->param_count
== 1 || fsig
->param_count
== 2) && (fsig
->params
[0]->type
== MONO_TYPE_SZARRAY
)) {
544 MonoInst
*array_ins
= args
[1];
546 MonoInst
*ldelema_ins
;
547 int val_vreg
, end_index_reg
;
549 val_vreg
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
551 /* CopyTo (T[]) or CopyTo (T[], index) */
553 if (fsig
->param_count
== 2) {
554 index_ins
= args
[2];
556 EMIT_NEW_ICONST (cfg
, index_ins
, 0);
559 /* CopyTo () does complicated argument checks */
560 mini_emit_bounds_check_offset (cfg
, array_ins
->dreg
, MONO_STRUCT_OFFSET (MonoArray
, max_length
), index_ins
->dreg
, "ArgumentOutOfRangeException");
561 end_index_reg
= alloc_ireg (cfg
);
562 int len_reg
= alloc_ireg (cfg
);
563 MONO_EMIT_NEW_LOAD_MEMBASE_OP_FLAGS (cfg
, OP_LOADI4_MEMBASE
, len_reg
, array_ins
->dreg
, MONO_STRUCT_OFFSET (MonoArray
, max_length
), MONO_INST_INVARIANT_LOAD
);
564 EMIT_NEW_BIALU (cfg
, ins
, OP_ISUB
, end_index_reg
, len_reg
, index_ins
->dreg
);
565 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, end_index_reg
, len
);
566 MONO_EMIT_NEW_COND_EXC (cfg
, LT
, "ArgumentException");
568 /* Load the array slice into the simd reg */
569 ldelema_ins
= mini_emit_ldelema_1_ins (cfg
, mono_class_from_mono_type_internal (etype
), array_ins
, index_ins
, FALSE
, FALSE
);
570 EMIT_NEW_STORE_MEMBASE (cfg
, ins
, OP_STOREX_MEMBASE
, ldelema_ins
->dreg
, 0, val_vreg
);
571 ins
->klass
= cmethod
->klass
;
576 if (fsig
->param_count
== 1 && fsig
->ret
->type
== MONO_TYPE_BOOLEAN
&& mono_metadata_type_equal (fsig
->params
[0], type
)) {
577 int sreg1
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
579 return emit_simd_ins (cfg
, klass
, OP_XEQUAL
, sreg1
, args
[1]->dreg
);
580 } else if (fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
)) {
581 /* Per element equality */
582 return emit_xcompare (cfg
, klass
, etype
->type
, args
[0], args
[1]);
586 case SN_op_Inequality
:
587 g_assert (fsig
->param_count
== 2 && fsig
->ret
->type
== MONO_TYPE_BOOLEAN
&&
588 mono_metadata_type_equal (fsig
->params
[0], type
) &&
589 mono_metadata_type_equal (fsig
->params
[1], type
));
590 ins
= emit_simd_ins (cfg
, klass
, OP_XEQUAL
, args
[0]->dreg
, args
[1]->dreg
);
591 if (id
== SN_op_Inequality
) {
592 int sreg
= ins
->dreg
;
593 int dreg
= alloc_ireg (cfg
);
594 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, sreg
, 0);
595 EMIT_NEW_UNALU (cfg
, ins
, OP_CEQ
, dreg
, -1);
599 case SN_GreaterThanOrEqual
:
601 case SN_LessThanOrEqual
:
602 g_assert (fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
));
603 is_unsigned
= etype
->type
== MONO_TYPE_U1
|| etype
->type
== MONO_TYPE_U2
|| etype
->type
== MONO_TYPE_U4
|| etype
->type
== MONO_TYPE_U8
;
604 ins
= emit_xcompare (cfg
, klass
, etype
->type
, args
[0], args
[1]);
607 ins
->inst_c0
= is_unsigned
? CMP_GT_UN
: CMP_GT
;
609 case SN_GreaterThanOrEqual
:
610 ins
->inst_c0
= is_unsigned
? CMP_GE_UN
: CMP_GE
;
613 ins
->inst_c0
= is_unsigned
? CMP_LT_UN
: CMP_LT
;
615 case SN_LessThanOrEqual
:
616 ins
->inst_c0
= is_unsigned
? CMP_LE_UN
: CMP_LE
;
619 g_assert_not_reached ();
623 return emit_simd_ins (cfg
, klass
, OP_XCAST
, args
[0]->dreg
, -1);
625 case SN_op_Subtraction
:
628 case SN_op_BitwiseAnd
:
629 case SN_op_BitwiseOr
:
630 case SN_op_ExclusiveOr
:
633 if (!(fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
)))
635 ins
= emit_simd_ins (cfg
, klass
, OP_XBINOP
, args
[0]->dreg
, args
[1]->dreg
);
636 ins
->inst_c1
= etype
->type
;
638 if (etype
->type
== MONO_TYPE_R4
|| etype
->type
== MONO_TYPE_R8
) {
641 ins
->inst_c0
= OP_FADD
;
643 case SN_op_Subtraction
:
644 ins
->inst_c0
= OP_FSUB
;
647 ins
->inst_c0
= OP_FMUL
;
650 ins
->inst_c0
= OP_FDIV
;
653 ins
->inst_c0
= OP_FMAX
;
656 ins
->inst_c0
= OP_FMIN
;
665 ins
->inst_c0
= OP_IADD
;
667 case SN_op_Subtraction
:
668 ins
->inst_c0
= OP_ISUB
;
672 ins->inst_c0 = OP_IDIV;
675 ins->inst_c0 = OP_IMUL;
678 case SN_op_BitwiseAnd
:
679 ins
->inst_c0
= OP_IAND
;
681 case SN_op_BitwiseOr
:
682 ins
->inst_c0
= OP_IOR
;
684 case SN_op_ExclusiveOr
:
685 ins
->inst_c0
= OP_IXOR
;
688 ins
->inst_c0
= OP_IMAX
;
691 ins
->inst_c0
= OP_IMIN
;
705 #endif // !TARGET_ARM64
708 emit_invalid_operation (MonoCompile
*cfg
, const char* message
)
710 mono_cfg_set_exception (cfg
, MONO_EXCEPTION_MONO_ERROR
);
711 mono_error_set_generic_error (cfg
->error
, "System", "InvalidOperationException", "%s", message
);
717 static SimdIntrinsic armbase_methods
[] = {
718 {SN_LeadingSignCount
},
719 {SN_LeadingZeroCount
},
720 {SN_ReverseElementBits
},
724 static SimdIntrinsic crc32_methods
[] = {
731 emit_arm64_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
733 // Arm64 intrinsics are LLVM-only
734 if (!COMPILE_LLVM (cfg
))
738 gboolean supported
, is_64bit
;
739 MonoClass
*klass
= cmethod
->klass
;
740 MonoTypeEnum arg0_type
= fsig
->param_count
> 0 ? get_underlying_type (fsig
->params
[0]) : MONO_TYPE_VOID
;
741 gboolean arg0_i32
= (arg0_type
== MONO_TYPE_I4
) || (arg0_type
== MONO_TYPE_U4
);
744 if (is_hw_intrinsics_class (klass
, "ArmBase", &is_64bit
)) {
745 info
= lookup_intrins_info (armbase_methods
, sizeof (armbase_methods
), cmethod
);
749 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_ARM64_BASE
) != 0;
752 case SN_get_IsSupported
:
753 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
754 ins
->type
= STACK_I4
;
756 case SN_LeadingZeroCount
:
757 return emit_simd_ins_for_sig (cfg
, klass
, arg0_i32
? OP_LZCNT32
: OP_LZCNT64
, 0, arg0_type
, fsig
, args
);
758 case SN_LeadingSignCount
:
759 return emit_simd_ins_for_sig (cfg
, klass
, arg0_i32
? OP_LSCNT32
: OP_LSCNT64
, 0, arg0_type
, fsig
, args
);
760 case SN_ReverseElementBits
:
761 return emit_simd_ins_for_sig (cfg
, klass
,
762 (is_64bit
? OP_XOP_I8_I8
: OP_XOP_I4_I4
),
763 (is_64bit
? SIMD_OP_ARM64_RBIT64
: SIMD_OP_ARM64_RBIT32
),
764 arg0_type
, fsig
, args
);
766 g_assert_not_reached (); // if a new API is added we need to either implement it or change IsSupported to false
770 if (is_hw_intrinsics_class (klass
, "Crc32", &is_64bit
)) {
771 info
= lookup_intrins_info (crc32_methods
, sizeof (crc32_methods
), cmethod
);
775 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_ARM64_CRC
) != 0;
778 case SN_get_IsSupported
:
779 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
780 ins
->type
= STACK_I4
;
782 case SN_ComputeCrc32
:
783 case SN_ComputeCrc32C
: {
784 SimdOp op
= (SimdOp
)0;
785 gboolean is_c
= info
->id
== SN_ComputeCrc32C
;
786 switch (get_underlying_type (fsig
->params
[1])) {
787 case MONO_TYPE_U1
: op
= is_c
? SIMD_OP_ARM64_CRC32CB
: SIMD_OP_ARM64_CRC32B
; break;
788 case MONO_TYPE_U2
: op
= is_c
? SIMD_OP_ARM64_CRC32CH
: SIMD_OP_ARM64_CRC32H
; break;
789 case MONO_TYPE_U4
: op
= is_c
? SIMD_OP_ARM64_CRC32CW
: SIMD_OP_ARM64_CRC32W
; break;
790 case MONO_TYPE_U8
: op
= is_c
? SIMD_OP_ARM64_CRC32CX
: SIMD_OP_ARM64_CRC32X
; break;
791 default: g_assert_not_reached (); break;
793 return emit_simd_ins_for_sig (cfg
, klass
, is_64bit
? OP_XOP_I4_I4_I8
: OP_XOP_I4_I4_I4
, op
, arg0_type
, fsig
, args
);
796 g_assert_not_reached (); // if a new API is added we need to either implement it or change IsSupported to false
801 #endif // TARGET_ARM64
805 static SimdIntrinsic sse_methods
[] = {
806 {SN_Add
, OP_XBINOP
, OP_FADD
},
807 {SN_AddScalar
, OP_SSE_ADDSS
},
808 {SN_And
, OP_SSE_AND
},
809 {SN_AndNot
, OP_SSE_ANDN
},
810 {SN_CompareEqual
, OP_XCOMPARE_FP
, CMP_EQ
},
811 {SN_CompareGreaterThan
, OP_XCOMPARE_FP
,CMP_GT
},
812 {SN_CompareGreaterThanOrEqual
, OP_XCOMPARE_FP
, CMP_GE
},
813 {SN_CompareLessThan
, OP_XCOMPARE_FP
, CMP_LT
},
814 {SN_CompareLessThanOrEqual
, OP_XCOMPARE_FP
, CMP_LE
},
815 {SN_CompareNotEqual
, OP_XCOMPARE_FP
, CMP_NE
},
816 {SN_CompareNotGreaterThan
, OP_XCOMPARE_FP
, CMP_LE
},
817 {SN_CompareNotGreaterThanOrEqual
, OP_XCOMPARE_FP
, CMP_LT
},
818 {SN_CompareNotLessThan
, OP_XCOMPARE_FP
, CMP_GE
},
819 {SN_CompareNotLessThanOrEqual
, OP_XCOMPARE_FP
, CMP_GT
},
820 {SN_CompareOrdered
, OP_XCOMPARE_FP
, CMP_ORD
},
821 {SN_CompareScalarEqual
, OP_SSE_CMPSS
, CMP_EQ
},
822 {SN_CompareScalarGreaterThan
, OP_SSE_CMPSS
, CMP_GT
},
823 {SN_CompareScalarGreaterThanOrEqual
, OP_SSE_CMPSS
, CMP_GE
},
824 {SN_CompareScalarLessThan
, OP_SSE_CMPSS
, CMP_LT
},
825 {SN_CompareScalarLessThanOrEqual
, OP_SSE_CMPSS
, CMP_LE
},
826 {SN_CompareScalarNotEqual
, OP_SSE_CMPSS
, CMP_NE
},
827 {SN_CompareScalarNotGreaterThan
, OP_SSE_CMPSS
, CMP_LE
},
828 {SN_CompareScalarNotGreaterThanOrEqual
, OP_SSE_CMPSS
, CMP_LT
},
829 {SN_CompareScalarNotLessThan
, OP_SSE_CMPSS
, CMP_GE
},
830 {SN_CompareScalarNotLessThanOrEqual
, OP_SSE_CMPSS
, CMP_GT
},
831 {SN_CompareScalarOrdered
, OP_SSE_CMPSS
, CMP_ORD
},
832 {SN_CompareScalarOrderedEqual
, OP_SSE_COMISS
, CMP_EQ
},
833 {SN_CompareScalarOrderedGreaterThan
, OP_SSE_COMISS
, CMP_GT
},
834 {SN_CompareScalarOrderedGreaterThanOrEqual
, OP_SSE_COMISS
, CMP_GE
},
835 {SN_CompareScalarOrderedLessThan
, OP_SSE_COMISS
, CMP_LT
},
836 {SN_CompareScalarOrderedLessThanOrEqual
, OP_SSE_COMISS
, CMP_LE
},
837 {SN_CompareScalarOrderedNotEqual
, OP_SSE_COMISS
, CMP_NE
},
838 {SN_CompareScalarUnordered
, OP_SSE_CMPSS
, CMP_UNORD
},
839 {SN_CompareScalarUnorderedEqual
, OP_SSE_UCOMISS
, CMP_EQ
},
840 {SN_CompareScalarUnorderedGreaterThan
, OP_SSE_UCOMISS
, CMP_GT
},
841 {SN_CompareScalarUnorderedGreaterThanOrEqual
, OP_SSE_UCOMISS
, CMP_GE
},
842 {SN_CompareScalarUnorderedLessThan
, OP_SSE_UCOMISS
, CMP_LT
},
843 {SN_CompareScalarUnorderedLessThanOrEqual
, OP_SSE_UCOMISS
, CMP_LE
},
844 {SN_CompareScalarUnorderedNotEqual
, OP_SSE_UCOMISS
, CMP_NE
},
845 {SN_CompareUnordered
, OP_XCOMPARE_FP
, CMP_UNORD
},
846 {SN_ConvertScalarToVector128Single
},
847 {SN_ConvertToInt32
, OP_XOP_I4_X
, SIMD_OP_SSE_CVTSS2SI
},
848 {SN_ConvertToInt32WithTruncation
, OP_XOP_I4_X
, SIMD_OP_SSE_CVTTSS2SI
},
849 {SN_ConvertToInt64
, OP_XOP_I8_X
, SIMD_OP_SSE_CVTSS2SI64
},
850 {SN_ConvertToInt64WithTruncation
, OP_XOP_I8_X
, SIMD_OP_SSE_CVTTSS2SI64
},
851 {SN_Divide
, OP_XBINOP
, OP_FDIV
},
852 {SN_DivideScalar
, OP_SSE_DIVSS
},
853 {SN_LoadAlignedVector128
, OP_SSE_LOADU
, 16 /* alignment */},
854 {SN_LoadHigh
, OP_SSE_MOVHPS_LOAD
},
855 {SN_LoadLow
, OP_SSE_MOVLPS_LOAD
},
856 {SN_LoadScalarVector128
, OP_SSE_MOVSS
},
857 {SN_LoadVector128
, OP_SSE_LOADU
, 1 /* alignment */},
858 {SN_Max
, OP_XOP_X_X_X
, SIMD_OP_SSE_MAXPS
},
859 {SN_MaxScalar
, OP_XOP_X_X_X
, SIMD_OP_SSE_MAXSS
},
860 {SN_Min
, OP_XOP_X_X_X
, SIMD_OP_SSE_MINPS
},
861 {SN_MinScalar
, OP_XOP_X_X_X
, SIMD_OP_SSE_MINSS
},
862 {SN_MoveHighToLow
, OP_SSE_MOVEHL
},
863 {SN_MoveLowToHigh
, OP_SSE_MOVELH
},
864 {SN_MoveMask
, OP_SSE_MOVMSK
},
865 {SN_MoveScalar
, OP_SSE_MOVS2
},
866 {SN_Multiply
, OP_XBINOP
, OP_FMUL
},
867 {SN_MultiplyScalar
, OP_SSE_MULSS
},
869 {SN_Prefetch0
, OP_SSE_PREFETCHT0
},
870 {SN_Prefetch1
, OP_SSE_PREFETCHT1
},
871 {SN_Prefetch2
, OP_SSE_PREFETCHT2
},
872 {SN_PrefetchNonTemporal
, OP_SSE_PREFETCHNTA
},
873 {SN_Reciprocal
, OP_XOP_X_X
, SIMD_OP_SSE_RCPPS
},
874 {SN_ReciprocalScalar
},
875 {SN_ReciprocalSqrt
, OP_XOP_X_X
, SIMD_OP_SSE_RSQRTPS
},
876 {SN_ReciprocalSqrtScalar
},
878 {SN_Sqrt
, OP_XOP_X_X
, SIMD_OP_SSE_SQRTPS
},
880 {SN_Store
, OP_SSE_STORE
, 1 /* alignment */},
881 {SN_StoreAligned
, OP_SSE_STORE
, 16 /* alignment */},
882 {SN_StoreAlignedNonTemporal
, OP_SSE_MOVNTPS
, 16 /* alignment */},
883 {SN_StoreFence
, OP_XOP
, SIMD_OP_SSE_SFENCE
},
884 {SN_StoreHigh
, OP_SSE_MOVHPS_STORE
},
885 {SN_StoreLow
, OP_SSE_MOVLPS_STORE
},
886 {SN_StoreScalar
, OP_SSE_MOVSS_STORE
},
887 {SN_Subtract
, OP_XBINOP
, OP_FSUB
},
888 {SN_SubtractScalar
, OP_SSE_SUBSS
},
889 {SN_UnpackHigh
, OP_SSE_UNPACKHI
},
890 {SN_UnpackLow
, OP_SSE_UNPACKLO
},
891 {SN_Xor
, OP_SSE_XOR
},
895 static SimdIntrinsic sse2_methods
[] = {
897 {SN_AddSaturate
, OP_SSE2_ADDS
},
898 {SN_AddScalar
, OP_SSE2_ADDSD
},
899 {SN_And
, OP_SSE_AND
},
900 {SN_AndNot
, OP_SSE_ANDN
},
903 {SN_CompareGreaterThan
},
904 {SN_CompareGreaterThanOrEqual
, OP_XCOMPARE_FP
, CMP_GE
},
905 {SN_CompareLessThan
},
906 {SN_CompareLessThanOrEqual
, OP_XCOMPARE_FP
, CMP_LE
},
907 {SN_CompareNotEqual
, OP_XCOMPARE_FP
, CMP_NE
},
908 {SN_CompareNotGreaterThan
, OP_XCOMPARE_FP
, CMP_LE
},
909 {SN_CompareNotGreaterThanOrEqual
, OP_XCOMPARE_FP
, CMP_LT
},
910 {SN_CompareNotLessThan
, OP_XCOMPARE_FP
, CMP_GE
},
911 {SN_CompareNotLessThanOrEqual
, OP_XCOMPARE_FP
, CMP_GT
},
912 {SN_CompareOrdered
, OP_XCOMPARE_FP
, CMP_ORD
},
913 {SN_CompareScalarEqual
, OP_SSE2_CMPSD
, CMP_EQ
},
914 {SN_CompareScalarGreaterThan
, OP_SSE2_CMPSD
, CMP_GT
},
915 {SN_CompareScalarGreaterThanOrEqual
, OP_SSE2_CMPSD
, CMP_GE
},
916 {SN_CompareScalarLessThan
, OP_SSE2_CMPSD
, CMP_LT
},
917 {SN_CompareScalarLessThanOrEqual
, OP_SSE2_CMPSD
, CMP_LE
},
918 {SN_CompareScalarNotEqual
, OP_SSE2_CMPSD
, CMP_NE
},
919 {SN_CompareScalarNotGreaterThan
, OP_SSE2_CMPSD
, CMP_LE
},
920 {SN_CompareScalarNotGreaterThanOrEqual
, OP_SSE2_CMPSD
, CMP_LT
},
921 {SN_CompareScalarNotLessThan
, OP_SSE2_CMPSD
, CMP_GE
},
922 {SN_CompareScalarNotLessThanOrEqual
, OP_SSE2_CMPSD
, CMP_GT
},
923 {SN_CompareScalarOrdered
, OP_SSE2_CMPSD
, CMP_ORD
},
924 {SN_CompareScalarOrderedEqual
, OP_SSE2_COMISD
, CMP_EQ
},
925 {SN_CompareScalarOrderedGreaterThan
, OP_SSE2_COMISD
, CMP_GT
},
926 {SN_CompareScalarOrderedGreaterThanOrEqual
, OP_SSE2_COMISD
, CMP_GE
},
927 {SN_CompareScalarOrderedLessThan
, OP_SSE2_COMISD
, CMP_LT
},
928 {SN_CompareScalarOrderedLessThanOrEqual
, OP_SSE2_COMISD
, CMP_LE
},
929 {SN_CompareScalarOrderedNotEqual
, OP_SSE2_COMISD
, CMP_NE
},
930 {SN_CompareScalarUnordered
, OP_SSE2_CMPSD
, CMP_UNORD
},
931 {SN_CompareScalarUnorderedEqual
, OP_SSE2_UCOMISD
, CMP_EQ
},
932 {SN_CompareScalarUnorderedGreaterThan
, OP_SSE2_UCOMISD
, CMP_GT
},
933 {SN_CompareScalarUnorderedGreaterThanOrEqual
, OP_SSE2_UCOMISD
, CMP_GE
},
934 {SN_CompareScalarUnorderedLessThan
, OP_SSE2_UCOMISD
, CMP_LT
},
935 {SN_CompareScalarUnorderedLessThanOrEqual
, OP_SSE2_UCOMISD
, CMP_LE
},
936 {SN_CompareScalarUnorderedNotEqual
, OP_SSE2_UCOMISD
, CMP_NE
},
937 {SN_CompareUnordered
, OP_XCOMPARE_FP
, CMP_UNORD
},
938 {SN_ConvertScalarToVector128Double
},
939 {SN_ConvertScalarToVector128Int32
},
940 {SN_ConvertScalarToVector128Int64
},
941 {SN_ConvertScalarToVector128Single
, OP_XOP_X_X_X
, SIMD_OP_SSE_CVTSD2SS
},
942 {SN_ConvertScalarToVector128UInt32
},
943 {SN_ConvertScalarToVector128UInt64
},
945 {SN_ConvertToInt32WithTruncation
, OP_XOP_I4_X
, SIMD_OP_SSE_CVTTSD2SI
},
947 {SN_ConvertToInt64WithTruncation
, OP_XOP_I8_X
, SIMD_OP_SSE_CVTTSD2SI64
},
948 {SN_ConvertToUInt32
},
949 {SN_ConvertToUInt64
},
950 {SN_ConvertToVector128Double
},
951 {SN_ConvertToVector128Int32
},
952 {SN_ConvertToVector128Int32WithTruncation
},
953 {SN_ConvertToVector128Single
},
954 {SN_Divide
, OP_XBINOP
, OP_FDIV
},
955 {SN_DivideScalar
, OP_SSE2_DIVSD
},
958 {SN_LoadAlignedVector128
},
959 {SN_LoadFence
, OP_XOP
, SIMD_OP_SSE_LFENCE
},
960 {SN_LoadHigh
, OP_SSE2_MOVHPD_LOAD
},
961 {SN_LoadLow
, OP_SSE2_MOVLPD_LOAD
},
962 {SN_LoadScalarVector128
},
964 {SN_MaskMove
, OP_SSE2_MASKMOVDQU
},
966 {SN_MaxScalar
, OP_XOP_X_X_X
, SIMD_OP_SSE_MAXSD
},
967 {SN_MemoryFence
, OP_XOP
, SIMD_OP_SSE_MFENCE
},
969 {SN_MinScalar
, OP_XOP_X_X_X
, SIMD_OP_SSE_MINSD
},
970 {SN_MoveMask
, OP_SSE_MOVMSK
},
973 {SN_MultiplyAddAdjacent
, OP_XOP_X_X_X
, SIMD_OP_SSE_PMADDWD
},
975 {SN_MultiplyLow
, OP_PMULW
},
976 {SN_MultiplyScalar
, OP_SSE2_MULSD
},
978 {SN_PackSignedSaturate
},
979 {SN_PackUnsignedSaturate
},
980 {SN_ShiftLeftLogical
},
981 {SN_ShiftLeftLogical128BitLane
},
982 {SN_ShiftRightArithmetic
},
983 {SN_ShiftRightLogical
},
984 {SN_ShiftRightLogical128BitLane
},
988 {SN_Sqrt
, OP_XOP_X_X
, SIMD_OP_SSE_SQRTPD
},
990 {SN_Store
, OP_SSE_STORE
, 1 /* alignment */},
991 {SN_StoreAligned
, OP_SSE_STORE
, 16 /* alignment */},
992 {SN_StoreAlignedNonTemporal
, OP_SSE_MOVNTPS
, 16 /* alignment */},
993 {SN_StoreHigh
, OP_SSE2_MOVHPD_STORE
},
994 {SN_StoreLow
, OP_SSE2_MOVLPD_STORE
},
995 {SN_StoreNonTemporal
, OP_SSE_MOVNTPS
, 1 /* alignment */},
996 {SN_StoreScalar
, OP_SSE_STORES
},
998 {SN_SubtractSaturate
, OP_SSE2_SUBS
},
999 {SN_SubtractScalar
, OP_SSE2_SUBSD
},
1000 {SN_SumAbsoluteDifferences
, OP_XOP_X_X_X
, SIMD_OP_SSE_PSADBW
},
1001 {SN_UnpackHigh
, OP_SSE_UNPACKHI
},
1002 {SN_UnpackLow
, OP_SSE_UNPACKLO
},
1003 {SN_Xor
, OP_SSE_XOR
},
1004 {SN_get_IsSupported
}
1007 static SimdIntrinsic sse3_methods
[] = {
1010 {SN_HorizontalSubtract
},
1011 {SN_LoadAndDuplicateToVector128
, OP_SSE3_MOVDDUP_MEM
},
1012 {SN_LoadDquVector128
, OP_XOP_X_I
, SIMD_OP_SSE_LDDQU
},
1013 {SN_MoveAndDuplicate
, OP_SSE3_MOVDDUP
},
1014 {SN_MoveHighAndDuplicate
, OP_SSE3_MOVSHDUP
},
1015 {SN_MoveLowAndDuplicate
, OP_SSE3_MOVSLDUP
},
1016 {SN_get_IsSupported
}
1019 static SimdIntrinsic ssse3_methods
[] = {
1020 {SN_Abs
, OP_SSSE3_ABS
},
1023 {SN_HorizontalAddSaturate
, OP_XOP_X_X_X
, SIMD_OP_SSE_PHADDSW
},
1024 {SN_HorizontalSubtract
},
1025 {SN_HorizontalSubtractSaturate
, OP_XOP_X_X_X
, SIMD_OP_SSE_PHSUBSW
},
1026 {SN_MultiplyAddAdjacent
, OP_XOP_X_X_X
, SIMD_OP_SSE_PMADDUBSW
},
1027 {SN_MultiplyHighRoundScale
, OP_XOP_X_X_X
, SIMD_OP_SSE_PMULHRSW
},
1028 {SN_Shuffle
, OP_SSSE3_SHUFFLE
},
1030 {SN_get_IsSupported
}
1033 static SimdIntrinsic sse41_methods
[] = {
1036 {SN_Ceiling
, OP_SSE41_ROUNDP
, 10 /*round mode*/},
1037 {SN_CeilingScalar
, OP_SSE41_ROUNDS
, 10 /*round mode*/},
1038 {SN_CompareEqual
, OP_XCOMPARE
, CMP_EQ
},
1039 {SN_ConvertToVector128Int16
, OP_SSE_CVTII
, MONO_TYPE_I2
},
1040 {SN_ConvertToVector128Int32
, OP_SSE_CVTII
, MONO_TYPE_I4
},
1041 {SN_ConvertToVector128Int64
, OP_SSE_CVTII
, MONO_TYPE_I8
},
1044 {SN_Floor
, OP_SSE41_ROUNDP
, 9 /*round mode*/},
1045 {SN_FloorScalar
, OP_SSE41_ROUNDS
, 9 /*round mode*/},
1047 {SN_LoadAlignedVector128NonTemporal
, OP_SSE41_LOADANT
},
1048 {SN_Max
, OP_XBINOP
, OP_IMAX
},
1049 {SN_Min
, OP_XBINOP
, OP_IMIN
},
1050 {SN_MinHorizontal
, OP_XOP_X_X
, SIMD_OP_SSE_PHMINPOSUW
},
1051 {SN_MultipleSumAbsoluteDifferences
},
1052 {SN_Multiply
, OP_SSE41_MUL
},
1053 {SN_MultiplyLow
, OP_SSE41_MULLO
},
1054 {SN_PackUnsignedSaturate
, OP_XOP_X_X_X
, SIMD_OP_SSE_PACKUSDW
},
1055 {SN_RoundCurrentDirection
, OP_SSE41_ROUNDP
, 4 /*round mode*/},
1056 {SN_RoundCurrentDirectionScalar
, OP_SSE41_ROUNDS
, 4 /*round mode*/},
1057 {SN_RoundToNearestInteger
, OP_SSE41_ROUNDP
, 8 /*round mode*/},
1058 {SN_RoundToNearestIntegerScalar
, OP_SSE41_ROUNDS
, 8 /*round mode*/},
1059 {SN_RoundToNegativeInfinity
, OP_SSE41_ROUNDP
, 9 /*round mode*/},
1060 {SN_RoundToNegativeInfinityScalar
, OP_SSE41_ROUNDS
, 9 /*round mode*/},
1061 {SN_RoundToPositiveInfinity
, OP_SSE41_ROUNDP
, 10 /*round mode*/},
1062 {SN_RoundToPositiveInfinityScalar
, OP_SSE41_ROUNDS
, 10 /*round mode*/},
1063 {SN_RoundToZero
, OP_SSE41_ROUNDP
, 11 /*round mode*/},
1064 {SN_RoundToZeroScalar
, OP_SSE41_ROUNDS
, 11 /*round mode*/},
1065 {SN_TestC
, OP_XOP_I4_X_X
, SIMD_OP_SSE_TESTC
},
1066 {SN_TestNotZAndNotC
, OP_XOP_I4_X_X
, SIMD_OP_SSE_TESTNZ
},
1067 {SN_TestZ
, OP_XOP_I4_X_X
, SIMD_OP_SSE_TESTZ
},
1068 {SN_get_IsSupported
}
1071 static SimdIntrinsic sse42_methods
[] = {
1072 {SN_CompareGreaterThan
, OP_XCOMPARE
, CMP_GT
},
1074 {SN_get_IsSupported
}
1077 static SimdIntrinsic pclmulqdq_methods
[] = {
1078 {SN_CarrylessMultiply
},
1079 {SN_get_IsSupported
}
1082 static SimdIntrinsic aes_methods
[] = {
1083 {SN_Decrypt
, OP_XOP_X_X_X
, SIMD_OP_AES_DEC
},
1084 {SN_DecryptLast
, OP_XOP_X_X_X
, SIMD_OP_AES_DECLAST
},
1085 {SN_Encrypt
, OP_XOP_X_X_X
, SIMD_OP_AES_ENC
},
1086 {SN_EncryptLast
, OP_XOP_X_X_X
, SIMD_OP_AES_ENCLAST
},
1087 {SN_InverseMixColumns
, OP_XOP_X_X
, SIMD_OP_AES_IMC
},
1089 {SN_get_IsSupported
}
1092 static SimdIntrinsic popcnt_methods
[] = {
1094 {SN_get_IsSupported
}
1097 static SimdIntrinsic lzcnt_methods
[] = {
1098 {SN_LeadingZeroCount
},
1099 {SN_get_IsSupported
}
1102 static SimdIntrinsic bmi1_methods
[] = {
1104 {SN_BitFieldExtract
},
1105 {SN_ExtractLowestSetBit
},
1106 {SN_GetMaskUpToLowestSetBit
},
1107 {SN_ResetLowestSetBit
},
1108 {SN_TrailingZeroCount
},
1109 {SN_get_IsSupported
}
1112 static SimdIntrinsic bmi2_methods
[] = {
1113 {SN_MultiplyNoFlags
},
1114 {SN_ParallelBitDeposit
},
1115 {SN_ParallelBitExtract
},
1117 {SN_get_IsSupported
}
1120 static SimdIntrinsic x86base_methods
[] = {
1121 {SN_BitScanForward
},
1122 {SN_BitScanReverse
},
1123 {SN_get_IsSupported
}
1127 emit_x86_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
1130 gboolean supported
, is_64bit
;
1131 MonoClass
*klass
= cmethod
->klass
;
1132 MonoTypeEnum arg0_type
= fsig
->param_count
> 0 ? get_underlying_type (fsig
->params
[0]) : MONO_TYPE_VOID
;
1133 SimdIntrinsic
*info
;
1135 if (is_hw_intrinsics_class (klass
, "Sse", &is_64bit
)) {
1136 if (!COMPILE_LLVM (cfg
))
1138 info
= lookup_intrins_info (sse_methods
, sizeof (sse_methods
), cmethod
);
1143 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSE
) != 0;
1147 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1150 case SN_get_IsSupported
:
1151 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1152 ins
->type
= STACK_I4
;
1155 if (args
[2]->opcode
== OP_ICONST
)
1156 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE_SHUFFLE
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1157 // FIXME: handle non-constant mask (generate a switch)
1158 return emit_invalid_operation (cfg
, "mask in Sse.Shuffle must be constant");
1159 case SN_ConvertScalarToVector128Single
: {
1161 switch (fsig
->params
[1]->type
) {
1162 case MONO_TYPE_I4
: op
= OP_SSE_CVTSI2SS
; break;
1163 case MONO_TYPE_I8
: op
= OP_SSE_CVTSI2SS64
; break;
1164 default: g_assert_not_reached (); break;
1166 return emit_simd_ins_for_sig (cfg
, klass
, op
, 0, 0, fsig
, args
);
1168 case SN_ReciprocalScalar
:
1169 case SN_ReciprocalSqrtScalar
:
1170 case SN_SqrtScalar
: {
1173 case SN_ReciprocalScalar
: op
= OP_SSE_RCPSS
; break;
1174 case SN_ReciprocalSqrtScalar
: op
= OP_SSE_RSQRTSS
; break;
1175 case SN_SqrtScalar
: op
= OP_SSE_SQRTSS
; break;
1177 if (fsig
->param_count
== 1)
1178 return emit_simd_ins (cfg
, klass
, op
, args
[0]->dreg
, args
[0]->dreg
);
1179 else if (fsig
->param_count
== 2)
1180 return emit_simd_ins (cfg
, klass
, op
, args
[0]->dreg
, args
[1]->dreg
);
1182 g_assert_not_reached ();
1185 case SN_LoadScalarVector128
:
1192 if (is_hw_intrinsics_class (klass
, "Sse2", &is_64bit
)) {
1193 if (!COMPILE_LLVM (cfg
))
1195 info
= lookup_intrins_info (sse2_methods
, sizeof (sse2_methods
), cmethod
);
1200 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSE2
) != 0;
1204 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1207 case SN_get_IsSupported
: {
1208 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1209 ins
->type
= STACK_I4
;
1213 return emit_simd_ins_for_sig (cfg
, klass
, OP_XBINOP
, arg0_type
== MONO_TYPE_R8
? OP_FSUB
: OP_ISUB
, arg0_type
, fsig
, args
);
1215 return emit_simd_ins_for_sig (cfg
, klass
, OP_XBINOP
, arg0_type
== MONO_TYPE_R8
? OP_FADD
: OP_IADD
, arg0_type
, fsig
, args
);
1217 if (arg0_type
== MONO_TYPE_U1
)
1218 return emit_simd_ins_for_sig (cfg
, klass
, OP_PAVGB_UN
, -1, arg0_type
, fsig
, args
);
1219 else if (arg0_type
== MONO_TYPE_U2
)
1220 return emit_simd_ins_for_sig (cfg
, klass
, OP_PAVGW_UN
, -1, arg0_type
, fsig
, args
);
1223 case SN_CompareNotEqual
:
1224 return emit_simd_ins_for_sig (cfg
, klass
, arg0_type
== MONO_TYPE_R8
? OP_XCOMPARE_FP
: OP_XCOMPARE
, CMP_NE
, arg0_type
, fsig
, args
);
1225 case SN_CompareEqual
:
1226 return emit_simd_ins_for_sig (cfg
, klass
, arg0_type
== MONO_TYPE_R8
? OP_XCOMPARE_FP
: OP_XCOMPARE
, CMP_EQ
, arg0_type
, fsig
, args
);
1227 case SN_CompareGreaterThan
:
1228 return emit_simd_ins_for_sig (cfg
, klass
, arg0_type
== MONO_TYPE_R8
? OP_XCOMPARE_FP
: OP_XCOMPARE
, CMP_GT
, arg0_type
, fsig
, args
);
1229 case SN_CompareLessThan
:
1230 return emit_simd_ins_for_sig (cfg
, klass
, arg0_type
== MONO_TYPE_R8
? OP_XCOMPARE_FP
: OP_XCOMPARE
, CMP_LT
, arg0_type
, fsig
, args
);
1231 case SN_ConvertToInt32
:
1232 if (arg0_type
== MONO_TYPE_R8
)
1233 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_I4_X
, SIMD_OP_SSE_CVTSD2SI
, arg0_type
, fsig
, args
);
1234 else if (arg0_type
== MONO_TYPE_I4
)
1235 return emit_simd_ins_for_sig (cfg
, klass
, OP_EXTRACT_I4
, 0, arg0_type
, fsig
, args
);
1238 case SN_ConvertToInt64
:
1239 if (arg0_type
== MONO_TYPE_R8
)
1240 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_I8_X
, SIMD_OP_SSE_CVTSD2SI64
, arg0_type
, fsig
, args
);
1241 else if (arg0_type
== MONO_TYPE_I8
)
1242 return emit_simd_ins_for_sig (cfg
, klass
, OP_EXTRACT_I8
, 0 /*element index*/, arg0_type
, fsig
, args
);
1244 g_assert_not_reached ();
1246 case SN_ConvertScalarToVector128Double
: {
1247 int op
= OP_SSE2_CVTSS2SD
;
1248 switch (fsig
->params
[1]->type
) {
1249 case MONO_TYPE_I4
: op
= OP_SSE2_CVTSI2SD
; break;
1250 case MONO_TYPE_I8
: op
= OP_SSE2_CVTSI2SD64
; break;
1252 return emit_simd_ins_for_sig (cfg
, klass
, op
, 0, 0, fsig
, args
);
1254 case SN_ConvertScalarToVector128Int32
:
1255 case SN_ConvertScalarToVector128Int64
:
1256 case SN_ConvertScalarToVector128UInt32
:
1257 case SN_ConvertScalarToVector128UInt64
:
1258 return emit_simd_ins_for_sig (cfg
, klass
, OP_CREATE_SCALAR
, -1, arg0_type
, fsig
, args
);
1259 case SN_ConvertToUInt32
:
1260 return emit_simd_ins_for_sig (cfg
, klass
, OP_EXTRACT_I4
, 0 /*element index*/, arg0_type
, fsig
, args
);
1261 case SN_ConvertToUInt64
:
1262 return emit_simd_ins_for_sig (cfg
, klass
, OP_EXTRACT_I8
, 0 /*element index*/, arg0_type
, fsig
, args
);
1263 case SN_ConvertToVector128Double
:
1264 if (arg0_type
== MONO_TYPE_R4
)
1265 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTPS2PD
, 0, arg0_type
, fsig
, args
);
1266 else if (arg0_type
== MONO_TYPE_I4
)
1267 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTDQ2PD
, 0, arg0_type
, fsig
, args
);
1270 case SN_ConvertToVector128Int32
:
1271 if (arg0_type
== MONO_TYPE_R4
)
1272 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTPS2DQ
, 0, arg0_type
, fsig
, args
);
1273 else if (arg0_type
== MONO_TYPE_R8
)
1274 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTPD2DQ
, 0, arg0_type
, fsig
, args
);
1277 case SN_ConvertToVector128Int32WithTruncation
:
1278 if (arg0_type
== MONO_TYPE_R4
)
1279 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTTPS2DQ
, 0, arg0_type
, fsig
, args
);
1280 else if (arg0_type
== MONO_TYPE_R8
)
1281 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTTPD2DQ
, 0, arg0_type
, fsig
, args
);
1284 case SN_ConvertToVector128Single
:
1285 if (arg0_type
== MONO_TYPE_I4
)
1286 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTDQ2PS
, 0, arg0_type
, fsig
, args
);
1287 else if (arg0_type
== MONO_TYPE_R8
)
1288 return emit_simd_ins_for_sig (cfg
, klass
, OP_CVTPD2PS
, 0, arg0_type
, fsig
, args
);
1291 case SN_LoadAlignedVector128
:
1292 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE_LOADU
, 16 /*alignment*/, arg0_type
, fsig
, args
);
1293 case SN_LoadVector128
:
1294 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE_LOADU
, 1 /*alignment*/, arg0_type
, fsig
, args
);
1296 return emit_simd_ins_for_sig (cfg
, klass
, fsig
->param_count
== 2 ? OP_SSE_MOVS2
: OP_SSE_MOVS
, -1, arg0_type
, fsig
, args
);
1298 switch (arg0_type
) {
1300 return emit_simd_ins_for_sig (cfg
, klass
, OP_PMAXB_UN
, 0, arg0_type
, fsig
, args
);
1302 return emit_simd_ins_for_sig (cfg
, klass
, OP_PMAXW
, 0, arg0_type
, fsig
, args
);
1303 case MONO_TYPE_R8
: return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_MAXPD
, arg0_type
, fsig
, args
);
1305 g_assert_not_reached ();
1310 switch (arg0_type
) {
1312 return emit_simd_ins_for_sig (cfg
, klass
, OP_PMINB_UN
, 0, arg0_type
, fsig
, args
);
1314 return emit_simd_ins_for_sig (cfg
, klass
, OP_PMINW
, 0, arg0_type
, fsig
, args
);
1315 case MONO_TYPE_R8
: return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_MINPD
, arg0_type
, fsig
, args
);
1317 g_assert_not_reached ();
1322 if (arg0_type
== MONO_TYPE_U4
)
1323 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PMULUDQ
, 0, arg0_type
, fsig
, args
);
1324 else if (arg0_type
== MONO_TYPE_R8
)
1325 return emit_simd_ins_for_sig (cfg
, klass
, OP_MULPD
, 0, arg0_type
, fsig
, args
);
1327 g_assert_not_reached ();
1328 case SN_MultiplyHigh
:
1329 if (arg0_type
== MONO_TYPE_I2
)
1330 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PMULHW
, arg0_type
, fsig
, args
);
1331 else if (arg0_type
== MONO_TYPE_U2
)
1332 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PMULHUW
, arg0_type
, fsig
, args
);
1334 g_assert_not_reached ();
1335 case SN_PackSignedSaturate
:
1336 if (arg0_type
== MONO_TYPE_I2
)
1337 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PACKSSWB
, arg0_type
, fsig
, args
);
1338 else if (arg0_type
== MONO_TYPE_I4
)
1339 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PACKSSDW
, arg0_type
, fsig
, args
);
1341 g_assert_not_reached ();
1342 case SN_PackUnsignedSaturate
:
1343 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PACKUS
, -1, arg0_type
, fsig
, args
);
1345 g_assert (arg0_type
== MONO_TYPE_U2
);
1346 return emit_simd_ins_for_sig (cfg
, klass
, OP_XEXTRACT_I32
, arg0_type
, 0, fsig
, args
);
1348 g_assert (arg0_type
== MONO_TYPE_I2
|| arg0_type
== MONO_TYPE_U2
);
1349 return emit_simd_ins_for_sig (cfg
, klass
, OP_XINSERT_I2
, 0, arg0_type
, fsig
, args
);
1350 case SN_ShiftRightLogical
: {
1351 gboolean is_imm
= fsig
->params
[1]->type
== MONO_TYPE_U1
;
1352 SimdOp op
= (SimdOp
)0;
1353 switch (arg0_type
) {
1356 op
= is_imm
? SIMD_OP_SSE_PSRLW_IMM
: SIMD_OP_SSE_PSRLW
;
1360 op
= is_imm
? SIMD_OP_SSE_PSRLD_IMM
: SIMD_OP_SSE_PSRLD
;
1364 op
= is_imm
? SIMD_OP_SSE_PSRLQ_IMM
: SIMD_OP_SSE_PSRLQ
;
1366 default: g_assert_not_reached (); break;
1368 return emit_simd_ins_for_sig (cfg
, klass
, is_imm
? OP_XOP_X_X_I4
: OP_XOP_X_X_X
, op
, arg0_type
, fsig
, args
);
1370 case SN_ShiftRightArithmetic
: {
1371 gboolean is_imm
= fsig
->params
[1]->type
== MONO_TYPE_U1
;
1372 SimdOp op
= (SimdOp
)0;
1373 switch (arg0_type
) {
1376 op
= is_imm
? SIMD_OP_SSE_PSRAW_IMM
: SIMD_OP_SSE_PSRAW
;
1380 op
= is_imm
? SIMD_OP_SSE_PSRAD_IMM
: SIMD_OP_SSE_PSRAD
;
1382 default: g_assert_not_reached (); break;
1384 return emit_simd_ins_for_sig (cfg
, klass
, is_imm
? OP_XOP_X_X_I4
: OP_XOP_X_X_X
, op
, arg0_type
, fsig
, args
);
1386 case SN_ShiftLeftLogical
: {
1387 gboolean is_imm
= fsig
->params
[1]->type
== MONO_TYPE_U1
;
1388 SimdOp op
= (SimdOp
)0;
1389 switch (arg0_type
) {
1392 op
= is_imm
? SIMD_OP_SSE_PSLLW_IMM
: SIMD_OP_SSE_PSLLW
;
1396 op
= is_imm
? SIMD_OP_SSE_PSLLD_IMM
: SIMD_OP_SSE_PSLLD
;
1400 op
= is_imm
? SIMD_OP_SSE_PSLLQ_IMM
: SIMD_OP_SSE_PSLLQ
;
1402 default: g_assert_not_reached (); break;
1404 return emit_simd_ins_for_sig (cfg
, klass
, is_imm
? OP_XOP_X_X_I4
: OP_XOP_X_X_X
, op
, arg0_type
, fsig
, args
);
1406 case SN_ShiftLeftLogical128BitLane
:
1407 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PSLLDQ
, 0, arg0_type
, fsig
, args
);
1408 case SN_ShiftRightLogical128BitLane
:
1409 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PSRLDQ
, 0, arg0_type
, fsig
, args
);
1411 if (fsig
->param_count
== 2) {
1412 g_assert (arg0_type
== MONO_TYPE_I4
|| arg0_type
== MONO_TYPE_U4
);
1413 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PSHUFD
, 0, arg0_type
, fsig
, args
);
1414 } else if (fsig
->param_count
== 3) {
1415 g_assert (arg0_type
== MONO_TYPE_R8
);
1416 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_SHUFPD
, 0, arg0_type
, fsig
, args
);
1418 g_assert_not_reached ();
1422 case SN_ShuffleHigh
:
1423 g_assert (fsig
->param_count
== 2);
1424 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PSHUFHW
, 0, arg0_type
, fsig
, args
);
1426 g_assert (fsig
->param_count
== 2);
1427 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE2_PSHUFLW
, 0, arg0_type
, fsig
, args
);
1428 case SN_SqrtScalar
: {
1429 if (fsig
->param_count
== 1)
1430 return emit_simd_ins (cfg
, klass
, OP_SSE2_SQRTSD
, args
[0]->dreg
, args
[0]->dreg
);
1431 else if (fsig
->param_count
== 2)
1432 return emit_simd_ins (cfg
, klass
, OP_SSE2_SQRTSD
, args
[0]->dreg
, args
[1]->dreg
);
1434 g_assert_not_reached ();
1438 case SN_LoadScalarVector128
: {
1440 switch (arg0_type
) {
1442 case MONO_TYPE_U4
: op
= OP_SSE2_MOVD
; break;
1444 case MONO_TYPE_U8
: op
= OP_SSE2_MOVQ
; break;
1445 case MONO_TYPE_R8
: op
= OP_SSE2_MOVUPD
; break;
1446 default: g_assert_not_reached(); break;
1448 return emit_simd_ins_for_sig (cfg
, klass
, op
, 0, 0, fsig
, args
);
1455 if (is_hw_intrinsics_class (klass
, "Sse3", &is_64bit
)) {
1456 if (!COMPILE_LLVM (cfg
))
1458 info
= lookup_intrins_info (sse3_methods
, sizeof (sse3_methods
), cmethod
);
1465 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1467 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSE3
);
1470 case SN_get_IsSupported
:
1471 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1472 ins
->type
= STACK_I4
;
1474 case SN_AddSubtract
:
1475 if (arg0_type
== MONO_TYPE_R4
)
1476 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_ADDSUBPS
, arg0_type
, fsig
, args
);
1477 else if (arg0_type
== MONO_TYPE_R8
)
1478 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_ADDSUBPD
, arg0_type
, fsig
, args
);
1480 g_assert_not_reached ();
1482 case SN_HorizontalAdd
:
1483 if (arg0_type
== MONO_TYPE_R4
)
1484 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_HADDPS
, arg0_type
, fsig
, args
);
1485 else if (arg0_type
== MONO_TYPE_R8
)
1486 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_HADDPD
, arg0_type
, fsig
, args
);
1488 g_assert_not_reached ();
1490 case SN_HorizontalSubtract
:
1491 if (arg0_type
== MONO_TYPE_R4
)
1492 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_HSUBPS
, arg0_type
, fsig
, args
);
1493 else if (arg0_type
== MONO_TYPE_R8
)
1494 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_HSUBPD
, arg0_type
, fsig
, args
);
1496 g_assert_not_reached ();
1499 g_assert_not_reached ();
1504 if (is_hw_intrinsics_class (klass
, "Ssse3", &is_64bit
)) {
1505 if (!COMPILE_LLVM (cfg
))
1507 info
= lookup_intrins_info (ssse3_methods
, sizeof (ssse3_methods
), cmethod
);
1514 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1516 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSSE3
) != 0;
1519 case SN_get_IsSupported
:
1520 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1521 ins
->type
= STACK_I4
;
1524 if (args
[2]->opcode
== OP_ICONST
)
1525 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSSE3_ALIGNR
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1526 return emit_invalid_operation (cfg
, "mask in Ssse3.AlignRight must be constant");
1527 case SN_HorizontalAdd
:
1528 if (arg0_type
== MONO_TYPE_I2
)
1529 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PHADDW
, arg0_type
, fsig
, args
);
1530 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PHADDD
, arg0_type
, fsig
, args
);
1531 case SN_HorizontalSubtract
:
1532 if (arg0_type
== MONO_TYPE_I2
)
1533 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PHSUBW
, arg0_type
, fsig
, args
);
1534 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PHSUBD
, arg0_type
, fsig
, args
);
1536 if (arg0_type
== MONO_TYPE_I1
)
1537 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PSIGNB
, arg0_type
, fsig
, args
);
1538 if (arg0_type
== MONO_TYPE_I2
)
1539 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PSIGNW
, arg0_type
, fsig
, args
);
1540 return emit_simd_ins_for_sig (cfg
, klass
, OP_XOP_X_X_X
, SIMD_OP_SSE_PSIGND
, arg0_type
, fsig
, args
);
1542 g_assert_not_reached ();
1547 if (is_hw_intrinsics_class (klass
, "Sse41", &is_64bit
)) {
1548 if (!COMPILE_LLVM (cfg
))
1550 info
= lookup_intrins_info (sse41_methods
, sizeof (sse41_methods
), cmethod
);
1557 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1559 supported
= COMPILE_LLVM (cfg
) && (mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSE41
) != 0;
1562 case SN_get_IsSupported
:
1563 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1564 ins
->type
= STACK_I4
;
1567 if (args
[2]->opcode
== OP_ICONST
&& arg0_type
== MONO_TYPE_R4
)
1568 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE41_DPPS_IMM
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1569 else if (args
[2]->opcode
== OP_ICONST
&& arg0_type
== MONO_TYPE_R8
)
1570 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE41_DPPD_IMM
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1571 // FIXME: handle non-constant control byte (generate a switch)
1572 return emit_invalid_operation (cfg
, "control byte in Sse41.DotProduct must be constant");
1573 case SN_MultipleSumAbsoluteDifferences
:
1574 if (args
[2]->opcode
== OP_ICONST
)
1575 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE41_MPSADBW_IMM
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1576 // FIXME: handle non-constant control byte (generate a switch)
1577 return emit_invalid_operation (cfg
, "control byte in Sse41.MultipleSumAbsoluteDifferences must be constant");
1579 if (args
[2]->opcode
== OP_ICONST
)
1580 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE41_BLEND_IMM
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1581 // FIXME: handle non-constant control byte (generate a switch)
1582 return emit_invalid_operation (cfg
, "control byte in Sse41.Blend must be constant");
1583 case SN_BlendVariable
:
1584 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE41_BLENDV
, -1, arg0_type
, fsig
, args
);
1587 switch (arg0_type
) {
1590 case MONO_TYPE_I4
: op
= OP_XEXTRACT_I32
; break;
1592 case MONO_TYPE_U8
: op
= OP_XEXTRACT_I64
; break;
1593 case MONO_TYPE_R4
: op
= OP_XEXTRACT_R4
; break;
1594 default: g_assert_not_reached(); break;
1596 return emit_simd_ins_for_sig (cfg
, klass
, op
, arg0_type
, 0, fsig
, args
);
1599 if (args
[2]->opcode
== OP_ICONST
)
1600 return emit_simd_ins_for_sig (cfg
, klass
, OP_SSE41_INSERT
, -1, arg0_type
, fsig
, args
);
1601 // FIXME: handle non-constant index (generate a switch)
1602 return emit_invalid_operation (cfg
, "index in Sse41.Insert must be constant");
1604 g_assert_not_reached ();
1609 if (is_hw_intrinsics_class (klass
, "Sse42", &is_64bit
)) {
1610 if (!COMPILE_LLVM (cfg
))
1612 info
= lookup_intrins_info (sse42_methods
, sizeof (sse42_methods
), cmethod
);
1619 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1621 supported
= COMPILE_LLVM (cfg
) && (mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSE42
) != 0;
1624 case SN_get_IsSupported
:
1625 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1626 ins
->type
= STACK_I4
;
1629 MonoTypeEnum arg1_type
= get_underlying_type (fsig
->params
[1]);
1630 return emit_simd_ins_for_sig (cfg
, klass
,
1631 arg1_type
== MONO_TYPE_U8
? OP_SSE42_CRC64
: OP_SSE42_CRC32
,
1632 arg1_type
, arg0_type
, fsig
, args
);
1635 g_assert_not_reached ();
1640 if (is_hw_intrinsics_class (klass
, "Pclmulqdq", &is_64bit
)) {
1641 if (!COMPILE_LLVM (cfg
))
1643 info
= lookup_intrins_info (pclmulqdq_methods
, sizeof (pclmulqdq_methods
), cmethod
);
1650 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1652 supported
= COMPILE_LLVM (cfg
) && (mini_get_cpu_features (cfg
) & MONO_CPU_X86_PCLMUL
) != 0;
1655 case SN_CarrylessMultiply
: {
1656 if (args
[2]->opcode
== OP_ICONST
)
1657 return emit_simd_ins_for_sig (cfg
, klass
, OP_PCLMULQDQ_IMM
, args
[2]->inst_c0
, arg0_type
, fsig
, args
);
1658 // FIXME: handle non-constant control byte (generate a switch)
1659 return emit_invalid_operation (cfg
, "index in Pclmulqdq.CarrylessMultiply must be constant");
1661 case SN_get_IsSupported
:
1662 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1663 ins
->type
= STACK_I4
;
1666 g_assert_not_reached ();
1671 if (is_hw_intrinsics_class (klass
, "Aes", &is_64bit
)) {
1672 if (!COMPILE_LLVM (cfg
))
1674 info
= lookup_intrins_info (aes_methods
, sizeof (aes_methods
), cmethod
);
1681 return emit_simd_ins_for_sig (cfg
, klass
, info
->op
, info
->instc0
, arg0_type
, fsig
, args
);
1683 supported
= COMPILE_LLVM (cfg
) && (mini_get_cpu_features (cfg
) & MONO_CPU_X86_AES
) != 0;
1686 case SN_get_IsSupported
:
1687 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1688 ins
->type
= STACK_I4
;
1690 case SN_KeygenAssist
: {
1691 if (args
[1]->opcode
== OP_ICONST
)
1692 return emit_simd_ins_for_sig (cfg
, klass
, OP_AES_KEYGEN_IMM
, args
[1]->inst_c0
, arg0_type
, fsig
, args
);
1693 // FIXME: handle non-constant control byte (generate a switch)
1694 return emit_invalid_operation (cfg
, "control byte in Aes.KeygenAssist must be constant");
1697 g_assert_not_reached ();
1702 if (is_hw_intrinsics_class (klass
, "Popcnt", &is_64bit
)) {
1703 info
= lookup_intrins_info (popcnt_methods
, sizeof (popcnt_methods
), cmethod
);
1708 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_POPCNT
) != 0;
1711 case SN_get_IsSupported
:
1712 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1713 ins
->type
= STACK_I4
;
1718 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_POPCNT64
: OP_POPCNT32
);
1719 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1720 ins
->sreg1
= args
[0]->dreg
;
1721 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1722 MONO_ADD_INS (cfg
->cbb
, ins
);
1728 if (is_hw_intrinsics_class (klass
, "Lzcnt", &is_64bit
)) {
1729 info
= lookup_intrins_info (lzcnt_methods
, sizeof (lzcnt_methods
), cmethod
);
1734 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_LZCNT
) != 0;
1737 case SN_get_IsSupported
:
1738 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1739 ins
->type
= STACK_I4
;
1741 case SN_LeadingZeroCount
:
1744 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_LZCNT64
: OP_LZCNT32
);
1745 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1746 ins
->sreg1
= args
[0]->dreg
;
1747 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1748 MONO_ADD_INS (cfg
->cbb
, ins
);
1754 if (is_hw_intrinsics_class (klass
, "Bmi1", &is_64bit
)) {
1755 if (!COMPILE_LLVM (cfg
))
1757 info
= lookup_intrins_info (bmi1_methods
, sizeof (bmi1_methods
), cmethod
);
1762 g_assert (id
!= -1);
1763 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_BMI1
) != 0;
1766 case SN_get_IsSupported
:
1767 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1768 ins
->type
= STACK_I4
;
1772 // LLVM replaces it with `andn`
1773 int tmp_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1774 int result_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1775 EMIT_NEW_BIALU_IMM (cfg
, ins
, is_64bit
? OP_LXOR_IMM
: OP_IXOR_IMM
, tmp_reg
, args
[0]->dreg
, -1);
1776 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LAND
: OP_IAND
, result_reg
, tmp_reg
, args
[1]->dreg
);
1779 case SN_BitFieldExtract
: {
1780 if (fsig
->param_count
== 2) {
1781 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_BEXTR64
: OP_BEXTR32
);
1782 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1783 ins
->sreg1
= args
[0]->dreg
;
1784 ins
->sreg2
= args
[1]->dreg
;
1785 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1786 MONO_ADD_INS (cfg
->cbb
, ins
);
1790 case SN_GetMaskUpToLowestSetBit
: {
1792 // LLVM replaces it with `blsmsk`
1793 int tmp_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1794 int result_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1795 EMIT_NEW_BIALU_IMM (cfg
, ins
, is_64bit
? OP_LSUB_IMM
: OP_ISUB_IMM
, tmp_reg
, args
[0]->dreg
, 1);
1796 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LXOR
: OP_IXOR
, result_reg
, args
[0]->dreg
, tmp_reg
);
1799 case SN_ResetLowestSetBit
: {
1801 // LLVM replaces it with `blsr`
1802 int tmp_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1803 int result_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1804 EMIT_NEW_BIALU_IMM (cfg
, ins
, is_64bit
? OP_LSUB_IMM
: OP_ISUB_IMM
, tmp_reg
, args
[0]->dreg
, 1);
1805 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LAND
: OP_IAND
, result_reg
, args
[0]->dreg
, tmp_reg
);
1808 case SN_ExtractLowestSetBit
: {
1810 // LLVM replaces it with `blsi`
1811 int tmp_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1812 int result_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1813 int zero_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1814 MONO_EMIT_NEW_ICONST (cfg
, zero_reg
, 0);
1815 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LSUB
: OP_ISUB
, tmp_reg
, zero_reg
, args
[0]->dreg
);
1816 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LAND
: OP_IAND
, result_reg
, args
[0]->dreg
, tmp_reg
);
1819 case SN_TrailingZeroCount
:
1820 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_CTTZ64
: OP_CTTZ32
);
1821 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1822 ins
->sreg1
= args
[0]->dreg
;
1823 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1824 MONO_ADD_INS (cfg
->cbb
, ins
);
1827 g_assert_not_reached ();
1830 if (is_hw_intrinsics_class (klass
, "Bmi2", &is_64bit
)) {
1831 if (!COMPILE_LLVM (cfg
))
1833 info
= lookup_intrins_info (bmi2_methods
, sizeof (bmi2_methods
), cmethod
);
1838 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_BMI2
) != 0;
1841 case SN_get_IsSupported
:
1842 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
1843 ins
->type
= STACK_I4
;
1845 case SN_MultiplyNoFlags
:
1846 if (fsig
->param_count
== 2) {
1847 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_MULX_H64
: OP_MULX_H32
);
1848 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1849 ins
->sreg1
= args
[0]->dreg
;
1850 ins
->sreg2
= args
[1]->dreg
;
1851 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1852 MONO_ADD_INS (cfg
->cbb
, ins
);
1853 } else if (fsig
->param_count
== 3) {
1854 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_MULX_HL64
: OP_MULX_HL32
);
1855 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1856 ins
->sreg1
= args
[0]->dreg
;
1857 ins
->sreg2
= args
[1]->dreg
;
1858 ins
->sreg3
= args
[2]->dreg
;
1859 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1860 MONO_ADD_INS (cfg
->cbb
, ins
);
1862 g_assert_not_reached ();
1865 case SN_ZeroHighBits
:
1866 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_BZHI64
: OP_BZHI32
);
1867 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1868 ins
->sreg1
= args
[0]->dreg
;
1869 ins
->sreg2
= args
[1]->dreg
;
1870 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1871 MONO_ADD_INS (cfg
->cbb
, ins
);
1873 case SN_ParallelBitExtract
:
1874 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_PEXT64
: OP_PEXT32
);
1875 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1876 ins
->sreg1
= args
[0]->dreg
;
1877 ins
->sreg2
= args
[1]->dreg
;
1878 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1879 MONO_ADD_INS (cfg
->cbb
, ins
);
1881 case SN_ParallelBitDeposit
:
1882 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_PDEP64
: OP_PDEP32
);
1883 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1884 ins
->sreg1
= args
[0]->dreg
;
1885 ins
->sreg2
= args
[1]->dreg
;
1886 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1887 MONO_ADD_INS (cfg
->cbb
, ins
);
1890 g_assert_not_reached ();
1894 if (is_hw_intrinsics_class (klass
, "X86Base", &is_64bit
)) {
1895 if (!COMPILE_LLVM (cfg
))
1898 info
= lookup_intrins_info (x86base_methods
, sizeof (x86base_methods
), cmethod
);
1904 case SN_get_IsSupported
:
1905 EMIT_NEW_ICONST (cfg
, ins
, 1);
1906 ins
->type
= STACK_I4
;
1908 case SN_BitScanForward
:
1909 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_X86_BSF64
: OP_X86_BSF32
);
1910 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1911 ins
->sreg1
= args
[0]->dreg
;
1912 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1913 MONO_ADD_INS (cfg
->cbb
, ins
);
1915 case SN_BitScanReverse
:
1916 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_X86_BSR64
: OP_X86_BSR32
);
1917 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
1918 ins
->sreg1
= args
[0]->dreg
;
1919 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
1920 MONO_ADD_INS (cfg
->cbb
, ins
);
1923 g_assert_not_reached ();
1930 static guint16 vector_128_methods
[] = {
1942 SN_CreateScalarUnsafe
,
1945 static guint16 vector_128_t_methods
[] = {
1951 emit_vector128 (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
1956 if (!COMPILE_LLVM (cfg
))
1959 klass
= cmethod
->klass
;
1960 id
= lookup_intrins (vector_128_methods
, sizeof (vector_128_methods
), cmethod
);
1964 if (!strcmp (m_class_get_name (cfg
->method
->klass
), "Vector256"))
1965 return NULL
; // TODO: Fix Vector256.WithUpper/WithLower
1967 MonoTypeEnum arg0_type
= fsig
->param_count
> 0 ? get_underlying_type (fsig
->params
[0]) : MONO_TYPE_VOID
;
1980 return emit_simd_ins (cfg
, klass
, OP_XCAST
, args
[0]->dreg
, -1);
1982 MonoType
*etype
= get_vector_t_elem_type (fsig
->ret
);
1983 if (fsig
->param_count
== 1 && mono_metadata_type_equal (fsig
->params
[0], etype
)) {
1984 return emit_simd_ins (cfg
, klass
, type_to_expand_op (etype
), args
[0]->dreg
, -1);
1986 MonoInst
*ins
, *load
;
1988 // FIXME: Optimize this
1989 MONO_INST_NEW (cfg
, ins
, OP_LOCALLOC_IMM
);
1990 ins
->dreg
= alloc_preg (cfg
);
1992 MONO_ADD_INS (cfg
->cbb
, ins
);
1994 int esize
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
1995 int store_opcode
= mono_type_to_store_membase (cfg
, etype
);
1996 for (int i
= 0; i
< fsig
->param_count
; ++i
)
1997 MONO_EMIT_NEW_STORE_MEMBASE (cfg
, store_opcode
, ins
->dreg
, i
* esize
, args
[i
]->dreg
);
1999 load
= emit_simd_ins (cfg
, klass
, OP_SSE_LOADU
, ins
->dreg
, -1);
2001 load
->inst_c1
= get_underlying_type (etype
);
2005 case SN_CreateScalarUnsafe
:
2006 return emit_simd_ins_for_sig (cfg
, klass
, OP_CREATE_SCALAR_UNSAFE
, -1, arg0_type
, fsig
, args
);
2015 emit_vector128_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
2018 MonoType
*type
, *etype
;
2022 id
= lookup_intrins (vector_128_t_methods
, sizeof (vector_128_t_methods
), cmethod
);
2026 klass
= cmethod
->klass
;
2027 type
= m_class_get_byval_arg (klass
);
2028 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
2029 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
2033 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
2036 if (cfg
->verbose_level
> 1) {
2037 char *name
= mono_method_full_name (cmethod
, TRUE
);
2038 printf (" SIMD intrinsic %s\n", name
);
2044 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
2046 EMIT_NEW_ICONST (cfg
, ins
, len
);
2049 return emit_simd_ins (cfg
, klass
, OP_XZERO
, -1, -1);
2058 static guint16 vector_256_t_methods
[] = {
2063 emit_vector256_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
2066 MonoType
*type
, *etype
;
2070 id
= lookup_intrins (vector_256_t_methods
, sizeof (vector_256_t_methods
), cmethod
);
2074 klass
= cmethod
->klass
;
2075 type
= m_class_get_byval_arg (klass
);
2076 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
2077 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
2081 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
2084 if (cfg
->verbose_level
> 1) {
2085 char *name
= mono_method_full_name (cmethod
, TRUE
);
2086 printf (" SIMD intrinsic %s\n", name
);
2092 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
2094 EMIT_NEW_ICONST (cfg
, ins
, len
);
2103 #endif // !TARGET_ARM64
2106 mono_emit_simd_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
2108 const char *class_name
;
2109 const char *class_ns
;
2110 MonoImage
*image
= m_class_get_image (cmethod
->klass
);
2112 if (image
!= mono_get_corlib ())
2115 class_ns
= m_class_get_name_space (cmethod
->klass
);
2116 class_name
= m_class_get_name (cmethod
->klass
);
2118 // If cmethod->klass is nested, the namespace is on the enclosing class.
2119 if (m_class_get_nested_in (cmethod
->klass
))
2120 class_ns
= m_class_get_name_space (m_class_get_nested_in (cmethod
->klass
));
2123 if (!strcmp (class_ns
, "System.Runtime.Intrinsics.Arm")) {
2124 MonoInst
*ins
= emit_arm64_intrinsics (cfg
, cmethod
, fsig
, args
);
2127 #endif // TARGET_ARM64
2129 #ifdef TARGET_AMD64 // TODO: test and enable for x86 too
2130 if (!strcmp (class_ns
, "System.Runtime.Intrinsics.X86")) {
2131 MonoInst
*ins
= emit_x86_intrinsics (cfg
, cmethod
, fsig
, args
);
2135 // FIXME: implement Vector64<T>, Vector128<T> and Vector<T> for Arm64
2137 if (!strcmp (class_ns
, "System.Runtime.Intrinsics")) {
2138 if (!strcmp (class_name
, "Vector128`1"))
2139 return emit_vector128_t (cfg
, cmethod
, fsig
, args
);
2140 if (!strcmp (class_name
, "Vector128"))
2141 return emit_vector128 (cfg
, cmethod
, fsig
, args
);
2142 if (!strcmp (class_name
, "Vector256`1"))
2143 return emit_vector256_t (cfg
, cmethod
, fsig
, args
);
2146 if (!strcmp (class_ns
, "System.Numerics")) {
2147 if (!strcmp (class_name
, "Vector"))
2148 return emit_sys_numerics_vector (cfg
, cmethod
, fsig
, args
);
2149 if (!strcmp (class_name
, "Vector`1"))
2150 return emit_sys_numerics_vector_t (cfg
, cmethod
, fsig
, args
);
2152 #endif // TARGET_AMD64
2158 mono_simd_decompose_intrinsic (MonoCompile
*cfg
, MonoBasicBlock
*bb
, MonoInst
*ins
)
2163 mono_simd_simplify_indirection (MonoCompile
*cfg
)
2167 #endif /* DISABLE_JIT */
2168 #endif /* MONO_ARCH_SIMD_INTRINSICS */
2170 #if defined(ENABLE_NETCORE) && defined(TARGET_AMD64)
2172 ves_icall_System_Runtime_Intrinsics_X86_X86Base___cpuidex (int abcd
[4], int function_id
, int subfunction_id
)
2174 #ifndef MONO_CROSS_COMPILE
2175 mono_hwcap_x86_call_cpuidex (function_id
, subfunction_id
,
2176 &abcd
[0], &abcd
[1], &abcd
[2], &abcd
[3]);
2181 MONO_EMPTY_SOURCE_FILE (simd_intrinsics_netcore
);