2 * SIMD Intrinsics support for netcore
6 #include <mono/utils/mono-compiler.h>
8 #if defined(DISABLE_JIT)
11 mono_simd_intrinsics_init (void)
18 * Only LLVM is supported as a backend.
22 #include "mini-runtime.h"
25 #include "mini-llvm.h"
27 #include "mono/utils/bsearch.h"
28 #include <mono/metadata/abi-details.h>
29 #include <mono/metadata/reflection-internals.h>
31 #if defined (MONO_ARCH_SIMD_INTRINSICS) && defined(ENABLE_NETCORE)
33 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
34 #define MSGSTRFIELD1(line) str##line
35 static const struct msgstr_t
{
36 #define METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)];
37 #define METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
38 #include "simd-methods-netcore.h"
42 #define METHOD(name) #name,
43 #define METHOD2(str,name) str,
44 #include "simd-methods-netcore.h"
50 #define METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
51 #define METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
52 #include "simd-methods-netcore.h"
54 #define method_name(idx) ((const char*)&method_names + (idx))
56 static int register_size
;
59 mono_simd_intrinsics_init (void)
63 if ((mini_get_cpu_features () & MONO_CPU_X86_AVX
) != 0)
66 /* Tell the class init code the size of the System.Numerics.Register type */
67 mono_simd_register_size
= register_size
;
71 mono_emit_simd_field_load (MonoCompile
*cfg
, MonoClassField
*field
, MonoInst
*addr
)
77 simd_intrinsic_compare_by_name (const void *key
, const void *value
)
79 return strcmp ((const char*)key
, method_name (*(guint16
*)value
));
83 lookup_intrins (guint16
*intrinsics
, int size
, MonoMethod
*cmethod
)
85 const guint16
*result
= (const guint16
*)mono_binary_search (cmethod
->name
, intrinsics
, size
/ sizeof (guint16
), sizeof (guint16
), &simd_intrinsic_compare_by_name
);
88 for (int i
= 0; i
< (size
/ sizeof (guint16
)) - 1; ++i
) {
89 if (method_name (intrinsics
[i
])[0] > method_name (intrinsics
[i
+ 1])[0]) {
90 printf ("%s %s\n",method_name (intrinsics
[i
]), method_name (intrinsics
[i
+ 1]));
91 g_assert_not_reached ();
103 type_to_expand_op (MonoType
*type
)
105 switch (type
->type
) {
123 g_assert_not_reached ();
128 * Return a simd vreg for the simd value represented by SRC.
129 * SRC is the 'this' argument to methods.
130 * Set INDIRECT to TRUE if the value was loaded from memory.
133 load_simd_vreg_class (MonoCompile
*cfg
, MonoClass
*klass
, MonoInst
*src
, gboolean
*indirect
)
135 const char *spec
= INS_INFO (src
->opcode
);
139 if (src
->opcode
== OP_XMOVE
) {
141 } else if (src
->opcode
== OP_LDADDR
) {
142 int res
= ((MonoInst
*)src
->inst_p0
)->dreg
;
144 } else if (spec
[MONO_INST_DEST
] == 'x') {
146 } else if (src
->type
== STACK_PTR
|| src
->type
== STACK_MP
) {
151 MONO_INST_NEW (cfg
, ins
, OP_LOADX_MEMBASE
);
153 ins
->sreg1
= src
->dreg
;
154 ins
->type
= STACK_VTYPE
;
155 ins
->dreg
= alloc_ireg (cfg
);
156 MONO_ADD_INS (cfg
->cbb
, ins
);
159 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src
->type
);
160 mono_print_ins (src
);
161 g_assert_not_reached ();
165 load_simd_vreg (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
*src
, gboolean
*indirect
)
167 return load_simd_vreg_class (cfg
, cmethod
->klass
, src
, indirect
);
170 /* Create and emit a SIMD instruction, dreg is auto-allocated */
172 emit_simd_ins (MonoCompile
*cfg
, MonoClass
*klass
, int opcode
, int sreg1
, int sreg2
)
174 const char *spec
= INS_INFO (opcode
);
177 MONO_INST_NEW (cfg
, ins
, opcode
);
178 if (spec
[MONO_INST_DEST
] == 'x') {
179 ins
->dreg
= alloc_xreg (cfg
);
180 ins
->type
= STACK_VTYPE
;
181 } else if (spec
[MONO_INST_DEST
] == 'i') {
182 ins
->dreg
= alloc_ireg (cfg
);
183 ins
->type
= STACK_I4
;
185 g_assert_not_reached ();
190 MONO_ADD_INS (cfg
->cbb
, ins
);
195 emit_xcompare (MonoCompile
*cfg
, MonoClass
*klass
, MonoType
*etype
, MonoInst
*arg1
, MonoInst
*arg2
)
198 gboolean is_fp
= etype
->type
== MONO_TYPE_R4
|| etype
->type
== MONO_TYPE_R8
;
200 ins
= emit_simd_ins (cfg
, klass
, is_fp
? OP_XCOMPARE_FP
: OP_XCOMPARE
, arg1
->dreg
, arg2
->dreg
);
201 ins
->inst_c0
= CMP_EQ
;
202 ins
->inst_c1
= etype
->type
;
207 get_vector_t_elem_type (MonoType
*vector_type
)
212 g_assert (vector_type
->type
== MONO_TYPE_GENERICINST
);
213 klass
= mono_class_from_mono_type_internal (vector_type
);
214 g_assert (!strcmp (m_class_get_name (klass
), "Vector`1"));
215 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
219 static guint16 vector_methods
[] = {
228 SN_get_IsHardwareAccelerated
,
232 emit_sys_numerics_vector (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
235 gboolean supported
= FALSE
;
239 id
= lookup_intrins (vector_methods
, sizeof (vector_methods
), cmethod
);
243 //printf ("%s\n", mono_method_full_name (cmethod, 1));
245 #ifdef MONO_ARCH_SIMD_INTRINSICS
249 if (cfg
->verbose_level
> 1) {
250 char *name
= mono_method_full_name (cmethod
, TRUE
);
251 printf (" SIMD intrinsic %s\n", name
);
256 case SN_get_IsHardwareAccelerated
:
257 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
258 ins
->type
= STACK_I4
;
260 case SN_ConvertToInt32
:
261 etype
= get_vector_t_elem_type (fsig
->params
[0]);
262 g_assert (etype
->type
== MONO_TYPE_R4
);
263 return emit_simd_ins (cfg
, mono_class_from_mono_type_internal (fsig
->ret
), OP_CVTPS2DQ
, args
[0]->dreg
, -1);
264 case SN_ConvertToSingle
:
265 etype
= get_vector_t_elem_type (fsig
->params
[0]);
266 g_assert (etype
->type
== MONO_TYPE_I4
|| etype
->type
== MONO_TYPE_U4
);
268 if (etype
->type
== MONO_TYPE_U4
)
270 return emit_simd_ins (cfg
, mono_class_from_mono_type_internal (fsig
->ret
), OP_CVTDQ2PS
, args
[0]->dreg
, -1);
271 case SN_ConvertToDouble
:
272 case SN_ConvertToInt64
:
273 case SN_ConvertToUInt32
:
274 case SN_ConvertToUInt64
:
286 static guint16 vector_t_methods
[] = {
291 SN_GreaterThanOrEqual
,
313 emit_sys_numerics_vector_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
316 MonoType
*type
, *etype
;
319 gboolean is_unsigned
;
321 id
= lookup_intrins (vector_t_methods
, sizeof (vector_t_methods
), cmethod
);
325 klass
= cmethod
->klass
;
326 type
= m_class_get_byval_arg (klass
);
327 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
328 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
330 len
= register_size
/ size
;
332 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
335 if (cfg
->verbose_level
> 1) {
336 char *name
= mono_method_full_name (cmethod
, TRUE
);
337 printf (" SIMD intrinsic %s\n", name
);
343 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
345 EMIT_NEW_ICONST (cfg
, ins
, len
);
348 g_assert (fsig
->param_count
== 0 && mono_metadata_type_equal (fsig
->ret
, type
));
349 return emit_simd_ins (cfg
, klass
, OP_XZERO
, -1, -1);
350 case SN_get_AllOnes
: {
351 /* Compare a zero vector with itself */
352 ins
= emit_simd_ins (cfg
, klass
, OP_XZERO
, -1, -1);
353 return emit_xcompare (cfg
, klass
, etype
, ins
, ins
);
356 if (!COMPILE_LLVM (cfg
))
358 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, args
[1]->dreg
, len
);
359 MONO_EMIT_NEW_COND_EXC (cfg
, GE_UN
, "IndexOutOfRangeException");
362 gboolean is64
= FALSE
;
363 switch (etype
->type
) {
366 opcode
= OP_XEXTRACT_I64
;
368 dreg
= alloc_lreg (cfg
);
371 opcode
= OP_XEXTRACT_R8
;
372 dreg
= alloc_freg (cfg
);
375 g_assert (cfg
->r4fp
);
376 opcode
= OP_XEXTRACT_R4
;
377 dreg
= alloc_freg (cfg
);
380 opcode
= OP_XEXTRACT_I32
;
381 dreg
= alloc_ireg (cfg
);
384 MONO_INST_NEW (cfg
, ins
, opcode
);
386 ins
->sreg1
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
387 ins
->sreg2
= args
[1]->dreg
;
388 ins
->inst_c0
= etype
->type
;
389 mini_type_to_eval_stack_type (cfg
, etype
, ins
);
390 MONO_ADD_INS (cfg
->cbb
, ins
);
394 if (fsig
->param_count
== 1 && mono_metadata_type_equal (fsig
->params
[0], etype
)) {
395 int dreg
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
397 int opcode
= type_to_expand_op (etype
);
398 ins
= emit_simd_ins (cfg
, klass
, opcode
, args
[1]->dreg
, -1);
402 if ((fsig
->param_count
== 1 || fsig
->param_count
== 2) && (fsig
->params
[0]->type
== MONO_TYPE_SZARRAY
)) {
403 MonoInst
*array_ins
= args
[1];
405 MonoInst
*ldelema_ins
;
409 if (args
[0]->opcode
!= OP_LDADDR
)
412 /* .ctor (T[]) or .ctor (T[], index) */
414 if (fsig
->param_count
== 2) {
415 index_ins
= args
[2];
417 EMIT_NEW_ICONST (cfg
, index_ins
, 0);
420 /* Emit index check for the end (index + len - 1 < array length) */
421 end_index_reg
= alloc_ireg (cfg
);
422 EMIT_NEW_BIALU_IMM (cfg
, ins
, OP_IADD_IMM
, end_index_reg
, index_ins
->dreg
, len
- 1);
423 MONO_EMIT_BOUNDS_CHECK (cfg
, array_ins
->dreg
, MonoArray
, max_length
, end_index_reg
);
425 /* Load the array slice into the simd reg */
426 ldelema_ins
= mini_emit_ldelema_1_ins (cfg
, mono_class_from_mono_type_internal (etype
), array_ins
, index_ins
, TRUE
);
427 g_assert (args
[0]->opcode
== OP_LDADDR
);
428 var
= (MonoInst
*)args
[0]->inst_p0
;
429 EMIT_NEW_LOAD_MEMBASE (cfg
, ins
, OP_LOADX_MEMBASE
, var
->dreg
, ldelema_ins
->dreg
, 0);
430 ins
->klass
= cmethod
->klass
;
435 if ((fsig
->param_count
== 1 || fsig
->param_count
== 2) && (fsig
->params
[0]->type
== MONO_TYPE_SZARRAY
)) {
436 MonoInst
*array_ins
= args
[1];
438 MonoInst
*ldelema_ins
;
439 int val_vreg
, end_index_reg
;
441 val_vreg
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
443 /* CopyTo (T[]) or CopyTo (T[], index) */
445 if (fsig
->param_count
== 2) {
446 index_ins
= args
[2];
448 EMIT_NEW_ICONST (cfg
, index_ins
, 0);
451 /* CopyTo () does complicated argument checks */
452 mini_emit_bounds_check_offset (cfg
, array_ins
->dreg
, MONO_STRUCT_OFFSET (MonoArray
, max_length
), index_ins
->dreg
, "ArgumentOutOfRangeException");
453 end_index_reg
= alloc_ireg (cfg
);
454 int len_reg
= alloc_ireg (cfg
);
455 MONO_EMIT_NEW_LOAD_MEMBASE_OP_FLAGS (cfg
, OP_LOADI4_MEMBASE
, len_reg
, array_ins
->dreg
, MONO_STRUCT_OFFSET (MonoArray
, max_length
), MONO_INST_INVARIANT_LOAD
);
456 EMIT_NEW_BIALU (cfg
, ins
, OP_ISUB
, end_index_reg
, len_reg
, index_ins
->dreg
);
457 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, end_index_reg
, len
);
458 MONO_EMIT_NEW_COND_EXC (cfg
, LT
, "ArgumentException");
460 /* Load the array slice into the simd reg */
461 ldelema_ins
= mini_emit_ldelema_1_ins (cfg
, mono_class_from_mono_type_internal (etype
), array_ins
, index_ins
, FALSE
);
462 EMIT_NEW_STORE_MEMBASE (cfg
, ins
, OP_STOREX_MEMBASE
, ldelema_ins
->dreg
, 0, val_vreg
);
463 ins
->klass
= cmethod
->klass
;
468 if (fsig
->param_count
== 1 && fsig
->ret
->type
== MONO_TYPE_BOOLEAN
&& mono_metadata_type_equal (fsig
->params
[0], type
)) {
469 int sreg1
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
471 return emit_simd_ins (cfg
, klass
, OP_XEQUAL
, sreg1
, args
[1]->dreg
);
472 } else if (fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
)) {
473 /* Per element equality */
474 return emit_xcompare (cfg
, klass
, etype
, args
[0], args
[1]);
478 case SN_op_Inequality
:
479 g_assert (fsig
->param_count
== 2 && fsig
->ret
->type
== MONO_TYPE_BOOLEAN
&&
480 mono_metadata_type_equal (fsig
->params
[0], type
) &&
481 mono_metadata_type_equal (fsig
->params
[1], type
));
482 ins
= emit_simd_ins (cfg
, klass
, OP_XEQUAL
, args
[0]->dreg
, args
[1]->dreg
);
483 if (id
== SN_op_Inequality
) {
484 int sreg
= ins
->dreg
;
485 int dreg
= alloc_ireg (cfg
);
486 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, sreg
, 0);
487 EMIT_NEW_UNALU (cfg
, ins
, OP_CEQ
, dreg
, -1);
491 case SN_GreaterThanOrEqual
:
493 case SN_LessThanOrEqual
:
494 g_assert (fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
));
495 is_unsigned
= etype
->type
== MONO_TYPE_U1
|| etype
->type
== MONO_TYPE_U2
|| etype
->type
== MONO_TYPE_U4
|| etype
->type
== MONO_TYPE_U8
;
496 ins
= emit_xcompare (cfg
, klass
, etype
, args
[0], args
[1]);
499 ins
->inst_c0
= is_unsigned
? CMP_GT_UN
: CMP_GT
;
501 case SN_GreaterThanOrEqual
:
502 ins
->inst_c0
= is_unsigned
? CMP_GE_UN
: CMP_GE
;
505 ins
->inst_c0
= is_unsigned
? CMP_LT_UN
: CMP_LT
;
507 case SN_LessThanOrEqual
:
508 ins
->inst_c0
= is_unsigned
? CMP_LE_UN
: CMP_LE
;
511 g_assert_not_reached ();
515 return emit_simd_ins (cfg
, klass
, OP_XCAST
, args
[0]->dreg
, -1);
517 case SN_op_Subtraction
:
520 case SN_op_BitwiseAnd
:
521 case SN_op_BitwiseOr
:
522 case SN_op_ExclusiveOr
:
525 if (!(fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
)))
527 ins
= emit_simd_ins (cfg
, klass
, OP_XBINOP
, args
[0]->dreg
, args
[1]->dreg
);
528 ins
->inst_c1
= etype
->type
;
530 if (etype
->type
== MONO_TYPE_R4
|| etype
->type
== MONO_TYPE_R8
) {
533 ins
->inst_c0
= OP_FADD
;
535 case SN_op_Subtraction
:
536 ins
->inst_c0
= OP_FSUB
;
539 ins
->inst_c0
= OP_FMUL
;
542 ins
->inst_c0
= OP_FDIV
;
545 ins
->inst_c0
= OP_FMAX
;
548 ins
->inst_c0
= OP_FMIN
;
557 ins
->inst_c0
= OP_IADD
;
559 case SN_op_Subtraction
:
560 ins
->inst_c0
= OP_ISUB
;
564 ins->inst_c0 = OP_IDIV;
567 ins->inst_c0 = OP_IMUL;
570 case SN_op_BitwiseAnd
:
571 ins
->inst_c0
= OP_IAND
;
573 case SN_op_BitwiseOr
:
574 ins
->inst_c0
= OP_IOR
;
576 case SN_op_ExclusiveOr
:
577 ins
->inst_c0
= OP_IXOR
;
580 ins
->inst_c0
= OP_IMAX
;
583 ins
->inst_c0
= OP_IMIN
;
600 static guint16 popcnt_methods
[] = {
605 static guint16 lzcnt_methods
[] = {
610 static guint16 bmi1_methods
[] = {
613 SN_ExtractLowestSetBit
,
614 SN_GetMaskUpToLowestSetBit
,
615 SN_ResetLowestSetBit
,
616 SN_TrailingZeroCount
,
620 static guint16 bmi2_methods
[] = {
622 SN_ParallelBitDeposit
,
623 SN_ParallelBitExtract
,
629 emit_x86_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
631 const char *class_name
;
632 const char *class_ns
;
635 gboolean supported
, is_64bit
;
636 MonoClass
*klass
= cmethod
->klass
;
638 class_ns
= m_class_get_name_space (klass
);
639 class_name
= m_class_get_name (klass
);
640 if (!strcmp (class_name
, "Popcnt") || (!strcmp (class_name
, "X64") && cmethod
->klass
->nested_in
&& !strcmp (m_class_get_name (cmethod
->klass
->nested_in
), "Popcnt"))) {
641 id
= lookup_intrins (popcnt_methods
, sizeof (popcnt_methods
), cmethod
);
645 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_POPCNT
) != 0;
646 is_64bit
= !strcmp (class_name
, "X64");
649 case SN_get_IsSupported
:
650 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
651 ins
->type
= STACK_I4
;
656 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_POPCNT64
: OP_POPCNT32
);
657 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
658 ins
->sreg1
= args
[0]->dreg
;
659 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
660 MONO_ADD_INS (cfg
->cbb
, ins
);
666 if (!strcmp (class_name
, "Lzcnt") || (!strcmp (class_name
, "X64") && cmethod
->klass
->nested_in
&& !strcmp (m_class_get_name (cmethod
->klass
->nested_in
), "Lzcnt"))) {
667 id
= lookup_intrins (lzcnt_methods
, sizeof (lzcnt_methods
), cmethod
);
671 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_LZCNT
) != 0;
672 is_64bit
= !strcmp (class_name
, "X64");
675 case SN_get_IsSupported
:
676 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
677 ins
->type
= STACK_I4
;
679 case SN_LeadingZeroCount
:
682 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_LZCNT64
: OP_LZCNT32
);
683 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
684 ins
->sreg1
= args
[0]->dreg
;
685 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
686 MONO_ADD_INS (cfg
->cbb
, ins
);
692 if (!strcmp (class_name
, "Bmi1") || (!strcmp (class_name
, "X64") && cmethod
->klass
->nested_in
&& !strcmp (m_class_get_name (cmethod
->klass
->nested_in
), "Bmi1"))) {
693 if (!COMPILE_LLVM (cfg
))
695 id
= lookup_intrins (bmi1_methods
, sizeof (bmi1_methods
), cmethod
);
698 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_BMI1
) != 0;
699 is_64bit
= !strcmp (class_name
, "X64");
702 case SN_get_IsSupported
:
703 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
704 ins
->type
= STACK_I4
;
708 // LLVM replaces it with `andn`
709 int tmp_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
710 int result_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
711 EMIT_NEW_BIALU_IMM (cfg
, ins
, is_64bit
? OP_LXOR_IMM
: OP_IXOR_IMM
, tmp_reg
, args
[0]->dreg
, -1);
712 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LAND
: OP_IAND
, result_reg
, tmp_reg
, args
[1]->dreg
);
715 case SN_BitFieldExtract
: {
716 if (fsig
->param_count
== 2) {
717 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_BEXTR64
: OP_BEXTR32
);
718 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
719 ins
->sreg1
= args
[0]->dreg
;
720 ins
->sreg2
= args
[1]->dreg
;
721 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
722 MONO_ADD_INS (cfg
->cbb
, ins
);
726 case SN_GetMaskUpToLowestSetBit
: {
728 // LLVM replaces it with `blsmsk`
729 int tmp_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
730 int result_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
731 EMIT_NEW_BIALU_IMM (cfg
, ins
, is_64bit
? OP_LSUB_IMM
: OP_ISUB_IMM
, tmp_reg
, args
[0]->dreg
, 1);
732 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LXOR
: OP_IXOR
, result_reg
, args
[0]->dreg
, tmp_reg
);
735 case SN_ResetLowestSetBit
: {
737 // LLVM replaces it with `blsr`
738 int tmp_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
739 int result_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
740 EMIT_NEW_BIALU_IMM (cfg
, ins
, is_64bit
? OP_LSUB_IMM
: OP_ISUB_IMM
, tmp_reg
, args
[0]->dreg
, 1);
741 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LAND
: OP_IAND
, result_reg
, args
[0]->dreg
, tmp_reg
);
744 case SN_ExtractLowestSetBit
: {
746 // LLVM replaces it with `blsi`
747 int tmp_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
748 int result_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
749 int zero_reg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
750 MONO_EMIT_NEW_ICONST (cfg
, zero_reg
, 0);
751 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LSUB
: OP_ISUB
, tmp_reg
, zero_reg
, args
[0]->dreg
);
752 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LAND
: OP_IAND
, result_reg
, args
[0]->dreg
, tmp_reg
);
755 case SN_TrailingZeroCount
:
756 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_CTTZ64
: OP_CTTZ32
);
757 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
758 ins
->sreg1
= args
[0]->dreg
;
759 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
760 MONO_ADD_INS (cfg
->cbb
, ins
);
763 g_assert_not_reached ();
766 if (!strcmp (class_name
, "Bmi2") || (!strcmp (class_name
, "X64") && cmethod
->klass
->nested_in
&& !strcmp (m_class_get_name (cmethod
->klass
->nested_in
), "Bmi2"))) {
767 if (!COMPILE_LLVM (cfg
))
769 id
= lookup_intrins (bmi2_methods
, sizeof (bmi2_methods
), cmethod
);
771 supported
= (mini_get_cpu_features (cfg
) & MONO_CPU_X86_BMI2
) != 0;
772 is_64bit
= !strcmp (class_name
, "X64");
775 case SN_get_IsSupported
:
776 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
777 ins
->type
= STACK_I4
;
779 case SN_MultiplyNoFlags
:
780 if (fsig
->param_count
== 2) {
781 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_MULX_H64
: OP_MULX_H32
);
782 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
783 ins
->sreg1
= args
[0]->dreg
;
784 ins
->sreg2
= args
[1]->dreg
;
785 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
786 MONO_ADD_INS (cfg
->cbb
, ins
);
787 } else if (fsig
->param_count
== 3) {
788 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_MULX_HL64
: OP_MULX_HL32
);
789 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
790 ins
->sreg1
= args
[0]->dreg
;
791 ins
->sreg2
= args
[1]->dreg
;
792 ins
->sreg3
= args
[2]->dreg
;
793 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
794 MONO_ADD_INS (cfg
->cbb
, ins
);
796 g_assert_not_reached ();
799 case SN_ZeroHighBits
:
800 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_BZHI64
: OP_BZHI32
);
801 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
802 ins
->sreg1
= args
[0]->dreg
;
803 ins
->sreg2
= args
[1]->dreg
;
804 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
805 MONO_ADD_INS (cfg
->cbb
, ins
);
807 case SN_ParallelBitExtract
:
808 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_PEXT64
: OP_PEXT32
);
809 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
810 ins
->sreg1
= args
[0]->dreg
;
811 ins
->sreg2
= args
[1]->dreg
;
812 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
813 MONO_ADD_INS (cfg
->cbb
, ins
);
815 case SN_ParallelBitDeposit
:
816 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_PDEP64
: OP_PDEP32
);
817 ins
->dreg
= is_64bit
? alloc_lreg (cfg
) : alloc_ireg (cfg
);
818 ins
->sreg1
= args
[0]->dreg
;
819 ins
->sreg2
= args
[1]->dreg
;
820 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
821 MONO_ADD_INS (cfg
->cbb
, ins
);
824 g_assert_not_reached ();
832 static guint16 vector_128_t_methods
[] = {
837 emit_vector128_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
840 MonoType
*type
, *etype
;
844 id
= lookup_intrins (vector_128_t_methods
, sizeof (vector_128_t_methods
), cmethod
);
848 klass
= cmethod
->klass
;
849 type
= m_class_get_byval_arg (klass
);
850 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
851 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
855 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
858 if (cfg
->verbose_level
> 1) {
859 char *name
= mono_method_full_name (cmethod
, TRUE
);
860 printf (" SIMD intrinsic %s\n", name
);
866 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
868 EMIT_NEW_ICONST (cfg
, ins
, len
);
877 static guint16 vector_256_t_methods
[] = {
882 emit_vector256_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
885 MonoType
*type
, *etype
;
889 id
= lookup_intrins (vector_256_t_methods
, sizeof (vector_256_t_methods
), cmethod
);
893 klass
= cmethod
->klass
;
894 type
= m_class_get_byval_arg (klass
);
895 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
896 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
900 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
903 if (cfg
->verbose_level
> 1) {
904 char *name
= mono_method_full_name (cmethod
, TRUE
);
905 printf (" SIMD intrinsic %s\n", name
);
911 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
913 EMIT_NEW_ICONST (cfg
, ins
, len
);
923 mono_emit_simd_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
925 const char *class_name
;
926 const char *class_ns
;
927 MonoImage
*image
= m_class_get_image (cmethod
->klass
);
929 if (image
!= mono_get_corlib ())
932 class_ns
= m_class_get_name_space (cmethod
->klass
);
933 class_name
= m_class_get_name (cmethod
->klass
);
935 if (cmethod
->klass
->nested_in
)
936 class_ns
= m_class_get_name_space (cmethod
->klass
->nested_in
), class_name
, cmethod
->klass
->nested_in
;
938 #ifdef TARGET_AMD64 // TODO: test and enable for x86 too
939 if (!strcmp (class_ns
, "System.Runtime.Intrinsics.X86")) {
940 return emit_x86_intrinsics (cfg
,cmethod
, fsig
, args
);
944 if (!strcmp (class_ns
, "System.Runtime.Intrinsics")) {
945 if (!strcmp (class_name
, "Vector128`1"))
946 return emit_vector128_t (cfg
, cmethod
, fsig
, args
);
947 if (!strcmp (class_name
, "Vector256`1"))
948 return emit_vector256_t (cfg
, cmethod
, fsig
, args
);
951 if (!strcmp (class_ns
, "System.Numerics")) {
952 if (!strcmp (class_name
, "Vector"))
953 return emit_sys_numerics_vector (cfg
, cmethod
, fsig
, args
);
954 if (!strcmp (class_name
, "Vector`1"))
955 return emit_sys_numerics_vector_t (cfg
, cmethod
, fsig
, args
);
962 mono_simd_decompose_intrinsic (MonoCompile
*cfg
, MonoBasicBlock
*bb
, MonoInst
*ins
)
967 mono_simd_simplify_indirection (MonoCompile
*cfg
)
973 MONO_EMPTY_SOURCE_FILE (simd_intrinsics_netcore
);
977 #endif /* DISABLE_JIT */