2 * SIMD Intrinsics support for netcore
6 #include <mono/utils/mono-compiler.h>
8 #if defined(DISABLE_JIT)
11 mono_simd_intrinsics_init (void)
18 * Only LLVM is supported as a backend.
26 #include "mono/utils/bsearch.h"
27 #include <mono/metadata/abi-details.h>
28 #include <mono/metadata/reflection-internals.h>
30 #if defined (MONO_ARCH_SIMD_INTRINSICS) && defined(ENABLE_NETCORE)
32 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
33 #define MSGSTRFIELD1(line) str##line
34 static const struct msgstr_t
{
35 #define METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)];
36 #define METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
37 #include "simd-methods-netcore.h"
41 #define METHOD(name) #name,
42 #define METHOD2(str,name) str,
43 #include "simd-methods-netcore.h"
49 #define METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
50 #define METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
51 #include "simd-methods-netcore.h"
53 #define method_name(idx) ((const char*)&method_names + (idx))
55 static int register_size
;
57 static MonoCPUFeatures
58 get_cpu_features (void)
61 return mono_llvm_get_cpu_features ();
62 #elif defined(TARGET_AMD64)
63 return mono_arch_get_cpu_features ();
65 return (MonoCPUFeatures
)0;
70 mono_simd_intrinsics_init (void)
74 if ((get_cpu_features () & MONO_CPU_X86_AVX
) != 0)
77 /* Tell the class init code the size of the System.Numerics.Register type */
78 mono_simd_register_size
= register_size
;
82 mono_emit_simd_field_load (MonoCompile
*cfg
, MonoClassField
*field
, MonoInst
*addr
)
88 simd_intrinsic_compare_by_name (const void *key
, const void *value
)
90 return strcmp ((const char*)key
, method_name (*(guint16
*)value
));
94 lookup_intrins (guint16
*intrinsics
, int size
, MonoMethod
*cmethod
)
96 const guint16
*result
= (const guint16
*)mono_binary_search (cmethod
->name
, intrinsics
, size
/ sizeof (guint16
), sizeof (guint16
), &simd_intrinsic_compare_by_name
);
98 for (int i
= 0; i
< (size
/ sizeof (guint16
)) - 1; ++i
) {
99 if (method_name (intrinsics
[i
])[0] > method_name (intrinsics
[i
+ 1])[0]) {
100 printf ("%s %s\n",method_name (intrinsics
[i
]), method_name (intrinsics
[i
+ 1]));
101 g_assert_not_reached ();
111 static guint16 vector_methods
[] = {
112 SN_get_IsHardwareAccelerated
116 emit_sys_numerics_vector (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
119 gboolean supported
= FALSE
;
122 id
= lookup_intrins (vector_methods
, sizeof (vector_methods
), cmethod
);
126 //printf ("%s\n", mono_method_full_name (cmethod, 1));
128 #ifdef MONO_ARCH_SIMD_INTRINSICS
133 case SN_get_IsHardwareAccelerated
:
134 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
135 ins
->type
= STACK_I4
;
145 type_to_expand_op (MonoType
*type
)
147 switch (type
->type
) {
165 g_assert_not_reached ();
170 * Return a simd vreg for the simd value represented by SRC.
171 * SRC is the 'this' argument to methods.
172 * Set INDIRECT to TRUE if the value was loaded from memory.
175 load_simd_vreg_class (MonoCompile
*cfg
, MonoClass
*klass
, MonoInst
*src
, gboolean
*indirect
)
177 const char *spec
= INS_INFO (src
->opcode
);
181 if (src
->opcode
== OP_XMOVE
) {
183 } else if (src
->opcode
== OP_LDADDR
) {
184 int res
= ((MonoInst
*)src
->inst_p0
)->dreg
;
186 } else if (spec
[MONO_INST_DEST
] == 'x') {
188 } else if (src
->type
== STACK_PTR
|| src
->type
== STACK_MP
) {
193 MONO_INST_NEW (cfg
, ins
, OP_LOADX_MEMBASE
);
195 ins
->sreg1
= src
->dreg
;
196 ins
->type
= STACK_VTYPE
;
197 ins
->dreg
= alloc_ireg (cfg
);
198 MONO_ADD_INS (cfg
->cbb
, ins
);
201 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src
->type
);
202 mono_print_ins (src
);
203 g_assert_not_reached ();
207 load_simd_vreg (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
*src
, gboolean
*indirect
)
209 return load_simd_vreg_class (cfg
, cmethod
->klass
, src
, indirect
);
212 /* Create and emit a SIMD instruction, dreg is auto-allocated */
214 emit_simd_ins (MonoCompile
*cfg
, MonoClass
*klass
, int opcode
, int sreg1
, int sreg2
)
216 const char *spec
= INS_INFO (opcode
);
219 MONO_INST_NEW (cfg
, ins
, opcode
);
220 if (spec
[MONO_INST_DEST
] == 'x') {
221 ins
->dreg
= alloc_xreg (cfg
);
222 ins
->type
= STACK_VTYPE
;
223 } else if (spec
[MONO_INST_DEST
] == 'i') {
224 ins
->dreg
= alloc_ireg (cfg
);
225 ins
->type
= STACK_I4
;
227 g_assert_not_reached ();
232 MONO_ADD_INS (cfg
->cbb
, ins
);
237 emit_xcompare (MonoCompile
*cfg
, MonoClass
*klass
, MonoType
*etype
, MonoInst
*arg1
, MonoInst
*arg2
)
240 gboolean is_fp
= etype
->type
== MONO_TYPE_R4
|| etype
->type
== MONO_TYPE_R8
;
242 ins
= emit_simd_ins (cfg
, klass
, is_fp
? OP_XCOMPARE_FP
: OP_XCOMPARE
, arg1
->dreg
, arg2
->dreg
);
243 ins
->inst_c0
= CMP_EQ
;
244 ins
->inst_c1
= etype
->type
;
248 static guint16 vector_t_methods
[] = {
253 SN_GreaterThanOrEqual
,
273 emit_sys_numerics_vector_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
276 MonoType
*type
, *etype
;
279 gboolean is_unsigned
;
281 id
= lookup_intrins (vector_t_methods
, sizeof (vector_t_methods
), cmethod
);
285 klass
= cmethod
->klass
;
286 type
= m_class_get_byval_arg (klass
);
287 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
288 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
290 len
= register_size
/ size
;
292 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
295 if (cfg
->verbose_level
> 1) {
296 char *name
= mono_method_full_name (cmethod
, TRUE
);
297 printf (" SIMD intrinsic %s\n", name
);
303 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
305 EMIT_NEW_ICONST (cfg
, ins
, len
);
308 g_assert (fsig
->param_count
== 0 && mono_metadata_type_equal (fsig
->ret
, type
));
309 return emit_simd_ins (cfg
, klass
, OP_XZERO
, -1, -1);
310 case SN_get_AllOnes
: {
311 /* Compare a zero vector with itself */
312 ins
= emit_simd_ins (cfg
, klass
, OP_XZERO
, -1, -1);
313 return emit_xcompare (cfg
, klass
, etype
, ins
, ins
);
316 if (!COMPILE_LLVM (cfg
))
318 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, args
[1]->dreg
, len
);
319 MONO_EMIT_NEW_COND_EXC (cfg
, GE_UN
, "IndexOutOfRangeException");
322 gboolean is64
= FALSE
;
323 switch (etype
->type
) {
326 opcode
= OP_XEXTRACT_I64
;
328 dreg
= alloc_lreg (cfg
);
331 opcode
= OP_XEXTRACT_R8
;
332 dreg
= alloc_freg (cfg
);
335 g_assert (cfg
->r4fp
);
336 opcode
= OP_XEXTRACT_R4
;
337 dreg
= alloc_freg (cfg
);
340 opcode
= OP_XEXTRACT_I32
;
341 dreg
= alloc_ireg (cfg
);
344 MONO_INST_NEW (cfg
, ins
, opcode
);
346 ins
->sreg1
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
347 ins
->sreg2
= args
[1]->dreg
;
348 ins
->inst_c0
= etype
->type
;
349 mini_type_to_eval_stack_type (cfg
, etype
, ins
);
350 MONO_ADD_INS (cfg
->cbb
, ins
);
353 if (fsig
->param_count
== 1 && mono_metadata_type_equal (fsig
->params
[0], etype
)) {
354 int dreg
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
356 int opcode
= type_to_expand_op (etype
);
357 ins
= emit_simd_ins (cfg
, klass
, opcode
, args
[1]->dreg
, -1);
361 if ((fsig
->param_count
== 1 || fsig
->param_count
== 2) && (fsig
->params
[0]->type
== MONO_TYPE_SZARRAY
)) {
362 MonoInst
*array_ins
= args
[1];
364 MonoInst
*ldelema_ins
;
368 if (args
[0]->opcode
!= OP_LDADDR
)
371 /* .ctor (T[]) or .ctor (T[], index) */
373 if (fsig
->param_count
== 2) {
374 index_ins
= args
[2];
376 EMIT_NEW_ICONST (cfg
, index_ins
, 0);
379 /* Emit index check for the end (index + len - 1 < array length) */
380 end_index_reg
= alloc_ireg (cfg
);
381 EMIT_NEW_BIALU_IMM (cfg
, ins
, OP_IADD_IMM
, end_index_reg
, index_ins
->dreg
, len
- 1);
382 MONO_EMIT_BOUNDS_CHECK (cfg
, array_ins
->dreg
, MonoArray
, max_length
, end_index_reg
);
384 /* Load the array slice into the simd reg */
385 ldelema_ins
= mini_emit_ldelema_1_ins (cfg
, mono_class_from_mono_type_internal (etype
), array_ins
, index_ins
, TRUE
);
386 g_assert (args
[0]->opcode
== OP_LDADDR
);
387 var
= (MonoInst
*)args
[0]->inst_p0
;
388 EMIT_NEW_LOAD_MEMBASE (cfg
, ins
, OP_LOADX_MEMBASE
, var
->dreg
, ldelema_ins
->dreg
, 0);
389 ins
->klass
= cmethod
->klass
;
394 if ((fsig
->param_count
== 1 || fsig
->param_count
== 2) && (fsig
->params
[0]->type
== MONO_TYPE_SZARRAY
)) {
395 MonoInst
*array_ins
= args
[1];
397 MonoInst
*ldelema_ins
;
398 int val_vreg
, end_index_reg
;
400 val_vreg
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
402 /* CopyTo (T[]) or CopyTo (T[], index) */
404 if (fsig
->param_count
== 2) {
405 index_ins
= args
[2];
407 EMIT_NEW_ICONST (cfg
, index_ins
, 0);
410 /* CopyTo () does complicated argument checks */
411 mini_emit_bounds_check_offset (cfg
, array_ins
->dreg
, MONO_STRUCT_OFFSET (MonoArray
, max_length
), index_ins
->dreg
, "ArgumentOutOfRangeException");
412 end_index_reg
= alloc_ireg (cfg
);
413 int len_reg
= alloc_ireg (cfg
);
414 MONO_EMIT_NEW_LOAD_MEMBASE_OP_FLAGS (cfg
, OP_LOADI4_MEMBASE
, len_reg
, array_ins
->dreg
, MONO_STRUCT_OFFSET (MonoArray
, max_length
), MONO_INST_INVARIANT_LOAD
);
415 EMIT_NEW_BIALU (cfg
, ins
, OP_ISUB
, end_index_reg
, len_reg
, index_ins
->dreg
);
416 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, end_index_reg
, len
);
417 MONO_EMIT_NEW_COND_EXC (cfg
, LT
, "ArgumentException");
419 /* Load the array slice into the simd reg */
420 ldelema_ins
= mini_emit_ldelema_1_ins (cfg
, mono_class_from_mono_type_internal (etype
), array_ins
, index_ins
, FALSE
);
421 EMIT_NEW_STORE_MEMBASE (cfg
, ins
, OP_STOREX_MEMBASE
, ldelema_ins
->dreg
, 0, val_vreg
);
422 ins
->klass
= cmethod
->klass
;
427 if (fsig
->param_count
== 1 && fsig
->ret
->type
== MONO_TYPE_BOOLEAN
&& mono_metadata_type_equal (fsig
->params
[0], type
)) {
428 int sreg1
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
430 return emit_simd_ins (cfg
, klass
, OP_XEQUAL
, sreg1
, args
[1]->dreg
);
431 } else if (fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
)) {
432 /* Per element equality */
433 return emit_xcompare (cfg
, klass
, etype
, args
[0], args
[1]);
437 case SN_op_Inequality
:
438 g_assert (fsig
->param_count
== 2 && fsig
->ret
->type
== MONO_TYPE_BOOLEAN
&&
439 mono_metadata_type_equal (fsig
->params
[0], type
) &&
440 mono_metadata_type_equal (fsig
->params
[1], type
));
441 ins
= emit_simd_ins (cfg
, klass
, OP_XEQUAL
, args
[0]->dreg
, args
[1]->dreg
);
442 if (id
== SN_op_Inequality
) {
443 int sreg
= ins
->dreg
;
444 int dreg
= alloc_ireg (cfg
);
445 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_COMPARE_IMM
, -1, sreg
, 0);
446 EMIT_NEW_UNALU (cfg
, ins
, OP_CEQ
, dreg
, -1);
450 case SN_GreaterThanOrEqual
:
452 case SN_LessThanOrEqual
:
453 g_assert (fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
));
454 is_unsigned
= etype
->type
== MONO_TYPE_U1
|| etype
->type
== MONO_TYPE_U2
|| etype
->type
== MONO_TYPE_U4
|| etype
->type
== MONO_TYPE_U8
;
455 ins
= emit_xcompare (cfg
, klass
, etype
, args
[0], args
[1]);
458 ins
->inst_c0
= is_unsigned
? CMP_GT_UN
: CMP_GT
;
460 case SN_GreaterThanOrEqual
:
461 ins
->inst_c0
= is_unsigned
? CMP_GE_UN
: CMP_GE
;
464 ins
->inst_c0
= is_unsigned
? CMP_LT_UN
: CMP_LT
;
466 case SN_LessThanOrEqual
:
467 ins
->inst_c0
= is_unsigned
? CMP_LE_UN
: CMP_LE
;
470 g_assert_not_reached ();
474 return emit_simd_ins (cfg
, klass
, OP_XCAST
, args
[0]->dreg
, -1);
476 case SN_op_Subtraction
:
479 case SN_op_BitwiseAnd
:
480 case SN_op_BitwiseOr
:
481 case SN_op_ExclusiveOr
:
482 if (!(fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
)))
484 ins
= emit_simd_ins (cfg
, klass
, OP_XBINOP
, args
[0]->dreg
, args
[1]->dreg
);
485 ins
->inst_c1
= etype
->type
;
486 if (etype
->type
== MONO_TYPE_R4
|| etype
->type
== MONO_TYPE_R8
) {
489 ins
->inst_c0
= OP_FADD
;
491 case SN_op_Subtraction
:
492 ins
->inst_c0
= OP_FSUB
;
495 ins
->inst_c0
= OP_FMUL
;
498 ins
->inst_c0
= OP_FDIV
;
507 ins
->inst_c0
= OP_IADD
;
509 case SN_op_Subtraction
:
510 ins
->inst_c0
= OP_ISUB
;
514 ins->inst_c0 = OP_IDIV;
517 ins->inst_c0 = OP_IMUL;
520 case SN_op_BitwiseAnd
:
521 ins
->inst_c0
= OP_IAND
;
523 case SN_op_BitwiseOr
:
524 ins
->inst_c0
= OP_IOR
;
526 case SN_op_ExclusiveOr
:
527 ins
->inst_c0
= OP_IXOR
;
544 static guint16 popcnt_methods
[] = {
549 static guint16 lzcnt_methods
[] = {
554 static guint16 bmi1_methods
[] = {
557 SN_ExtractLowestSetBit
,
558 SN_GetMaskUpToLowestSetBit
,
559 SN_ResetLowestSetBit
,
560 SN_TrailingZeroCount
,
564 static guint16 bmi2_methods
[] = {
565 //SN_MultiplyNoFlags,
566 SN_ParallelBitDeposit
,
567 SN_ParallelBitExtract
,
573 emit_x86_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
575 const char *class_name
;
576 const char *class_ns
;
579 gboolean supported
, is_64bit
;
580 MonoClass
*klass
= cmethod
->klass
;
582 class_ns
= m_class_get_name_space (klass
);
583 class_name
= m_class_get_name (klass
);
584 if (!strcmp (class_name
, "Popcnt") || (!strcmp (class_name
, "X64") && cmethod
->klass
->nested_in
&& !strcmp (m_class_get_name (cmethod
->klass
->nested_in
), "Popcnt"))) {
585 id
= lookup_intrins (popcnt_methods
, sizeof (popcnt_methods
), cmethod
);
589 supported
= (get_cpu_features () & MONO_CPU_X86_POPCNT
) != 0;
590 is_64bit
= !strcmp (class_name
, "X64");
593 case SN_get_IsSupported
:
594 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
595 ins
->type
= STACK_I4
;
600 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_POPCNT64
: OP_POPCNT32
);
601 ins
->dreg
= alloc_ireg (cfg
);
602 ins
->sreg1
= args
[0]->dreg
;
603 MONO_ADD_INS (cfg
->cbb
, ins
);
609 if (!strcmp (class_name
, "Lzcnt") || (!strcmp (class_name
, "X64") && cmethod
->klass
->nested_in
&& !strcmp (m_class_get_name (cmethod
->klass
->nested_in
), "Lzcnt"))) {
610 id
= lookup_intrins (lzcnt_methods
, sizeof (lzcnt_methods
), cmethod
);
614 supported
= (get_cpu_features () & MONO_CPU_X86_LZCNT
) != 0;
615 is_64bit
= !strcmp (class_name
, "X64");
618 case SN_get_IsSupported
:
619 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
620 ins
->type
= STACK_I4
;
622 case SN_LeadingZeroCount
:
625 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_LZCNT64
: OP_LZCNT32
);
626 ins
->dreg
= alloc_ireg (cfg
);
627 ins
->sreg1
= args
[0]->dreg
;
628 MONO_ADD_INS (cfg
->cbb
, ins
);
634 if (!strcmp (class_name
, "Bmi1") || (!strcmp (class_name
, "X64") && cmethod
->klass
->nested_in
&& !strcmp (m_class_get_name (cmethod
->klass
->nested_in
), "Bmi1"))) {
635 if (!COMPILE_LLVM (cfg
))
637 id
= lookup_intrins (bmi1_methods
, sizeof (bmi1_methods
), cmethod
);
640 supported
= (get_cpu_features () & MONO_CPU_X86_BMI1
) != 0;
641 is_64bit
= !strcmp (class_name
, "X64");
644 case SN_get_IsSupported
:
645 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
646 ins
->type
= STACK_I4
;
650 // LLVM replaces it with `andn`
651 int tmp_reg
= alloc_preg (cfg
);
652 int result_reg
= alloc_preg (cfg
);
653 EMIT_NEW_BIALU_IMM (cfg
, ins
, is_64bit
? OP_LXOR_IMM
: OP_IXOR_IMM
, tmp_reg
, args
[0]->dreg
, -1);
654 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LAND
: OP_IAND
, result_reg
, tmp_reg
, args
[1]->dreg
);
657 case SN_BitFieldExtract
: {
658 if (fsig
->param_count
== 2) {
659 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_BEXTR64
: OP_BEXTR32
);
660 ins
->dreg
= alloc_ireg (cfg
);
661 ins
->sreg1
= args
[0]->dreg
;
662 ins
->sreg2
= args
[1]->dreg
;
663 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
664 MONO_ADD_INS (cfg
->cbb
, ins
);
668 case SN_GetMaskUpToLowestSetBit
: {
670 // LLVM replaces it with `blsmsk`
671 int tmp_reg
= alloc_preg (cfg
);
672 int result_reg
= alloc_preg (cfg
);
673 EMIT_NEW_BIALU_IMM (cfg
, ins
, is_64bit
? OP_LSUB_IMM
: OP_ISUB_IMM
, tmp_reg
, args
[0]->dreg
, 1);
674 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LXOR
: OP_IXOR
, result_reg
, args
[0]->dreg
, tmp_reg
);
677 case SN_ResetLowestSetBit
: {
679 // LLVM replaces it with `blsr`
680 int tmp_reg
= alloc_preg (cfg
);
681 int result_reg
= alloc_preg (cfg
);
682 EMIT_NEW_BIALU_IMM (cfg
, ins
, is_64bit
? OP_LSUB_IMM
: OP_ISUB_IMM
, tmp_reg
, args
[0]->dreg
, 1);
683 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LAND
: OP_IAND
, result_reg
, args
[0]->dreg
, tmp_reg
);
686 case SN_ExtractLowestSetBit
: {
688 // LLVM replaces it with `blsi`
689 int tmp_reg
= alloc_preg (cfg
);
690 int result_reg
= alloc_preg (cfg
);
691 int zero_reg
= alloc_preg (cfg
);
692 MONO_EMIT_NEW_ICONST (cfg
, zero_reg
, 0);
693 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LSUB
: OP_ISUB
, tmp_reg
, zero_reg
, args
[0]->dreg
);
694 EMIT_NEW_BIALU (cfg
, ins
, is_64bit
? OP_LAND
: OP_IAND
, result_reg
, args
[0]->dreg
, tmp_reg
);
697 case SN_TrailingZeroCount
:
698 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_CTTZ64
: OP_CTTZ32
);
699 ins
->dreg
= alloc_ireg (cfg
);
700 ins
->sreg1
= args
[0]->dreg
;
701 ins
->type
= STACK_I4
;
702 MONO_ADD_INS (cfg
->cbb
, ins
);
705 g_assert_not_reached ();
708 if (!strcmp (class_name
, "Bmi2") || (!strcmp (class_name
, "X64") && cmethod
->klass
->nested_in
&& !strcmp (m_class_get_name (cmethod
->klass
->nested_in
), "Bmi2"))) {
709 // We only support the subset used by corelib. Remove this check once MultiplyNoFlags is implemented.
710 if (m_class_get_image (cfg
->method
->klass
) != mono_get_corlib ())
712 if (!COMPILE_LLVM (cfg
))
714 id
= lookup_intrins (bmi2_methods
, sizeof (bmi2_methods
), cmethod
);
716 supported
= (get_cpu_features () & MONO_CPU_X86_BMI2
) != 0;
717 is_64bit
= !strcmp (class_name
, "X64");
720 case SN_get_IsSupported
:
721 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
722 ins
->type
= STACK_I4
;
724 //case SN_MultiplyNoFlags:
725 //// TODO: implement using _mulx_u32/u64:
726 //// ulong MultiplyNoFlags(ulong left, ulong right)
727 //// ulong MultiplyNoFlags(ulong left, ulong right, ulong* low) => MultiplyNoFlags(left, right, low);
728 //// uint MultiplyNoFlags(uint left, uint right)
729 //// uint MultiplyNoFlags(uint left, uint right, uint* low)
731 //case SN_ZeroHighBits:
732 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_BZHI64
: OP_BZHI32
);
733 ins
->dreg
= alloc_ireg (cfg
);
734 ins
->sreg1
= args
[0]->dreg
;
735 ins
->sreg2
= args
[1]->dreg
;
736 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
737 MONO_ADD_INS (cfg
->cbb
, ins
);
739 case SN_ParallelBitExtract
:
740 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_PEXT64
: OP_PEXT32
);
741 ins
->dreg
= alloc_ireg (cfg
);
742 ins
->sreg1
= args
[0]->dreg
;
743 ins
->sreg2
= args
[1]->dreg
;
744 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
745 MONO_ADD_INS (cfg
->cbb
, ins
);
747 case SN_ParallelBitDeposit
:
748 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_PDEP64
: OP_PDEP32
);
749 ins
->dreg
= alloc_ireg (cfg
);
750 ins
->sreg1
= args
[0]->dreg
;
751 ins
->sreg2
= args
[1]->dreg
;
752 ins
->type
= is_64bit
? STACK_I8
: STACK_I4
;
753 MONO_ADD_INS (cfg
->cbb
, ins
);
756 g_assert_not_reached ();
764 static guint16 vector_128_t_methods
[] = {
769 emit_vector128_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
772 MonoType
*type
, *etype
;
776 id
= lookup_intrins (vector_128_t_methods
, sizeof (vector_128_t_methods
), cmethod
);
780 klass
= cmethod
->klass
;
781 type
= m_class_get_byval_arg (klass
);
782 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
783 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
787 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
790 if (cfg
->verbose_level
> 1) {
791 char *name
= mono_method_full_name (cmethod
, TRUE
);
792 printf (" SIMD intrinsic %s\n", name
);
798 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
800 EMIT_NEW_ICONST (cfg
, ins
, len
);
809 static guint16 vector_256_t_methods
[] = {
814 emit_vector256_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
817 MonoType
*type
, *etype
;
821 id
= lookup_intrins (vector_256_t_methods
, sizeof (vector_256_t_methods
), cmethod
);
825 klass
= cmethod
->klass
;
826 type
= m_class_get_byval_arg (klass
);
827 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
828 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
832 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
835 if (cfg
->verbose_level
> 1) {
836 char *name
= mono_method_full_name (cmethod
, TRUE
);
837 printf (" SIMD intrinsic %s\n", name
);
843 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
845 EMIT_NEW_ICONST (cfg
, ins
, len
);
855 mono_emit_simd_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
857 const char *class_name
;
858 const char *class_ns
;
859 MonoImage
*image
= m_class_get_image (cmethod
->klass
);
861 if (image
!= mono_get_corlib ())
864 if (cfg
->compile_aot
)
867 class_ns
= m_class_get_name_space (cmethod
->klass
);
868 class_name
= m_class_get_name (cmethod
->klass
);
869 if (!strcmp (class_ns
, "System.Numerics") && !strcmp (class_name
, "Vector")) {
870 MonoInst
*ins
= emit_sys_numerics_vector (cfg
, cmethod
, fsig
, args
);
872 //printf ("M: %s %s\n", mono_method_get_full_name (cfg->method), mono_method_get_full_name (cmethod));
876 if (!strcmp (class_ns
, "System.Numerics") && !strcmp (class_name
, "Vector`1")) {
877 MonoInst
*ins
= emit_sys_numerics_vector_t (cfg
, cmethod
, fsig
, args
);
879 //printf ("M: %s %s\n", mono_method_get_full_name (cfg->method), mono_method_get_full_name (cmethod));
883 if (!strcmp (class_ns
, "System.Runtime.Intrinsics")) {
884 if (!strcmp (class_name
, "Vector128`1"))
885 return emit_vector128_t (cfg
,cmethod
, fsig
, args
);
886 if (!strcmp (class_name
, "Vector256`1"))
887 return emit_vector256_t (cfg
,cmethod
, fsig
, args
);
890 if (cmethod
->klass
->nested_in
)
891 class_ns
= m_class_get_name_space (cmethod
->klass
->nested_in
), class_name
, cmethod
->klass
->nested_in
;
892 if (!strcmp (class_ns
, "System.Runtime.Intrinsics.X86"))
893 return emit_x86_intrinsics (cfg
,cmethod
, fsig
, args
);
900 mono_simd_decompose_intrinsic (MonoCompile
*cfg
, MonoBasicBlock
*bb
, MonoInst
*ins
)
905 mono_simd_simplify_indirection (MonoCompile
*cfg
)
911 MONO_EMPTY_SOURCE_FILE (simd_intrinsics_netcore
);
915 #endif /* DISABLE_JIT */