2 * SIMD Intrinsics support for netcore
6 #include <mono/utils/mono-compiler.h>
8 #if defined(DISABLE_JIT)
11 mono_simd_intrinsics_init (void)
18 * Only LLVM is supported as a backend.
26 #include "mono/utils/bsearch.h"
27 #include <mono/metadata/abi-details.h>
28 #include <mono/metadata/reflection-internals.h>
30 #if defined (MONO_ARCH_SIMD_INTRINSICS) && defined(ENABLE_NETCORE)
32 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
33 #define MSGSTRFIELD1(line) str##line
34 static const struct msgstr_t
{
35 #define METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)];
36 #define METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
37 #include "simd-methods-netcore.h"
41 #define METHOD(name) #name,
42 #define METHOD2(str,name) str,
43 #include "simd-methods-netcore.h"
49 #define METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
50 #define METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
51 #include "simd-methods-netcore.h"
53 #define method_name(idx) ((const char*)&method_names + (idx))
55 static int register_size
;
57 static MonoCPUFeatures
58 get_cpu_features (void)
61 return mono_llvm_get_cpu_features ();
63 return (MonoCPUFeatures
)0;
68 mono_simd_intrinsics_init (void)
72 if ((get_cpu_features () & MONO_CPU_X86_AVX
) != 0)
75 /* Tell the class init code the size of the System.Numerics.Register type */
76 mono_simd_register_size
= register_size
;
80 mono_emit_simd_field_load (MonoCompile
*cfg
, MonoClassField
*field
, MonoInst
*addr
)
86 simd_intrinsic_compare_by_name (const void *key
, const void *value
)
88 return strcmp ((const char*)key
, method_name (*(guint16
*)value
));
92 lookup_intrins (guint16
*intrinsics
, int size
, MonoMethod
*cmethod
)
94 const guint16
*result
= (const guint16
*)mono_binary_search (cmethod
->name
, intrinsics
, size
/ sizeof (guint16
), sizeof (guint16
), &simd_intrinsic_compare_by_name
);
96 for (int i
= 0; i
< (size
/ sizeof (guint16
)) - 1; ++i
) {
97 if (method_name (intrinsics
[i
])[0] > method_name (intrinsics
[i
+ 1])[0]) {
98 printf ("%s %s\n",method_name (intrinsics
[i
]), method_name (intrinsics
[i
+ 1]));
99 g_assert_not_reached ();
109 static guint16 vector_methods
[] = {
110 SN_get_IsHardwareAccelerated
114 emit_sys_numerics_vector (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
117 gboolean supported
= FALSE
;
120 id
= lookup_intrins (vector_methods
, sizeof (vector_methods
), cmethod
);
124 //printf ("%s\n", mono_method_full_name (cmethod, 1));
126 #ifdef MONO_ARCH_SIMD_INTRINSICS
131 case SN_get_IsHardwareAccelerated
:
132 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
133 ins
->type
= STACK_I4
;
143 type_to_expand_op (MonoType
*type
)
145 switch (type
->type
) {
163 g_assert_not_reached ();
168 * Return a simd vreg for the simd value represented by SRC.
169 * SRC is the 'this' argument to methods.
170 * Set INDIRECT to TRUE if the value was loaded from memory.
173 load_simd_vreg_class (MonoCompile
*cfg
, MonoClass
*klass
, MonoInst
*src
, gboolean
*indirect
)
175 const char *spec
= INS_INFO (src
->opcode
);
179 if (src
->opcode
== OP_XMOVE
) {
181 } else if (src
->opcode
== OP_LDADDR
) {
182 int res
= ((MonoInst
*)src
->inst_p0
)->dreg
;
184 } else if (spec
[MONO_INST_DEST
] == 'x') {
186 } else if (src
->type
== STACK_PTR
|| src
->type
== STACK_MP
) {
191 MONO_INST_NEW (cfg
, ins
, OP_LOADX_MEMBASE
);
193 ins
->sreg1
= src
->dreg
;
194 ins
->type
= STACK_VTYPE
;
195 ins
->dreg
= alloc_ireg (cfg
);
196 MONO_ADD_INS (cfg
->cbb
, ins
);
199 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src
->type
);
200 mono_print_ins (src
);
201 g_assert_not_reached ();
205 load_simd_vreg (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoInst
*src
, gboolean
*indirect
)
207 return load_simd_vreg_class (cfg
, cmethod
->klass
, src
, indirect
);
210 /* Create and emit a SIMD instruction, dreg is auto-allocated */
212 emit_simd_ins (MonoCompile
*cfg
, MonoClass
*klass
, int opcode
, int sreg1
, int sreg2
)
214 const char *spec
= INS_INFO (opcode
);
217 MONO_INST_NEW (cfg
, ins
, opcode
);
218 if (spec
[MONO_INST_DEST
] == 'x') {
219 ins
->dreg
= alloc_xreg (cfg
);
220 ins
->type
= STACK_VTYPE
;
221 } else if (spec
[MONO_INST_DEST
] == 'i') {
222 ins
->dreg
= alloc_ireg (cfg
);
223 ins
->type
= STACK_I4
;
225 g_assert_not_reached ();
230 MONO_ADD_INS (cfg
->cbb
, ins
);
235 emit_xcompare (MonoCompile
*cfg
, MonoClass
*klass
, MonoType
*etype
, MonoInst
*arg1
, MonoInst
*arg2
)
238 gboolean is_fp
= etype
->type
== MONO_TYPE_R4
|| etype
->type
== MONO_TYPE_R8
;
240 ins
= emit_simd_ins (cfg
, klass
, is_fp
? OP_XCOMPARE_FP
: OP_XCOMPARE
, arg1
->dreg
, arg2
->dreg
);
241 ins
->inst_c0
= CMP_EQ
;
242 ins
->inst_c1
= etype
->type
;
246 static guint16 vector_t_methods
[] = {
251 SN_GreaterThanOrEqual
,
271 emit_sys_numerics_vector_t (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
274 MonoType
*type
, *etype
;
276 int size
, len
, id
, index
;
277 gboolean is_unsigned
;
279 id
= lookup_intrins (vector_t_methods
, sizeof (vector_t_methods
), cmethod
);
283 klass
= cmethod
->klass
;
284 type
= m_class_get_byval_arg (klass
);
285 etype
= mono_class_get_context (klass
)->class_inst
->type_argv
[0];
286 size
= mono_class_value_size (mono_class_from_mono_type_internal (etype
), NULL
);
288 len
= register_size
/ size
;
290 if (!MONO_TYPE_IS_PRIMITIVE (etype
) || etype
->type
== MONO_TYPE_CHAR
|| etype
->type
== MONO_TYPE_BOOLEAN
)
293 if (cfg
->verbose_level
> 1) {
294 char *name
= mono_method_full_name (cmethod
, TRUE
);
295 printf (" SIMD intrinsic %s\n", name
);
301 if (!(fsig
->param_count
== 0 && fsig
->ret
->type
== MONO_TYPE_I4
))
303 EMIT_NEW_ICONST (cfg
, ins
, len
);
306 g_assert (fsig
->param_count
== 0 && mono_metadata_type_equal (fsig
->ret
, type
));
307 return emit_simd_ins (cfg
, klass
, OP_XZERO
, -1, -1);
308 case SN_get_AllOnes
: {
309 /* Compare a zero vector with itself */
310 ins
= emit_simd_ins (cfg
, klass
, OP_XZERO
, -1, -1);
311 return emit_xcompare (cfg
, klass
, etype
, ins
, ins
);
314 if (args
[1]->opcode
!= OP_ICONST
)
316 index
= args
[1]->inst_c0
;
317 if (index
< 0 || index
>= len
)
321 if (fsig
->param_count
== 1 && mono_metadata_type_equal (fsig
->params
[0], etype
)) {
322 int dreg
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
324 int opcode
= type_to_expand_op (etype
);
325 ins
= emit_simd_ins (cfg
, klass
, opcode
, args
[1]->dreg
, -1);
329 if ((fsig
->param_count
== 1 || fsig
->param_count
== 2) && (fsig
->params
[0]->type
== MONO_TYPE_SZARRAY
)) {
330 MonoInst
*array_ins
= args
[1];
332 MonoInst
*ldelema_ins
;
336 if (args
[0]->opcode
!= OP_LDADDR
)
339 /* .ctor (T[]) or .ctor (T[], index) */
341 if (fsig
->param_count
== 2) {
342 index_ins
= args
[2];
344 EMIT_NEW_ICONST (cfg
, index_ins
, 0);
347 /* Emit index check for the end (index + len - 1 < array length) */
348 end_index_reg
= alloc_ireg (cfg
);
349 EMIT_NEW_BIALU_IMM (cfg
, ins
, OP_IADD_IMM
, end_index_reg
, index_ins
->dreg
, len
- 1);
350 MONO_EMIT_BOUNDS_CHECK (cfg
, array_ins
->dreg
, MonoArray
, max_length
, end_index_reg
);
352 /* Load the array slice into the simd reg */
353 ldelema_ins
= mini_emit_ldelema_1_ins (cfg
, mono_class_from_mono_type_internal (etype
), array_ins
, index_ins
, TRUE
);
354 g_assert (args
[0]->opcode
== OP_LDADDR
);
355 var
= (MonoInst
*)args
[0]->inst_p0
;
356 EMIT_NEW_LOAD_MEMBASE (cfg
, ins
, OP_LOADX_MEMBASE
, var
->dreg
, ldelema_ins
->dreg
, 0);
357 ins
->klass
= cmethod
->klass
;
362 if ((fsig
->param_count
== 1 || fsig
->param_count
== 2) && (fsig
->params
[0]->type
== MONO_TYPE_SZARRAY
)) {
363 MonoInst
*array_ins
= args
[1];
365 MonoInst
*ldelema_ins
;
366 int val_vreg
, end_index_reg
;
368 val_vreg
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
370 /* CopyTo (T[]) or CopyTo (T[], index) */
372 if (fsig
->param_count
== 2) {
373 index_ins
= args
[2];
375 EMIT_NEW_ICONST (cfg
, index_ins
, 0);
378 /* Emit index check for the end (index + len - 1 < array length) */
379 end_index_reg
= alloc_ireg (cfg
);
380 EMIT_NEW_BIALU_IMM (cfg
, ins
, OP_IADD_IMM
, end_index_reg
, index_ins
->dreg
, len
- 1);
381 MONO_EMIT_BOUNDS_CHECK (cfg
, array_ins
->dreg
, MonoArray
, max_length
, end_index_reg
);
383 /* Load the array slice into the simd reg */
384 ldelema_ins
= mini_emit_ldelema_1_ins (cfg
, mono_class_from_mono_type_internal (etype
), array_ins
, index_ins
, TRUE
);
385 EMIT_NEW_STORE_MEMBASE (cfg
, ins
, OP_STOREX_MEMBASE
, ldelema_ins
->dreg
, 0, val_vreg
);
386 ins
->klass
= cmethod
->klass
;
391 if (fsig
->param_count
== 1 && fsig
->ret
->type
== MONO_TYPE_BOOLEAN
&& mono_metadata_type_equal (fsig
->params
[0], type
)) {
392 int sreg1
= load_simd_vreg (cfg
, cmethod
, args
[0], NULL
);
394 return emit_simd_ins (cfg
, klass
, OP_XEQUAL
, sreg1
, args
[1]->dreg
);
395 } else if (fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
)) {
396 /* Per element equality */
397 return emit_xcompare (cfg
, klass
, etype
, args
[0], args
[1]);
401 case SN_op_Inequality
:
402 g_assert (fsig
->param_count
== 2 && fsig
->ret
->type
== MONO_TYPE_BOOLEAN
&&
403 mono_metadata_type_equal (fsig
->params
[0], type
) &&
404 mono_metadata_type_equal (fsig
->params
[1], type
));
405 ins
= emit_simd_ins (cfg
, klass
, OP_XEQUAL
, args
[0]->dreg
, args
[1]->dreg
);
406 if (id
== SN_op_Inequality
) {
407 int sreg
= ins
->dreg
;
408 int dreg
= alloc_ireg (cfg
);
409 EMIT_NEW_UNALU (cfg
, ins
, OP_INOT
, dreg
, sreg
);
413 case SN_GreaterThanOrEqual
:
415 case SN_LessThanOrEqual
:
416 g_assert (fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
));
417 is_unsigned
= etype
->type
== MONO_TYPE_U1
|| etype
->type
== MONO_TYPE_U2
|| etype
->type
== MONO_TYPE_U4
|| etype
->type
== MONO_TYPE_U8
;
418 ins
= emit_xcompare (cfg
, klass
, etype
, args
[0], args
[1]);
421 ins
->inst_c0
= is_unsigned
? CMP_GT_UN
: CMP_GT
;
423 case SN_GreaterThanOrEqual
:
424 ins
->inst_c0
= is_unsigned
? CMP_GE_UN
: CMP_GE
;
427 ins
->inst_c0
= is_unsigned
? CMP_LT_UN
: CMP_LT
;
429 case SN_LessThanOrEqual
:
430 ins
->inst_c0
= is_unsigned
? CMP_LE_UN
: CMP_LE
;
433 g_assert_not_reached ();
437 return emit_simd_ins (cfg
, klass
, OP_XCAST
, args
[0]->dreg
, -1);
439 case SN_op_Subtraction
:
442 case SN_op_BitwiseAnd
:
443 case SN_op_BitwiseOr
:
444 case SN_op_ExclusiveOr
:
445 if (!(fsig
->param_count
== 2 && mono_metadata_type_equal (fsig
->ret
, type
) && mono_metadata_type_equal (fsig
->params
[0], type
) && mono_metadata_type_equal (fsig
->params
[1], type
)))
447 ins
= emit_simd_ins (cfg
, klass
, OP_XBINOP
, args
[0]->dreg
, args
[1]->dreg
);
448 if (etype
->type
== MONO_TYPE_R4
|| etype
->type
== MONO_TYPE_R8
) {
451 ins
->inst_c0
= OP_FADD
;
453 case SN_op_Subtraction
:
454 ins
->inst_c0
= OP_FSUB
;
457 ins
->inst_c0
= OP_FMUL
;
460 ins
->inst_c0
= OP_FDIV
;
469 ins
->inst_c0
= OP_IADD
;
471 case SN_op_Subtraction
:
472 ins
->inst_c0
= OP_ISUB
;
476 ins->inst_c0 = OP_IDIV;
479 ins->inst_c0 = OP_IMUL;
482 case SN_op_BitwiseAnd
:
483 ins
->inst_c0
= OP_IAND
;
485 case SN_op_BitwiseOr
:
486 ins
->inst_c0
= OP_IOR
;
488 case SN_op_ExclusiveOr
:
489 ins
->inst_c0
= OP_IXOR
;
506 static guint16 popcnt_methods
[] = {
511 static guint16 lzcnt_methods
[] = {
516 static guint16 bmi1_methods
[] = {
517 SN_TrailingZeroCount
,
521 static guint16 bmi2_methods
[] = {
522 SN_ParallelBitDeposit
,
523 SN_ParallelBitExtract
,
528 emit_x86_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
530 const char *class_name
;
531 const char *class_ns
;
534 gboolean supported
, is_64bit
;
535 MonoClass
*klass
= cmethod
->klass
;
537 class_ns
= m_class_get_name_space (klass
);
538 class_name
= m_class_get_name (klass
);
539 if (!strcmp (class_name
, "Popcnt") || (!strcmp (class_name
, "X64") && cmethod
->klass
->nested_in
&& !strcmp (m_class_get_name (cmethod
->klass
->nested_in
), "Popcnt"))) {
540 id
= lookup_intrins (popcnt_methods
, sizeof (popcnt_methods
), cmethod
);
544 supported
= (get_cpu_features () & MONO_CPU_X86_POPCNT
) != 0;
545 is_64bit
= !strcmp (class_name
, "X64");
548 case SN_get_IsSupported
:
549 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
550 ins
->type
= STACK_I4
;
555 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_POPCNT64
: OP_POPCNT32
);
556 ins
->dreg
= alloc_ireg (cfg
);
557 ins
->sreg1
= args
[0]->dreg
;
558 MONO_ADD_INS (cfg
->cbb
, ins
);
564 if (!strcmp (class_name
, "Lzcnt") || (!strcmp (class_name
, "X64") && cmethod
->klass
->nested_in
&& !strcmp (m_class_get_name (cmethod
->klass
->nested_in
), "Lzcnt"))) {
565 id
= lookup_intrins (lzcnt_methods
, sizeof (lzcnt_methods
), cmethod
);
569 supported
= (get_cpu_features () & MONO_CPU_X86_LZCNT
) != 0;
570 is_64bit
= !strcmp (class_name
, "X64");
573 case SN_get_IsSupported
:
574 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
575 ins
->type
= STACK_I4
;
577 case SN_LeadingZeroCount
:
580 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_LZCNT64
: OP_LZCNT32
);
581 ins
->dreg
= alloc_ireg (cfg
);
582 ins
->sreg1
= args
[0]->dreg
;
583 MONO_ADD_INS (cfg
->cbb
, ins
);
589 if (!strcmp (class_name
, "Bmi1") || (!strcmp (class_name
, "X64") && cmethod
->klass
->nested_in
&& !strcmp (m_class_get_name (cmethod
->klass
->nested_in
), "Bmi1"))) {
590 // We only support the subset used by corelib
591 if (m_class_get_image (cfg
->method
->klass
) != mono_get_corlib ())
593 id
= lookup_intrins (bmi1_methods
, sizeof (bmi1_methods
), cmethod
);
595 supported
= (get_cpu_features () & MONO_CPU_X86_BMI1
) != 0;
596 is_64bit
= !strcmp (class_name
, "X64");
599 case SN_get_IsSupported
:
600 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
601 ins
->type
= STACK_I4
;
603 case SN_TrailingZeroCount
:
604 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_CTTZ64
: OP_CTTZ32
);
605 ins
->dreg
= alloc_ireg (cfg
);
606 ins
->sreg1
= args
[0]->dreg
;
607 ins
->type
= STACK_I4
;
608 MONO_ADD_INS (cfg
->cbb
, ins
);
611 g_assert_not_reached ();
614 if (!strcmp (class_name
, "Bmi2") || (!strcmp (class_name
, "X64") && cmethod
->klass
->nested_in
&& !strcmp (m_class_get_name (cmethod
->klass
->nested_in
), "Bmi2"))) {
615 // We only support the subset used by corelib
616 if (m_class_get_image (cfg
->method
->klass
) != mono_get_corlib ())
618 id
= lookup_intrins (bmi2_methods
, sizeof (bmi2_methods
), cmethod
);
620 supported
= (get_cpu_features () & MONO_CPU_X86_BMI2
) != 0;
621 is_64bit
= !strcmp (class_name
, "X64");
624 case SN_get_IsSupported
:
625 EMIT_NEW_ICONST (cfg
, ins
, supported
? 1 : 0);
626 ins
->type
= STACK_I4
;
628 case SN_ParallelBitExtract
:
629 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_PEXT64
: OP_PEXT32
);
630 ins
->dreg
= alloc_ireg (cfg
);
631 ins
->sreg1
= args
[0]->dreg
;
632 ins
->sreg2
= args
[1]->dreg
;
633 ins
->type
= STACK_I4
;
634 MONO_ADD_INS (cfg
->cbb
, ins
);
636 case SN_ParallelBitDeposit
:
637 MONO_INST_NEW (cfg
, ins
, is_64bit
? OP_PDEP64
: OP_PDEP32
);
638 ins
->dreg
= alloc_ireg (cfg
);
639 ins
->sreg1
= args
[0]->dreg
;
640 ins
->sreg2
= args
[1]->dreg
;
641 ins
->type
= STACK_I4
;
642 MONO_ADD_INS (cfg
->cbb
, ins
);
645 g_assert_not_reached ();
647 //printf ("%s %s\n", mono_method_get_full_name (cfg->method), mono_method_get_full_name (cmethod));
655 mono_emit_simd_intrinsics (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
657 const char *class_name
;
658 const char *class_ns
;
659 MonoImage
*image
= m_class_get_image (cmethod
->klass
);
661 if (image
!= mono_get_corlib ())
663 if (!COMPILE_LLVM (cfg
))
666 if (cfg
->compile_aot
)
669 class_ns
= m_class_get_name_space (cmethod
->klass
);
670 class_name
= m_class_get_name (cmethod
->klass
);
671 if (!strcmp (class_ns
, "System.Numerics") && !strcmp (class_name
, "Vector")) {
672 MonoInst
*ins
= emit_sys_numerics_vector (cfg
, cmethod
, fsig
, args
);
674 //printf ("M: %s %s\n", mono_method_get_full_name (cfg->method), mono_method_get_full_name (cmethod));
678 if (!strcmp (class_ns
, "System.Numerics") && !strcmp (class_name
, "Vector`1")) {
679 MonoInst
*ins
= emit_sys_numerics_vector_t (cfg
, cmethod
, fsig
, args
);
681 //printf ("M: %s %s\n", mono_method_get_full_name (cfg->method), mono_method_get_full_name (cmethod));
686 if (cmethod
->klass
->nested_in
)
687 class_ns
= m_class_get_name_space (cmethod
->klass
->nested_in
), class_name
, cmethod
->klass
->nested_in
;
688 if (!strcmp (class_ns
, "System.Runtime.Intrinsics.X86"))
689 return emit_x86_intrinsics (cfg
,cmethod
, fsig
, args
);
696 mono_simd_decompose_intrinsic (MonoCompile
*cfg
, MonoBasicBlock
*bb
, MonoInst
*ins
)
701 mono_simd_simplify_indirection (MonoCompile
*cfg
)
707 MONO_EMPTY_SOURCE_FILE (simd_intrinsics_netcore
);
711 #endif /* DISABLE_JIT */