[netcore] Implement missing Bmi1/Bmi2 intrinsics (#16919)
[mono-project.git] / mono / mini / simd-intrinsics-netcore.c
blobdd9620b18a69321f7625988ca940377bdd234d10
1 /**
2 * SIMD Intrinsics support for netcore
3 */
5 #include <config.h>
6 #include <mono/utils/mono-compiler.h>
8 #if defined(DISABLE_JIT)
10 void
11 mono_simd_intrinsics_init (void)
15 #else
18 * Only LLVM is supported as a backend.
21 #include "mini.h"
22 #include "ir-emit.h"
23 #ifdef ENABLE_LLVM
24 #include "llvm-jit.h"
25 #endif
26 #include "mono/utils/bsearch.h"
27 #include <mono/metadata/abi-details.h>
28 #include <mono/metadata/reflection-internals.h>
30 #if defined (MONO_ARCH_SIMD_INTRINSICS) && defined(ENABLE_NETCORE)
32 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
33 #define MSGSTRFIELD1(line) str##line
34 static const struct msgstr_t {
35 #define METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)];
36 #define METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
37 #include "simd-methods-netcore.h"
38 #undef METHOD
39 #undef METHOD2
40 } method_names = {
41 #define METHOD(name) #name,
42 #define METHOD2(str,name) str,
43 #include "simd-methods-netcore.h"
44 #undef METHOD
45 #undef METHOD2
48 enum {
49 #define METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
50 #define METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
51 #include "simd-methods-netcore.h"
53 #define method_name(idx) ((const char*)&method_names + (idx))
55 static int register_size;
57 static MonoCPUFeatures
58 get_cpu_features (void)
60 #ifdef ENABLE_LLVM
61 return mono_llvm_get_cpu_features ();
62 #elif defined(TARGET_AMD64)
63 return mono_arch_get_cpu_features ();
64 #else
65 return (MonoCPUFeatures)0;
66 #endif
69 void
70 mono_simd_intrinsics_init (void)
72 register_size = 16;
73 #if FALSE
74 if ((get_cpu_features () & MONO_CPU_X86_AVX) != 0)
75 register_size = 32;
76 #endif
77 /* Tell the class init code the size of the System.Numerics.Register type */
78 mono_simd_register_size = register_size;
81 MonoInst*
82 mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr)
84 return NULL;
87 static int
88 simd_intrinsic_compare_by_name (const void *key, const void *value)
90 return strcmp ((const char*)key, method_name (*(guint16*)value));
93 static int
94 lookup_intrins (guint16 *intrinsics, int size, MonoMethod *cmethod)
96 const guint16 *result = (const guint16 *)mono_binary_search (cmethod->name, intrinsics, size / sizeof (guint16), sizeof (guint16), &simd_intrinsic_compare_by_name);
98 for (int i = 0; i < (size / sizeof (guint16)) - 1; ++i) {
99 if (method_name (intrinsics [i])[0] > method_name (intrinsics [i + 1])[0]) {
100 printf ("%s %s\n",method_name (intrinsics [i]), method_name (intrinsics [i + 1]));
101 g_assert_not_reached ();
105 if (result == NULL)
106 return -1;
107 else
108 return (int)*result;
111 static guint16 vector_methods [] = {
112 SN_get_IsHardwareAccelerated
115 static MonoInst*
116 emit_sys_numerics_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
118 MonoInst *ins;
119 gboolean supported = FALSE;
120 int id;
122 id = lookup_intrins (vector_methods, sizeof (vector_methods), cmethod);
123 if (id == -1)
124 return NULL;
126 //printf ("%s\n", mono_method_full_name (cmethod, 1));
128 #ifdef MONO_ARCH_SIMD_INTRINSICS
129 supported = TRUE;
130 #endif
132 switch (id) {
133 case SN_get_IsHardwareAccelerated:
134 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
135 ins->type = STACK_I4;
136 return ins;
137 default:
138 break;
141 return NULL;
144 static int
145 type_to_expand_op (MonoType *type)
147 switch (type->type) {
148 case MONO_TYPE_I1:
149 case MONO_TYPE_U1:
150 return OP_EXPAND_I1;
151 case MONO_TYPE_I2:
152 case MONO_TYPE_U2:
153 return OP_EXPAND_I2;
154 case MONO_TYPE_I4:
155 case MONO_TYPE_U4:
156 return OP_EXPAND_I4;
157 case MONO_TYPE_I8:
158 case MONO_TYPE_U8:
159 return OP_EXPAND_I8;
160 case MONO_TYPE_R4:
161 return OP_EXPAND_R4;
162 case MONO_TYPE_R8:
163 return OP_EXPAND_R8;
164 default:
165 g_assert_not_reached ();
170 * Return a simd vreg for the simd value represented by SRC.
171 * SRC is the 'this' argument to methods.
172 * Set INDIRECT to TRUE if the value was loaded from memory.
174 static int
175 load_simd_vreg_class (MonoCompile *cfg, MonoClass *klass, MonoInst *src, gboolean *indirect)
177 const char *spec = INS_INFO (src->opcode);
179 if (indirect)
180 *indirect = FALSE;
181 if (src->opcode == OP_XMOVE) {
182 return src->sreg1;
183 } else if (src->opcode == OP_LDADDR) {
184 int res = ((MonoInst*)src->inst_p0)->dreg;
185 return res;
186 } else if (spec [MONO_INST_DEST] == 'x') {
187 return src->dreg;
188 } else if (src->type == STACK_PTR || src->type == STACK_MP) {
189 MonoInst *ins;
190 if (indirect)
191 *indirect = TRUE;
193 MONO_INST_NEW (cfg, ins, OP_LOADX_MEMBASE);
194 ins->klass = klass;
195 ins->sreg1 = src->dreg;
196 ins->type = STACK_VTYPE;
197 ins->dreg = alloc_ireg (cfg);
198 MONO_ADD_INS (cfg->cbb, ins);
199 return ins->dreg;
201 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src->type);
202 mono_print_ins (src);
203 g_assert_not_reached ();
206 static int
207 load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean *indirect)
209 return load_simd_vreg_class (cfg, cmethod->klass, src, indirect);
212 /* Create and emit a SIMD instruction, dreg is auto-allocated */
213 static MonoInst*
214 emit_simd_ins (MonoCompile *cfg, MonoClass *klass, int opcode, int sreg1, int sreg2)
216 const char *spec = INS_INFO (opcode);
217 MonoInst *ins;
219 MONO_INST_NEW (cfg, ins, opcode);
220 if (spec [MONO_INST_DEST] == 'x') {
221 ins->dreg = alloc_xreg (cfg);
222 ins->type = STACK_VTYPE;
223 } else if (spec [MONO_INST_DEST] == 'i') {
224 ins->dreg = alloc_ireg (cfg);
225 ins->type = STACK_I4;
226 } else {
227 g_assert_not_reached ();
229 ins->sreg1 = sreg1;
230 ins->sreg2 = sreg2;
231 ins->klass = klass;
232 MONO_ADD_INS (cfg->cbb, ins);
233 return ins;
236 static MonoInst*
237 emit_xcompare (MonoCompile *cfg, MonoClass *klass, MonoType *etype, MonoInst *arg1, MonoInst *arg2)
239 MonoInst *ins;
240 gboolean is_fp = etype->type == MONO_TYPE_R4 || etype->type == MONO_TYPE_R8;
242 ins = emit_simd_ins (cfg, klass, is_fp ? OP_XCOMPARE_FP : OP_XCOMPARE, arg1->dreg, arg2->dreg);
243 ins->inst_c0 = CMP_EQ;
244 ins->inst_c1 = etype->type;
245 return ins;
248 static guint16 vector_t_methods [] = {
249 SN_ctor,
250 SN_CopyTo,
251 SN_Equals,
252 SN_GreaterThan,
253 SN_GreaterThanOrEqual,
254 SN_LessThan,
255 SN_LessThanOrEqual,
256 SN_get_AllOnes,
257 SN_get_Count,
258 SN_get_Item,
259 SN_get_Zero,
260 SN_op_Addition,
261 SN_op_BitwiseAnd,
262 SN_op_BitwiseOr,
263 SN_op_Division,
264 SN_op_Equality,
265 SN_op_ExclusiveOr,
266 SN_op_Explicit,
267 SN_op_Inequality,
268 SN_op_Multiply,
269 SN_op_Subtraction
272 static MonoInst*
273 emit_sys_numerics_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
275 MonoInst *ins;
276 MonoType *type, *etype;
277 MonoClass *klass;
278 int size, len, id;
279 gboolean is_unsigned;
281 id = lookup_intrins (vector_t_methods, sizeof (vector_t_methods), cmethod);
282 if (id == -1)
283 return NULL;
285 klass = cmethod->klass;
286 type = m_class_get_byval_arg (klass);
287 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
288 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
289 g_assert (size);
290 len = register_size / size;
292 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
293 return NULL;
295 if (cfg->verbose_level > 1) {
296 char *name = mono_method_full_name (cmethod, TRUE);
297 printf (" SIMD intrinsic %s\n", name);
298 g_free (name);
301 switch (id) {
302 case SN_get_Count:
303 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
304 break;
305 EMIT_NEW_ICONST (cfg, ins, len);
306 return ins;
307 case SN_get_Zero:
308 g_assert (fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type));
309 return emit_simd_ins (cfg, klass, OP_XZERO, -1, -1);
310 case SN_get_AllOnes: {
311 /* Compare a zero vector with itself */
312 ins = emit_simd_ins (cfg, klass, OP_XZERO, -1, -1);
313 return emit_xcompare (cfg, klass, etype, ins, ins);
315 case SN_get_Item:
316 if (!COMPILE_LLVM (cfg))
317 return NULL;
318 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
319 MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "IndexOutOfRangeException");
320 int opcode = -1;
321 int dreg;
322 gboolean is64 = FALSE;
323 switch (etype->type) {
324 case MONO_TYPE_I8:
325 case MONO_TYPE_U8:
326 opcode = OP_XEXTRACT_I64;
327 is64 = TRUE;
328 dreg = alloc_lreg (cfg);
329 break;
330 case MONO_TYPE_R8:
331 opcode = OP_XEXTRACT_R8;
332 dreg = alloc_freg (cfg);
333 break;
334 case MONO_TYPE_R4:
335 g_assert (cfg->r4fp);
336 opcode = OP_XEXTRACT_R4;
337 dreg = alloc_freg (cfg);
338 break;
339 default:
340 opcode = OP_XEXTRACT_I32;
341 dreg = alloc_ireg (cfg);
342 break;
344 MONO_INST_NEW (cfg, ins, opcode);
345 ins->dreg = dreg;
346 ins->sreg1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
347 ins->sreg2 = args [1]->dreg;
348 ins->inst_c0 = etype->type;
349 mini_type_to_eval_stack_type (cfg, etype, ins);
350 MONO_ADD_INS (cfg->cbb, ins);
351 return ins;
352 case SN_ctor:
353 if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) {
354 int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
356 int opcode = type_to_expand_op (etype);
357 ins = emit_simd_ins (cfg, klass, opcode, args [1]->dreg, -1);
358 ins->dreg = dreg;
359 return ins;
361 if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
362 MonoInst *array_ins = args [1];
363 MonoInst *index_ins;
364 MonoInst *ldelema_ins;
365 MonoInst *var;
366 int end_index_reg;
368 if (args [0]->opcode != OP_LDADDR)
369 return NULL;
371 /* .ctor (T[]) or .ctor (T[], index) */
373 if (fsig->param_count == 2) {
374 index_ins = args [2];
375 } else {
376 EMIT_NEW_ICONST (cfg, index_ins, 0);
379 /* Emit index check for the end (index + len - 1 < array length) */
380 end_index_reg = alloc_ireg (cfg);
381 EMIT_NEW_BIALU_IMM (cfg, ins, OP_IADD_IMM, end_index_reg, index_ins->dreg, len - 1);
382 MONO_EMIT_BOUNDS_CHECK (cfg, array_ins->dreg, MonoArray, max_length, end_index_reg);
384 /* Load the array slice into the simd reg */
385 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, TRUE);
386 g_assert (args [0]->opcode == OP_LDADDR);
387 var = (MonoInst*)args [0]->inst_p0;
388 EMIT_NEW_LOAD_MEMBASE (cfg, ins, OP_LOADX_MEMBASE, var->dreg, ldelema_ins->dreg, 0);
389 ins->klass = cmethod->klass;
390 return args [0];
392 break;
393 case SN_CopyTo:
394 if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
395 MonoInst *array_ins = args [1];
396 MonoInst *index_ins;
397 MonoInst *ldelema_ins;
398 int val_vreg, end_index_reg;
400 val_vreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
402 /* CopyTo (T[]) or CopyTo (T[], index) */
404 if (fsig->param_count == 2) {
405 index_ins = args [2];
406 } else {
407 EMIT_NEW_ICONST (cfg, index_ins, 0);
410 /* CopyTo () does complicated argument checks */
411 mini_emit_bounds_check_offset (cfg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), index_ins->dreg, "ArgumentOutOfRangeException");
412 end_index_reg = alloc_ireg (cfg);
413 int len_reg = alloc_ireg (cfg);
414 MONO_EMIT_NEW_LOAD_MEMBASE_OP_FLAGS (cfg, OP_LOADI4_MEMBASE, len_reg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), MONO_INST_INVARIANT_LOAD);
415 EMIT_NEW_BIALU (cfg, ins, OP_ISUB, end_index_reg, len_reg, index_ins->dreg);
416 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, end_index_reg, len);
417 MONO_EMIT_NEW_COND_EXC (cfg, LT, "ArgumentException");
419 /* Load the array slice into the simd reg */
420 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, FALSE);
421 EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_MEMBASE, ldelema_ins->dreg, 0, val_vreg);
422 ins->klass = cmethod->klass;
423 return ins;
425 break;
426 case SN_Equals:
427 if (fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && mono_metadata_type_equal (fsig->params [0], type)) {
428 int sreg1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
430 return emit_simd_ins (cfg, klass, OP_XEQUAL, sreg1, args [1]->dreg);
431 } else if (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)) {
432 /* Per element equality */
433 return emit_xcompare (cfg, klass, etype, args [0], args [1]);
435 break;
436 case SN_op_Equality:
437 case SN_op_Inequality:
438 g_assert (fsig->param_count == 2 && fsig->ret->type == MONO_TYPE_BOOLEAN &&
439 mono_metadata_type_equal (fsig->params [0], type) &&
440 mono_metadata_type_equal (fsig->params [1], type));
441 ins = emit_simd_ins (cfg, klass, OP_XEQUAL, args [0]->dreg, args [1]->dreg);
442 if (id == SN_op_Inequality) {
443 int sreg = ins->dreg;
444 int dreg = alloc_ireg (cfg);
445 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, sreg, 0);
446 EMIT_NEW_UNALU (cfg, ins, OP_CEQ, dreg, -1);
448 return ins;
449 case SN_GreaterThan:
450 case SN_GreaterThanOrEqual:
451 case SN_LessThan:
452 case SN_LessThanOrEqual:
453 g_assert (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type));
454 is_unsigned = etype->type == MONO_TYPE_U1 || etype->type == MONO_TYPE_U2 || etype->type == MONO_TYPE_U4 || etype->type == MONO_TYPE_U8;
455 ins = emit_xcompare (cfg, klass, etype, args [0], args [1]);
456 switch (id) {
457 case SN_GreaterThan:
458 ins->inst_c0 = is_unsigned ? CMP_GT_UN : CMP_GT;
459 break;
460 case SN_GreaterThanOrEqual:
461 ins->inst_c0 = is_unsigned ? CMP_GE_UN : CMP_GE;
462 break;
463 case SN_LessThan:
464 ins->inst_c0 = is_unsigned ? CMP_LT_UN : CMP_LT;
465 break;
466 case SN_LessThanOrEqual:
467 ins->inst_c0 = is_unsigned ? CMP_LE_UN : CMP_LE;
468 break;
469 default:
470 g_assert_not_reached ();
472 return ins;
473 case SN_op_Explicit:
474 return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1);
475 case SN_op_Addition:
476 case SN_op_Subtraction:
477 case SN_op_Division:
478 case SN_op_Multiply:
479 case SN_op_BitwiseAnd:
480 case SN_op_BitwiseOr:
481 case SN_op_ExclusiveOr:
482 if (!(fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)))
483 return NULL;
484 ins = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, args [1]->dreg);
485 ins->inst_c1 = etype->type;
486 if (etype->type == MONO_TYPE_R4 || etype->type == MONO_TYPE_R8) {
487 switch (id) {
488 case SN_op_Addition:
489 ins->inst_c0 = OP_FADD;
490 break;
491 case SN_op_Subtraction:
492 ins->inst_c0 = OP_FSUB;
493 break;
494 case SN_op_Multiply:
495 ins->inst_c0 = OP_FMUL;
496 break;
497 case SN_op_Division:
498 ins->inst_c0 = OP_FDIV;
499 break;
500 default:
501 NULLIFY_INS (ins);
502 return NULL;
504 } else {
505 switch (id) {
506 case SN_op_Addition:
507 ins->inst_c0 = OP_IADD;
508 break;
509 case SN_op_Subtraction:
510 ins->inst_c0 = OP_ISUB;
511 break;
513 case SN_op_Division:
514 ins->inst_c0 = OP_IDIV;
515 break;
516 case SN_op_Multiply:
517 ins->inst_c0 = OP_IMUL;
518 break;
520 case SN_op_BitwiseAnd:
521 ins->inst_c0 = OP_IAND;
522 break;
523 case SN_op_BitwiseOr:
524 ins->inst_c0 = OP_IOR;
525 break;
526 case SN_op_ExclusiveOr:
527 ins->inst_c0 = OP_IXOR;
528 break;
529 default:
530 NULLIFY_INS (ins);
531 return NULL;
534 return ins;
535 default:
536 break;
539 return NULL;
542 #ifdef TARGET_AMD64
544 static guint16 popcnt_methods [] = {
545 SN_PopCount,
546 SN_get_IsSupported
549 static guint16 lzcnt_methods [] = {
550 SN_LeadingZeroCount,
551 SN_get_IsSupported
554 static guint16 bmi1_methods [] = {
555 SN_AndNot,
556 SN_BitFieldExtract,
557 SN_ExtractLowestSetBit,
558 SN_GetMaskUpToLowestSetBit,
559 SN_ResetLowestSetBit,
560 SN_TrailingZeroCount,
561 SN_get_IsSupported,
564 static guint16 bmi2_methods [] = {
565 //SN_MultiplyNoFlags,
566 SN_ParallelBitDeposit,
567 SN_ParallelBitExtract,
568 SN_ZeroHighBits,
569 SN_get_IsSupported,
572 static MonoInst*
573 emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
575 const char *class_name;
576 const char *class_ns;
577 MonoInst *ins;
578 int id;
579 gboolean supported, is_64bit;
580 MonoClass *klass = cmethod->klass;
582 class_ns = m_class_get_name_space (klass);
583 class_name = m_class_get_name (klass);
584 if (!strcmp (class_name, "Popcnt") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Popcnt"))) {
585 id = lookup_intrins (popcnt_methods, sizeof (popcnt_methods), cmethod);
586 if (id == -1)
587 return NULL;
589 supported = (get_cpu_features () & MONO_CPU_X86_POPCNT) != 0;
590 is_64bit = !strcmp (class_name, "X64");
592 switch (id) {
593 case SN_get_IsSupported:
594 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
595 ins->type = STACK_I4;
596 return ins;
597 case SN_PopCount:
598 if (!supported)
599 return NULL;
600 MONO_INST_NEW (cfg, ins, is_64bit ? OP_POPCNT64 : OP_POPCNT32);
601 ins->dreg = alloc_ireg (cfg);
602 ins->sreg1 = args [0]->dreg;
603 MONO_ADD_INS (cfg->cbb, ins);
604 return ins;
605 default:
606 return NULL;
609 if (!strcmp (class_name, "Lzcnt") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Lzcnt"))) {
610 id = lookup_intrins (lzcnt_methods, sizeof (lzcnt_methods), cmethod);
611 if (id == -1)
612 return NULL;
614 supported = (get_cpu_features () & MONO_CPU_X86_LZCNT) != 0;
615 is_64bit = !strcmp (class_name, "X64");
617 switch (id) {
618 case SN_get_IsSupported:
619 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
620 ins->type = STACK_I4;
621 return ins;
622 case SN_LeadingZeroCount:
623 if (!supported)
624 return NULL;
625 MONO_INST_NEW (cfg, ins, is_64bit ? OP_LZCNT64 : OP_LZCNT32);
626 ins->dreg = alloc_ireg (cfg);
627 ins->sreg1 = args [0]->dreg;
628 MONO_ADD_INS (cfg->cbb, ins);
629 return ins;
630 default:
631 return NULL;
634 if (!strcmp (class_name, "Bmi1") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Bmi1"))) {
635 if (!COMPILE_LLVM (cfg))
636 return NULL;
637 id = lookup_intrins (bmi1_methods, sizeof (bmi1_methods), cmethod);
639 g_assert (id != -1);
640 supported = (get_cpu_features () & MONO_CPU_X86_BMI1) != 0;
641 is_64bit = !strcmp (class_name, "X64");
643 switch (id) {
644 case SN_get_IsSupported:
645 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
646 ins->type = STACK_I4;
647 return ins;
648 case SN_AndNot: {
649 // (a ^ -1) & b
650 // LLVM replaces it with `andn`
651 int tmp_reg = alloc_preg (cfg);
652 int result_reg = alloc_preg (cfg);
653 EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LXOR_IMM : OP_IXOR_IMM, tmp_reg, args [0]->dreg, -1);
654 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, tmp_reg, args [1]->dreg);
655 return ins;
657 case SN_BitFieldExtract: {
658 if (fsig->param_count == 2) {
659 MONO_INST_NEW (cfg, ins, is_64bit ? OP_BEXTR64 : OP_BEXTR32);
660 ins->dreg = alloc_ireg (cfg);
661 ins->sreg1 = args [0]->dreg;
662 ins->sreg2 = args [1]->dreg;
663 ins->type = is_64bit ? STACK_I8 : STACK_I4;
664 MONO_ADD_INS (cfg->cbb, ins);
665 return ins;
668 case SN_GetMaskUpToLowestSetBit: {
669 // x ^ (x - 1)
670 // LLVM replaces it with `blsmsk`
671 int tmp_reg = alloc_preg (cfg);
672 int result_reg = alloc_preg (cfg);
673 EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LSUB_IMM : OP_ISUB_IMM, tmp_reg, args [0]->dreg, 1);
674 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LXOR : OP_IXOR, result_reg, args [0]->dreg, tmp_reg);
675 return ins;
677 case SN_ResetLowestSetBit: {
678 // x & (x - 1)
679 // LLVM replaces it with `blsr`
680 int tmp_reg = alloc_preg (cfg);
681 int result_reg = alloc_preg (cfg);
682 EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LSUB_IMM : OP_ISUB_IMM, tmp_reg, args [0]->dreg, 1);
683 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, args [0]->dreg, tmp_reg);
684 return ins;
686 case SN_ExtractLowestSetBit: {
687 // x & (0 - x)
688 // LLVM replaces it with `blsi`
689 int tmp_reg = alloc_preg (cfg);
690 int result_reg = alloc_preg (cfg);
691 int zero_reg = alloc_preg (cfg);
692 MONO_EMIT_NEW_ICONST (cfg, zero_reg, 0);
693 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LSUB : OP_ISUB, tmp_reg, zero_reg, args [0]->dreg);
694 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, args [0]->dreg, tmp_reg);
695 return ins;
697 case SN_TrailingZeroCount:
698 MONO_INST_NEW (cfg, ins, is_64bit ? OP_CTTZ64 : OP_CTTZ32);
699 ins->dreg = alloc_ireg (cfg);
700 ins->sreg1 = args [0]->dreg;
701 ins->type = STACK_I4;
702 MONO_ADD_INS (cfg->cbb, ins);
703 return ins;
704 default:
705 g_assert_not_reached ();
708 if (!strcmp (class_name, "Bmi2") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Bmi2"))) {
709 // We only support the subset used by corelib. Remove this check once MultiplyNoFlags is implemented.
710 if (m_class_get_image (cfg->method->klass) != mono_get_corlib ())
711 return NULL;
712 if (!COMPILE_LLVM (cfg))
713 return NULL;
714 id = lookup_intrins (bmi2_methods, sizeof (bmi2_methods), cmethod);
715 g_assert (id != -1);
716 supported = (get_cpu_features () & MONO_CPU_X86_BMI2) != 0;
717 is_64bit = !strcmp (class_name, "X64");
719 switch (id) {
720 case SN_get_IsSupported:
721 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
722 ins->type = STACK_I4;
723 return ins;
724 //case SN_MultiplyNoFlags:
725 //// TODO: implement using _mulx_u32/u64:
726 //// ulong MultiplyNoFlags(ulong left, ulong right)
727 //// ulong MultiplyNoFlags(ulong left, ulong right, ulong* low) => MultiplyNoFlags(left, right, low);
728 //// uint MultiplyNoFlags(uint left, uint right)
729 //// uint MultiplyNoFlags(uint left, uint right, uint* low)
730 //return NULL;
731 //case SN_ZeroHighBits:
732 MONO_INST_NEW (cfg, ins, is_64bit ? OP_BZHI64 : OP_BZHI32);
733 ins->dreg = alloc_ireg (cfg);
734 ins->sreg1 = args [0]->dreg;
735 ins->sreg2 = args [1]->dreg;
736 ins->type = is_64bit ? STACK_I8 : STACK_I4;
737 MONO_ADD_INS (cfg->cbb, ins);
738 return ins;
739 case SN_ParallelBitExtract:
740 MONO_INST_NEW (cfg, ins, is_64bit ? OP_PEXT64 : OP_PEXT32);
741 ins->dreg = alloc_ireg (cfg);
742 ins->sreg1 = args [0]->dreg;
743 ins->sreg2 = args [1]->dreg;
744 ins->type = is_64bit ? STACK_I8 : STACK_I4;
745 MONO_ADD_INS (cfg->cbb, ins);
746 return ins;
747 case SN_ParallelBitDeposit:
748 MONO_INST_NEW (cfg, ins, is_64bit ? OP_PDEP64 : OP_PDEP32);
749 ins->dreg = alloc_ireg (cfg);
750 ins->sreg1 = args [0]->dreg;
751 ins->sreg2 = args [1]->dreg;
752 ins->type = is_64bit ? STACK_I8 : STACK_I4;
753 MONO_ADD_INS (cfg->cbb, ins);
754 return ins;
755 default:
756 g_assert_not_reached ();
760 return NULL;
762 #endif
764 static guint16 vector_128_t_methods [] = {
765 SN_get_Count,
768 static MonoInst*
769 emit_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
771 MonoInst *ins;
772 MonoType *type, *etype;
773 MonoClass *klass;
774 int size, len, id;
776 id = lookup_intrins (vector_128_t_methods, sizeof (vector_128_t_methods), cmethod);
777 if (id == -1)
778 return NULL;
780 klass = cmethod->klass;
781 type = m_class_get_byval_arg (klass);
782 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
783 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
784 g_assert (size);
785 len = 16 / size;
787 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
788 return NULL;
790 if (cfg->verbose_level > 1) {
791 char *name = mono_method_full_name (cmethod, TRUE);
792 printf (" SIMD intrinsic %s\n", name);
793 g_free (name);
796 switch (id) {
797 case SN_get_Count:
798 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
799 break;
800 EMIT_NEW_ICONST (cfg, ins, len);
801 return ins;
802 default:
803 break;
806 return NULL;
809 static guint16 vector_256_t_methods [] = {
810 SN_get_Count,
813 static MonoInst*
814 emit_vector256_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
816 MonoInst *ins;
817 MonoType *type, *etype;
818 MonoClass *klass;
819 int size, len, id;
821 id = lookup_intrins (vector_256_t_methods, sizeof (vector_256_t_methods), cmethod);
822 if (id == -1)
823 return NULL;
825 klass = cmethod->klass;
826 type = m_class_get_byval_arg (klass);
827 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
828 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
829 g_assert (size);
830 len = 32 / size;
832 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
833 return NULL;
835 if (cfg->verbose_level > 1) {
836 char *name = mono_method_full_name (cmethod, TRUE);
837 printf (" SIMD intrinsic %s\n", name);
838 g_free (name);
841 switch (id) {
842 case SN_get_Count:
843 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
844 break;
845 EMIT_NEW_ICONST (cfg, ins, len);
846 return ins;
847 default:
848 break;
851 return NULL;
854 MonoInst*
855 mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
857 const char *class_name;
858 const char *class_ns;
859 MonoImage *image = m_class_get_image (cmethod->klass);
861 if (image != mono_get_corlib ())
862 return NULL;
863 // FIXME:
864 if (cfg->compile_aot)
865 return NULL;
867 class_ns = m_class_get_name_space (cmethod->klass);
868 class_name = m_class_get_name (cmethod->klass);
869 if (!strcmp (class_ns, "System.Numerics") && !strcmp (class_name, "Vector")) {
870 MonoInst *ins = emit_sys_numerics_vector (cfg, cmethod, fsig, args);
871 if (!ins) {
872 //printf ("M: %s %s\n", mono_method_get_full_name (cfg->method), mono_method_get_full_name (cmethod));
874 return ins;
876 if (!strcmp (class_ns, "System.Numerics") && !strcmp (class_name, "Vector`1")) {
877 MonoInst *ins = emit_sys_numerics_vector_t (cfg, cmethod, fsig, args);
878 if (!ins) {
879 //printf ("M: %s %s\n", mono_method_get_full_name (cfg->method), mono_method_get_full_name (cmethod));
881 return ins;
883 if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
884 if (!strcmp (class_name, "Vector128`1"))
885 return emit_vector128_t (cfg ,cmethod, fsig, args);
886 if (!strcmp (class_name, "Vector256`1"))
887 return emit_vector256_t (cfg ,cmethod, fsig, args);
889 #ifdef TARGET_AMD64
890 if (cmethod->klass->nested_in)
891 class_ns = m_class_get_name_space (cmethod->klass->nested_in), class_name, cmethod->klass->nested_in;
892 if (!strcmp (class_ns, "System.Runtime.Intrinsics.X86"))
893 return emit_x86_intrinsics (cfg ,cmethod, fsig, args);
894 #endif
896 return NULL;
899 void
900 mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
904 void
905 mono_simd_simplify_indirection (MonoCompile *cfg)
909 #else
911 MONO_EMPTY_SOURCE_FILE (simd_intrinsics_netcore);
913 #endif
915 #endif /* DISABLE_JIT */