[wasm] Improve virtualenv installation script (#18470)
[mono-project.git] / mono / mini / simd-intrinsics-netcore.c
blob24cbae63e350bc555044f95726589cc4ff78a5f2
1 /**
2 * SIMD Intrinsics support for netcore
3 */
5 #include <config.h>
6 #include <mono/utils/mono-compiler.h>
8 #if defined(DISABLE_JIT)
10 void
11 mono_simd_intrinsics_init (void)
15 #else
18 * Only LLVM is supported as a backend.
21 #include "mini.h"
22 #include "mini-runtime.h"
23 #include "ir-emit.h"
24 #ifdef ENABLE_LLVM
25 #include "mini-llvm.h"
26 #endif
27 #include "mono/utils/bsearch.h"
28 #include <mono/metadata/abi-details.h>
29 #include <mono/metadata/reflection-internals.h>
31 #if defined (MONO_ARCH_SIMD_INTRINSICS) && defined(ENABLE_NETCORE)
33 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
34 #define MSGSTRFIELD1(line) str##line
35 static const struct msgstr_t {
36 #define METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)];
37 #define METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
38 #include "simd-methods-netcore.h"
39 #undef METHOD
40 #undef METHOD2
41 } method_names = {
42 #define METHOD(name) #name,
43 #define METHOD2(str,name) str,
44 #include "simd-methods-netcore.h"
45 #undef METHOD
46 #undef METHOD2
49 enum {
50 #define METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
51 #define METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
52 #include "simd-methods-netcore.h"
54 #define method_name(idx) ((const char*)&method_names + (idx))
56 static int register_size;
58 void
59 mono_simd_intrinsics_init (void)
61 register_size = 16;
62 #if FALSE
63 if ((mini_get_cpu_features () & MONO_CPU_X86_AVX) != 0)
64 register_size = 32;
65 #endif
66 /* Tell the class init code the size of the System.Numerics.Register type */
67 mono_simd_register_size = register_size;
70 MonoInst*
71 mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr)
73 return NULL;
76 static int
77 simd_intrinsic_compare_by_name (const void *key, const void *value)
79 return strcmp ((const char*)key, method_name (*(guint16*)value));
82 static int
83 lookup_intrins (guint16 *intrinsics, int size, MonoMethod *cmethod)
85 const guint16 *result = (const guint16 *)mono_binary_search (cmethod->name, intrinsics, size / sizeof (guint16), sizeof (guint16), &simd_intrinsic_compare_by_name);
87 #if FALSE
88 for (int i = 0; i < (size / sizeof (guint16)) - 1; ++i) {
89 if (method_name (intrinsics [i])[0] > method_name (intrinsics [i + 1])[0]) {
90 printf ("%s %s\n",method_name (intrinsics [i]), method_name (intrinsics [i + 1]));
91 g_assert_not_reached ();
94 #endif
96 if (result == NULL)
97 return -1;
98 else
99 return (int)*result;
102 static int
103 type_to_expand_op (MonoType *type)
105 switch (type->type) {
106 case MONO_TYPE_I1:
107 case MONO_TYPE_U1:
108 return OP_EXPAND_I1;
109 case MONO_TYPE_I2:
110 case MONO_TYPE_U2:
111 return OP_EXPAND_I2;
112 case MONO_TYPE_I4:
113 case MONO_TYPE_U4:
114 return OP_EXPAND_I4;
115 case MONO_TYPE_I8:
116 case MONO_TYPE_U8:
117 return OP_EXPAND_I8;
118 case MONO_TYPE_R4:
119 return OP_EXPAND_R4;
120 case MONO_TYPE_R8:
121 return OP_EXPAND_R8;
122 default:
123 g_assert_not_reached ();
128 * Return a simd vreg for the simd value represented by SRC.
129 * SRC is the 'this' argument to methods.
130 * Set INDIRECT to TRUE if the value was loaded from memory.
132 static int
133 load_simd_vreg_class (MonoCompile *cfg, MonoClass *klass, MonoInst *src, gboolean *indirect)
135 const char *spec = INS_INFO (src->opcode);
137 if (indirect)
138 *indirect = FALSE;
139 if (src->opcode == OP_XMOVE) {
140 return src->sreg1;
141 } else if (src->opcode == OP_LDADDR) {
142 int res = ((MonoInst*)src->inst_p0)->dreg;
143 return res;
144 } else if (spec [MONO_INST_DEST] == 'x') {
145 return src->dreg;
146 } else if (src->type == STACK_PTR || src->type == STACK_MP) {
147 MonoInst *ins;
148 if (indirect)
149 *indirect = TRUE;
151 MONO_INST_NEW (cfg, ins, OP_LOADX_MEMBASE);
152 ins->klass = klass;
153 ins->sreg1 = src->dreg;
154 ins->type = STACK_VTYPE;
155 ins->dreg = alloc_ireg (cfg);
156 MONO_ADD_INS (cfg->cbb, ins);
157 return ins->dreg;
159 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src->type);
160 mono_print_ins (src);
161 g_assert_not_reached ();
164 static int
165 load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean *indirect)
167 return load_simd_vreg_class (cfg, cmethod->klass, src, indirect);
170 /* Create and emit a SIMD instruction, dreg is auto-allocated */
171 static MonoInst*
172 emit_simd_ins (MonoCompile *cfg, MonoClass *klass, int opcode, int sreg1, int sreg2)
174 const char *spec = INS_INFO (opcode);
175 MonoInst *ins;
177 MONO_INST_NEW (cfg, ins, opcode);
178 if (spec [MONO_INST_DEST] == 'x') {
179 ins->dreg = alloc_xreg (cfg);
180 ins->type = STACK_VTYPE;
181 } else if (spec [MONO_INST_DEST] == 'i') {
182 ins->dreg = alloc_ireg (cfg);
183 ins->type = STACK_I4;
184 } else {
185 g_assert_not_reached ();
187 ins->sreg1 = sreg1;
188 ins->sreg2 = sreg2;
189 ins->klass = klass;
190 MONO_ADD_INS (cfg->cbb, ins);
191 return ins;
194 static MonoInst*
195 emit_xcompare (MonoCompile *cfg, MonoClass *klass, MonoType *etype, MonoInst *arg1, MonoInst *arg2)
197 MonoInst *ins;
198 gboolean is_fp = etype->type == MONO_TYPE_R4 || etype->type == MONO_TYPE_R8;
200 ins = emit_simd_ins (cfg, klass, is_fp ? OP_XCOMPARE_FP : OP_XCOMPARE, arg1->dreg, arg2->dreg);
201 ins->inst_c0 = CMP_EQ;
202 ins->inst_c1 = etype->type;
203 return ins;
206 static MonoType*
207 get_vector_t_elem_type (MonoType *vector_type)
209 MonoClass *klass;
210 MonoType *etype;
212 g_assert (vector_type->type == MONO_TYPE_GENERICINST);
213 klass = mono_class_from_mono_type_internal (vector_type);
214 g_assert (!strcmp (m_class_get_name (klass), "Vector`1"));
215 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
216 return etype;
219 static guint16 vector_methods [] = {
220 SN_ConvertToDouble,
221 SN_ConvertToInt32,
222 SN_ConvertToInt64,
223 SN_ConvertToSingle,
224 SN_ConvertToUInt32,
225 SN_ConvertToUInt64,
226 SN_Narrow,
227 SN_Widen,
228 SN_get_IsHardwareAccelerated,
231 static MonoInst*
232 emit_sys_numerics_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
234 MonoInst *ins;
235 gboolean supported = FALSE;
236 int id;
237 MonoType *etype;
239 id = lookup_intrins (vector_methods, sizeof (vector_methods), cmethod);
240 if (id == -1)
241 return NULL;
243 //printf ("%s\n", mono_method_full_name (cmethod, 1));
245 #ifdef MONO_ARCH_SIMD_INTRINSICS
246 supported = TRUE;
247 #endif
249 if (cfg->verbose_level > 1) {
250 char *name = mono_method_full_name (cmethod, TRUE);
251 printf (" SIMD intrinsic %s\n", name);
252 g_free (name);
255 switch (id) {
256 case SN_get_IsHardwareAccelerated:
257 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
258 ins->type = STACK_I4;
259 return ins;
260 case SN_ConvertToInt32:
261 etype = get_vector_t_elem_type (fsig->params [0]);
262 g_assert (etype->type == MONO_TYPE_R4);
263 return emit_simd_ins (cfg, mono_class_from_mono_type_internal (fsig->ret), OP_CVTPS2DQ, args [0]->dreg, -1);
264 case SN_ConvertToSingle:
265 etype = get_vector_t_elem_type (fsig->params [0]);
266 g_assert (etype->type == MONO_TYPE_I4 || etype->type == MONO_TYPE_U4);
267 // FIXME:
268 if (etype->type == MONO_TYPE_U4)
269 return NULL;
270 return emit_simd_ins (cfg, mono_class_from_mono_type_internal (fsig->ret), OP_CVTDQ2PS, args [0]->dreg, -1);
271 case SN_ConvertToDouble:
272 case SN_ConvertToInt64:
273 case SN_ConvertToUInt32:
274 case SN_ConvertToUInt64:
275 case SN_Narrow:
276 case SN_Widen:
277 // FIXME:
278 break;
279 default:
280 break;
283 return NULL;
286 static guint16 vector_t_methods [] = {
287 SN_ctor,
288 SN_CopyTo,
289 SN_Equals,
290 SN_GreaterThan,
291 SN_GreaterThanOrEqual,
292 SN_LessThan,
293 SN_LessThanOrEqual,
294 SN_Max,
295 SN_Min,
296 SN_get_AllOnes,
297 SN_get_Count,
298 SN_get_Item,
299 SN_get_Zero,
300 SN_op_Addition,
301 SN_op_BitwiseAnd,
302 SN_op_BitwiseOr,
303 SN_op_Division,
304 SN_op_Equality,
305 SN_op_ExclusiveOr,
306 SN_op_Explicit,
307 SN_op_Inequality,
308 SN_op_Multiply,
309 SN_op_Subtraction
312 static MonoInst*
313 emit_sys_numerics_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
315 MonoInst *ins;
316 MonoType *type, *etype;
317 MonoClass *klass;
318 int size, len, id;
319 gboolean is_unsigned;
321 id = lookup_intrins (vector_t_methods, sizeof (vector_t_methods), cmethod);
322 if (id == -1)
323 return NULL;
325 klass = cmethod->klass;
326 type = m_class_get_byval_arg (klass);
327 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
328 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
329 g_assert (size);
330 len = register_size / size;
332 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
333 return NULL;
335 if (cfg->verbose_level > 1) {
336 char *name = mono_method_full_name (cmethod, TRUE);
337 printf (" SIMD intrinsic %s\n", name);
338 g_free (name);
341 switch (id) {
342 case SN_get_Count:
343 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
344 break;
345 EMIT_NEW_ICONST (cfg, ins, len);
346 return ins;
347 case SN_get_Zero:
348 g_assert (fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type));
349 return emit_simd_ins (cfg, klass, OP_XZERO, -1, -1);
350 case SN_get_AllOnes: {
351 /* Compare a zero vector with itself */
352 ins = emit_simd_ins (cfg, klass, OP_XZERO, -1, -1);
353 return emit_xcompare (cfg, klass, etype, ins, ins);
355 case SN_get_Item: {
356 if (!COMPILE_LLVM (cfg))
357 return NULL;
358 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
359 MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "IndexOutOfRangeException");
360 int opcode = -1;
361 int dreg;
362 gboolean is64 = FALSE;
363 switch (etype->type) {
364 case MONO_TYPE_I8:
365 case MONO_TYPE_U8:
366 opcode = OP_XEXTRACT_I64;
367 is64 = TRUE;
368 dreg = alloc_lreg (cfg);
369 break;
370 case MONO_TYPE_R8:
371 opcode = OP_XEXTRACT_R8;
372 dreg = alloc_freg (cfg);
373 break;
374 case MONO_TYPE_R4:
375 g_assert (cfg->r4fp);
376 opcode = OP_XEXTRACT_R4;
377 dreg = alloc_freg (cfg);
378 break;
379 default:
380 opcode = OP_XEXTRACT_I32;
381 dreg = alloc_ireg (cfg);
382 break;
384 MONO_INST_NEW (cfg, ins, opcode);
385 ins->dreg = dreg;
386 ins->sreg1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
387 ins->sreg2 = args [1]->dreg;
388 ins->inst_c0 = etype->type;
389 mini_type_to_eval_stack_type (cfg, etype, ins);
390 MONO_ADD_INS (cfg->cbb, ins);
391 return ins;
393 case SN_ctor:
394 if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) {
395 int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
397 int opcode = type_to_expand_op (etype);
398 ins = emit_simd_ins (cfg, klass, opcode, args [1]->dreg, -1);
399 ins->dreg = dreg;
400 return ins;
402 if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
403 MonoInst *array_ins = args [1];
404 MonoInst *index_ins;
405 MonoInst *ldelema_ins;
406 MonoInst *var;
407 int end_index_reg;
409 if (args [0]->opcode != OP_LDADDR)
410 return NULL;
412 /* .ctor (T[]) or .ctor (T[], index) */
414 if (fsig->param_count == 2) {
415 index_ins = args [2];
416 } else {
417 EMIT_NEW_ICONST (cfg, index_ins, 0);
420 /* Emit index check for the end (index + len - 1 < array length) */
421 end_index_reg = alloc_ireg (cfg);
422 EMIT_NEW_BIALU_IMM (cfg, ins, OP_IADD_IMM, end_index_reg, index_ins->dreg, len - 1);
423 MONO_EMIT_BOUNDS_CHECK (cfg, array_ins->dreg, MonoArray, max_length, end_index_reg);
425 /* Load the array slice into the simd reg */
426 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, TRUE);
427 g_assert (args [0]->opcode == OP_LDADDR);
428 var = (MonoInst*)args [0]->inst_p0;
429 EMIT_NEW_LOAD_MEMBASE (cfg, ins, OP_LOADX_MEMBASE, var->dreg, ldelema_ins->dreg, 0);
430 ins->klass = cmethod->klass;
431 return args [0];
433 break;
434 case SN_CopyTo:
435 if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
436 MonoInst *array_ins = args [1];
437 MonoInst *index_ins;
438 MonoInst *ldelema_ins;
439 int val_vreg, end_index_reg;
441 val_vreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
443 /* CopyTo (T[]) or CopyTo (T[], index) */
445 if (fsig->param_count == 2) {
446 index_ins = args [2];
447 } else {
448 EMIT_NEW_ICONST (cfg, index_ins, 0);
451 /* CopyTo () does complicated argument checks */
452 mini_emit_bounds_check_offset (cfg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), index_ins->dreg, "ArgumentOutOfRangeException");
453 end_index_reg = alloc_ireg (cfg);
454 int len_reg = alloc_ireg (cfg);
455 MONO_EMIT_NEW_LOAD_MEMBASE_OP_FLAGS (cfg, OP_LOADI4_MEMBASE, len_reg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), MONO_INST_INVARIANT_LOAD);
456 EMIT_NEW_BIALU (cfg, ins, OP_ISUB, end_index_reg, len_reg, index_ins->dreg);
457 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, end_index_reg, len);
458 MONO_EMIT_NEW_COND_EXC (cfg, LT, "ArgumentException");
460 /* Load the array slice into the simd reg */
461 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, FALSE);
462 EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_MEMBASE, ldelema_ins->dreg, 0, val_vreg);
463 ins->klass = cmethod->klass;
464 return ins;
466 break;
467 case SN_Equals:
468 if (fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && mono_metadata_type_equal (fsig->params [0], type)) {
469 int sreg1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
471 return emit_simd_ins (cfg, klass, OP_XEQUAL, sreg1, args [1]->dreg);
472 } else if (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)) {
473 /* Per element equality */
474 return emit_xcompare (cfg, klass, etype, args [0], args [1]);
476 break;
477 case SN_op_Equality:
478 case SN_op_Inequality:
479 g_assert (fsig->param_count == 2 && fsig->ret->type == MONO_TYPE_BOOLEAN &&
480 mono_metadata_type_equal (fsig->params [0], type) &&
481 mono_metadata_type_equal (fsig->params [1], type));
482 ins = emit_simd_ins (cfg, klass, OP_XEQUAL, args [0]->dreg, args [1]->dreg);
483 if (id == SN_op_Inequality) {
484 int sreg = ins->dreg;
485 int dreg = alloc_ireg (cfg);
486 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, sreg, 0);
487 EMIT_NEW_UNALU (cfg, ins, OP_CEQ, dreg, -1);
489 return ins;
490 case SN_GreaterThan:
491 case SN_GreaterThanOrEqual:
492 case SN_LessThan:
493 case SN_LessThanOrEqual:
494 g_assert (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type));
495 is_unsigned = etype->type == MONO_TYPE_U1 || etype->type == MONO_TYPE_U2 || etype->type == MONO_TYPE_U4 || etype->type == MONO_TYPE_U8;
496 ins = emit_xcompare (cfg, klass, etype, args [0], args [1]);
497 switch (id) {
498 case SN_GreaterThan:
499 ins->inst_c0 = is_unsigned ? CMP_GT_UN : CMP_GT;
500 break;
501 case SN_GreaterThanOrEqual:
502 ins->inst_c0 = is_unsigned ? CMP_GE_UN : CMP_GE;
503 break;
504 case SN_LessThan:
505 ins->inst_c0 = is_unsigned ? CMP_LT_UN : CMP_LT;
506 break;
507 case SN_LessThanOrEqual:
508 ins->inst_c0 = is_unsigned ? CMP_LE_UN : CMP_LE;
509 break;
510 default:
511 g_assert_not_reached ();
513 return ins;
514 case SN_op_Explicit:
515 return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1);
516 case SN_op_Addition:
517 case SN_op_Subtraction:
518 case SN_op_Division:
519 case SN_op_Multiply:
520 case SN_op_BitwiseAnd:
521 case SN_op_BitwiseOr:
522 case SN_op_ExclusiveOr:
523 case SN_Max:
524 case SN_Min:
525 if (!(fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)))
526 return NULL;
527 ins = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, args [1]->dreg);
528 ins->inst_c1 = etype->type;
530 if (etype->type == MONO_TYPE_R4 || etype->type == MONO_TYPE_R8) {
531 switch (id) {
532 case SN_op_Addition:
533 ins->inst_c0 = OP_FADD;
534 break;
535 case SN_op_Subtraction:
536 ins->inst_c0 = OP_FSUB;
537 break;
538 case SN_op_Multiply:
539 ins->inst_c0 = OP_FMUL;
540 break;
541 case SN_op_Division:
542 ins->inst_c0 = OP_FDIV;
543 break;
544 case SN_Max:
545 ins->inst_c0 = OP_FMAX;
546 break;
547 case SN_Min:
548 ins->inst_c0 = OP_FMIN;
549 break;
550 default:
551 NULLIFY_INS (ins);
552 return NULL;
554 } else {
555 switch (id) {
556 case SN_op_Addition:
557 ins->inst_c0 = OP_IADD;
558 break;
559 case SN_op_Subtraction:
560 ins->inst_c0 = OP_ISUB;
561 break;
563 case SN_op_Division:
564 ins->inst_c0 = OP_IDIV;
565 break;
566 case SN_op_Multiply:
567 ins->inst_c0 = OP_IMUL;
568 break;
570 case SN_op_BitwiseAnd:
571 ins->inst_c0 = OP_IAND;
572 break;
573 case SN_op_BitwiseOr:
574 ins->inst_c0 = OP_IOR;
575 break;
576 case SN_op_ExclusiveOr:
577 ins->inst_c0 = OP_IXOR;
578 break;
579 case SN_Max:
580 ins->inst_c0 = OP_IMAX;
581 break;
582 case SN_Min:
583 ins->inst_c0 = OP_IMIN;
584 break;
585 default:
586 NULLIFY_INS (ins);
587 return NULL;
590 return ins;
591 default:
592 break;
595 return NULL;
598 #ifdef TARGET_AMD64
600 static guint16 popcnt_methods [] = {
601 SN_PopCount,
602 SN_get_IsSupported
605 static guint16 lzcnt_methods [] = {
606 SN_LeadingZeroCount,
607 SN_get_IsSupported
610 static guint16 bmi1_methods [] = {
611 SN_AndNot,
612 SN_BitFieldExtract,
613 SN_ExtractLowestSetBit,
614 SN_GetMaskUpToLowestSetBit,
615 SN_ResetLowestSetBit,
616 SN_TrailingZeroCount,
617 SN_get_IsSupported,
620 static guint16 bmi2_methods [] = {
621 SN_MultiplyNoFlags,
622 SN_ParallelBitDeposit,
623 SN_ParallelBitExtract,
624 SN_ZeroHighBits,
625 SN_get_IsSupported,
628 static MonoInst*
629 emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
631 const char *class_name;
632 const char *class_ns;
633 MonoInst *ins;
634 int id;
635 gboolean supported, is_64bit;
636 MonoClass *klass = cmethod->klass;
638 class_ns = m_class_get_name_space (klass);
639 class_name = m_class_get_name (klass);
640 if (!strcmp (class_name, "Popcnt") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Popcnt"))) {
641 id = lookup_intrins (popcnt_methods, sizeof (popcnt_methods), cmethod);
642 if (id == -1)
643 return NULL;
645 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_POPCNT) != 0;
646 is_64bit = !strcmp (class_name, "X64");
648 switch (id) {
649 case SN_get_IsSupported:
650 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
651 ins->type = STACK_I4;
652 return ins;
653 case SN_PopCount:
654 if (!supported)
655 return NULL;
656 MONO_INST_NEW (cfg, ins, is_64bit ? OP_POPCNT64 : OP_POPCNT32);
657 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
658 ins->sreg1 = args [0]->dreg;
659 ins->type = is_64bit ? STACK_I8 : STACK_I4;
660 MONO_ADD_INS (cfg->cbb, ins);
661 return ins;
662 default:
663 return NULL;
666 if (!strcmp (class_name, "Lzcnt") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Lzcnt"))) {
667 id = lookup_intrins (lzcnt_methods, sizeof (lzcnt_methods), cmethod);
668 if (id == -1)
669 return NULL;
671 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_LZCNT) != 0;
672 is_64bit = !strcmp (class_name, "X64");
674 switch (id) {
675 case SN_get_IsSupported:
676 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
677 ins->type = STACK_I4;
678 return ins;
679 case SN_LeadingZeroCount:
680 if (!supported)
681 return NULL;
682 MONO_INST_NEW (cfg, ins, is_64bit ? OP_LZCNT64 : OP_LZCNT32);
683 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
684 ins->sreg1 = args [0]->dreg;
685 ins->type = is_64bit ? STACK_I8 : STACK_I4;
686 MONO_ADD_INS (cfg->cbb, ins);
687 return ins;
688 default:
689 return NULL;
692 if (!strcmp (class_name, "Bmi1") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Bmi1"))) {
693 if (!COMPILE_LLVM (cfg))
694 return NULL;
695 id = lookup_intrins (bmi1_methods, sizeof (bmi1_methods), cmethod);
697 g_assert (id != -1);
698 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_BMI1) != 0;
699 is_64bit = !strcmp (class_name, "X64");
701 switch (id) {
702 case SN_get_IsSupported:
703 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
704 ins->type = STACK_I4;
705 return ins;
706 case SN_AndNot: {
707 // (a ^ -1) & b
708 // LLVM replaces it with `andn`
709 int tmp_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
710 int result_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
711 EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LXOR_IMM : OP_IXOR_IMM, tmp_reg, args [0]->dreg, -1);
712 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, tmp_reg, args [1]->dreg);
713 return ins;
715 case SN_BitFieldExtract: {
716 if (fsig->param_count == 2) {
717 MONO_INST_NEW (cfg, ins, is_64bit ? OP_BEXTR64 : OP_BEXTR32);
718 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
719 ins->sreg1 = args [0]->dreg;
720 ins->sreg2 = args [1]->dreg;
721 ins->type = is_64bit ? STACK_I8 : STACK_I4;
722 MONO_ADD_INS (cfg->cbb, ins);
723 return ins;
726 case SN_GetMaskUpToLowestSetBit: {
727 // x ^ (x - 1)
728 // LLVM replaces it with `blsmsk`
729 int tmp_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
730 int result_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
731 EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LSUB_IMM : OP_ISUB_IMM, tmp_reg, args [0]->dreg, 1);
732 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LXOR : OP_IXOR, result_reg, args [0]->dreg, tmp_reg);
733 return ins;
735 case SN_ResetLowestSetBit: {
736 // x & (x - 1)
737 // LLVM replaces it with `blsr`
738 int tmp_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
739 int result_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
740 EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LSUB_IMM : OP_ISUB_IMM, tmp_reg, args [0]->dreg, 1);
741 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, args [0]->dreg, tmp_reg);
742 return ins;
744 case SN_ExtractLowestSetBit: {
745 // x & (0 - x)
746 // LLVM replaces it with `blsi`
747 int tmp_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
748 int result_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
749 int zero_reg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
750 MONO_EMIT_NEW_ICONST (cfg, zero_reg, 0);
751 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LSUB : OP_ISUB, tmp_reg, zero_reg, args [0]->dreg);
752 EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, args [0]->dreg, tmp_reg);
753 return ins;
755 case SN_TrailingZeroCount:
756 MONO_INST_NEW (cfg, ins, is_64bit ? OP_CTTZ64 : OP_CTTZ32);
757 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
758 ins->sreg1 = args [0]->dreg;
759 ins->type = is_64bit ? STACK_I8 : STACK_I4;
760 MONO_ADD_INS (cfg->cbb, ins);
761 return ins;
762 default:
763 g_assert_not_reached ();
766 if (!strcmp (class_name, "Bmi2") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Bmi2"))) {
767 if (!COMPILE_LLVM (cfg))
768 return NULL;
769 id = lookup_intrins (bmi2_methods, sizeof (bmi2_methods), cmethod);
770 g_assert (id != -1);
771 supported = (mini_get_cpu_features (cfg) & MONO_CPU_X86_BMI2) != 0;
772 is_64bit = !strcmp (class_name, "X64");
774 switch (id) {
775 case SN_get_IsSupported:
776 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
777 ins->type = STACK_I4;
778 return ins;
779 case SN_MultiplyNoFlags:
780 if (fsig->param_count == 2) {
781 MONO_INST_NEW (cfg, ins, is_64bit ? OP_MULX_H64 : OP_MULX_H32);
782 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
783 ins->sreg1 = args [0]->dreg;
784 ins->sreg2 = args [1]->dreg;
785 ins->type = is_64bit ? STACK_I8 : STACK_I4;
786 MONO_ADD_INS (cfg->cbb, ins);
787 } else if (fsig->param_count == 3) {
788 MONO_INST_NEW (cfg, ins, is_64bit ? OP_MULX_HL64 : OP_MULX_HL32);
789 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
790 ins->sreg1 = args [0]->dreg;
791 ins->sreg2 = args [1]->dreg;
792 ins->sreg3 = args [2]->dreg;
793 ins->type = is_64bit ? STACK_I8 : STACK_I4;
794 MONO_ADD_INS (cfg->cbb, ins);
795 } else {
796 g_assert_not_reached ();
798 return ins;
799 case SN_ZeroHighBits:
800 MONO_INST_NEW (cfg, ins, is_64bit ? OP_BZHI64 : OP_BZHI32);
801 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
802 ins->sreg1 = args [0]->dreg;
803 ins->sreg2 = args [1]->dreg;
804 ins->type = is_64bit ? STACK_I8 : STACK_I4;
805 MONO_ADD_INS (cfg->cbb, ins);
806 return ins;
807 case SN_ParallelBitExtract:
808 MONO_INST_NEW (cfg, ins, is_64bit ? OP_PEXT64 : OP_PEXT32);
809 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
810 ins->sreg1 = args [0]->dreg;
811 ins->sreg2 = args [1]->dreg;
812 ins->type = is_64bit ? STACK_I8 : STACK_I4;
813 MONO_ADD_INS (cfg->cbb, ins);
814 return ins;
815 case SN_ParallelBitDeposit:
816 MONO_INST_NEW (cfg, ins, is_64bit ? OP_PDEP64 : OP_PDEP32);
817 ins->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
818 ins->sreg1 = args [0]->dreg;
819 ins->sreg2 = args [1]->dreg;
820 ins->type = is_64bit ? STACK_I8 : STACK_I4;
821 MONO_ADD_INS (cfg->cbb, ins);
822 return ins;
823 default:
824 g_assert_not_reached ();
828 return NULL;
830 #endif
832 static guint16 vector_128_t_methods [] = {
833 SN_get_Count,
836 static MonoInst*
837 emit_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
839 MonoInst *ins;
840 MonoType *type, *etype;
841 MonoClass *klass;
842 int size, len, id;
844 id = lookup_intrins (vector_128_t_methods, sizeof (vector_128_t_methods), cmethod);
845 if (id == -1)
846 return NULL;
848 klass = cmethod->klass;
849 type = m_class_get_byval_arg (klass);
850 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
851 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
852 g_assert (size);
853 len = 16 / size;
855 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
856 return NULL;
858 if (cfg->verbose_level > 1) {
859 char *name = mono_method_full_name (cmethod, TRUE);
860 printf (" SIMD intrinsic %s\n", name);
861 g_free (name);
864 switch (id) {
865 case SN_get_Count:
866 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
867 break;
868 EMIT_NEW_ICONST (cfg, ins, len);
869 return ins;
870 default:
871 break;
874 return NULL;
877 static guint16 vector_256_t_methods [] = {
878 SN_get_Count,
881 static MonoInst*
882 emit_vector256_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
884 MonoInst *ins;
885 MonoType *type, *etype;
886 MonoClass *klass;
887 int size, len, id;
889 id = lookup_intrins (vector_256_t_methods, sizeof (vector_256_t_methods), cmethod);
890 if (id == -1)
891 return NULL;
893 klass = cmethod->klass;
894 type = m_class_get_byval_arg (klass);
895 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
896 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
897 g_assert (size);
898 len = 32 / size;
900 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
901 return NULL;
903 if (cfg->verbose_level > 1) {
904 char *name = mono_method_full_name (cmethod, TRUE);
905 printf (" SIMD intrinsic %s\n", name);
906 g_free (name);
909 switch (id) {
910 case SN_get_Count:
911 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
912 break;
913 EMIT_NEW_ICONST (cfg, ins, len);
914 return ins;
915 default:
916 break;
919 return NULL;
922 MonoInst*
923 mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
925 const char *class_name;
926 const char *class_ns;
927 MonoImage *image = m_class_get_image (cmethod->klass);
929 if (image != mono_get_corlib ())
930 return NULL;
932 class_ns = m_class_get_name_space (cmethod->klass);
933 class_name = m_class_get_name (cmethod->klass);
935 if (cmethod->klass->nested_in)
936 class_ns = m_class_get_name_space (cmethod->klass->nested_in), class_name, cmethod->klass->nested_in;
938 #ifdef TARGET_AMD64 // TODO: test and enable for x86 too
939 if (!strcmp (class_ns, "System.Runtime.Intrinsics.X86")) {
940 return emit_x86_intrinsics (cfg ,cmethod, fsig, args);
942 #endif
944 if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
945 if (!strcmp (class_name, "Vector128`1"))
946 return emit_vector128_t (cfg, cmethod, fsig, args);
947 if (!strcmp (class_name, "Vector256`1"))
948 return emit_vector256_t (cfg, cmethod, fsig, args);
951 if (!strcmp (class_ns, "System.Numerics")) {
952 if (!strcmp (class_name, "Vector"))
953 return emit_sys_numerics_vector (cfg, cmethod, fsig, args);
954 if (!strcmp (class_name, "Vector`1"))
955 return emit_sys_numerics_vector_t (cfg, cmethod, fsig, args);
958 return NULL;
961 void
962 mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
966 void
967 mono_simd_simplify_indirection (MonoCompile *cfg)
971 #else
973 MONO_EMPTY_SOURCE_FILE (simd_intrinsics_netcore);
975 #endif
977 #endif /* DISABLE_JIT */