[netcore][x64] Implement lowering of new SIMD OPs into SSE opcodes (#16672)
[mono-project.git] / mono / mini / simd-intrinsics-netcore.c
blob9bd6299372a3227208cbbeac751efc9c129adbb8
1 /**
2 * SIMD Intrinsics support for netcore
3 */
5 #include <config.h>
6 #include <mono/utils/mono-compiler.h>
8 #if defined(DISABLE_JIT)
10 void
11 mono_simd_intrinsics_init (void)
15 #else
18 * Only LLVM is supported as a backend.
21 #include "mini.h"
22 #include "ir-emit.h"
23 #ifdef ENABLE_LLVM
24 #include "llvm-jit.h"
25 #endif
26 #include "mono/utils/bsearch.h"
27 #include <mono/metadata/abi-details.h>
28 #include <mono/metadata/reflection-internals.h>
30 #if defined (MONO_ARCH_SIMD_INTRINSICS) && defined(ENABLE_NETCORE)
32 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
33 #define MSGSTRFIELD1(line) str##line
34 static const struct msgstr_t {
35 #define METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)];
36 #define METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
37 #include "simd-methods-netcore.h"
38 #undef METHOD
39 #undef METHOD2
40 } method_names = {
41 #define METHOD(name) #name,
42 #define METHOD2(str,name) str,
43 #include "simd-methods-netcore.h"
44 #undef METHOD
45 #undef METHOD2
48 enum {
49 #define METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
50 #define METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
51 #include "simd-methods-netcore.h"
53 #define method_name(idx) ((const char*)&method_names + (idx))
55 static int register_size;
57 static MonoCPUFeatures
58 get_cpu_features (void)
60 #ifdef ENABLE_LLVM
61 return mono_llvm_get_cpu_features ();
62 #elif defined(TARGET_AMD64)
63 return mono_arch_get_cpu_features ();
64 #else
65 return (MonoCPUFeatures)0;
66 #endif
69 void
70 mono_simd_intrinsics_init (void)
72 register_size = 16;
73 #if FALSE
74 if ((get_cpu_features () & MONO_CPU_X86_AVX) != 0)
75 register_size = 32;
76 #endif
77 /* Tell the class init code the size of the System.Numerics.Register type */
78 mono_simd_register_size = register_size;
81 MonoInst*
82 mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr)
84 return NULL;
87 static int
88 simd_intrinsic_compare_by_name (const void *key, const void *value)
90 return strcmp ((const char*)key, method_name (*(guint16*)value));
93 static int
94 lookup_intrins (guint16 *intrinsics, int size, MonoMethod *cmethod)
96 const guint16 *result = (const guint16 *)mono_binary_search (cmethod->name, intrinsics, size / sizeof (guint16), sizeof (guint16), &simd_intrinsic_compare_by_name);
98 for (int i = 0; i < (size / sizeof (guint16)) - 1; ++i) {
99 if (method_name (intrinsics [i])[0] > method_name (intrinsics [i + 1])[0]) {
100 printf ("%s %s\n",method_name (intrinsics [i]), method_name (intrinsics [i + 1]));
101 g_assert_not_reached ();
105 if (result == NULL)
106 return -1;
107 else
108 return (int)*result;
111 static guint16 vector_methods [] = {
112 SN_get_IsHardwareAccelerated
115 static MonoInst*
116 emit_sys_numerics_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
118 MonoInst *ins;
119 gboolean supported = FALSE;
120 int id;
122 id = lookup_intrins (vector_methods, sizeof (vector_methods), cmethod);
123 if (id == -1)
124 return NULL;
126 //printf ("%s\n", mono_method_full_name (cmethod, 1));
128 #ifdef MONO_ARCH_SIMD_INTRINSICS
129 supported = TRUE;
130 #endif
132 switch (id) {
133 case SN_get_IsHardwareAccelerated:
134 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
135 ins->type = STACK_I4;
136 return ins;
137 default:
138 break;
141 return NULL;
144 static int
145 type_to_expand_op (MonoType *type)
147 switch (type->type) {
148 case MONO_TYPE_I1:
149 case MONO_TYPE_U1:
150 return OP_EXPAND_I1;
151 case MONO_TYPE_I2:
152 case MONO_TYPE_U2:
153 return OP_EXPAND_I2;
154 case MONO_TYPE_I4:
155 case MONO_TYPE_U4:
156 return OP_EXPAND_I4;
157 case MONO_TYPE_I8:
158 case MONO_TYPE_U8:
159 return OP_EXPAND_I8;
160 case MONO_TYPE_R4:
161 return OP_EXPAND_R4;
162 case MONO_TYPE_R8:
163 return OP_EXPAND_R8;
164 default:
165 g_assert_not_reached ();
170 * Return a simd vreg for the simd value represented by SRC.
171 * SRC is the 'this' argument to methods.
172 * Set INDIRECT to TRUE if the value was loaded from memory.
174 static int
175 load_simd_vreg_class (MonoCompile *cfg, MonoClass *klass, MonoInst *src, gboolean *indirect)
177 const char *spec = INS_INFO (src->opcode);
179 if (indirect)
180 *indirect = FALSE;
181 if (src->opcode == OP_XMOVE) {
182 return src->sreg1;
183 } else if (src->opcode == OP_LDADDR) {
184 int res = ((MonoInst*)src->inst_p0)->dreg;
185 return res;
186 } else if (spec [MONO_INST_DEST] == 'x') {
187 return src->dreg;
188 } else if (src->type == STACK_PTR || src->type == STACK_MP) {
189 MonoInst *ins;
190 if (indirect)
191 *indirect = TRUE;
193 MONO_INST_NEW (cfg, ins, OP_LOADX_MEMBASE);
194 ins->klass = klass;
195 ins->sreg1 = src->dreg;
196 ins->type = STACK_VTYPE;
197 ins->dreg = alloc_ireg (cfg);
198 MONO_ADD_INS (cfg->cbb, ins);
199 return ins->dreg;
201 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src->type);
202 mono_print_ins (src);
203 g_assert_not_reached ();
206 static int
207 load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean *indirect)
209 return load_simd_vreg_class (cfg, cmethod->klass, src, indirect);
212 /* Create and emit a SIMD instruction, dreg is auto-allocated */
213 static MonoInst*
214 emit_simd_ins (MonoCompile *cfg, MonoClass *klass, int opcode, int sreg1, int sreg2)
216 const char *spec = INS_INFO (opcode);
217 MonoInst *ins;
219 MONO_INST_NEW (cfg, ins, opcode);
220 if (spec [MONO_INST_DEST] == 'x') {
221 ins->dreg = alloc_xreg (cfg);
222 ins->type = STACK_VTYPE;
223 } else if (spec [MONO_INST_DEST] == 'i') {
224 ins->dreg = alloc_ireg (cfg);
225 ins->type = STACK_I4;
226 } else {
227 g_assert_not_reached ();
229 ins->sreg1 = sreg1;
230 ins->sreg2 = sreg2;
231 ins->klass = klass;
232 MONO_ADD_INS (cfg->cbb, ins);
233 return ins;
236 static MonoInst*
237 emit_xcompare (MonoCompile *cfg, MonoClass *klass, MonoType *etype, MonoInst *arg1, MonoInst *arg2)
239 MonoInst *ins;
240 gboolean is_fp = etype->type == MONO_TYPE_R4 || etype->type == MONO_TYPE_R8;
242 ins = emit_simd_ins (cfg, klass, is_fp ? OP_XCOMPARE_FP : OP_XCOMPARE, arg1->dreg, arg2->dreg);
243 ins->inst_c0 = CMP_EQ;
244 ins->inst_c1 = etype->type;
245 return ins;
248 static guint16 vector_t_methods [] = {
249 SN_ctor,
250 SN_CopyTo,
251 SN_Equals,
252 SN_GreaterThan,
253 SN_GreaterThanOrEqual,
254 SN_LessThan,
255 SN_LessThanOrEqual,
256 SN_get_AllOnes,
257 SN_get_Count,
258 SN_get_Item,
259 SN_get_Zero,
260 SN_op_Addition,
261 SN_op_BitwiseAnd,
262 SN_op_BitwiseOr,
263 SN_op_Division,
264 SN_op_Equality,
265 SN_op_ExclusiveOr,
266 SN_op_Explicit,
267 SN_op_Inequality,
268 SN_op_Multiply,
269 SN_op_Subtraction
272 static MonoInst*
273 emit_sys_numerics_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
275 MonoInst *ins;
276 MonoType *type, *etype;
277 MonoClass *klass;
278 int size, len, id;
279 gboolean is_unsigned;
281 id = lookup_intrins (vector_t_methods, sizeof (vector_t_methods), cmethod);
282 if (id == -1)
283 return NULL;
285 klass = cmethod->klass;
286 type = m_class_get_byval_arg (klass);
287 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
288 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
289 g_assert (size);
290 len = register_size / size;
292 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
293 return NULL;
295 if (cfg->verbose_level > 1) {
296 char *name = mono_method_full_name (cmethod, TRUE);
297 printf (" SIMD intrinsic %s\n", name);
298 g_free (name);
301 switch (id) {
302 case SN_get_Count:
303 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
304 break;
305 EMIT_NEW_ICONST (cfg, ins, len);
306 return ins;
307 case SN_get_Zero:
308 g_assert (fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type));
309 return emit_simd_ins (cfg, klass, OP_XZERO, -1, -1);
310 case SN_get_AllOnes: {
311 /* Compare a zero vector with itself */
312 ins = emit_simd_ins (cfg, klass, OP_XZERO, -1, -1);
313 return emit_xcompare (cfg, klass, etype, ins, ins);
315 case SN_get_Item:
316 if (!COMPILE_LLVM (cfg))
317 return NULL;
318 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
319 MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "IndexOutOfRangeException");
320 int opcode = -1;
321 int dreg;
322 gboolean is64 = FALSE;
323 switch (etype->type) {
324 case MONO_TYPE_I8:
325 case MONO_TYPE_U8:
326 opcode = OP_XEXTRACT_I64;
327 is64 = TRUE;
328 dreg = alloc_lreg (cfg);
329 break;
330 case MONO_TYPE_R8:
331 opcode = OP_XEXTRACT_R8;
332 dreg = alloc_freg (cfg);
333 break;
334 case MONO_TYPE_R4:
335 g_assert (cfg->r4fp);
336 opcode = OP_XEXTRACT_R4;
337 dreg = alloc_freg (cfg);
338 break;
339 default:
340 opcode = OP_XEXTRACT_I32;
341 dreg = alloc_ireg (cfg);
342 break;
344 MONO_INST_NEW (cfg, ins, opcode);
345 ins->dreg = dreg;
346 ins->sreg1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
347 ins->sreg2 = args [1]->dreg;
348 ins->inst_c0 = etype->type;
349 mini_type_to_eval_stack_type (cfg, etype, ins);
350 MONO_ADD_INS (cfg->cbb, ins);
351 return ins;
352 case SN_ctor:
353 if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) {
354 int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
356 int opcode = type_to_expand_op (etype);
357 ins = emit_simd_ins (cfg, klass, opcode, args [1]->dreg, -1);
358 ins->dreg = dreg;
359 return ins;
361 if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
362 MonoInst *array_ins = args [1];
363 MonoInst *index_ins;
364 MonoInst *ldelema_ins;
365 MonoInst *var;
366 int end_index_reg;
368 if (args [0]->opcode != OP_LDADDR)
369 return NULL;
371 /* .ctor (T[]) or .ctor (T[], index) */
373 if (fsig->param_count == 2) {
374 index_ins = args [2];
375 } else {
376 EMIT_NEW_ICONST (cfg, index_ins, 0);
379 /* Emit index check for the end (index + len - 1 < array length) */
380 end_index_reg = alloc_ireg (cfg);
381 EMIT_NEW_BIALU_IMM (cfg, ins, OP_IADD_IMM, end_index_reg, index_ins->dreg, len - 1);
382 MONO_EMIT_BOUNDS_CHECK (cfg, array_ins->dreg, MonoArray, max_length, end_index_reg);
384 /* Load the array slice into the simd reg */
385 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, TRUE);
386 g_assert (args [0]->opcode == OP_LDADDR);
387 var = (MonoInst*)args [0]->inst_p0;
388 EMIT_NEW_LOAD_MEMBASE (cfg, ins, OP_LOADX_MEMBASE, var->dreg, ldelema_ins->dreg, 0);
389 ins->klass = cmethod->klass;
390 return args [0];
392 break;
393 case SN_CopyTo:
394 if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
395 MonoInst *array_ins = args [1];
396 MonoInst *index_ins;
397 MonoInst *ldelema_ins;
398 int val_vreg, end_index_reg;
400 val_vreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
402 /* CopyTo (T[]) or CopyTo (T[], index) */
404 if (fsig->param_count == 2) {
405 index_ins = args [2];
406 } else {
407 EMIT_NEW_ICONST (cfg, index_ins, 0);
410 /* CopyTo () does complicated argument checks */
411 mini_emit_bounds_check_offset (cfg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), index_ins->dreg, "ArgumentOutOfRangeException");
412 end_index_reg = alloc_ireg (cfg);
413 int len_reg = alloc_ireg (cfg);
414 MONO_EMIT_NEW_LOAD_MEMBASE_OP_FLAGS (cfg, OP_LOADI4_MEMBASE, len_reg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length), MONO_INST_INVARIANT_LOAD);
415 EMIT_NEW_BIALU (cfg, ins, OP_ISUB, end_index_reg, len_reg, index_ins->dreg);
416 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, end_index_reg, len);
417 MONO_EMIT_NEW_COND_EXC (cfg, LT, "ArgumentException");
419 /* Load the array slice into the simd reg */
420 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type_internal (etype), array_ins, index_ins, FALSE);
421 EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_MEMBASE, ldelema_ins->dreg, 0, val_vreg);
422 ins->klass = cmethod->klass;
423 return ins;
425 break;
426 case SN_Equals:
427 if (fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && mono_metadata_type_equal (fsig->params [0], type)) {
428 int sreg1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
430 return emit_simd_ins (cfg, klass, OP_XEQUAL, sreg1, args [1]->dreg);
431 } else if (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)) {
432 /* Per element equality */
433 return emit_xcompare (cfg, klass, etype, args [0], args [1]);
435 break;
436 case SN_op_Equality:
437 case SN_op_Inequality:
438 g_assert (fsig->param_count == 2 && fsig->ret->type == MONO_TYPE_BOOLEAN &&
439 mono_metadata_type_equal (fsig->params [0], type) &&
440 mono_metadata_type_equal (fsig->params [1], type));
441 ins = emit_simd_ins (cfg, klass, OP_XEQUAL, args [0]->dreg, args [1]->dreg);
442 if (id == SN_op_Inequality) {
443 int sreg = ins->dreg;
444 int dreg = alloc_ireg (cfg);
445 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, sreg, 0);
446 EMIT_NEW_UNALU (cfg, ins, OP_CEQ, dreg, -1);
448 return ins;
449 case SN_GreaterThan:
450 case SN_GreaterThanOrEqual:
451 case SN_LessThan:
452 case SN_LessThanOrEqual:
453 g_assert (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type));
454 is_unsigned = etype->type == MONO_TYPE_U1 || etype->type == MONO_TYPE_U2 || etype->type == MONO_TYPE_U4 || etype->type == MONO_TYPE_U8;
455 ins = emit_xcompare (cfg, klass, etype, args [0], args [1]);
456 switch (id) {
457 case SN_GreaterThan:
458 ins->inst_c0 = is_unsigned ? CMP_GT_UN : CMP_GT;
459 break;
460 case SN_GreaterThanOrEqual:
461 ins->inst_c0 = is_unsigned ? CMP_GE_UN : CMP_GE;
462 break;
463 case SN_LessThan:
464 ins->inst_c0 = is_unsigned ? CMP_LT_UN : CMP_LT;
465 break;
466 case SN_LessThanOrEqual:
467 ins->inst_c0 = is_unsigned ? CMP_LE_UN : CMP_LE;
468 break;
469 default:
470 g_assert_not_reached ();
472 return ins;
473 case SN_op_Explicit:
474 return emit_simd_ins (cfg, klass, OP_XCAST, args [0]->dreg, -1);
475 case SN_op_Addition:
476 case SN_op_Subtraction:
477 case SN_op_Division:
478 case SN_op_Multiply:
479 case SN_op_BitwiseAnd:
480 case SN_op_BitwiseOr:
481 case SN_op_ExclusiveOr:
482 if (!(fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)))
483 return NULL;
484 ins = emit_simd_ins (cfg, klass, OP_XBINOP, args [0]->dreg, args [1]->dreg);
485 ins->inst_c1 = etype->type;
486 if (etype->type == MONO_TYPE_R4 || etype->type == MONO_TYPE_R8) {
487 switch (id) {
488 case SN_op_Addition:
489 ins->inst_c0 = OP_FADD;
490 break;
491 case SN_op_Subtraction:
492 ins->inst_c0 = OP_FSUB;
493 break;
494 case SN_op_Multiply:
495 ins->inst_c0 = OP_FMUL;
496 break;
497 case SN_op_Division:
498 ins->inst_c0 = OP_FDIV;
499 break;
500 default:
501 NULLIFY_INS (ins);
502 return NULL;
504 } else {
505 switch (id) {
506 case SN_op_Addition:
507 ins->inst_c0 = OP_IADD;
508 break;
509 case SN_op_Subtraction:
510 ins->inst_c0 = OP_ISUB;
511 break;
513 case SN_op_Division:
514 ins->inst_c0 = OP_IDIV;
515 break;
516 case SN_op_Multiply:
517 ins->inst_c0 = OP_IMUL;
518 break;
520 case SN_op_BitwiseAnd:
521 ins->inst_c0 = OP_IAND;
522 break;
523 case SN_op_BitwiseOr:
524 ins->inst_c0 = OP_IOR;
525 break;
526 case SN_op_ExclusiveOr:
527 ins->inst_c0 = OP_IXOR;
528 break;
529 default:
530 NULLIFY_INS (ins);
531 return NULL;
534 return ins;
535 default:
536 break;
539 return NULL;
542 #ifdef TARGET_AMD64
544 static guint16 popcnt_methods [] = {
545 SN_PopCount,
546 SN_get_IsSupported
549 static guint16 lzcnt_methods [] = {
550 SN_LeadingZeroCount,
551 SN_get_IsSupported
554 static guint16 bmi1_methods [] = {
555 SN_TrailingZeroCount,
556 SN_get_IsSupported,
559 static guint16 bmi2_methods [] = {
560 SN_ParallelBitDeposit,
561 SN_ParallelBitExtract,
562 SN_get_IsSupported,
565 static MonoInst*
566 emit_x86_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
568 const char *class_name;
569 const char *class_ns;
570 MonoInst *ins;
571 int id;
572 gboolean supported, is_64bit;
573 MonoClass *klass = cmethod->klass;
575 class_ns = m_class_get_name_space (klass);
576 class_name = m_class_get_name (klass);
577 if (!strcmp (class_name, "Popcnt") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Popcnt"))) {
578 id = lookup_intrins (popcnt_methods, sizeof (popcnt_methods), cmethod);
579 if (id == -1)
580 return NULL;
582 supported = (get_cpu_features () & MONO_CPU_X86_POPCNT) != 0;
583 is_64bit = !strcmp (class_name, "X64");
585 switch (id) {
586 case SN_get_IsSupported:
587 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
588 ins->type = STACK_I4;
589 return ins;
590 case SN_PopCount:
591 if (!supported)
592 return NULL;
593 MONO_INST_NEW (cfg, ins, is_64bit ? OP_POPCNT64 : OP_POPCNT32);
594 ins->dreg = alloc_ireg (cfg);
595 ins->sreg1 = args [0]->dreg;
596 MONO_ADD_INS (cfg->cbb, ins);
597 return ins;
598 default:
599 return NULL;
602 if (!strcmp (class_name, "Lzcnt") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Lzcnt"))) {
603 id = lookup_intrins (lzcnt_methods, sizeof (lzcnt_methods), cmethod);
604 if (id == -1)
605 return NULL;
607 supported = (get_cpu_features () & MONO_CPU_X86_LZCNT) != 0;
608 is_64bit = !strcmp (class_name, "X64");
610 switch (id) {
611 case SN_get_IsSupported:
612 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
613 ins->type = STACK_I4;
614 return ins;
615 case SN_LeadingZeroCount:
616 if (!supported)
617 return NULL;
618 MONO_INST_NEW (cfg, ins, is_64bit ? OP_LZCNT64 : OP_LZCNT32);
619 ins->dreg = alloc_ireg (cfg);
620 ins->sreg1 = args [0]->dreg;
621 MONO_ADD_INS (cfg->cbb, ins);
622 return ins;
623 default:
624 return NULL;
627 if (!strcmp (class_name, "Bmi1") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Bmi1"))) {
628 // We only support the subset used by corelib
629 if (m_class_get_image (cfg->method->klass) != mono_get_corlib ())
630 return NULL;
631 if (!COMPILE_LLVM (cfg))
632 return NULL;
633 id = lookup_intrins (bmi1_methods, sizeof (bmi1_methods), cmethod);
634 g_assert (id != -1);
635 supported = (get_cpu_features () & MONO_CPU_X86_BMI1) != 0;
636 is_64bit = !strcmp (class_name, "X64");
638 switch (id) {
639 case SN_get_IsSupported:
640 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
641 ins->type = STACK_I4;
642 return ins;
643 case SN_TrailingZeroCount:
644 MONO_INST_NEW (cfg, ins, is_64bit ? OP_CTTZ64 : OP_CTTZ32);
645 ins->dreg = alloc_ireg (cfg);
646 ins->sreg1 = args [0]->dreg;
647 ins->type = STACK_I4;
648 MONO_ADD_INS (cfg->cbb, ins);
649 return ins;
650 default:
651 g_assert_not_reached ();
654 if (!strcmp (class_name, "Bmi2") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Bmi2"))) {
655 // We only support the subset used by corelib
656 if (m_class_get_image (cfg->method->klass) != mono_get_corlib ())
657 return NULL;
658 if (!COMPILE_LLVM (cfg))
659 return NULL;
660 id = lookup_intrins (bmi2_methods, sizeof (bmi2_methods), cmethod);
661 g_assert (id != -1);
662 supported = (get_cpu_features () & MONO_CPU_X86_BMI2) != 0;
663 is_64bit = !strcmp (class_name, "X64");
665 switch (id) {
666 case SN_get_IsSupported:
667 EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
668 ins->type = STACK_I4;
669 return ins;
670 case SN_ParallelBitExtract:
671 MONO_INST_NEW (cfg, ins, is_64bit ? OP_PEXT64 : OP_PEXT32);
672 ins->dreg = alloc_ireg (cfg);
673 ins->sreg1 = args [0]->dreg;
674 ins->sreg2 = args [1]->dreg;
675 ins->type = STACK_I4;
676 MONO_ADD_INS (cfg->cbb, ins);
677 return ins;
678 case SN_ParallelBitDeposit:
679 MONO_INST_NEW (cfg, ins, is_64bit ? OP_PDEP64 : OP_PDEP32);
680 ins->dreg = alloc_ireg (cfg);
681 ins->sreg1 = args [0]->dreg;
682 ins->sreg2 = args [1]->dreg;
683 ins->type = STACK_I4;
684 MONO_ADD_INS (cfg->cbb, ins);
685 return ins;
686 default:
687 g_assert_not_reached ();
689 //printf ("%s %s\n", mono_method_get_full_name (cfg->method), mono_method_get_full_name (cmethod));
692 return NULL;
694 #endif
696 static guint16 vector_128_t_methods [] = {
697 SN_get_Count,
700 static MonoInst*
701 emit_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
703 MonoInst *ins;
704 MonoType *type, *etype;
705 MonoClass *klass;
706 int size, len, id;
708 id = lookup_intrins (vector_128_t_methods, sizeof (vector_128_t_methods), cmethod);
709 if (id == -1)
710 return NULL;
712 klass = cmethod->klass;
713 type = m_class_get_byval_arg (klass);
714 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
715 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
716 g_assert (size);
717 len = 16 / size;
719 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
720 return NULL;
722 if (cfg->verbose_level > 1) {
723 char *name = mono_method_full_name (cmethod, TRUE);
724 printf (" SIMD intrinsic %s\n", name);
725 g_free (name);
728 switch (id) {
729 case SN_get_Count:
730 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
731 break;
732 EMIT_NEW_ICONST (cfg, ins, len);
733 return ins;
734 default:
735 break;
738 return NULL;
741 static guint16 vector_256_t_methods [] = {
742 SN_get_Count,
745 static MonoInst*
746 emit_vector256_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
748 MonoInst *ins;
749 MonoType *type, *etype;
750 MonoClass *klass;
751 int size, len, id;
753 id = lookup_intrins (vector_256_t_methods, sizeof (vector_256_t_methods), cmethod);
754 if (id == -1)
755 return NULL;
757 klass = cmethod->klass;
758 type = m_class_get_byval_arg (klass);
759 etype = mono_class_get_context (klass)->class_inst->type_argv [0];
760 size = mono_class_value_size (mono_class_from_mono_type_internal (etype), NULL);
761 g_assert (size);
762 len = 32 / size;
764 if (!MONO_TYPE_IS_PRIMITIVE (etype) || etype->type == MONO_TYPE_CHAR || etype->type == MONO_TYPE_BOOLEAN)
765 return NULL;
767 if (cfg->verbose_level > 1) {
768 char *name = mono_method_full_name (cmethod, TRUE);
769 printf (" SIMD intrinsic %s\n", name);
770 g_free (name);
773 switch (id) {
774 case SN_get_Count:
775 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
776 break;
777 EMIT_NEW_ICONST (cfg, ins, len);
778 return ins;
779 default:
780 break;
783 return NULL;
786 MonoInst*
787 mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
789 const char *class_name;
790 const char *class_ns;
791 MonoImage *image = m_class_get_image (cmethod->klass);
793 if (image != mono_get_corlib ())
794 return NULL;
795 // FIXME:
796 if (cfg->compile_aot)
797 return NULL;
799 class_ns = m_class_get_name_space (cmethod->klass);
800 class_name = m_class_get_name (cmethod->klass);
801 if (!strcmp (class_ns, "System.Numerics") && !strcmp (class_name, "Vector")) {
802 MonoInst *ins = emit_sys_numerics_vector (cfg, cmethod, fsig, args);
803 if (!ins) {
804 //printf ("M: %s %s\n", mono_method_get_full_name (cfg->method), mono_method_get_full_name (cmethod));
806 return ins;
808 if (!strcmp (class_ns, "System.Numerics") && !strcmp (class_name, "Vector`1")) {
809 MonoInst *ins = emit_sys_numerics_vector_t (cfg, cmethod, fsig, args);
810 if (!ins) {
811 //printf ("M: %s %s\n", mono_method_get_full_name (cfg->method), mono_method_get_full_name (cmethod));
813 return ins;
815 if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
816 if (!strcmp (class_name, "Vector128`1"))
817 return emit_vector128_t (cfg ,cmethod, fsig, args);
818 if (!strcmp (class_name, "Vector256`1"))
819 return emit_vector256_t (cfg ,cmethod, fsig, args);
821 #ifdef TARGET_AMD64
822 if (cmethod->klass->nested_in)
823 class_ns = m_class_get_name_space (cmethod->klass->nested_in), class_name, cmethod->klass->nested_in;
824 if (!strcmp (class_ns, "System.Runtime.Intrinsics.X86"))
825 return emit_x86_intrinsics (cfg ,cmethod, fsig, args);
826 #endif
828 return NULL;
831 void
832 mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins)
836 void
837 mono_simd_simplify_indirection (MonoCompile *cfg)
841 #else
843 MONO_EMPTY_SOURCE_FILE (simd_intrinsics_netcore);
845 #endif
847 #endif /* DISABLE_JIT */