2010-05-11 Rodrigo Kumpera <rkumpera@novell.com>
[mono.git] / mono / mini / mini-x86.c
blob82fe16f01cc12bbae89c305698284fc8c3ab0e7b
1 /*
2 * mini-x86.c: x86 backend for the Mono code generator
4 * Authors:
5 * Paolo Molaro (lupus@ximian.com)
6 * Dietmar Maurer (dietmar@ximian.com)
7 * Patrik Torstensson
9 * (C) 2003 Ximian, Inc.
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/metadata/mono-debug.h>
23 #include <mono/utils/mono-math.h>
24 #include <mono/utils/mono-counters.h>
25 #include <mono/utils/mono-mmap.h>
27 #include "trace.h"
28 #include "mini-x86.h"
29 #include "cpu-x86.h"
30 #include "ir-emit.h"
32 /* On windows, these hold the key returned by TlsAlloc () */
33 static gint lmf_tls_offset = -1;
34 static gint lmf_addr_tls_offset = -1;
35 static gint appdomain_tls_offset = -1;
37 #ifdef MONO_XEN_OPT
38 static gboolean optimize_for_xen = TRUE;
39 #else
40 #define optimize_for_xen 0
41 #endif
43 #ifdef TARGET_WIN32
44 static gboolean is_win32 = TRUE;
45 #else
46 static gboolean is_win32 = FALSE;
47 #endif
49 /* This mutex protects architecture specific caches */
50 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
51 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
52 static CRITICAL_SECTION mini_arch_mutex;
54 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
56 #define ARGS_OFFSET 8
58 #ifdef TARGET_WIN32
59 /* Under windows, the default pinvoke calling convention is stdcall */
60 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
61 #else
62 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
63 #endif
65 MonoBreakpointInfo
66 mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
69 * The code generated for sequence points reads from this location, which is
70 * made read-only when single stepping is enabled.
72 static gpointer ss_trigger_page;
74 /* Enabled breakpoints read from this trigger page */
75 static gpointer bp_trigger_page;
77 const char*
78 mono_arch_regname (int reg)
80 switch (reg) {
81 case X86_EAX: return "%eax";
82 case X86_EBX: return "%ebx";
83 case X86_ECX: return "%ecx";
84 case X86_EDX: return "%edx";
85 case X86_ESP: return "%esp";
86 case X86_EBP: return "%ebp";
87 case X86_EDI: return "%edi";
88 case X86_ESI: return "%esi";
90 return "unknown";
93 const char*
94 mono_arch_fregname (int reg)
96 switch (reg) {
97 case 0:
98 return "%fr0";
99 case 1:
100 return "%fr1";
101 case 2:
102 return "%fr2";
103 case 3:
104 return "%fr3";
105 case 4:
106 return "%fr4";
107 case 5:
108 return "%fr5";
109 case 6:
110 return "%fr6";
111 case 7:
112 return "%fr7";
113 default:
114 return "unknown";
118 const char *
119 mono_arch_xregname (int reg)
121 switch (reg) {
122 case 0:
123 return "%xmm0";
124 case 1:
125 return "%xmm1";
126 case 2:
127 return "%xmm2";
128 case 3:
129 return "%xmm3";
130 case 4:
131 return "%xmm4";
132 case 5:
133 return "%xmm5";
134 case 6:
135 return "%xmm6";
136 case 7:
137 return "%xmm7";
138 default:
139 return "unknown";
143 void
144 mono_x86_patch (unsigned char* code, gpointer target)
146 x86_patch (code, (unsigned char*)target);
149 typedef enum {
150 ArgInIReg,
151 ArgInFloatSSEReg,
152 ArgInDoubleSSEReg,
153 ArgOnStack,
154 ArgValuetypeInReg,
155 ArgOnFloatFpStack,
156 ArgOnDoubleFpStack,
157 ArgNone
158 } ArgStorage;
160 typedef struct {
161 gint16 offset;
162 gint8 reg;
163 ArgStorage storage;
165 /* Only if storage == ArgValuetypeInReg */
166 ArgStorage pair_storage [2];
167 gint8 pair_regs [2];
168 } ArgInfo;
170 typedef struct {
171 int nargs;
172 guint32 stack_usage;
173 guint32 reg_usage;
174 guint32 freg_usage;
175 gboolean need_stack_align;
176 guint32 stack_align_amount;
177 ArgInfo ret;
178 ArgInfo sig_cookie;
179 ArgInfo args [1];
180 } CallInfo;
182 #define PARAM_REGS 0
184 #define FLOAT_PARAM_REGS 0
186 static X86_Reg_No param_regs [] = { 0 };
188 #if defined(TARGET_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
189 #define SMALL_STRUCTS_IN_REGS
190 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
191 #endif
193 static void inline
194 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
196 ainfo->offset = *stack_size;
198 if (*gr >= PARAM_REGS) {
199 ainfo->storage = ArgOnStack;
200 (*stack_size) += sizeof (gpointer);
202 else {
203 ainfo->storage = ArgInIReg;
204 ainfo->reg = param_regs [*gr];
205 (*gr) ++;
209 static void inline
210 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
212 ainfo->offset = *stack_size;
214 g_assert (PARAM_REGS == 0);
216 ainfo->storage = ArgOnStack;
217 (*stack_size) += sizeof (gpointer) * 2;
220 static void inline
221 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
223 ainfo->offset = *stack_size;
225 if (*gr >= FLOAT_PARAM_REGS) {
226 ainfo->storage = ArgOnStack;
227 (*stack_size) += is_double ? 8 : 4;
229 else {
230 /* A double register */
231 if (is_double)
232 ainfo->storage = ArgInDoubleSSEReg;
233 else
234 ainfo->storage = ArgInFloatSSEReg;
235 ainfo->reg = *gr;
236 (*gr) += 1;
241 static void
242 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
243 gboolean is_return,
244 guint32 *gr, guint32 *fr, guint32 *stack_size)
246 guint32 size;
247 MonoClass *klass;
249 klass = mono_class_from_mono_type (type);
250 size = mini_type_stack_size_full (gsctx, &klass->byval_arg, NULL, sig->pinvoke);
252 #ifdef SMALL_STRUCTS_IN_REGS
253 if (sig->pinvoke && is_return) {
254 MonoMarshalType *info;
257 * the exact rules are not very well documented, the code below seems to work with the
258 * code generated by gcc 3.3.3 -mno-cygwin.
260 info = mono_marshal_load_type_info (klass);
261 g_assert (info);
263 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
265 /* Special case structs with only a float member */
266 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
267 ainfo->storage = ArgValuetypeInReg;
268 ainfo->pair_storage [0] = ArgOnDoubleFpStack;
269 return;
271 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
272 ainfo->storage = ArgValuetypeInReg;
273 ainfo->pair_storage [0] = ArgOnFloatFpStack;
274 return;
276 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
277 ainfo->storage = ArgValuetypeInReg;
278 ainfo->pair_storage [0] = ArgInIReg;
279 ainfo->pair_regs [0] = return_regs [0];
280 if (info->native_size > 4) {
281 ainfo->pair_storage [1] = ArgInIReg;
282 ainfo->pair_regs [1] = return_regs [1];
284 return;
287 #endif
289 ainfo->offset = *stack_size;
290 ainfo->storage = ArgOnStack;
291 *stack_size += ALIGN_TO (size, sizeof (gpointer));
295 * get_call_info:
297 * Obtain information about a call according to the calling convention.
298 * For x86 ELF, see the "System V Application Binary Interface Intel386
299 * Architecture Processor Supplment, Fourth Edition" document for more
300 * information.
301 * For x86 win32, see ???.
303 static CallInfo*
304 get_call_info_internal (MonoGenericSharingContext *gsctx, CallInfo *cinfo, MonoMethodSignature *sig, gboolean is_pinvoke)
306 guint32 i, gr, fr;
307 MonoType *ret_type;
308 int n = sig->hasthis + sig->param_count;
309 guint32 stack_size = 0;
311 gr = 0;
312 fr = 0;
314 /* return value */
316 ret_type = mini_type_get_underlying_type (gsctx, sig->ret);
317 switch (ret_type->type) {
318 case MONO_TYPE_BOOLEAN:
319 case MONO_TYPE_I1:
320 case MONO_TYPE_U1:
321 case MONO_TYPE_I2:
322 case MONO_TYPE_U2:
323 case MONO_TYPE_CHAR:
324 case MONO_TYPE_I4:
325 case MONO_TYPE_U4:
326 case MONO_TYPE_I:
327 case MONO_TYPE_U:
328 case MONO_TYPE_PTR:
329 case MONO_TYPE_FNPTR:
330 case MONO_TYPE_CLASS:
331 case MONO_TYPE_OBJECT:
332 case MONO_TYPE_SZARRAY:
333 case MONO_TYPE_ARRAY:
334 case MONO_TYPE_STRING:
335 cinfo->ret.storage = ArgInIReg;
336 cinfo->ret.reg = X86_EAX;
337 break;
338 case MONO_TYPE_U8:
339 case MONO_TYPE_I8:
340 cinfo->ret.storage = ArgInIReg;
341 cinfo->ret.reg = X86_EAX;
342 break;
343 case MONO_TYPE_R4:
344 cinfo->ret.storage = ArgOnFloatFpStack;
345 break;
346 case MONO_TYPE_R8:
347 cinfo->ret.storage = ArgOnDoubleFpStack;
348 break;
349 case MONO_TYPE_GENERICINST:
350 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
351 cinfo->ret.storage = ArgInIReg;
352 cinfo->ret.reg = X86_EAX;
353 break;
355 /* Fall through */
356 case MONO_TYPE_VALUETYPE: {
357 guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
359 add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
360 if (cinfo->ret.storage == ArgOnStack)
361 /* The caller passes the address where the value is stored */
362 add_general (&gr, &stack_size, &cinfo->ret);
363 break;
365 case MONO_TYPE_TYPEDBYREF:
366 /* Same as a valuetype with size 24 */
367 add_general (&gr, &stack_size, &cinfo->ret);
369 break;
370 case MONO_TYPE_VOID:
371 cinfo->ret.storage = ArgNone;
372 break;
373 default:
374 g_error ("Can't handle as return value 0x%x", sig->ret->type);
378 /* this */
379 if (sig->hasthis)
380 add_general (&gr, &stack_size, cinfo->args + 0);
382 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
383 gr = PARAM_REGS;
384 fr = FLOAT_PARAM_REGS;
386 /* Emit the signature cookie just before the implicit arguments */
387 add_general (&gr, &stack_size, &cinfo->sig_cookie);
390 for (i = 0; i < sig->param_count; ++i) {
391 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
392 MonoType *ptype;
394 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
395 /* We allways pass the sig cookie on the stack for simplicity */
397 * Prevent implicit arguments + the sig cookie from being passed
398 * in registers.
400 gr = PARAM_REGS;
401 fr = FLOAT_PARAM_REGS;
403 /* Emit the signature cookie just before the implicit arguments */
404 add_general (&gr, &stack_size, &cinfo->sig_cookie);
407 if (sig->params [i]->byref) {
408 add_general (&gr, &stack_size, ainfo);
409 continue;
411 ptype = mini_type_get_underlying_type (gsctx, sig->params [i]);
412 switch (ptype->type) {
413 case MONO_TYPE_BOOLEAN:
414 case MONO_TYPE_I1:
415 case MONO_TYPE_U1:
416 add_general (&gr, &stack_size, ainfo);
417 break;
418 case MONO_TYPE_I2:
419 case MONO_TYPE_U2:
420 case MONO_TYPE_CHAR:
421 add_general (&gr, &stack_size, ainfo);
422 break;
423 case MONO_TYPE_I4:
424 case MONO_TYPE_U4:
425 add_general (&gr, &stack_size, ainfo);
426 break;
427 case MONO_TYPE_I:
428 case MONO_TYPE_U:
429 case MONO_TYPE_PTR:
430 case MONO_TYPE_FNPTR:
431 case MONO_TYPE_CLASS:
432 case MONO_TYPE_OBJECT:
433 case MONO_TYPE_STRING:
434 case MONO_TYPE_SZARRAY:
435 case MONO_TYPE_ARRAY:
436 add_general (&gr, &stack_size, ainfo);
437 break;
438 case MONO_TYPE_GENERICINST:
439 if (!mono_type_generic_inst_is_valuetype (ptype)) {
440 add_general (&gr, &stack_size, ainfo);
441 break;
443 /* Fall through */
444 case MONO_TYPE_VALUETYPE:
445 add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
446 break;
447 case MONO_TYPE_TYPEDBYREF:
448 stack_size += sizeof (MonoTypedRef);
449 ainfo->storage = ArgOnStack;
450 break;
451 case MONO_TYPE_U8:
452 case MONO_TYPE_I8:
453 add_general_pair (&gr, &stack_size, ainfo);
454 break;
455 case MONO_TYPE_R4:
456 add_float (&fr, &stack_size, ainfo, FALSE);
457 break;
458 case MONO_TYPE_R8:
459 add_float (&fr, &stack_size, ainfo, TRUE);
460 break;
461 default:
462 g_error ("unexpected type 0x%x", ptype->type);
463 g_assert_not_reached ();
467 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
468 gr = PARAM_REGS;
469 fr = FLOAT_PARAM_REGS;
471 /* Emit the signature cookie just before the implicit arguments */
472 add_general (&gr, &stack_size, &cinfo->sig_cookie);
475 if (mono_do_x86_stack_align && (stack_size % MONO_ARCH_FRAME_ALIGNMENT) != 0) {
476 cinfo->need_stack_align = TRUE;
477 cinfo->stack_align_amount = MONO_ARCH_FRAME_ALIGNMENT - (stack_size % MONO_ARCH_FRAME_ALIGNMENT);
478 stack_size += cinfo->stack_align_amount;
481 cinfo->stack_usage = stack_size;
482 cinfo->reg_usage = gr;
483 cinfo->freg_usage = fr;
484 return cinfo;
487 static CallInfo*
488 get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
490 int n = sig->hasthis + sig->param_count;
491 CallInfo *cinfo;
493 if (mp)
494 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
495 else
496 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
498 return get_call_info_internal (gsctx, cinfo, sig, is_pinvoke);
502 * mono_arch_get_argument_info:
503 * @csig: a method signature
504 * @param_count: the number of parameters to consider
505 * @arg_info: an array to store the result infos
507 * Gathers information on parameters such as size, alignment and
508 * padding. arg_info should be large enought to hold param_count + 1 entries.
510 * Returns the size of the argument area on the stack.
511 * This should be signal safe, since it is called from
512 * mono_arch_find_jit_info_ext ().
513 * FIXME: The metadata calls might not be signal safe.
516 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
518 int k, args_size = 0;
519 int size, pad;
520 guint32 align;
521 int offset = 8;
522 CallInfo *cinfo;
524 /* Avoid g_malloc as it is not signal safe */
525 cinfo = (CallInfo*)g_newa (guint8*, sizeof (CallInfo) + (sizeof (ArgInfo) * (csig->param_count + 1)));
527 cinfo = get_call_info_internal (NULL, cinfo, csig, FALSE);
529 if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
530 args_size += sizeof (gpointer);
531 offset += 4;
534 arg_info [0].offset = offset;
536 if (csig->hasthis) {
537 args_size += sizeof (gpointer);
538 offset += 4;
541 arg_info [0].size = args_size;
543 for (k = 0; k < param_count; k++) {
544 size = mini_type_stack_size_full (NULL, csig->params [k], &align, csig->pinvoke);
546 /* ignore alignment for now */
547 align = 1;
549 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
550 arg_info [k].pad = pad;
551 args_size += size;
552 arg_info [k + 1].pad = 0;
553 arg_info [k + 1].size = size;
554 offset += pad;
555 arg_info [k + 1].offset = offset;
556 offset += size;
559 if (mono_do_x86_stack_align && !CALLCONV_IS_STDCALL (csig))
560 align = MONO_ARCH_FRAME_ALIGNMENT;
561 else
562 align = 4;
563 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
564 arg_info [k].pad = pad;
566 return args_size;
569 static const guchar cpuid_impl [] = {
570 0x55, /* push %ebp */
571 0x89, 0xe5, /* mov %esp,%ebp */
572 0x53, /* push %ebx */
573 0x8b, 0x45, 0x08, /* mov 0x8(%ebp),%eax */
574 0x0f, 0xa2, /* cpuid */
575 0x50, /* push %eax */
576 0x8b, 0x45, 0x10, /* mov 0x10(%ebp),%eax */
577 0x89, 0x18, /* mov %ebx,(%eax) */
578 0x8b, 0x45, 0x14, /* mov 0x14(%ebp),%eax */
579 0x89, 0x08, /* mov %ecx,(%eax) */
580 0x8b, 0x45, 0x18, /* mov 0x18(%ebp),%eax */
581 0x89, 0x10, /* mov %edx,(%eax) */
582 0x58, /* pop %eax */
583 0x8b, 0x55, 0x0c, /* mov 0xc(%ebp),%edx */
584 0x89, 0x02, /* mov %eax,(%edx) */
585 0x5b, /* pop %ebx */
586 0xc9, /* leave */
587 0xc3, /* ret */
590 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
592 static int
593 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
595 int have_cpuid = 0;
596 #ifndef _MSC_VER
597 __asm__ __volatile__ (
598 "pushfl\n"
599 "popl %%eax\n"
600 "movl %%eax, %%edx\n"
601 "xorl $0x200000, %%eax\n"
602 "pushl %%eax\n"
603 "popfl\n"
604 "pushfl\n"
605 "popl %%eax\n"
606 "xorl %%edx, %%eax\n"
607 "andl $0x200000, %%eax\n"
608 "movl %%eax, %0"
609 : "=r" (have_cpuid)
611 : "%eax", "%edx"
613 #else
614 __asm {
615 pushfd
616 pop eax
617 mov edx, eax
618 xor eax, 0x200000
619 push eax
620 popfd
621 pushfd
622 pop eax
623 xor eax, edx
624 and eax, 0x200000
625 mov have_cpuid, eax
627 #endif
628 if (have_cpuid) {
629 /* Have to use the code manager to get around WinXP DEP */
630 static CpuidFunc func = NULL;
631 void *ptr;
632 if (!func) {
633 ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
634 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
635 func = (CpuidFunc)ptr;
637 func (id, p_eax, p_ebx, p_ecx, p_edx);
640 * We use this approach because of issues with gcc and pic code, see:
641 * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
642 __asm__ __volatile__ ("cpuid"
643 : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
644 : "a" (id));
646 return 1;
648 return 0;
652 * Initialize the cpu to execute managed code.
654 void
655 mono_arch_cpu_init (void)
657 /* spec compliance requires running with double precision */
658 #ifndef _MSC_VER
659 guint16 fpcw;
661 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
662 fpcw &= ~X86_FPCW_PRECC_MASK;
663 fpcw |= X86_FPCW_PREC_DOUBLE;
664 __asm__ __volatile__ ("fldcw %0\n": : "m" (fpcw));
665 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
666 #else
667 _control87 (_PC_53, MCW_PC);
668 #endif
672 * Initialize architecture specific code.
674 void
675 mono_arch_init (void)
677 InitializeCriticalSection (&mini_arch_mutex);
679 ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ);
680 bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
681 mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
683 mono_aot_register_jit_icall ("mono_x86_throw_exception", mono_x86_throw_exception);
684 mono_aot_register_jit_icall ("mono_x86_throw_corlib_exception", mono_x86_throw_corlib_exception);
688 * Cleanup architecture specific code.
690 void
691 mono_arch_cleanup (void)
693 DeleteCriticalSection (&mini_arch_mutex);
697 * This function returns the optimizations supported on this cpu.
699 guint32
700 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
702 int eax, ebx, ecx, edx;
703 guint32 opts = 0;
705 *exclude_mask = 0;
707 if (mono_aot_only)
708 /* The cpuid function allocates from the global codeman */
709 return opts;
711 /* Feature Flags function, flags returned in EDX. */
712 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
713 if (edx & (1 << 15)) {
714 opts |= MONO_OPT_CMOV;
715 if (edx & 1)
716 opts |= MONO_OPT_FCMOV;
717 else
718 *exclude_mask |= MONO_OPT_FCMOV;
719 } else
720 *exclude_mask |= MONO_OPT_CMOV;
721 if (edx & (1 << 26))
722 opts |= MONO_OPT_SSE2;
723 else
724 *exclude_mask |= MONO_OPT_SSE2;
726 #ifdef MONO_ARCH_SIMD_INTRINSICS
727 /*SIMD intrinsics require at least SSE2.*/
728 if (!(opts & MONO_OPT_SSE2))
729 *exclude_mask |= MONO_OPT_SIMD;
730 #endif
732 return opts;
736 * This function test for all SSE functions supported.
738 * Returns a bitmask corresponding to all supported versions.
741 guint32
742 mono_arch_cpu_enumerate_simd_versions (void)
744 int eax, ebx, ecx, edx;
745 guint32 sse_opts = 0;
747 if (mono_aot_only)
748 /* The cpuid function allocates from the global codeman */
749 return sse_opts;
751 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
752 if (edx & (1 << 25))
753 sse_opts |= SIMD_VERSION_SSE1;
754 if (edx & (1 << 26))
755 sse_opts |= SIMD_VERSION_SSE2;
756 if (ecx & (1 << 0))
757 sse_opts |= SIMD_VERSION_SSE3;
758 if (ecx & (1 << 9))
759 sse_opts |= SIMD_VERSION_SSSE3;
760 if (ecx & (1 << 19))
761 sse_opts |= SIMD_VERSION_SSE41;
762 if (ecx & (1 << 20))
763 sse_opts |= SIMD_VERSION_SSE42;
766 /* Yes, all this needs to be done to check for sse4a.
767 See: "Amd: CPUID Specification"
769 if (cpuid (0x80000000, &eax, &ebx, &ecx, &edx)) {
770 /* eax greater or equal than 0x80000001, ebx = 'htuA', ecx = DMAc', edx = 'itne'*/
771 if ((((unsigned int) eax) >= 0x80000001) && (ebx == 0x68747541) && (ecx == 0x444D4163) && (edx == 0x69746E65)) {
772 cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
773 if (ecx & (1 << 6))
774 sse_opts |= SIMD_VERSION_SSE4a;
779 return sse_opts;
783 * Determine whenever the trap whose info is in SIGINFO is caused by
784 * integer overflow.
786 gboolean
787 mono_arch_is_int_overflow (void *sigctx, void *info)
789 MonoContext ctx;
790 guint8* ip;
792 mono_arch_sigctx_to_monoctx (sigctx, &ctx);
794 ip = (guint8*)ctx.eip;
796 if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
797 gint32 reg;
799 /* idiv REG */
800 switch (x86_modrm_rm (ip [1])) {
801 case X86_EAX:
802 reg = ctx.eax;
803 break;
804 case X86_ECX:
805 reg = ctx.ecx;
806 break;
807 case X86_EDX:
808 reg = ctx.edx;
809 break;
810 case X86_EBX:
811 reg = ctx.ebx;
812 break;
813 case X86_ESI:
814 reg = ctx.esi;
815 break;
816 case X86_EDI:
817 reg = ctx.edi;
818 break;
819 default:
820 g_assert_not_reached ();
821 reg = -1;
824 if (reg == -1)
825 return TRUE;
828 return FALSE;
831 GList *
832 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
834 GList *vars = NULL;
835 int i;
837 for (i = 0; i < cfg->num_varinfo; i++) {
838 MonoInst *ins = cfg->varinfo [i];
839 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
841 /* unused vars */
842 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
843 continue;
845 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) ||
846 (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
847 continue;
849 /* we dont allocate I1 to registers because there is no simply way to sign extend
850 * 8bit quantities in caller saved registers on x86 */
851 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
852 g_assert (MONO_VARINFO (cfg, i)->reg == -1);
853 g_assert (i == vmv->idx);
854 vars = g_list_prepend (vars, vmv);
858 vars = mono_varlist_sort (cfg, vars, 0);
860 return vars;
863 GList *
864 mono_arch_get_global_int_regs (MonoCompile *cfg)
866 GList *regs = NULL;
868 /* we can use 3 registers for global allocation */
869 regs = g_list_prepend (regs, (gpointer)X86_EBX);
870 regs = g_list_prepend (regs, (gpointer)X86_ESI);
871 regs = g_list_prepend (regs, (gpointer)X86_EDI);
873 return regs;
877 * mono_arch_regalloc_cost:
879 * Return the cost, in number of memory references, of the action of
880 * allocating the variable VMV into a register during global register
881 * allocation.
883 guint32
884 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
886 MonoInst *ins = cfg->varinfo [vmv->idx];
888 if (cfg->method->save_lmf)
889 /* The register is already saved */
890 return (ins->opcode == OP_ARG) ? 1 : 0;
891 else
892 /* push+pop+possible load if it is an argument */
893 return (ins->opcode == OP_ARG) ? 3 : 2;
896 static void
897 set_needs_stack_frame (MonoCompile *cfg, gboolean flag)
899 static int inited = FALSE;
900 static int count = 0;
902 if (cfg->arch.need_stack_frame_inited) {
903 g_assert (cfg->arch.need_stack_frame == flag);
904 return;
907 cfg->arch.need_stack_frame = flag;
908 cfg->arch.need_stack_frame_inited = TRUE;
910 if (flag)
911 return;
913 if (!inited) {
914 mono_counters_register ("Could eliminate stack frame", MONO_COUNTER_INT|MONO_COUNTER_JIT, &count);
915 inited = TRUE;
917 ++count;
919 //g_print ("will eliminate %s.%s.%s\n", cfg->method->klass->name_space, cfg->method->klass->name, cfg->method->name);
922 static gboolean
923 needs_stack_frame (MonoCompile *cfg)
925 MonoMethodSignature *sig;
926 MonoMethodHeader *header;
927 gboolean result = FALSE;
929 #if defined(__APPLE__)
930 /*OSX requires stack frame code to have the correct alignment. */
931 return TRUE;
932 #endif
934 if (cfg->arch.need_stack_frame_inited)
935 return cfg->arch.need_stack_frame;
937 header = cfg->header;
938 sig = mono_method_signature (cfg->method);
940 if (cfg->disable_omit_fp)
941 result = TRUE;
942 else if (cfg->flags & MONO_CFG_HAS_ALLOCA)
943 result = TRUE;
944 else if (cfg->method->save_lmf)
945 result = TRUE;
946 else if (cfg->stack_offset)
947 result = TRUE;
948 else if (cfg->param_area)
949 result = TRUE;
950 else if (cfg->flags & (MONO_CFG_HAS_CALLS | MONO_CFG_HAS_ALLOCA | MONO_CFG_HAS_TAIL))
951 result = TRUE;
952 else if (header->num_clauses)
953 result = TRUE;
954 else if (sig->param_count + sig->hasthis)
955 result = TRUE;
956 else if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
957 result = TRUE;
958 else if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
959 (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE))
960 result = TRUE;
962 set_needs_stack_frame (cfg, result);
964 return cfg->arch.need_stack_frame;
968 * Set var information according to the calling convention. X86 version.
969 * The locals var stuff should most likely be split in another method.
971 void
972 mono_arch_allocate_vars (MonoCompile *cfg)
974 MonoMethodSignature *sig;
975 MonoMethodHeader *header;
976 MonoInst *inst;
977 guint32 locals_stack_size, locals_stack_align;
978 int i, offset;
979 gint32 *offsets;
980 CallInfo *cinfo;
982 header = cfg->header;
983 sig = mono_method_signature (cfg->method);
985 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
987 cfg->frame_reg = X86_EBP;
988 offset = 0;
990 /* Reserve space to save LMF and caller saved registers */
992 if (cfg->method->save_lmf) {
993 offset += sizeof (MonoLMF);
994 } else {
995 if (cfg->used_int_regs & (1 << X86_EBX)) {
996 offset += 4;
999 if (cfg->used_int_regs & (1 << X86_EDI)) {
1000 offset += 4;
1003 if (cfg->used_int_regs & (1 << X86_ESI)) {
1004 offset += 4;
1008 switch (cinfo->ret.storage) {
1009 case ArgValuetypeInReg:
1010 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
1011 offset += 8;
1012 cfg->ret->opcode = OP_REGOFFSET;
1013 cfg->ret->inst_basereg = X86_EBP;
1014 cfg->ret->inst_offset = - offset;
1015 break;
1016 default:
1017 break;
1020 /* Allocate locals */
1021 offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
1022 if (locals_stack_size > MONO_ARCH_MAX_FRAME_SIZE) {
1023 char *mname = mono_method_full_name (cfg->method, TRUE);
1024 cfg->exception_type = MONO_EXCEPTION_INVALID_PROGRAM;
1025 cfg->exception_message = g_strdup_printf ("Method %s stack is too big.", mname);
1026 g_free (mname);
1027 return;
1029 if (locals_stack_align) {
1030 offset += (locals_stack_align - 1);
1031 offset &= ~(locals_stack_align - 1);
1034 * EBP is at alignment 8 % MONO_ARCH_FRAME_ALIGNMENT, so if we
1035 * have locals larger than 8 bytes we need to make sure that
1036 * they have the appropriate offset.
1038 if (MONO_ARCH_FRAME_ALIGNMENT > 8 && locals_stack_align > 8)
1039 offset += MONO_ARCH_FRAME_ALIGNMENT - sizeof (gpointer) * 2;
1040 for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
1041 if (offsets [i] != -1) {
1042 MonoInst *inst = cfg->varinfo [i];
1043 inst->opcode = OP_REGOFFSET;
1044 inst->inst_basereg = X86_EBP;
1045 inst->inst_offset = - (offset + offsets [i]);
1046 //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
1049 offset += locals_stack_size;
1053 * Allocate arguments+return value
1056 switch (cinfo->ret.storage) {
1057 case ArgOnStack:
1058 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
1060 * In the new IR, the cfg->vret_addr variable represents the
1061 * vtype return value.
1063 cfg->vret_addr->opcode = OP_REGOFFSET;
1064 cfg->vret_addr->inst_basereg = cfg->frame_reg;
1065 cfg->vret_addr->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
1066 if (G_UNLIKELY (cfg->verbose_level > 1)) {
1067 printf ("vret_addr =");
1068 mono_print_ins (cfg->vret_addr);
1070 } else {
1071 cfg->ret->opcode = OP_REGOFFSET;
1072 cfg->ret->inst_basereg = X86_EBP;
1073 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
1075 break;
1076 case ArgValuetypeInReg:
1077 break;
1078 case ArgInIReg:
1079 cfg->ret->opcode = OP_REGVAR;
1080 cfg->ret->inst_c0 = cinfo->ret.reg;
1081 cfg->ret->dreg = cinfo->ret.reg;
1082 break;
1083 case ArgNone:
1084 case ArgOnFloatFpStack:
1085 case ArgOnDoubleFpStack:
1086 break;
1087 default:
1088 g_assert_not_reached ();
1091 if (sig->call_convention == MONO_CALL_VARARG) {
1092 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1093 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
1096 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1097 ArgInfo *ainfo = &cinfo->args [i];
1098 inst = cfg->args [i];
1099 if (inst->opcode != OP_REGVAR) {
1100 inst->opcode = OP_REGOFFSET;
1101 inst->inst_basereg = X86_EBP;
1103 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
1106 cfg->stack_offset = offset;
1109 void
1110 mono_arch_create_vars (MonoCompile *cfg)
1112 MonoMethodSignature *sig;
1113 CallInfo *cinfo;
1115 sig = mono_method_signature (cfg->method);
1117 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1119 if (cinfo->ret.storage == ArgValuetypeInReg)
1120 cfg->ret_var_is_local = TRUE;
1121 if ((cinfo->ret.storage != ArgValuetypeInReg) && MONO_TYPE_ISSTRUCT (sig->ret)) {
1122 cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
1127 * It is expensive to adjust esp for each individual fp argument pushed on the stack
1128 * so we try to do it just once when we have multiple fp arguments in a row.
1129 * We don't use this mechanism generally because for int arguments the generated code
1130 * is slightly bigger and new generation cpus optimize away the dependency chains
1131 * created by push instructions on the esp value.
1132 * fp_arg_setup is the first argument in the execution sequence where the esp register
1133 * is modified.
1135 static G_GNUC_UNUSED int
1136 collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
1138 int fp_space = 0;
1139 MonoType *t;
1141 for (; start_arg < sig->param_count; ++start_arg) {
1142 t = mini_type_get_underlying_type (NULL, sig->params [start_arg]);
1143 if (!t->byref && t->type == MONO_TYPE_R8) {
1144 fp_space += sizeof (double);
1145 *fp_arg_setup = start_arg;
1146 } else {
1147 break;
1150 return fp_space;
1153 static void
1154 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
1156 MonoMethodSignature *tmp_sig;
1158 /* FIXME: Add support for signature tokens to AOT */
1159 cfg->disable_aot = TRUE;
1162 * mono_ArgIterator_Setup assumes the signature cookie is
1163 * passed first and all the arguments which were before it are
1164 * passed on the stack after the signature. So compensate by
1165 * passing a different signature.
1167 tmp_sig = mono_metadata_signature_dup (call->signature);
1168 tmp_sig->param_count -= call->signature->sentinelpos;
1169 tmp_sig->sentinelpos = 0;
1170 memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1172 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_X86_PUSH_IMM, -1, -1, tmp_sig);
1175 #ifdef ENABLE_LLVM
1176 LLVMCallInfo*
1177 mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
1179 int i, n;
1180 CallInfo *cinfo;
1181 ArgInfo *ainfo;
1182 LLVMCallInfo *linfo;
1183 MonoType *t;
1185 n = sig->param_count + sig->hasthis;
1187 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke);
1189 linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n));
1192 * LLVM always uses the native ABI while we use our own ABI, the
1193 * only difference is the handling of vtypes:
1194 * - we only pass/receive them in registers in some cases, and only
1195 * in 1 or 2 integer registers.
1197 if (cinfo->ret.storage == ArgValuetypeInReg) {
1198 if (sig->pinvoke) {
1199 cfg->exception_message = g_strdup ("pinvoke + vtypes");
1200 cfg->disable_llvm = TRUE;
1201 return linfo;
1204 cfg->exception_message = g_strdup ("vtype ret in call");
1205 cfg->disable_llvm = TRUE;
1207 linfo->ret.storage = LLVMArgVtypeInReg;
1208 for (j = 0; j < 2; ++j)
1209 linfo->ret.pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, cinfo->ret.pair_storage [j]);
1213 if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage == ArgInIReg) {
1214 /* Vtype returned using a hidden argument */
1215 linfo->ret.storage = LLVMArgVtypeRetAddr;
1218 if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage != ArgInIReg) {
1219 // FIXME:
1220 cfg->exception_message = g_strdup ("vtype ret in call");
1221 cfg->disable_llvm = TRUE;
1224 for (i = 0; i < n; ++i) {
1225 ainfo = cinfo->args + i;
1227 if (i >= sig->hasthis)
1228 t = sig->params [i - sig->hasthis];
1229 else
1230 t = &mono_defaults.int_class->byval_arg;
1232 linfo->args [i].storage = LLVMArgNone;
1234 switch (ainfo->storage) {
1235 case ArgInIReg:
1236 linfo->args [i].storage = LLVMArgInIReg;
1237 break;
1238 case ArgInDoubleSSEReg:
1239 case ArgInFloatSSEReg:
1240 linfo->args [i].storage = LLVMArgInFPReg;
1241 break;
1242 case ArgOnStack:
1243 if (MONO_TYPE_ISSTRUCT (t)) {
1244 if (mono_class_value_size (mono_class_from_mono_type (t), NULL) == 0)
1245 /* LLVM seems to allocate argument space for empty structures too */
1246 linfo->args [i].storage = LLVMArgNone;
1247 else
1248 linfo->args [i].storage = LLVMArgVtypeByVal;
1249 } else {
1250 linfo->args [i].storage = LLVMArgInIReg;
1251 if (t->byref) {
1252 if (t->type == MONO_TYPE_R4)
1253 linfo->args [i].storage = LLVMArgInFPReg;
1254 else if (t->type == MONO_TYPE_R8)
1255 linfo->args [i].storage = LLVMArgInFPReg;
1258 break;
1259 case ArgValuetypeInReg:
1260 if (sig->pinvoke) {
1261 cfg->exception_message = g_strdup ("pinvoke + vtypes");
1262 cfg->disable_llvm = TRUE;
1263 return linfo;
1266 cfg->exception_message = g_strdup ("vtype arg");
1267 cfg->disable_llvm = TRUE;
1269 linfo->args [i].storage = LLVMArgVtypeInReg;
1270 for (j = 0; j < 2; ++j)
1271 linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
1273 break;
1274 default:
1275 cfg->exception_message = g_strdup ("ainfo->storage");
1276 cfg->disable_llvm = TRUE;
1277 break;
1281 return linfo;
1283 #endif
1285 void
1286 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
1288 MonoInst *arg, *in;
1289 MonoMethodSignature *sig;
1290 int i, n;
1291 CallInfo *cinfo;
1292 int sentinelpos = 0;
1294 sig = call->signature;
1295 n = sig->param_count + sig->hasthis;
1297 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1299 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1300 sentinelpos = sig->sentinelpos + (sig->hasthis ? 1 : 0);
1302 if (cinfo->need_stack_align) {
1303 MONO_INST_NEW (cfg, arg, OP_SUB_IMM);
1304 arg->dreg = X86_ESP;
1305 arg->sreg1 = X86_ESP;
1306 arg->inst_imm = cinfo->stack_align_amount;
1307 MONO_ADD_INS (cfg->cbb, arg);
1310 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1311 if (cinfo->ret.storage == ArgValuetypeInReg) {
1313 * Tell the JIT to use a more efficient calling convention: call using
1314 * OP_CALL, compute the result location after the call, and save the
1315 * result there.
1317 call->vret_in_reg = TRUE;
1318 if (call->vret_var)
1319 NULLIFY_INS (call->vret_var);
1323 /* Handle the case where there are no implicit arguments */
1324 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1325 emit_sig_cookie (cfg, call, cinfo);
1328 /* Arguments are pushed in the reverse order */
1329 for (i = n - 1; i >= 0; i --) {
1330 ArgInfo *ainfo = cinfo->args + i;
1331 MonoType *t;
1333 if (i >= sig->hasthis)
1334 t = sig->params [i - sig->hasthis];
1335 else
1336 t = &mono_defaults.int_class->byval_arg;
1337 t = mini_type_get_underlying_type (cfg->generic_sharing_context, t);
1339 MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
1341 in = call->args [i];
1342 arg->cil_code = in->cil_code;
1343 arg->sreg1 = in->dreg;
1344 arg->type = in->type;
1346 g_assert (in->dreg != -1);
1348 if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
1349 guint32 align;
1350 guint32 size;
1352 g_assert (in->klass);
1354 if (t->type == MONO_TYPE_TYPEDBYREF) {
1355 size = sizeof (MonoTypedRef);
1356 align = sizeof (gpointer);
1358 else {
1359 size = mini_type_stack_size_full (cfg->generic_sharing_context, &in->klass->byval_arg, &align, sig->pinvoke);
1362 if (size > 0) {
1363 arg->opcode = OP_OUTARG_VT;
1364 arg->sreg1 = in->dreg;
1365 arg->klass = in->klass;
1366 arg->backend.size = size;
1368 MONO_ADD_INS (cfg->cbb, arg);
1371 else {
1372 switch (ainfo->storage) {
1373 case ArgOnStack:
1374 arg->opcode = OP_X86_PUSH;
1375 if (!t->byref) {
1376 if (t->type == MONO_TYPE_R4) {
1377 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 4);
1378 arg->opcode = OP_STORER4_MEMBASE_REG;
1379 arg->inst_destbasereg = X86_ESP;
1380 arg->inst_offset = 0;
1381 } else if (t->type == MONO_TYPE_R8) {
1382 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
1383 arg->opcode = OP_STORER8_MEMBASE_REG;
1384 arg->inst_destbasereg = X86_ESP;
1385 arg->inst_offset = 0;
1386 } else if (t->type == MONO_TYPE_I8 || t->type == MONO_TYPE_U8) {
1387 arg->sreg1 ++;
1388 MONO_EMIT_NEW_UNALU (cfg, OP_X86_PUSH, -1, in->dreg + 2);
1391 break;
1392 default:
1393 g_assert_not_reached ();
1396 MONO_ADD_INS (cfg->cbb, arg);
1399 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
1400 /* Emit the signature cookie just before the implicit arguments */
1401 emit_sig_cookie (cfg, call, cinfo);
1405 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1406 MonoInst *vtarg;
1408 if (cinfo->ret.storage == ArgValuetypeInReg) {
1409 /* Already done */
1411 else if (cinfo->ret.storage == ArgInIReg) {
1412 NOT_IMPLEMENTED;
1413 /* The return address is passed in a register */
1414 MONO_INST_NEW (cfg, vtarg, OP_MOVE);
1415 vtarg->sreg1 = call->inst.dreg;
1416 vtarg->dreg = mono_alloc_ireg (cfg);
1417 MONO_ADD_INS (cfg->cbb, vtarg);
1419 mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
1420 } else {
1421 MonoInst *vtarg;
1422 MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
1423 vtarg->type = STACK_MP;
1424 vtarg->sreg1 = call->vret_var->dreg;
1425 MONO_ADD_INS (cfg->cbb, vtarg);
1428 /* if the function returns a struct on stack, the called method already does a ret $0x4 */
1429 if (cinfo->ret.storage != ArgValuetypeInReg)
1430 cinfo->stack_usage -= 4;
1433 call->stack_usage = cinfo->stack_usage;
1436 void
1437 mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
1439 MonoInst *arg;
1440 int size = ins->backend.size;
1442 if (size <= 4) {
1443 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_MEMBASE);
1444 arg->sreg1 = src->dreg;
1446 MONO_ADD_INS (cfg->cbb, arg);
1447 } else if (size <= 20) {
1448 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 4));
1449 mini_emit_memcpy (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
1450 } else {
1451 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ);
1452 arg->inst_basereg = src->dreg;
1453 arg->inst_offset = 0;
1454 arg->inst_imm = size;
1456 MONO_ADD_INS (cfg->cbb, arg);
1460 void
1461 mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
1463 MonoType *ret = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
1465 if (!ret->byref) {
1466 if (ret->type == MONO_TYPE_R4) {
1467 if (COMPILE_LLVM (cfg))
1468 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
1469 /* Nothing to do */
1470 return;
1471 } else if (ret->type == MONO_TYPE_R8) {
1472 if (COMPILE_LLVM (cfg))
1473 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
1474 /* Nothing to do */
1475 return;
1476 } else if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
1477 if (COMPILE_LLVM (cfg))
1478 MONO_EMIT_NEW_UNALU (cfg, OP_LMOVE, cfg->ret->dreg, val->dreg);
1479 else {
1480 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EAX, val->dreg + 1);
1481 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EDX, val->dreg + 2);
1483 return;
1487 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
1491 * Allow tracing to work with this interface (with an optional argument)
1493 void*
1494 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1496 guchar *code = p;
1498 g_assert (MONO_ARCH_FRAME_ALIGNMENT >= 8);
1499 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 8);
1501 /* if some args are passed in registers, we need to save them here */
1502 x86_push_reg (code, X86_EBP);
1504 if (cfg->compile_aot) {
1505 x86_push_imm (code, cfg->method);
1506 x86_mov_reg_imm (code, X86_EAX, func);
1507 x86_call_reg (code, X86_EAX);
1508 } else {
1509 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1510 x86_push_imm (code, cfg->method);
1511 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1512 x86_call_code (code, 0);
1514 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT);
1516 return code;
1519 enum {
1520 SAVE_NONE,
1521 SAVE_STRUCT,
1522 SAVE_EAX,
1523 SAVE_EAX_EDX,
1524 SAVE_FP
1527 void*
1528 mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers)
1530 guchar *code = p;
1531 int arg_size = 0, stack_usage = 0, save_mode = SAVE_NONE;
1532 MonoMethod *method = cfg->method;
1533 MonoType *ret_type = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
1535 switch (ret_type->type) {
1536 case MONO_TYPE_VOID:
1537 /* special case string .ctor icall */
1538 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class) {
1539 save_mode = SAVE_EAX;
1540 stack_usage = enable_arguments ? 8 : 4;
1541 } else
1542 save_mode = SAVE_NONE;
1543 break;
1544 case MONO_TYPE_I8:
1545 case MONO_TYPE_U8:
1546 save_mode = SAVE_EAX_EDX;
1547 stack_usage = enable_arguments ? 16 : 8;
1548 break;
1549 case MONO_TYPE_R4:
1550 case MONO_TYPE_R8:
1551 save_mode = SAVE_FP;
1552 stack_usage = enable_arguments ? 16 : 8;
1553 break;
1554 case MONO_TYPE_GENERICINST:
1555 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
1556 save_mode = SAVE_EAX;
1557 stack_usage = enable_arguments ? 8 : 4;
1558 break;
1560 /* Fall through */
1561 case MONO_TYPE_VALUETYPE:
1562 // FIXME: Handle SMALL_STRUCT_IN_REG here for proper alignment on darwin-x86
1563 save_mode = SAVE_STRUCT;
1564 stack_usage = enable_arguments ? 4 : 0;
1565 break;
1566 default:
1567 save_mode = SAVE_EAX;
1568 stack_usage = enable_arguments ? 8 : 4;
1569 break;
1572 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - stack_usage - 4);
1574 switch (save_mode) {
1575 case SAVE_EAX_EDX:
1576 x86_push_reg (code, X86_EDX);
1577 x86_push_reg (code, X86_EAX);
1578 if (enable_arguments) {
1579 x86_push_reg (code, X86_EDX);
1580 x86_push_reg (code, X86_EAX);
1581 arg_size = 8;
1583 break;
1584 case SAVE_EAX:
1585 x86_push_reg (code, X86_EAX);
1586 if (enable_arguments) {
1587 x86_push_reg (code, X86_EAX);
1588 arg_size = 4;
1590 break;
1591 case SAVE_FP:
1592 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1593 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1594 if (enable_arguments) {
1595 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1596 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1597 arg_size = 8;
1599 break;
1600 case SAVE_STRUCT:
1601 if (enable_arguments) {
1602 x86_push_membase (code, X86_EBP, 8);
1603 arg_size = 4;
1605 break;
1606 case SAVE_NONE:
1607 default:
1608 break;
1611 if (cfg->compile_aot) {
1612 x86_push_imm (code, method);
1613 x86_mov_reg_imm (code, X86_EAX, func);
1614 x86_call_reg (code, X86_EAX);
1615 } else {
1616 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1617 x86_push_imm (code, method);
1618 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1619 x86_call_code (code, 0);
1622 x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1624 switch (save_mode) {
1625 case SAVE_EAX_EDX:
1626 x86_pop_reg (code, X86_EAX);
1627 x86_pop_reg (code, X86_EDX);
1628 break;
1629 case SAVE_EAX:
1630 x86_pop_reg (code, X86_EAX);
1631 break;
1632 case SAVE_FP:
1633 x86_fld_membase (code, X86_ESP, 0, TRUE);
1634 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1635 break;
1636 case SAVE_NONE:
1637 default:
1638 break;
1641 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - stack_usage);
1643 return code;
1646 #define EMIT_COND_BRANCH(ins,cond,sign) \
1647 if (ins->inst_true_bb->native_offset) { \
1648 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1649 } else { \
1650 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1651 if ((cfg->opt & MONO_OPT_BRANCH) && \
1652 x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1653 x86_branch8 (code, cond, 0, sign); \
1654 else \
1655 x86_branch32 (code, cond, 0, sign); \
1659 * Emit an exception if condition is fail and
1660 * if possible do a directly branch to target
1662 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \
1663 do { \
1664 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1665 if (tins == NULL) { \
1666 mono_add_patch_info (cfg, code - cfg->native_code, \
1667 MONO_PATCH_INFO_EXC, exc_name); \
1668 x86_branch32 (code, cond, 0, signed); \
1669 } else { \
1670 EMIT_COND_BRANCH (tins, cond, signed); \
1672 } while (0);
1674 #define EMIT_FPCOMPARE(code) do { \
1675 x86_fcompp (code); \
1676 x86_fnstsw (code); \
1677 } while (0);
1680 static guint8*
1681 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1683 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1684 x86_call_code (code, 0);
1686 return code;
1689 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1692 * mono_peephole_pass_1:
1694 * Perform peephole opts which should/can be performed before local regalloc
1696 void
1697 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1699 MonoInst *ins, *n;
1701 MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1702 MonoInst *last_ins = ins->prev;
1704 switch (ins->opcode) {
1705 case OP_IADD_IMM:
1706 case OP_ADD_IMM:
1707 if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1709 * X86_LEA is like ADD, but doesn't have the
1710 * sreg1==dreg restriction.
1712 ins->opcode = OP_X86_LEA_MEMBASE;
1713 ins->inst_basereg = ins->sreg1;
1714 } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1715 ins->opcode = OP_X86_INC_REG;
1716 break;
1717 case OP_SUB_IMM:
1718 case OP_ISUB_IMM:
1719 if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1720 ins->opcode = OP_X86_LEA_MEMBASE;
1721 ins->inst_basereg = ins->sreg1;
1722 ins->inst_imm = -ins->inst_imm;
1723 } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1724 ins->opcode = OP_X86_DEC_REG;
1725 break;
1726 case OP_COMPARE_IMM:
1727 case OP_ICOMPARE_IMM:
1728 /* OP_COMPARE_IMM (reg, 0)
1729 * -->
1730 * OP_X86_TEST_NULL (reg)
1732 if (!ins->inst_imm)
1733 ins->opcode = OP_X86_TEST_NULL;
1734 break;
1735 case OP_X86_COMPARE_MEMBASE_IMM:
1737 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1738 * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1739 * -->
1740 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1741 * OP_COMPARE_IMM reg, imm
1743 * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1745 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1746 ins->inst_basereg == last_ins->inst_destbasereg &&
1747 ins->inst_offset == last_ins->inst_offset) {
1748 ins->opcode = OP_COMPARE_IMM;
1749 ins->sreg1 = last_ins->sreg1;
1751 /* check if we can remove cmp reg,0 with test null */
1752 if (!ins->inst_imm)
1753 ins->opcode = OP_X86_TEST_NULL;
1756 break;
1757 case OP_X86_PUSH_MEMBASE:
1758 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1759 last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1760 ins->inst_basereg == last_ins->inst_destbasereg &&
1761 ins->inst_offset == last_ins->inst_offset) {
1762 ins->opcode = OP_X86_PUSH;
1763 ins->sreg1 = last_ins->sreg1;
1765 break;
1768 mono_peephole_ins (bb, ins);
1772 void
1773 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
1775 MonoInst *ins, *n;
1777 MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1778 switch (ins->opcode) {
1779 case OP_ICONST:
1780 /* reg = 0 -> XOR (reg, reg) */
1781 /* XOR sets cflags on x86, so we cant do it always */
1782 if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1783 MonoInst *ins2;
1785 ins->opcode = OP_IXOR;
1786 ins->sreg1 = ins->dreg;
1787 ins->sreg2 = ins->dreg;
1790 * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG
1791 * since it takes 3 bytes instead of 7.
1793 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1794 if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1795 ins2->opcode = OP_STORE_MEMBASE_REG;
1796 ins2->sreg1 = ins->dreg;
1798 else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1799 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1800 ins2->sreg1 = ins->dreg;
1802 else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1803 /* Continue iteration */
1805 else
1806 break;
1809 break;
1810 case OP_IADD_IMM:
1811 case OP_ADD_IMM:
1812 if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1813 ins->opcode = OP_X86_INC_REG;
1814 break;
1815 case OP_ISUB_IMM:
1816 case OP_SUB_IMM:
1817 if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1818 ins->opcode = OP_X86_DEC_REG;
1819 break;
1822 mono_peephole_ins (bb, ins);
1827 * mono_arch_lowering_pass:
1829 * Converts complex opcodes into simpler ones so that each IR instruction
1830 * corresponds to one machine instruction.
1832 void
1833 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1835 MonoInst *ins, *next;
1838 * FIXME: Need to add more instructions, but the current machine
1839 * description can't model some parts of the composite instructions like
1840 * cdq.
1842 MONO_BB_FOR_EACH_INS_SAFE (bb, next, ins) {
1843 switch (ins->opcode) {
1844 case OP_IREM_IMM:
1845 case OP_IDIV_IMM:
1846 case OP_IDIV_UN_IMM:
1847 case OP_IREM_UN_IMM:
1849 * Keep the cases where we could generated optimized code, otherwise convert
1850 * to the non-imm variant.
1852 if ((ins->opcode == OP_IREM_IMM) && mono_is_power_of_two (ins->inst_imm) >= 0)
1853 break;
1854 mono_decompose_op_imm (cfg, bb, ins);
1855 break;
1856 default:
1857 break;
1861 bb->max_vreg = cfg->next_vreg;
1864 static const int
1865 branch_cc_table [] = {
1866 X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1867 X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1868 X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1871 /* Maps CMP_... constants to X86_CC_... constants */
1872 static const int
1873 cc_table [] = {
1874 X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1875 X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1878 static const int
1879 cc_signed_table [] = {
1880 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1881 FALSE, FALSE, FALSE, FALSE
1884 static unsigned char*
1885 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1887 #define XMM_TEMP_REG 0
1888 /*This SSE2 optimization must not be done which OPT_SIMD in place as it clobbers xmm0.*/
1889 /*The xmm pass decomposes OP_FCONV_ ops anyway anyway.*/
1890 if (cfg->opt & MONO_OPT_SSE2 && size < 8 && !(cfg->opt & MONO_OPT_SIMD)) {
1891 /* optimize by assigning a local var for this use so we avoid
1892 * the stack manipulations */
1893 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1894 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1895 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1896 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1897 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1898 if (size == 1)
1899 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1900 else if (size == 2)
1901 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1902 return code;
1904 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1905 x86_fnstcw_membase(code, X86_ESP, 0);
1906 x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1907 x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1908 x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1909 x86_fldcw_membase (code, X86_ESP, 2);
1910 if (size == 8) {
1911 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1912 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1913 x86_pop_reg (code, dreg);
1914 /* FIXME: need the high register
1915 * x86_pop_reg (code, dreg_high);
1917 } else {
1918 x86_push_reg (code, X86_EAX); // SP = SP - 4
1919 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1920 x86_pop_reg (code, dreg);
1922 x86_fldcw_membase (code, X86_ESP, 0);
1923 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1925 if (size == 1)
1926 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1927 else if (size == 2)
1928 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1929 return code;
1932 static unsigned char*
1933 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1935 int sreg = tree->sreg1;
1936 int need_touch = FALSE;
1938 #if defined(TARGET_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1939 need_touch = TRUE;
1940 #endif
1942 if (need_touch) {
1943 guint8* br[5];
1946 * Under Windows:
1947 * If requested stack size is larger than one page,
1948 * perform stack-touch operation
1951 * Generate stack probe code.
1952 * Under Windows, it is necessary to allocate one page at a time,
1953 * "touching" stack after each successful sub-allocation. This is
1954 * because of the way stack growth is implemented - there is a
1955 * guard page before the lowest stack page that is currently commited.
1956 * Stack normally grows sequentially so OS traps access to the
1957 * guard page and commits more pages when needed.
1959 x86_test_reg_imm (code, sreg, ~0xFFF);
1960 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1962 br[2] = code; /* loop */
1963 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1964 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1967 * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1968 * that follows only initializes the last part of the area.
1970 /* Same as the init code below with size==0x1000 */
1971 if (tree->flags & MONO_INST_INIT) {
1972 x86_push_reg (code, X86_EAX);
1973 x86_push_reg (code, X86_ECX);
1974 x86_push_reg (code, X86_EDI);
1975 x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1976 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1977 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1978 x86_cld (code);
1979 x86_prefix (code, X86_REP_PREFIX);
1980 x86_stosl (code);
1981 x86_pop_reg (code, X86_EDI);
1982 x86_pop_reg (code, X86_ECX);
1983 x86_pop_reg (code, X86_EAX);
1986 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1987 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1988 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1989 x86_patch (br[3], br[2]);
1990 x86_test_reg_reg (code, sreg, sreg);
1991 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1992 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1994 br[1] = code; x86_jump8 (code, 0);
1996 x86_patch (br[0], code);
1997 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1998 x86_patch (br[1], code);
1999 x86_patch (br[4], code);
2001 else
2002 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
2004 if (tree->flags & MONO_INST_INIT) {
2005 int offset = 0;
2006 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
2007 x86_push_reg (code, X86_EAX);
2008 offset += 4;
2010 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
2011 x86_push_reg (code, X86_ECX);
2012 offset += 4;
2014 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
2015 x86_push_reg (code, X86_EDI);
2016 offset += 4;
2019 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
2020 if (sreg != X86_ECX)
2021 x86_mov_reg_reg (code, X86_ECX, sreg, 4);
2022 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2024 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2025 x86_cld (code);
2026 x86_prefix (code, X86_REP_PREFIX);
2027 x86_stosl (code);
2029 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2030 x86_pop_reg (code, X86_EDI);
2031 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2032 x86_pop_reg (code, X86_ECX);
2033 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2034 x86_pop_reg (code, X86_EAX);
2036 return code;
2040 static guint8*
2041 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2043 /* Move return value to the target register */
2044 switch (ins->opcode) {
2045 case OP_CALL:
2046 case OP_CALL_REG:
2047 case OP_CALL_MEMBASE:
2048 if (ins->dreg != X86_EAX)
2049 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2050 break;
2051 default:
2052 break;
2055 return code;
2058 gboolean
2059 mono_x86_have_tls_get (void)
2061 #ifdef __APPLE__
2062 guint32 *ins = (guint32*)pthread_getspecific;
2064 * We're looking for these two instructions:
2066 * mov 0x4(%esp),%eax
2067 * mov %gs:0x48(,%eax,4),%eax
2069 return ins [0] == 0x0424448b && ins [1] == 0x85048b65 && ins [2] == 0x00000048;
2070 #else
2071 return TRUE;
2072 #endif
2076 * mono_x86_emit_tls_get:
2077 * @code: buffer to store code to
2078 * @dreg: hard register where to place the result
2079 * @tls_offset: offset info
2081 * mono_x86_emit_tls_get emits in @code the native code that puts in
2082 * the dreg register the item in the thread local storage identified
2083 * by tls_offset.
2085 * Returns: a pointer to the end of the stored code
2087 guint8*
2088 mono_x86_emit_tls_get (guint8* code, int dreg, int tls_offset)
2090 #if defined(__APPLE__)
2091 x86_prefix (code, X86_GS_PREFIX);
2092 x86_mov_reg_mem (code, dreg, 0x48 + tls_offset * 4, 4);
2093 #elif defined(TARGET_WIN32)
2095 * See the Under the Hood article in the May 1996 issue of Microsoft Systems
2096 * Journal and/or a disassembly of the TlsGet () function.
2098 g_assert (tls_offset < 64);
2099 x86_prefix (code, X86_FS_PREFIX);
2100 x86_mov_reg_mem (code, dreg, 0x18, 4);
2101 /* Dunno what this does but TlsGetValue () contains it */
2102 x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2103 x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2104 #else
2105 if (optimize_for_xen) {
2106 x86_prefix (code, X86_GS_PREFIX);
2107 x86_mov_reg_mem (code, dreg, 0, 4);
2108 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
2109 } else {
2110 x86_prefix (code, X86_GS_PREFIX);
2111 x86_mov_reg_mem (code, dreg, tls_offset, 4);
2113 #endif
2114 return code;
2118 * emit_load_volatile_arguments:
2120 * Load volatile arguments from the stack to the original input registers.
2121 * Required before a tail call.
2123 static guint8*
2124 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2126 MonoMethod *method = cfg->method;
2127 MonoMethodSignature *sig;
2128 MonoInst *inst;
2129 CallInfo *cinfo;
2130 guint32 i;
2132 /* FIXME: Generate intermediate code instead */
2134 sig = mono_method_signature (method);
2136 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
2138 /* This is the opposite of the code in emit_prolog */
2140 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2141 ArgInfo *ainfo = cinfo->args + i;
2142 MonoType *arg_type;
2143 inst = cfg->args [i];
2145 if (sig->hasthis && (i == 0))
2146 arg_type = &mono_defaults.object_class->byval_arg;
2147 else
2148 arg_type = sig->params [i - sig->hasthis];
2151 * On x86, the arguments are either in their original stack locations, or in
2152 * global regs.
2154 if (inst->opcode == OP_REGVAR) {
2155 g_assert (ainfo->storage == ArgOnStack);
2157 x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
2161 return code;
2164 #define REAL_PRINT_REG(text,reg) \
2165 mono_assert (reg >= 0); \
2166 x86_push_reg (code, X86_EAX); \
2167 x86_push_reg (code, X86_EDX); \
2168 x86_push_reg (code, X86_ECX); \
2169 x86_push_reg (code, reg); \
2170 x86_push_imm (code, reg); \
2171 x86_push_imm (code, text " %d %p\n"); \
2172 x86_mov_reg_imm (code, X86_EAX, printf); \
2173 x86_call_reg (code, X86_EAX); \
2174 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2175 x86_pop_reg (code, X86_ECX); \
2176 x86_pop_reg (code, X86_EDX); \
2177 x86_pop_reg (code, X86_EAX);
2179 /* benchmark and set based on cpu */
2180 #define LOOP_ALIGNMENT 8
2181 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2183 #ifndef DISABLE_JIT
2185 void
2186 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2188 MonoInst *ins;
2189 MonoCallInst *call;
2190 guint offset;
2191 guint8 *code = cfg->native_code + cfg->code_len;
2192 int max_len, cpos;
2194 if (cfg->opt & MONO_OPT_LOOP) {
2195 int pad, align = LOOP_ALIGNMENT;
2196 /* set alignment depending on cpu */
2197 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2198 pad = align - pad;
2199 /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2200 x86_padding (code, pad);
2201 cfg->code_len += pad;
2202 bb->native_offset = cfg->code_len;
2206 if (cfg->verbose_level > 2)
2207 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2209 cpos = bb->max_offset;
2211 if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2212 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2213 g_assert (!cfg->compile_aot);
2214 cpos += 6;
2216 cov->data [bb->dfn].cil_code = bb->cil_code;
2217 /* this is not thread save, but good enough */
2218 x86_inc_mem (code, &cov->data [bb->dfn].count);
2221 offset = code - cfg->native_code;
2223 mono_debug_open_block (cfg, bb, offset);
2225 MONO_BB_FOR_EACH_INS (bb, ins) {
2226 offset = code - cfg->native_code;
2228 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2230 if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) {
2231 cfg->code_size *= 2;
2232 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2233 code = cfg->native_code + offset;
2234 mono_jit_stats.code_reallocs++;
2237 if (cfg->debug_info)
2238 mono_debug_record_line_number (cfg, ins, offset);
2240 switch (ins->opcode) {
2241 case OP_BIGMUL:
2242 x86_mul_reg (code, ins->sreg2, TRUE);
2243 break;
2244 case OP_BIGMUL_UN:
2245 x86_mul_reg (code, ins->sreg2, FALSE);
2246 break;
2247 case OP_X86_SETEQ_MEMBASE:
2248 case OP_X86_SETNE_MEMBASE:
2249 x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2250 ins->inst_basereg, ins->inst_offset, TRUE);
2251 break;
2252 case OP_STOREI1_MEMBASE_IMM:
2253 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2254 break;
2255 case OP_STOREI2_MEMBASE_IMM:
2256 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2257 break;
2258 case OP_STORE_MEMBASE_IMM:
2259 case OP_STOREI4_MEMBASE_IMM:
2260 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2261 break;
2262 case OP_STOREI1_MEMBASE_REG:
2263 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2264 break;
2265 case OP_STOREI2_MEMBASE_REG:
2266 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2267 break;
2268 case OP_STORE_MEMBASE_REG:
2269 case OP_STOREI4_MEMBASE_REG:
2270 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2271 break;
2272 case OP_STORE_MEM_IMM:
2273 x86_mov_mem_imm (code, ins->inst_p0, ins->inst_c0, 4);
2274 break;
2275 case OP_LOADU4_MEM:
2276 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2277 break;
2278 case OP_LOAD_MEM:
2279 case OP_LOADI4_MEM:
2280 /* These are created by the cprop pass so they use inst_imm as the source */
2281 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2282 break;
2283 case OP_LOADU1_MEM:
2284 x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, FALSE);
2285 break;
2286 case OP_LOADU2_MEM:
2287 x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, TRUE);
2288 break;
2289 case OP_LOAD_MEMBASE:
2290 case OP_LOADI4_MEMBASE:
2291 case OP_LOADU4_MEMBASE:
2292 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2293 break;
2294 case OP_LOADU1_MEMBASE:
2295 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2296 break;
2297 case OP_LOADI1_MEMBASE:
2298 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2299 break;
2300 case OP_LOADU2_MEMBASE:
2301 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2302 break;
2303 case OP_LOADI2_MEMBASE:
2304 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2305 break;
2306 case OP_ICONV_TO_I1:
2307 case OP_SEXT_I1:
2308 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2309 break;
2310 case OP_ICONV_TO_I2:
2311 case OP_SEXT_I2:
2312 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2313 break;
2314 case OP_ICONV_TO_U1:
2315 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2316 break;
2317 case OP_ICONV_TO_U2:
2318 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2319 break;
2320 case OP_COMPARE:
2321 case OP_ICOMPARE:
2322 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2323 break;
2324 case OP_COMPARE_IMM:
2325 case OP_ICOMPARE_IMM:
2326 x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2327 break;
2328 case OP_X86_COMPARE_MEMBASE_REG:
2329 x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2330 break;
2331 case OP_X86_COMPARE_MEMBASE_IMM:
2332 x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2333 break;
2334 case OP_X86_COMPARE_MEMBASE8_IMM:
2335 x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2336 break;
2337 case OP_X86_COMPARE_REG_MEMBASE:
2338 x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2339 break;
2340 case OP_X86_COMPARE_MEM_IMM:
2341 x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2342 break;
2343 case OP_X86_TEST_NULL:
2344 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2345 break;
2346 case OP_X86_ADD_MEMBASE_IMM:
2347 x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2348 break;
2349 case OP_X86_ADD_REG_MEMBASE:
2350 x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2351 break;
2352 case OP_X86_SUB_MEMBASE_IMM:
2353 x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2354 break;
2355 case OP_X86_SUB_REG_MEMBASE:
2356 x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2357 break;
2358 case OP_X86_AND_MEMBASE_IMM:
2359 x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2360 break;
2361 case OP_X86_OR_MEMBASE_IMM:
2362 x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2363 break;
2364 case OP_X86_XOR_MEMBASE_IMM:
2365 x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2366 break;
2367 case OP_X86_ADD_MEMBASE_REG:
2368 x86_alu_membase_reg (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2369 break;
2370 case OP_X86_SUB_MEMBASE_REG:
2371 x86_alu_membase_reg (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2372 break;
2373 case OP_X86_AND_MEMBASE_REG:
2374 x86_alu_membase_reg (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2375 break;
2376 case OP_X86_OR_MEMBASE_REG:
2377 x86_alu_membase_reg (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2378 break;
2379 case OP_X86_XOR_MEMBASE_REG:
2380 x86_alu_membase_reg (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2381 break;
2382 case OP_X86_INC_MEMBASE:
2383 x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2384 break;
2385 case OP_X86_INC_REG:
2386 x86_inc_reg (code, ins->dreg);
2387 break;
2388 case OP_X86_DEC_MEMBASE:
2389 x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2390 break;
2391 case OP_X86_DEC_REG:
2392 x86_dec_reg (code, ins->dreg);
2393 break;
2394 case OP_X86_MUL_REG_MEMBASE:
2395 x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2396 break;
2397 case OP_X86_AND_REG_MEMBASE:
2398 x86_alu_reg_membase (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset);
2399 break;
2400 case OP_X86_OR_REG_MEMBASE:
2401 x86_alu_reg_membase (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset);
2402 break;
2403 case OP_X86_XOR_REG_MEMBASE:
2404 x86_alu_reg_membase (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset);
2405 break;
2406 case OP_BREAK:
2407 x86_breakpoint (code);
2408 break;
2409 case OP_RELAXED_NOP:
2410 x86_prefix (code, X86_REP_PREFIX);
2411 x86_nop (code);
2412 break;
2413 case OP_HARD_NOP:
2414 x86_nop (code);
2415 break;
2416 case OP_NOP:
2417 case OP_DUMMY_USE:
2418 case OP_DUMMY_STORE:
2419 case OP_NOT_REACHED:
2420 case OP_NOT_NULL:
2421 break;
2422 case OP_SEQ_POINT: {
2423 int i;
2425 if (cfg->compile_aot)
2426 NOT_IMPLEMENTED;
2429 * Read from the single stepping trigger page. This will cause a
2430 * SIGSEGV when single stepping is enabled.
2431 * We do this _before_ the breakpoint, so single stepping after
2432 * a breakpoint is hit will step to the next IL offset.
2434 if (ins->flags & MONO_INST_SINGLE_STEP_LOC)
2435 x86_alu_reg_mem (code, X86_CMP, X86_EAX, (guint32)ss_trigger_page);
2437 mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
2440 * A placeholder for a possible breakpoint inserted by
2441 * mono_arch_set_breakpoint ().
2443 for (i = 0; i < 6; ++i)
2444 x86_nop (code);
2445 break;
2447 case OP_ADDCC:
2448 case OP_IADDCC:
2449 case OP_IADD:
2450 x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2451 break;
2452 case OP_ADC:
2453 case OP_IADC:
2454 x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2455 break;
2456 case OP_ADDCC_IMM:
2457 case OP_ADD_IMM:
2458 case OP_IADD_IMM:
2459 x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2460 break;
2461 case OP_ADC_IMM:
2462 case OP_IADC_IMM:
2463 x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2464 break;
2465 case OP_SUBCC:
2466 case OP_ISUBCC:
2467 case OP_ISUB:
2468 x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2469 break;
2470 case OP_SBB:
2471 case OP_ISBB:
2472 x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2473 break;
2474 case OP_SUBCC_IMM:
2475 case OP_SUB_IMM:
2476 case OP_ISUB_IMM:
2477 x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2478 break;
2479 case OP_SBB_IMM:
2480 case OP_ISBB_IMM:
2481 x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2482 break;
2483 case OP_IAND:
2484 x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2485 break;
2486 case OP_AND_IMM:
2487 case OP_IAND_IMM:
2488 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2489 break;
2490 case OP_IDIV:
2491 case OP_IREM:
2493 * The code is the same for div/rem, the allocator will allocate dreg
2494 * to RAX/RDX as appropriate.
2496 if (ins->sreg2 == X86_EDX) {
2497 /* cdq clobbers this */
2498 x86_push_reg (code, ins->sreg2);
2499 x86_cdq (code);
2500 x86_div_membase (code, X86_ESP, 0, TRUE);
2501 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2502 } else {
2503 x86_cdq (code);
2504 x86_div_reg (code, ins->sreg2, TRUE);
2506 break;
2507 case OP_IDIV_UN:
2508 case OP_IREM_UN:
2509 if (ins->sreg2 == X86_EDX) {
2510 x86_push_reg (code, ins->sreg2);
2511 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2512 x86_div_membase (code, X86_ESP, 0, FALSE);
2513 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2514 } else {
2515 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2516 x86_div_reg (code, ins->sreg2, FALSE);
2518 break;
2519 case OP_DIV_IMM:
2520 x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2521 x86_cdq (code);
2522 x86_div_reg (code, ins->sreg2, TRUE);
2523 break;
2524 case OP_IREM_IMM: {
2525 int power = mono_is_power_of_two (ins->inst_imm);
2527 g_assert (ins->sreg1 == X86_EAX);
2528 g_assert (ins->dreg == X86_EAX);
2529 g_assert (power >= 0);
2531 if (power == 1) {
2532 /* Based on http://compilers.iecc.com/comparch/article/93-04-079 */
2533 x86_cdq (code);
2534 x86_alu_reg_imm (code, X86_AND, X86_EAX, 1);
2536 * If the divident is >= 0, this does not nothing. If it is positive, it
2537 * it transforms %eax=0 into %eax=0, and %eax=1 into %eax=-1.
2539 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EDX);
2540 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2541 } else if (power == 0) {
2542 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2543 } else {
2544 /* Based on gcc code */
2546 /* Add compensation for negative dividents */
2547 x86_cdq (code);
2548 x86_shift_reg_imm (code, X86_SHR, X86_EDX, 32 - power);
2549 x86_alu_reg_reg (code, X86_ADD, X86_EAX, X86_EDX);
2550 /* Compute remainder */
2551 x86_alu_reg_imm (code, X86_AND, X86_EAX, (1 << power) - 1);
2552 /* Remove compensation */
2553 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2555 break;
2557 case OP_IOR:
2558 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2559 break;
2560 case OP_OR_IMM:
2561 case OP_IOR_IMM:
2562 x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2563 break;
2564 case OP_IXOR:
2565 x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2566 break;
2567 case OP_XOR_IMM:
2568 case OP_IXOR_IMM:
2569 x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2570 break;
2571 case OP_ISHL:
2572 g_assert (ins->sreg2 == X86_ECX);
2573 x86_shift_reg (code, X86_SHL, ins->dreg);
2574 break;
2575 case OP_ISHR:
2576 g_assert (ins->sreg2 == X86_ECX);
2577 x86_shift_reg (code, X86_SAR, ins->dreg);
2578 break;
2579 case OP_SHR_IMM:
2580 case OP_ISHR_IMM:
2581 x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2582 break;
2583 case OP_SHR_UN_IMM:
2584 case OP_ISHR_UN_IMM:
2585 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2586 break;
2587 case OP_ISHR_UN:
2588 g_assert (ins->sreg2 == X86_ECX);
2589 x86_shift_reg (code, X86_SHR, ins->dreg);
2590 break;
2591 case OP_SHL_IMM:
2592 case OP_ISHL_IMM:
2593 x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2594 break;
2595 case OP_LSHL: {
2596 guint8 *jump_to_end;
2598 /* handle shifts below 32 bits */
2599 x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2600 x86_shift_reg (code, X86_SHL, ins->sreg1);
2602 x86_test_reg_imm (code, X86_ECX, 32);
2603 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2605 /* handle shift over 32 bit */
2606 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2607 x86_clear_reg (code, ins->sreg1);
2609 x86_patch (jump_to_end, code);
2611 break;
2612 case OP_LSHR: {
2613 guint8 *jump_to_end;
2615 /* handle shifts below 32 bits */
2616 x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2617 x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2619 x86_test_reg_imm (code, X86_ECX, 32);
2620 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2622 /* handle shifts over 31 bits */
2623 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2624 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2626 x86_patch (jump_to_end, code);
2628 break;
2629 case OP_LSHR_UN: {
2630 guint8 *jump_to_end;
2632 /* handle shifts below 32 bits */
2633 x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2634 x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2636 x86_test_reg_imm (code, X86_ECX, 32);
2637 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2639 /* handle shifts over 31 bits */
2640 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2641 x86_clear_reg (code, ins->backend.reg3);
2643 x86_patch (jump_to_end, code);
2645 break;
2646 case OP_LSHL_IMM:
2647 if (ins->inst_imm >= 32) {
2648 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2649 x86_clear_reg (code, ins->sreg1);
2650 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2651 } else {
2652 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2653 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2655 break;
2656 case OP_LSHR_IMM:
2657 if (ins->inst_imm >= 32) {
2658 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2659 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2660 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2661 } else {
2662 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2663 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2665 break;
2666 case OP_LSHR_UN_IMM:
2667 if (ins->inst_imm >= 32) {
2668 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2669 x86_clear_reg (code, ins->backend.reg3);
2670 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2671 } else {
2672 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2673 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2675 break;
2676 case OP_INOT:
2677 x86_not_reg (code, ins->sreg1);
2678 break;
2679 case OP_INEG:
2680 x86_neg_reg (code, ins->sreg1);
2681 break;
2683 case OP_IMUL:
2684 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2685 break;
2686 case OP_MUL_IMM:
2687 case OP_IMUL_IMM:
2688 switch (ins->inst_imm) {
2689 case 2:
2690 /* MOV r1, r2 */
2691 /* ADD r1, r1 */
2692 if (ins->dreg != ins->sreg1)
2693 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2694 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2695 break;
2696 case 3:
2697 /* LEA r1, [r2 + r2*2] */
2698 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2699 break;
2700 case 5:
2701 /* LEA r1, [r2 + r2*4] */
2702 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2703 break;
2704 case 6:
2705 /* LEA r1, [r2 + r2*2] */
2706 /* ADD r1, r1 */
2707 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2708 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2709 break;
2710 case 9:
2711 /* LEA r1, [r2 + r2*8] */
2712 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2713 break;
2714 case 10:
2715 /* LEA r1, [r2 + r2*4] */
2716 /* ADD r1, r1 */
2717 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2718 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2719 break;
2720 case 12:
2721 /* LEA r1, [r2 + r2*2] */
2722 /* SHL r1, 2 */
2723 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2724 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2725 break;
2726 case 25:
2727 /* LEA r1, [r2 + r2*4] */
2728 /* LEA r1, [r1 + r1*4] */
2729 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2730 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2731 break;
2732 case 100:
2733 /* LEA r1, [r2 + r2*4] */
2734 /* SHL r1, 2 */
2735 /* LEA r1, [r1 + r1*4] */
2736 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2737 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2738 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2739 break;
2740 default:
2741 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2742 break;
2744 break;
2745 case OP_IMUL_OVF:
2746 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2747 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2748 break;
2749 case OP_IMUL_OVF_UN: {
2750 /* the mul operation and the exception check should most likely be split */
2751 int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2752 /*g_assert (ins->sreg2 == X86_EAX);
2753 g_assert (ins->dreg == X86_EAX);*/
2754 if (ins->sreg2 == X86_EAX) {
2755 non_eax_reg = ins->sreg1;
2756 } else if (ins->sreg1 == X86_EAX) {
2757 non_eax_reg = ins->sreg2;
2758 } else {
2759 /* no need to save since we're going to store to it anyway */
2760 if (ins->dreg != X86_EAX) {
2761 saved_eax = TRUE;
2762 x86_push_reg (code, X86_EAX);
2764 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2765 non_eax_reg = ins->sreg2;
2767 if (ins->dreg == X86_EDX) {
2768 if (!saved_eax) {
2769 saved_eax = TRUE;
2770 x86_push_reg (code, X86_EAX);
2772 } else if (ins->dreg != X86_EAX) {
2773 saved_edx = TRUE;
2774 x86_push_reg (code, X86_EDX);
2776 x86_mul_reg (code, non_eax_reg, FALSE);
2777 /* save before the check since pop and mov don't change the flags */
2778 if (ins->dreg != X86_EAX)
2779 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2780 if (saved_edx)
2781 x86_pop_reg (code, X86_EDX);
2782 if (saved_eax)
2783 x86_pop_reg (code, X86_EAX);
2784 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2785 break;
2787 case OP_ICONST:
2788 x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2789 break;
2790 case OP_AOTCONST:
2791 g_assert_not_reached ();
2792 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2793 x86_mov_reg_imm (code, ins->dreg, 0);
2794 break;
2795 case OP_JUMP_TABLE:
2796 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2797 x86_mov_reg_imm (code, ins->dreg, 0);
2798 break;
2799 case OP_LOAD_GOTADDR:
2800 g_assert (ins->dreg == MONO_ARCH_GOT_REG);
2801 code = mono_arch_emit_load_got_addr (cfg->native_code, code, cfg, NULL);
2802 break;
2803 case OP_GOT_ENTRY:
2804 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2805 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2806 break;
2807 case OP_X86_PUSH_GOT_ENTRY:
2808 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2809 x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2810 break;
2811 case OP_MOVE:
2812 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2813 break;
2814 case OP_JMP: {
2816 * Note: this 'frame destruction' logic is useful for tail calls, too.
2817 * Keep in sync with the code in emit_epilog.
2819 int pos = 0;
2821 /* FIXME: no tracing support... */
2822 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2823 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2824 /* reset offset to make max_len work */
2825 offset = code - cfg->native_code;
2827 g_assert (!cfg->method->save_lmf);
2829 code = emit_load_volatile_arguments (cfg, code);
2831 if (cfg->used_int_regs & (1 << X86_EBX))
2832 pos -= 4;
2833 if (cfg->used_int_regs & (1 << X86_EDI))
2834 pos -= 4;
2835 if (cfg->used_int_regs & (1 << X86_ESI))
2836 pos -= 4;
2837 if (pos)
2838 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2840 if (cfg->used_int_regs & (1 << X86_ESI))
2841 x86_pop_reg (code, X86_ESI);
2842 if (cfg->used_int_regs & (1 << X86_EDI))
2843 x86_pop_reg (code, X86_EDI);
2844 if (cfg->used_int_regs & (1 << X86_EBX))
2845 x86_pop_reg (code, X86_EBX);
2847 /* restore ESP/EBP */
2848 x86_leave (code);
2849 offset = code - cfg->native_code;
2850 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2851 x86_jump32 (code, 0);
2853 cfg->disable_aot = TRUE;
2854 break;
2856 case OP_CHECK_THIS:
2857 /* ensure ins->sreg1 is not NULL
2858 * note that cmp DWORD PTR [eax], eax is one byte shorter than
2859 * cmp DWORD PTR [eax], 0
2861 x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2862 break;
2863 case OP_ARGLIST: {
2864 int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2865 x86_push_reg (code, hreg);
2866 x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2867 x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2868 x86_pop_reg (code, hreg);
2869 break;
2871 case OP_FCALL:
2872 case OP_LCALL:
2873 case OP_VCALL:
2874 case OP_VCALL2:
2875 case OP_VOIDCALL:
2876 case OP_CALL:
2877 call = (MonoCallInst*)ins;
2878 if (ins->flags & MONO_INST_HAS_METHOD)
2879 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2880 else
2881 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2882 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2883 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2884 * bytes to pop, we want to use pops. GCC does this (note it won't happen
2885 * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2886 * smart enough to do that optimization yet
2888 * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2889 * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2890 * (most likely from locality benefits). People with other processors should
2891 * check on theirs to see what happens.
2893 if (call->stack_usage == 4) {
2894 /* we want to use registers that won't get used soon, so use
2895 * ecx, as eax will get allocated first. edx is used by long calls,
2896 * so we can't use that.
2899 x86_pop_reg (code, X86_ECX);
2900 } else {
2901 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2904 code = emit_move_return_value (cfg, ins, code);
2905 break;
2906 case OP_FCALL_REG:
2907 case OP_LCALL_REG:
2908 case OP_VCALL_REG:
2909 case OP_VCALL2_REG:
2910 case OP_VOIDCALL_REG:
2911 case OP_CALL_REG:
2912 call = (MonoCallInst*)ins;
2913 x86_call_reg (code, ins->sreg1);
2914 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2915 if (call->stack_usage == 4)
2916 x86_pop_reg (code, X86_ECX);
2917 else
2918 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2920 code = emit_move_return_value (cfg, ins, code);
2921 break;
2922 case OP_FCALL_MEMBASE:
2923 case OP_LCALL_MEMBASE:
2924 case OP_VCALL_MEMBASE:
2925 case OP_VCALL2_MEMBASE:
2926 case OP_VOIDCALL_MEMBASE:
2927 case OP_CALL_MEMBASE:
2928 call = (MonoCallInst*)ins;
2931 * Emit a few nops to simplify get_vcall_slot ().
2933 x86_nop (code);
2934 x86_nop (code);
2935 x86_nop (code);
2937 x86_call_membase (code, ins->sreg1, ins->inst_offset);
2938 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2939 if (call->stack_usage == 4)
2940 x86_pop_reg (code, X86_ECX);
2941 else
2942 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2944 code = emit_move_return_value (cfg, ins, code);
2945 break;
2946 case OP_X86_PUSH:
2947 x86_push_reg (code, ins->sreg1);
2948 break;
2949 case OP_X86_PUSH_IMM:
2950 x86_push_imm (code, ins->inst_imm);
2951 break;
2952 case OP_X86_PUSH_MEMBASE:
2953 x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2954 break;
2955 case OP_X86_PUSH_OBJ:
2956 x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2957 x86_push_reg (code, X86_EDI);
2958 x86_push_reg (code, X86_ESI);
2959 x86_push_reg (code, X86_ECX);
2960 if (ins->inst_offset)
2961 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2962 else
2963 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2964 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2965 x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2966 x86_cld (code);
2967 x86_prefix (code, X86_REP_PREFIX);
2968 x86_movsd (code);
2969 x86_pop_reg (code, X86_ECX);
2970 x86_pop_reg (code, X86_ESI);
2971 x86_pop_reg (code, X86_EDI);
2972 break;
2973 case OP_X86_LEA:
2974 x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2975 break;
2976 case OP_X86_LEA_MEMBASE:
2977 x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2978 break;
2979 case OP_X86_XCHG:
2980 x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2981 break;
2982 case OP_LOCALLOC:
2983 /* keep alignment */
2984 x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2985 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2986 code = mono_emit_stack_alloc (code, ins);
2987 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2988 break;
2989 case OP_LOCALLOC_IMM: {
2990 guint32 size = ins->inst_imm;
2991 size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
2993 if (ins->flags & MONO_INST_INIT) {
2994 /* FIXME: Optimize this */
2995 x86_mov_reg_imm (code, ins->dreg, size);
2996 ins->sreg1 = ins->dreg;
2998 code = mono_emit_stack_alloc (code, ins);
2999 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
3000 } else {
3001 x86_alu_reg_imm (code, X86_SUB, X86_ESP, size);
3002 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
3004 break;
3006 case OP_THROW: {
3007 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
3008 x86_push_reg (code, ins->sreg1);
3009 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
3010 (gpointer)"mono_arch_throw_exception");
3011 break;
3013 case OP_RETHROW: {
3014 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
3015 x86_push_reg (code, ins->sreg1);
3016 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
3017 (gpointer)"mono_arch_rethrow_exception");
3018 break;
3020 case OP_CALL_HANDLER:
3021 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
3022 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3023 x86_call_imm (code, 0);
3024 mono_cfg_add_try_hole (cfg, ins->inst_eh_block, code, bb);
3025 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
3026 break;
3027 case OP_START_HANDLER: {
3028 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3029 x86_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, X86_ESP, 4);
3030 break;
3032 case OP_ENDFINALLY: {
3033 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3034 x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
3035 x86_ret (code);
3036 break;
3038 case OP_ENDFILTER: {
3039 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3040 x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
3041 /* The local allocator will put the result into EAX */
3042 x86_ret (code);
3043 break;
3046 case OP_LABEL:
3047 ins->inst_c0 = code - cfg->native_code;
3048 break;
3049 case OP_BR:
3050 if (ins->inst_target_bb->native_offset) {
3051 x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset);
3052 } else {
3053 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3054 if ((cfg->opt & MONO_OPT_BRANCH) &&
3055 x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3056 x86_jump8 (code, 0);
3057 else
3058 x86_jump32 (code, 0);
3060 break;
3061 case OP_BR_REG:
3062 x86_jump_reg (code, ins->sreg1);
3063 break;
3064 case OP_CEQ:
3065 case OP_CLT:
3066 case OP_CLT_UN:
3067 case OP_CGT:
3068 case OP_CGT_UN:
3069 case OP_CNE:
3070 case OP_ICEQ:
3071 case OP_ICLT:
3072 case OP_ICLT_UN:
3073 case OP_ICGT:
3074 case OP_ICGT_UN:
3075 x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
3076 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3077 break;
3078 case OP_COND_EXC_EQ:
3079 case OP_COND_EXC_NE_UN:
3080 case OP_COND_EXC_LT:
3081 case OP_COND_EXC_LT_UN:
3082 case OP_COND_EXC_GT:
3083 case OP_COND_EXC_GT_UN:
3084 case OP_COND_EXC_GE:
3085 case OP_COND_EXC_GE_UN:
3086 case OP_COND_EXC_LE:
3087 case OP_COND_EXC_LE_UN:
3088 case OP_COND_EXC_IEQ:
3089 case OP_COND_EXC_INE_UN:
3090 case OP_COND_EXC_ILT:
3091 case OP_COND_EXC_ILT_UN:
3092 case OP_COND_EXC_IGT:
3093 case OP_COND_EXC_IGT_UN:
3094 case OP_COND_EXC_IGE:
3095 case OP_COND_EXC_IGE_UN:
3096 case OP_COND_EXC_ILE:
3097 case OP_COND_EXC_ILE_UN:
3098 EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
3099 break;
3100 case OP_COND_EXC_OV:
3101 case OP_COND_EXC_NO:
3102 case OP_COND_EXC_C:
3103 case OP_COND_EXC_NC:
3104 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3105 break;
3106 case OP_COND_EXC_IOV:
3107 case OP_COND_EXC_INO:
3108 case OP_COND_EXC_IC:
3109 case OP_COND_EXC_INC:
3110 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_IEQ], (ins->opcode < OP_COND_EXC_INE_UN), ins->inst_p1);
3111 break;
3112 case OP_IBEQ:
3113 case OP_IBNE_UN:
3114 case OP_IBLT:
3115 case OP_IBLT_UN:
3116 case OP_IBGT:
3117 case OP_IBGT_UN:
3118 case OP_IBGE:
3119 case OP_IBGE_UN:
3120 case OP_IBLE:
3121 case OP_IBLE_UN:
3122 EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
3123 break;
3125 case OP_CMOV_IEQ:
3126 case OP_CMOV_IGE:
3127 case OP_CMOV_IGT:
3128 case OP_CMOV_ILE:
3129 case OP_CMOV_ILT:
3130 case OP_CMOV_INE_UN:
3131 case OP_CMOV_IGE_UN:
3132 case OP_CMOV_IGT_UN:
3133 case OP_CMOV_ILE_UN:
3134 case OP_CMOV_ILT_UN:
3135 g_assert (ins->dreg == ins->sreg1);
3136 x86_cmov_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, ins->sreg2);
3137 break;
3139 /* floating point opcodes */
3140 case OP_R8CONST: {
3141 double d = *(double *)ins->inst_p0;
3143 if ((d == 0.0) && (mono_signbit (d) == 0)) {
3144 x86_fldz (code);
3145 } else if (d == 1.0) {
3146 x86_fld1 (code);
3147 } else {
3148 if (cfg->compile_aot) {
3149 guint32 *val = (guint32*)&d;
3150 x86_push_imm (code, val [1]);
3151 x86_push_imm (code, val [0]);
3152 x86_fld_membase (code, X86_ESP, 0, TRUE);
3153 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3155 else {
3156 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3157 x86_fld (code, NULL, TRUE);
3160 break;
3162 case OP_R4CONST: {
3163 float f = *(float *)ins->inst_p0;
3165 if ((f == 0.0) && (mono_signbit (f) == 0)) {
3166 x86_fldz (code);
3167 } else if (f == 1.0) {
3168 x86_fld1 (code);
3169 } else {
3170 if (cfg->compile_aot) {
3171 guint32 val = *(guint32*)&f;
3172 x86_push_imm (code, val);
3173 x86_fld_membase (code, X86_ESP, 0, FALSE);
3174 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3176 else {
3177 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3178 x86_fld (code, NULL, FALSE);
3181 break;
3183 case OP_STORER8_MEMBASE_REG:
3184 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3185 break;
3186 case OP_LOADR8_MEMBASE:
3187 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3188 break;
3189 case OP_STORER4_MEMBASE_REG:
3190 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3191 break;
3192 case OP_LOADR4_MEMBASE:
3193 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3194 break;
3195 case OP_ICONV_TO_R4:
3196 x86_push_reg (code, ins->sreg1);
3197 x86_fild_membase (code, X86_ESP, 0, FALSE);
3198 /* Change precision */
3199 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3200 x86_fld_membase (code, X86_ESP, 0, FALSE);
3201 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3202 break;
3203 case OP_ICONV_TO_R8:
3204 x86_push_reg (code, ins->sreg1);
3205 x86_fild_membase (code, X86_ESP, 0, FALSE);
3206 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3207 break;
3208 case OP_ICONV_TO_R_UN:
3209 x86_push_imm (code, 0);
3210 x86_push_reg (code, ins->sreg1);
3211 x86_fild_membase (code, X86_ESP, 0, TRUE);
3212 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3213 break;
3214 case OP_X86_FP_LOAD_I8:
3215 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3216 break;
3217 case OP_X86_FP_LOAD_I4:
3218 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3219 break;
3220 case OP_FCONV_TO_R4:
3221 /* Change precision */
3222 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3223 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3224 x86_fld_membase (code, X86_ESP, 0, FALSE);
3225 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3226 break;
3227 case OP_FCONV_TO_I1:
3228 code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3229 break;
3230 case OP_FCONV_TO_U1:
3231 code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3232 break;
3233 case OP_FCONV_TO_I2:
3234 code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3235 break;
3236 case OP_FCONV_TO_U2:
3237 code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3238 break;
3239 case OP_FCONV_TO_I4:
3240 case OP_FCONV_TO_I:
3241 code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3242 break;
3243 case OP_FCONV_TO_I8:
3244 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3245 x86_fnstcw_membase(code, X86_ESP, 0);
3246 x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3247 x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3248 x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3249 x86_fldcw_membase (code, X86_ESP, 2);
3250 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3251 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3252 x86_pop_reg (code, ins->dreg);
3253 x86_pop_reg (code, ins->backend.reg3);
3254 x86_fldcw_membase (code, X86_ESP, 0);
3255 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3256 break;
3257 case OP_LCONV_TO_R8_2:
3258 x86_push_reg (code, ins->sreg2);
3259 x86_push_reg (code, ins->sreg1);
3260 x86_fild_membase (code, X86_ESP, 0, TRUE);
3261 /* Change precision */
3262 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
3263 x86_fld_membase (code, X86_ESP, 0, TRUE);
3264 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3265 break;
3266 case OP_LCONV_TO_R4_2:
3267 x86_push_reg (code, ins->sreg2);
3268 x86_push_reg (code, ins->sreg1);
3269 x86_fild_membase (code, X86_ESP, 0, TRUE);
3270 /* Change precision */
3271 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3272 x86_fld_membase (code, X86_ESP, 0, FALSE);
3273 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3274 break;
3275 case OP_LCONV_TO_R_UN_2: {
3276 static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3277 guint8 *br;
3279 /* load 64bit integer to FP stack */
3280 x86_push_reg (code, ins->sreg2);
3281 x86_push_reg (code, ins->sreg1);
3282 x86_fild_membase (code, X86_ESP, 0, TRUE);
3284 /* test if lreg is negative */
3285 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3286 br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3288 /* add correction constant mn */
3289 x86_fld80_mem (code, mn);
3290 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3292 x86_patch (br, code);
3294 /* Change precision */
3295 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
3296 x86_fld_membase (code, X86_ESP, 0, TRUE);
3298 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3300 break;
3302 case OP_LCONV_TO_OVF_I:
3303 case OP_LCONV_TO_OVF_I4_2: {
3304 guint8 *br [3], *label [1];
3305 MonoInst *tins;
3308 * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3310 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3312 /* If the low word top bit is set, see if we are negative */
3313 br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3314 /* We are not negative (no top bit set, check for our top word to be zero */
3315 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3316 br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3317 label [0] = code;
3319 /* throw exception */
3320 tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
3321 if (tins) {
3322 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
3323 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
3324 x86_jump8 (code, 0);
3325 else
3326 x86_jump32 (code, 0);
3327 } else {
3328 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3329 x86_jump32 (code, 0);
3333 x86_patch (br [0], code);
3334 /* our top bit is set, check that top word is 0xfffffff */
3335 x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3337 x86_patch (br [1], code);
3338 /* nope, emit exception */
3339 br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3340 x86_patch (br [2], label [0]);
3342 if (ins->dreg != ins->sreg1)
3343 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3344 break;
3346 case OP_FMOVE:
3347 /* Not needed on the fp stack */
3348 break;
3349 case OP_FADD:
3350 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3351 break;
3352 case OP_FSUB:
3353 x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3354 break;
3355 case OP_FMUL:
3356 x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3357 break;
3358 case OP_FDIV:
3359 x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3360 break;
3361 case OP_FNEG:
3362 x86_fchs (code);
3363 break;
3364 case OP_SIN:
3365 x86_fsin (code);
3366 x86_fldz (code);
3367 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3368 break;
3369 case OP_COS:
3370 x86_fcos (code);
3371 x86_fldz (code);
3372 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3373 break;
3374 case OP_ABS:
3375 x86_fabs (code);
3376 break;
3377 case OP_TAN: {
3379 * it really doesn't make sense to inline all this code,
3380 * it's here just to show that things may not be as simple
3381 * as they appear.
3383 guchar *check_pos, *end_tan, *pop_jump;
3384 x86_push_reg (code, X86_EAX);
3385 x86_fptan (code);
3386 x86_fnstsw (code);
3387 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3388 check_pos = code;
3389 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3390 x86_fstp (code, 0); /* pop the 1.0 */
3391 end_tan = code;
3392 x86_jump8 (code, 0);
3393 x86_fldpi (code);
3394 x86_fp_op (code, X86_FADD, 0);
3395 x86_fxch (code, 1);
3396 x86_fprem1 (code);
3397 x86_fstsw (code);
3398 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3399 pop_jump = code;
3400 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3401 x86_fstp (code, 1);
3402 x86_fptan (code);
3403 x86_patch (pop_jump, code);
3404 x86_fstp (code, 0); /* pop the 1.0 */
3405 x86_patch (check_pos, code);
3406 x86_patch (end_tan, code);
3407 x86_fldz (code);
3408 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3409 x86_pop_reg (code, X86_EAX);
3410 break;
3412 case OP_ATAN:
3413 x86_fld1 (code);
3414 x86_fpatan (code);
3415 x86_fldz (code);
3416 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3417 break;
3418 case OP_SQRT:
3419 x86_fsqrt (code);
3420 break;
3421 case OP_ROUND:
3422 x86_frndint (code);
3423 break;
3424 case OP_IMIN:
3425 g_assert (cfg->opt & MONO_OPT_CMOV);
3426 g_assert (ins->dreg == ins->sreg1);
3427 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3428 x86_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2);
3429 break;
3430 case OP_IMIN_UN:
3431 g_assert (cfg->opt & MONO_OPT_CMOV);
3432 g_assert (ins->dreg == ins->sreg1);
3433 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3434 x86_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2);
3435 break;
3436 case OP_IMAX:
3437 g_assert (cfg->opt & MONO_OPT_CMOV);
3438 g_assert (ins->dreg == ins->sreg1);
3439 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3440 x86_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2);
3441 break;
3442 case OP_IMAX_UN:
3443 g_assert (cfg->opt & MONO_OPT_CMOV);
3444 g_assert (ins->dreg == ins->sreg1);
3445 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3446 x86_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2);
3447 break;
3448 case OP_X86_FPOP:
3449 x86_fstp (code, 0);
3450 break;
3451 case OP_X86_FXCH:
3452 x86_fxch (code, ins->inst_imm);
3453 break;
3454 case OP_FREM: {
3455 guint8 *l1, *l2;
3457 x86_push_reg (code, X86_EAX);
3458 /* we need to exchange ST(0) with ST(1) */
3459 x86_fxch (code, 1);
3461 /* this requires a loop, because fprem somtimes
3462 * returns a partial remainder */
3463 l1 = code;
3464 /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3465 /* x86_fprem1 (code); */
3466 x86_fprem (code);
3467 x86_fnstsw (code);
3468 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3469 l2 = code + 2;
3470 x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3472 /* pop result */
3473 x86_fstp (code, 1);
3475 x86_pop_reg (code, X86_EAX);
3476 break;
3478 case OP_FCOMPARE:
3479 if (cfg->opt & MONO_OPT_FCMOV) {
3480 x86_fcomip (code, 1);
3481 x86_fstp (code, 0);
3482 break;
3484 /* this overwrites EAX */
3485 EMIT_FPCOMPARE(code);
3486 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3487 break;
3488 case OP_FCEQ:
3489 if (cfg->opt & MONO_OPT_FCMOV) {
3490 /* zeroing the register at the start results in
3491 * shorter and faster code (we can also remove the widening op)
3493 guchar *unordered_check;
3494 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3495 x86_fcomip (code, 1);
3496 x86_fstp (code, 0);
3497 unordered_check = code;
3498 x86_branch8 (code, X86_CC_P, 0, FALSE);
3499 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3500 x86_patch (unordered_check, code);
3501 break;
3503 if (ins->dreg != X86_EAX)
3504 x86_push_reg (code, X86_EAX);
3506 EMIT_FPCOMPARE(code);
3507 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3508 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3509 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3510 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3512 if (ins->dreg != X86_EAX)
3513 x86_pop_reg (code, X86_EAX);
3514 break;
3515 case OP_FCLT:
3516 case OP_FCLT_UN:
3517 if (cfg->opt & MONO_OPT_FCMOV) {
3518 /* zeroing the register at the start results in
3519 * shorter and faster code (we can also remove the widening op)
3521 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3522 x86_fcomip (code, 1);
3523 x86_fstp (code, 0);
3524 if (ins->opcode == OP_FCLT_UN) {
3525 guchar *unordered_check = code;
3526 guchar *jump_to_end;
3527 x86_branch8 (code, X86_CC_P, 0, FALSE);
3528 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3529 jump_to_end = code;
3530 x86_jump8 (code, 0);
3531 x86_patch (unordered_check, code);
3532 x86_inc_reg (code, ins->dreg);
3533 x86_patch (jump_to_end, code);
3534 } else {
3535 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3537 break;
3539 if (ins->dreg != X86_EAX)
3540 x86_push_reg (code, X86_EAX);
3542 EMIT_FPCOMPARE(code);
3543 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3544 if (ins->opcode == OP_FCLT_UN) {
3545 guchar *is_not_zero_check, *end_jump;
3546 is_not_zero_check = code;
3547 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3548 end_jump = code;
3549 x86_jump8 (code, 0);
3550 x86_patch (is_not_zero_check, code);
3551 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3553 x86_patch (end_jump, code);
3555 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3556 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3558 if (ins->dreg != X86_EAX)
3559 x86_pop_reg (code, X86_EAX);
3560 break;
3561 case OP_FCGT:
3562 case OP_FCGT_UN:
3563 if (cfg->opt & MONO_OPT_FCMOV) {
3564 /* zeroing the register at the start results in
3565 * shorter and faster code (we can also remove the widening op)
3567 guchar *unordered_check;
3568 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3569 x86_fcomip (code, 1);
3570 x86_fstp (code, 0);
3571 if (ins->opcode == OP_FCGT) {
3572 unordered_check = code;
3573 x86_branch8 (code, X86_CC_P, 0, FALSE);
3574 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3575 x86_patch (unordered_check, code);
3576 } else {
3577 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3579 break;
3581 if (ins->dreg != X86_EAX)
3582 x86_push_reg (code, X86_EAX);
3584 EMIT_FPCOMPARE(code);
3585 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3586 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3587 if (ins->opcode == OP_FCGT_UN) {
3588 guchar *is_not_zero_check, *end_jump;
3589 is_not_zero_check = code;
3590 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3591 end_jump = code;
3592 x86_jump8 (code, 0);
3593 x86_patch (is_not_zero_check, code);
3594 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3596 x86_patch (end_jump, code);
3598 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3599 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3601 if (ins->dreg != X86_EAX)
3602 x86_pop_reg (code, X86_EAX);
3603 break;
3604 case OP_FBEQ:
3605 if (cfg->opt & MONO_OPT_FCMOV) {
3606 guchar *jump = code;
3607 x86_branch8 (code, X86_CC_P, 0, TRUE);
3608 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3609 x86_patch (jump, code);
3610 break;
3612 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3613 EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3614 break;
3615 case OP_FBNE_UN:
3616 /* Branch if C013 != 100 */
3617 if (cfg->opt & MONO_OPT_FCMOV) {
3618 /* branch if !ZF or (PF|CF) */
3619 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3620 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3621 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3622 break;
3624 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3625 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3626 break;
3627 case OP_FBLT:
3628 if (cfg->opt & MONO_OPT_FCMOV) {
3629 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3630 break;
3632 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3633 break;
3634 case OP_FBLT_UN:
3635 if (cfg->opt & MONO_OPT_FCMOV) {
3636 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3637 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3638 break;
3640 if (ins->opcode == OP_FBLT_UN) {
3641 guchar *is_not_zero_check, *end_jump;
3642 is_not_zero_check = code;
3643 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3644 end_jump = code;
3645 x86_jump8 (code, 0);
3646 x86_patch (is_not_zero_check, code);
3647 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3649 x86_patch (end_jump, code);
3651 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3652 break;
3653 case OP_FBGT:
3654 case OP_FBGT_UN:
3655 if (cfg->opt & MONO_OPT_FCMOV) {
3656 if (ins->opcode == OP_FBGT) {
3657 guchar *br1;
3659 /* skip branch if C1=1 */
3660 br1 = code;
3661 x86_branch8 (code, X86_CC_P, 0, FALSE);
3662 /* branch if (C0 | C3) = 1 */
3663 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3664 x86_patch (br1, code);
3665 } else {
3666 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3668 break;
3670 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3671 if (ins->opcode == OP_FBGT_UN) {
3672 guchar *is_not_zero_check, *end_jump;
3673 is_not_zero_check = code;
3674 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3675 end_jump = code;
3676 x86_jump8 (code, 0);
3677 x86_patch (is_not_zero_check, code);
3678 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3680 x86_patch (end_jump, code);
3682 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3683 break;
3684 case OP_FBGE:
3685 /* Branch if C013 == 100 or 001 */
3686 if (cfg->opt & MONO_OPT_FCMOV) {
3687 guchar *br1;
3689 /* skip branch if C1=1 */
3690 br1 = code;
3691 x86_branch8 (code, X86_CC_P, 0, FALSE);
3692 /* branch if (C0 | C3) = 1 */
3693 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3694 x86_patch (br1, code);
3695 break;
3697 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3698 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3699 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3700 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3701 break;
3702 case OP_FBGE_UN:
3703 /* Branch if C013 == 000 */
3704 if (cfg->opt & MONO_OPT_FCMOV) {
3705 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3706 break;
3708 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3709 break;
3710 case OP_FBLE:
3711 /* Branch if C013=000 or 100 */
3712 if (cfg->opt & MONO_OPT_FCMOV) {
3713 guchar *br1;
3715 /* skip branch if C1=1 */
3716 br1 = code;
3717 x86_branch8 (code, X86_CC_P, 0, FALSE);
3718 /* branch if C0=0 */
3719 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3720 x86_patch (br1, code);
3721 break;
3723 x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3724 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3725 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3726 break;
3727 case OP_FBLE_UN:
3728 /* Branch if C013 != 001 */
3729 if (cfg->opt & MONO_OPT_FCMOV) {
3730 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3731 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3732 break;
3734 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3735 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3736 break;
3737 case OP_CKFINITE: {
3738 guchar *br1;
3739 x86_push_reg (code, X86_EAX);
3740 x86_fxam (code);
3741 x86_fnstsw (code);
3742 x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3743 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3744 x86_pop_reg (code, X86_EAX);
3746 /* Have to clean up the fp stack before throwing the exception */
3747 br1 = code;
3748 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3750 x86_fstp (code, 0);
3751 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3753 x86_patch (br1, code);
3754 break;
3756 case OP_TLS_GET: {
3757 code = mono_x86_emit_tls_get (code, ins->dreg, ins->inst_offset);
3758 break;
3760 case OP_MEMORY_BARRIER: {
3761 /* Not needed on x86 */
3762 break;
3764 case OP_ATOMIC_ADD_I4: {
3765 int dreg = ins->dreg;
3767 if (dreg == ins->inst_basereg) {
3768 x86_push_reg (code, ins->sreg2);
3769 dreg = ins->sreg2;
3772 if (dreg != ins->sreg2)
3773 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3775 x86_prefix (code, X86_LOCK_PREFIX);
3776 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3778 if (dreg != ins->dreg) {
3779 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3780 x86_pop_reg (code, dreg);
3783 break;
3785 case OP_ATOMIC_ADD_NEW_I4: {
3786 int dreg = ins->dreg;
3788 /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3789 if (ins->sreg2 == dreg) {
3790 if (dreg == X86_EBX) {
3791 dreg = X86_EDI;
3792 if (ins->inst_basereg == X86_EDI)
3793 dreg = X86_ESI;
3794 } else {
3795 dreg = X86_EBX;
3796 if (ins->inst_basereg == X86_EBX)
3797 dreg = X86_EDI;
3799 } else if (ins->inst_basereg == dreg) {
3800 if (dreg == X86_EBX) {
3801 dreg = X86_EDI;
3802 if (ins->sreg2 == X86_EDI)
3803 dreg = X86_ESI;
3804 } else {
3805 dreg = X86_EBX;
3806 if (ins->sreg2 == X86_EBX)
3807 dreg = X86_EDI;
3811 if (dreg != ins->dreg) {
3812 x86_push_reg (code, dreg);
3815 x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3816 x86_prefix (code, X86_LOCK_PREFIX);
3817 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3818 /* dreg contains the old value, add with sreg2 value */
3819 x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3821 if (ins->dreg != dreg) {
3822 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3823 x86_pop_reg (code, dreg);
3826 break;
3828 case OP_ATOMIC_EXCHANGE_I4: {
3829 guchar *br[2];
3830 int sreg2 = ins->sreg2;
3831 int breg = ins->inst_basereg;
3833 /* cmpxchg uses eax as comperand, need to make sure we can use it
3834 * hack to overcome limits in x86 reg allocator
3835 * (req: dreg == eax and sreg2 != eax and breg != eax)
3837 g_assert (ins->dreg == X86_EAX);
3839 /* We need the EAX reg for the cmpxchg */
3840 if (ins->sreg2 == X86_EAX) {
3841 sreg2 = (breg == X86_EDX) ? X86_EBX : X86_EDX;
3842 x86_push_reg (code, sreg2);
3843 x86_mov_reg_reg (code, sreg2, X86_EAX, 4);
3846 if (breg == X86_EAX) {
3847 breg = (sreg2 == X86_ESI) ? X86_EDI : X86_ESI;
3848 x86_push_reg (code, breg);
3849 x86_mov_reg_reg (code, breg, X86_EAX, 4);
3852 x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3854 br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3855 x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3856 br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3857 x86_patch (br [1], br [0]);
3859 if (breg != ins->inst_basereg)
3860 x86_pop_reg (code, breg);
3862 if (ins->sreg2 != sreg2)
3863 x86_pop_reg (code, sreg2);
3865 break;
3867 case OP_ATOMIC_CAS_I4: {
3868 g_assert (ins->sreg3 == X86_EAX);
3869 g_assert (ins->sreg1 != X86_EAX);
3870 g_assert (ins->sreg1 != ins->sreg2);
3872 x86_prefix (code, X86_LOCK_PREFIX);
3873 x86_cmpxchg_membase_reg (code, ins->sreg1, ins->inst_offset, ins->sreg2);
3875 if (ins->dreg != X86_EAX)
3876 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3877 break;
3879 #ifdef MONO_ARCH_SIMD_INTRINSICS
3880 case OP_ADDPS:
3881 x86_sse_alu_ps_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
3882 break;
3883 case OP_DIVPS:
3884 x86_sse_alu_ps_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
3885 break;
3886 case OP_MULPS:
3887 x86_sse_alu_ps_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
3888 break;
3889 case OP_SUBPS:
3890 x86_sse_alu_ps_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
3891 break;
3892 case OP_MAXPS:
3893 x86_sse_alu_ps_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
3894 break;
3895 case OP_MINPS:
3896 x86_sse_alu_ps_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
3897 break;
3898 case OP_COMPPS:
3899 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
3900 x86_sse_alu_ps_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
3901 break;
3902 case OP_ANDPS:
3903 x86_sse_alu_ps_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
3904 break;
3905 case OP_ANDNPS:
3906 x86_sse_alu_ps_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
3907 break;
3908 case OP_ORPS:
3909 x86_sse_alu_ps_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
3910 break;
3911 case OP_XORPS:
3912 x86_sse_alu_ps_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
3913 break;
3914 case OP_SQRTPS:
3915 x86_sse_alu_ps_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
3916 break;
3917 case OP_RSQRTPS:
3918 x86_sse_alu_ps_reg_reg (code, X86_SSE_RSQRT, ins->dreg, ins->sreg1);
3919 break;
3920 case OP_RCPPS:
3921 x86_sse_alu_ps_reg_reg (code, X86_SSE_RCP, ins->dreg, ins->sreg1);
3922 break;
3923 case OP_ADDSUBPS:
3924 x86_sse_alu_sd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
3925 break;
3926 case OP_HADDPS:
3927 x86_sse_alu_sd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
3928 break;
3929 case OP_HSUBPS:
3930 x86_sse_alu_sd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
3931 break;
3932 case OP_DUPPS_HIGH:
3933 x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSHDUP, ins->dreg, ins->sreg1);
3934 break;
3935 case OP_DUPPS_LOW:
3936 x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSLDUP, ins->dreg, ins->sreg1);
3937 break;
3939 case OP_PSHUFLEW_HIGH:
3940 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3941 x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 1);
3942 break;
3943 case OP_PSHUFLEW_LOW:
3944 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3945 x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 0);
3946 break;
3947 case OP_PSHUFLED:
3948 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3949 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->sreg1, ins->inst_c0);
3950 break;
3952 case OP_ADDPD:
3953 x86_sse_alu_pd_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
3954 break;
3955 case OP_DIVPD:
3956 x86_sse_alu_pd_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
3957 break;
3958 case OP_MULPD:
3959 x86_sse_alu_pd_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
3960 break;
3961 case OP_SUBPD:
3962 x86_sse_alu_pd_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
3963 break;
3964 case OP_MAXPD:
3965 x86_sse_alu_pd_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
3966 break;
3967 case OP_MINPD:
3968 x86_sse_alu_pd_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
3969 break;
3970 case OP_COMPPD:
3971 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
3972 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
3973 break;
3974 case OP_ANDPD:
3975 x86_sse_alu_pd_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
3976 break;
3977 case OP_ANDNPD:
3978 x86_sse_alu_pd_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
3979 break;
3980 case OP_ORPD:
3981 x86_sse_alu_pd_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
3982 break;
3983 case OP_XORPD:
3984 x86_sse_alu_pd_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
3985 break;
3986 case OP_SQRTPD:
3987 x86_sse_alu_pd_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
3988 break;
3989 case OP_ADDSUBPD:
3990 x86_sse_alu_pd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
3991 break;
3992 case OP_HADDPD:
3993 x86_sse_alu_pd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
3994 break;
3995 case OP_HSUBPD:
3996 x86_sse_alu_pd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
3997 break;
3998 case OP_DUPPD:
3999 x86_sse_alu_sd_reg_reg (code, X86_SSE_MOVDDUP, ins->dreg, ins->sreg1);
4000 break;
4002 case OP_EXTRACT_MASK:
4003 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMOVMSKB, ins->dreg, ins->sreg1);
4004 break;
4006 case OP_PAND:
4007 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAND, ins->sreg1, ins->sreg2);
4008 break;
4009 case OP_POR:
4010 x86_sse_alu_pd_reg_reg (code, X86_SSE_POR, ins->sreg1, ins->sreg2);
4011 break;
4012 case OP_PXOR:
4013 x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->sreg1, ins->sreg2);
4014 break;
4016 case OP_PADDB:
4017 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDB, ins->sreg1, ins->sreg2);
4018 break;
4019 case OP_PADDW:
4020 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDW, ins->sreg1, ins->sreg2);
4021 break;
4022 case OP_PADDD:
4023 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDD, ins->sreg1, ins->sreg2);
4024 break;
4025 case OP_PADDQ:
4026 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDQ, ins->sreg1, ins->sreg2);
4027 break;
4029 case OP_PSUBB:
4030 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBB, ins->sreg1, ins->sreg2);
4031 break;
4032 case OP_PSUBW:
4033 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBW, ins->sreg1, ins->sreg2);
4034 break;
4035 case OP_PSUBD:
4036 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBD, ins->sreg1, ins->sreg2);
4037 break;
4038 case OP_PSUBQ:
4039 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBQ, ins->sreg1, ins->sreg2);
4040 break;
4042 case OP_PMAXB_UN:
4043 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXUB, ins->sreg1, ins->sreg2);
4044 break;
4045 case OP_PMAXW_UN:
4046 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUW, ins->sreg1, ins->sreg2);
4047 break;
4048 case OP_PMAXD_UN:
4049 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUD, ins->sreg1, ins->sreg2);
4050 break;
4052 case OP_PMAXB:
4053 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSB, ins->sreg1, ins->sreg2);
4054 break;
4055 case OP_PMAXW:
4056 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXSW, ins->sreg1, ins->sreg2);
4057 break;
4058 case OP_PMAXD:
4059 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSD, ins->sreg1, ins->sreg2);
4060 break;
4062 case OP_PAVGB_UN:
4063 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGB, ins->sreg1, ins->sreg2);
4064 break;
4065 case OP_PAVGW_UN:
4066 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGW, ins->sreg1, ins->sreg2);
4067 break;
4069 case OP_PMINB_UN:
4070 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINUB, ins->sreg1, ins->sreg2);
4071 break;
4072 case OP_PMINW_UN:
4073 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUW, ins->sreg1, ins->sreg2);
4074 break;
4075 case OP_PMIND_UN:
4076 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUD, ins->sreg1, ins->sreg2);
4077 break;
4079 case OP_PMINB:
4080 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSB, ins->sreg1, ins->sreg2);
4081 break;
4082 case OP_PMINW:
4083 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINSW, ins->sreg1, ins->sreg2);
4084 break;
4085 case OP_PMIND:
4086 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSD, ins->sreg1, ins->sreg2);
4087 break;
4089 case OP_PCMPEQB:
4090 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQB, ins->sreg1, ins->sreg2);
4091 break;
4092 case OP_PCMPEQW:
4093 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQW, ins->sreg1, ins->sreg2);
4094 break;
4095 case OP_PCMPEQD:
4096 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQD, ins->sreg1, ins->sreg2);
4097 break;
4098 case OP_PCMPEQQ:
4099 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPEQQ, ins->sreg1, ins->sreg2);
4100 break;
4102 case OP_PCMPGTB:
4103 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTB, ins->sreg1, ins->sreg2);
4104 break;
4105 case OP_PCMPGTW:
4106 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTW, ins->sreg1, ins->sreg2);
4107 break;
4108 case OP_PCMPGTD:
4109 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTD, ins->sreg1, ins->sreg2);
4110 break;
4111 case OP_PCMPGTQ:
4112 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPGTQ, ins->sreg1, ins->sreg2);
4113 break;
4115 case OP_PSUM_ABS_DIFF:
4116 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSADBW, ins->sreg1, ins->sreg2);
4117 break;
4119 case OP_UNPACK_LOWB:
4120 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLBW, ins->sreg1, ins->sreg2);
4121 break;
4122 case OP_UNPACK_LOWW:
4123 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLWD, ins->sreg1, ins->sreg2);
4124 break;
4125 case OP_UNPACK_LOWD:
4126 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLDQ, ins->sreg1, ins->sreg2);
4127 break;
4128 case OP_UNPACK_LOWQ:
4129 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLQDQ, ins->sreg1, ins->sreg2);
4130 break;
4131 case OP_UNPACK_LOWPS:
4132 x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
4133 break;
4134 case OP_UNPACK_LOWPD:
4135 x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
4136 break;
4138 case OP_UNPACK_HIGHB:
4139 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHBW, ins->sreg1, ins->sreg2);
4140 break;
4141 case OP_UNPACK_HIGHW:
4142 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHWD, ins->sreg1, ins->sreg2);
4143 break;
4144 case OP_UNPACK_HIGHD:
4145 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHDQ, ins->sreg1, ins->sreg2);
4146 break;
4147 case OP_UNPACK_HIGHQ:
4148 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHQDQ, ins->sreg1, ins->sreg2);
4149 break;
4150 case OP_UNPACK_HIGHPS:
4151 x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
4152 break;
4153 case OP_UNPACK_HIGHPD:
4154 x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
4155 break;
4157 case OP_PACKW:
4158 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSWB, ins->sreg1, ins->sreg2);
4159 break;
4160 case OP_PACKD:
4161 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSDW, ins->sreg1, ins->sreg2);
4162 break;
4163 case OP_PACKW_UN:
4164 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKUSWB, ins->sreg1, ins->sreg2);
4165 break;
4166 case OP_PACKD_UN:
4167 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PACKUSDW, ins->sreg1, ins->sreg2);
4168 break;
4170 case OP_PADDB_SAT_UN:
4171 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSB, ins->sreg1, ins->sreg2);
4172 break;
4173 case OP_PSUBB_SAT_UN:
4174 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSB, ins->sreg1, ins->sreg2);
4175 break;
4176 case OP_PADDW_SAT_UN:
4177 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSW, ins->sreg1, ins->sreg2);
4178 break;
4179 case OP_PSUBW_SAT_UN:
4180 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSW, ins->sreg1, ins->sreg2);
4181 break;
4183 case OP_PADDB_SAT:
4184 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSB, ins->sreg1, ins->sreg2);
4185 break;
4186 case OP_PSUBB_SAT:
4187 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSB, ins->sreg1, ins->sreg2);
4188 break;
4189 case OP_PADDW_SAT:
4190 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSW, ins->sreg1, ins->sreg2);
4191 break;
4192 case OP_PSUBW_SAT:
4193 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSW, ins->sreg1, ins->sreg2);
4194 break;
4196 case OP_PMULW:
4197 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULLW, ins->sreg1, ins->sreg2);
4198 break;
4199 case OP_PMULD:
4200 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMULLD, ins->sreg1, ins->sreg2);
4201 break;
4202 case OP_PMULQ:
4203 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULUDQ, ins->sreg1, ins->sreg2);
4204 break;
4205 case OP_PMULW_HIGH_UN:
4206 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHUW, ins->sreg1, ins->sreg2);
4207 break;
4208 case OP_PMULW_HIGH:
4209 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHW, ins->sreg1, ins->sreg2);
4210 break;
4212 case OP_PSHRW:
4213 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4214 break;
4215 case OP_PSHRW_REG:
4216 x86_sse_shift_reg_reg (code, X86_SSE_PSRLW_REG, ins->dreg, ins->sreg2);
4217 break;
4219 case OP_PSARW:
4220 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4221 break;
4222 case OP_PSARW_REG:
4223 x86_sse_shift_reg_reg (code, X86_SSE_PSRAW_REG, ins->dreg, ins->sreg2);
4224 break;
4226 case OP_PSHLW:
4227 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4228 break;
4229 case OP_PSHLW_REG:
4230 x86_sse_shift_reg_reg (code, X86_SSE_PSLLW_REG, ins->dreg, ins->sreg2);
4231 break;
4233 case OP_PSHRD:
4234 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4235 break;
4236 case OP_PSHRD_REG:
4237 x86_sse_shift_reg_reg (code, X86_SSE_PSRLD_REG, ins->dreg, ins->sreg2);
4238 break;
4240 case OP_PSARD:
4241 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4242 break;
4243 case OP_PSARD_REG:
4244 x86_sse_shift_reg_reg (code, X86_SSE_PSRAD_REG, ins->dreg, ins->sreg2);
4245 break;
4247 case OP_PSHLD:
4248 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4249 break;
4250 case OP_PSHLD_REG:
4251 x86_sse_shift_reg_reg (code, X86_SSE_PSLLD_REG, ins->dreg, ins->sreg2);
4252 break;
4254 case OP_PSHRQ:
4255 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4256 break;
4257 case OP_PSHRQ_REG:
4258 x86_sse_shift_reg_reg (code, X86_SSE_PSRLQ_REG, ins->dreg, ins->sreg2);
4259 break;
4261 case OP_PSHLQ:
4262 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4263 break;
4264 case OP_PSHLQ_REG:
4265 x86_sse_shift_reg_reg (code, X86_SSE_PSLLQ_REG, ins->dreg, ins->sreg2);
4266 break;
4268 case OP_ICONV_TO_X:
4269 x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4270 break;
4271 case OP_EXTRACT_I4:
4272 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4273 break;
4274 case OP_EXTRACT_I1:
4275 case OP_EXTRACT_U1:
4276 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4277 if (ins->inst_c0)
4278 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
4279 x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE);
4280 break;
4281 case OP_EXTRACT_I2:
4282 case OP_EXTRACT_U2:
4283 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4284 if (ins->inst_c0)
4285 x86_shift_reg_imm (code, X86_SHR, ins->dreg, 16);
4286 x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE);
4287 break;
4288 case OP_EXTRACT_R8:
4289 if (ins->inst_c0)
4290 x86_sse_alu_pd_membase_reg (code, X86_SSE_MOVHPD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
4291 else
4292 x86_sse_alu_sd_membase_reg (code, X86_SSE_MOVSD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
4293 x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE);
4294 break;
4296 case OP_INSERT_I2:
4297 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->sreg1, ins->sreg2, ins->inst_c0);
4298 break;
4299 case OP_EXTRACTX_U2:
4300 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PEXTRW, ins->dreg, ins->sreg1, ins->inst_c0);
4301 break;
4302 case OP_INSERTX_U1_SLOW:
4303 /*sreg1 is the extracted ireg (scratch)
4304 /sreg2 is the to be inserted ireg (scratch)
4305 /dreg is the xreg to receive the value*/
4307 /*clear the bits from the extracted word*/
4308 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00);
4309 /*shift the value to insert if needed*/
4310 if (ins->inst_c0 & 1)
4311 x86_shift_reg_imm (code, X86_SHL, ins->sreg2, 8);
4312 /*join them together*/
4313 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
4314 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
4315 break;
4316 case OP_INSERTX_I4_SLOW:
4317 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
4318 x86_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
4319 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
4320 break;
4322 case OP_INSERTX_R4_SLOW:
4323 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
4324 /*TODO if inst_c0 == 0 use movss*/
4325 x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 0, ins->inst_c0 * 2);
4326 x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 2, ins->inst_c0 * 2 + 1);
4327 break;
4328 case OP_INSERTX_R8_SLOW:
4329 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4330 if (ins->inst_c0)
4331 x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVHPD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4332 else
4333 x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVSD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4334 break;
4336 case OP_STOREX_MEMBASE_REG:
4337 case OP_STOREX_MEMBASE:
4338 x86_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4339 break;
4340 case OP_LOADX_MEMBASE:
4341 x86_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4342 break;
4343 case OP_LOADX_ALIGNED_MEMBASE:
4344 x86_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4345 break;
4346 case OP_STOREX_ALIGNED_MEMBASE_REG:
4347 x86_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4348 break;
4349 case OP_STOREX_NTA_MEMBASE_REG:
4350 x86_sse_alu_reg_membase (code, X86_SSE_MOVNTPS, ins->dreg, ins->sreg1, ins->inst_offset);
4351 break;
4352 case OP_PREFETCH_MEMBASE:
4353 x86_sse_alu_reg_membase (code, X86_SSE_PREFETCH, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
4355 break;
4356 case OP_XMOVE:
4357 /*FIXME the peephole pass should have killed this*/
4358 if (ins->dreg != ins->sreg1)
4359 x86_movaps_reg_reg (code, ins->dreg, ins->sreg1);
4360 break;
4361 case OP_XZERO:
4362 x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->dreg, ins->dreg);
4363 break;
4364 case OP_ICONV_TO_R8_RAW:
4365 x86_mov_membase_reg (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1, 4);
4366 x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE);
4367 break;
4369 case OP_FCONV_TO_R8_X:
4370 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4371 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4372 break;
4374 case OP_XCONV_R8_TO_I4:
4375 x86_cvttsd2si (code, ins->dreg, ins->sreg1);
4376 switch (ins->backend.source_opcode) {
4377 case OP_FCONV_TO_I1:
4378 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
4379 break;
4380 case OP_FCONV_TO_U1:
4381 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4382 break;
4383 case OP_FCONV_TO_I2:
4384 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
4385 break;
4386 case OP_FCONV_TO_U2:
4387 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
4388 break;
4390 break;
4392 case OP_EXPAND_I1:
4393 /*FIXME this causes a partial register stall, maybe it would not be that bad to use shift + mask + or*/
4394 /*The +4 is to get a mov ?h, ?l over the same reg.*/
4395 x86_mov_reg_reg (code, ins->dreg + 4, ins->dreg, 1);
4396 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
4397 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
4398 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4399 break;
4400 case OP_EXPAND_I2:
4401 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
4402 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
4403 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4404 break;
4405 case OP_EXPAND_I4:
4406 x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4407 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4408 break;
4409 case OP_EXPAND_R4:
4410 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
4411 x86_movd_xreg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4412 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4413 break;
4414 case OP_EXPAND_R8:
4415 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4416 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4417 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0x44);
4418 break;
4419 #endif
4420 case OP_LIVERANGE_START: {
4421 if (cfg->verbose_level > 1)
4422 printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
4423 MONO_VARINFO (cfg, ins->inst_c0)->live_range_start = code - cfg->native_code;
4424 break;
4426 case OP_LIVERANGE_END: {
4427 if (cfg->verbose_level > 1)
4428 printf ("R%d END=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
4429 MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code;
4430 break;
4432 default:
4433 g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
4434 g_assert_not_reached ();
4437 if (G_UNLIKELY ((code - cfg->native_code - offset) > max_len)) {
4438 g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4439 mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4440 g_assert_not_reached ();
4443 cpos += max_len;
4446 cfg->code_len = code - cfg->native_code;
4449 #endif /* DISABLE_JIT */
4451 void
4452 mono_arch_register_lowlevel_calls (void)
4456 void
4457 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4459 MonoJumpInfo *patch_info;
4460 gboolean compile_aot = !run_cctors;
4462 for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4463 unsigned char *ip = patch_info->ip.i + code;
4464 const unsigned char *target;
4466 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4468 if (compile_aot) {
4469 switch (patch_info->type) {
4470 case MONO_PATCH_INFO_BB:
4471 case MONO_PATCH_INFO_LABEL:
4472 break;
4473 default:
4474 /* No need to patch these */
4475 continue;
4479 switch (patch_info->type) {
4480 case MONO_PATCH_INFO_IP:
4481 *((gconstpointer *)(ip)) = target;
4482 break;
4483 case MONO_PATCH_INFO_CLASS_INIT: {
4484 guint8 *code = ip;
4485 /* Might already been changed to a nop */
4486 x86_call_code (code, 0);
4487 x86_patch (ip, target);
4488 break;
4490 case MONO_PATCH_INFO_ABS:
4491 case MONO_PATCH_INFO_METHOD:
4492 case MONO_PATCH_INFO_METHOD_JUMP:
4493 case MONO_PATCH_INFO_INTERNAL_METHOD:
4494 case MONO_PATCH_INFO_BB:
4495 case MONO_PATCH_INFO_LABEL:
4496 case MONO_PATCH_INFO_RGCTX_FETCH:
4497 case MONO_PATCH_INFO_GENERIC_CLASS_INIT:
4498 case MONO_PATCH_INFO_MONITOR_ENTER:
4499 case MONO_PATCH_INFO_MONITOR_EXIT:
4500 x86_patch (ip, target);
4501 break;
4502 case MONO_PATCH_INFO_NONE:
4503 break;
4504 default: {
4505 guint32 offset = mono_arch_get_patch_offset (ip);
4506 *((gconstpointer *)(ip + offset)) = target;
4507 break;
4513 guint8 *
4514 mono_arch_emit_prolog (MonoCompile *cfg)
4516 MonoMethod *method = cfg->method;
4517 MonoBasicBlock *bb;
4518 MonoMethodSignature *sig;
4519 MonoInst *inst;
4520 int alloc_size, pos, max_offset, i, cfa_offset;
4521 guint8 *code;
4522 gboolean need_stack_frame;
4524 cfg->code_size = MAX (cfg->header->code_size * 4, 10240);
4526 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
4527 cfg->code_size += 512;
4529 code = cfg->native_code = g_malloc (cfg->code_size);
4531 /* Offset between RSP and the CFA */
4532 cfa_offset = 0;
4534 // CFA = sp + 4
4535 cfa_offset = sizeof (gpointer);
4536 mono_emit_unwind_op_def_cfa (cfg, code, X86_ESP, sizeof (gpointer));
4537 // IP saved at CFA - 4
4538 /* There is no IP reg on x86 */
4539 mono_emit_unwind_op_offset (cfg, code, X86_NREG, -cfa_offset);
4541 need_stack_frame = needs_stack_frame (cfg);
4543 if (need_stack_frame) {
4544 x86_push_reg (code, X86_EBP);
4545 cfa_offset += sizeof (gpointer);
4546 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
4547 mono_emit_unwind_op_offset (cfg, code, X86_EBP, - cfa_offset);
4548 x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4549 mono_emit_unwind_op_def_cfa_reg (cfg, code, X86_EBP);
4552 alloc_size = cfg->stack_offset;
4553 pos = 0;
4555 if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4556 /* Might need to attach the thread to the JIT or change the domain for the callback */
4557 if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
4558 guint8 *buf, *no_domain_branch;
4560 code = mono_x86_emit_tls_get (code, X86_EAX, appdomain_tls_offset);
4561 x86_alu_reg_imm (code, X86_CMP, X86_EAX, GPOINTER_TO_UINT (cfg->domain));
4562 no_domain_branch = code;
4563 x86_branch8 (code, X86_CC_NE, 0, 0);
4564 code = mono_x86_emit_tls_get ( code, X86_EAX, lmf_tls_offset);
4565 x86_test_reg_reg (code, X86_EAX, X86_EAX);
4566 buf = code;
4567 x86_branch8 (code, X86_CC_NE, 0, 0);
4568 x86_patch (no_domain_branch, code);
4569 x86_push_imm (code, cfg->domain);
4570 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4571 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4572 x86_patch (buf, code);
4573 #ifdef TARGET_WIN32
4574 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4575 /* FIXME: Add a separate key for LMF to avoid this */
4576 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4577 #endif
4579 else {
4580 if (cfg->compile_aot) {
4582 * This goes before the saving of callee saved regs, so save the got reg
4583 * ourselves.
4585 x86_push_reg (code, MONO_ARCH_GOT_REG);
4586 code = mono_arch_emit_load_got_addr (cfg->native_code, code, cfg, NULL);
4587 x86_push_imm (code, 0);
4588 } else {
4589 x86_push_imm (code, cfg->domain);
4591 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4592 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4593 if (cfg->compile_aot)
4594 x86_pop_reg (code, MONO_ARCH_GOT_REG);
4598 if (method->save_lmf) {
4599 pos += sizeof (MonoLMF);
4601 /* save the current IP */
4602 if (cfg->compile_aot) {
4603 /* This pushes the current ip */
4604 x86_call_imm (code, 0);
4605 } else {
4606 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4607 x86_push_imm_template (code);
4609 cfa_offset += sizeof (gpointer);
4611 /* save all caller saved regs */
4612 x86_push_reg (code, X86_EBP);
4613 cfa_offset += sizeof (gpointer);
4614 x86_push_reg (code, X86_ESI);
4615 cfa_offset += sizeof (gpointer);
4616 mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset);
4617 x86_push_reg (code, X86_EDI);
4618 cfa_offset += sizeof (gpointer);
4619 mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset);
4620 x86_push_reg (code, X86_EBX);
4621 cfa_offset += sizeof (gpointer);
4622 mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset);
4624 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
4626 * Optimized version which uses the mono_lmf TLS variable instead of indirection
4627 * through the mono_lmf_addr TLS variable.
4629 /* %eax = previous_lmf */
4630 x86_prefix (code, X86_GS_PREFIX);
4631 x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
4632 /* skip esp + method_info + lmf */
4633 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
4634 /* push previous_lmf */
4635 x86_push_reg (code, X86_EAX);
4636 /* new lmf = ESP */
4637 x86_prefix (code, X86_GS_PREFIX);
4638 x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
4639 } else {
4640 /* get the address of lmf for the current thread */
4642 * This is performance critical so we try to use some tricks to make
4643 * it fast.
4646 if (lmf_addr_tls_offset != -1) {
4647 /* Load lmf quicky using the GS register */
4648 code = mono_x86_emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
4649 #ifdef TARGET_WIN32
4650 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4651 /* FIXME: Add a separate key for LMF to avoid this */
4652 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4653 #endif
4654 } else {
4655 if (cfg->compile_aot)
4656 code = mono_arch_emit_load_got_addr (cfg->native_code, code, cfg, NULL);
4657 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4660 /* Skip esp + method info */
4661 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
4663 /* push lmf */
4664 x86_push_reg (code, X86_EAX);
4665 /* push *lfm (previous_lmf) */
4666 x86_push_membase (code, X86_EAX, 0);
4667 /* *(lmf) = ESP */
4668 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4670 } else {
4672 if (cfg->used_int_regs & (1 << X86_EBX)) {
4673 x86_push_reg (code, X86_EBX);
4674 pos += 4;
4675 cfa_offset += sizeof (gpointer);
4676 mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset);
4679 if (cfg->used_int_regs & (1 << X86_EDI)) {
4680 x86_push_reg (code, X86_EDI);
4681 pos += 4;
4682 cfa_offset += sizeof (gpointer);
4683 mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset);
4686 if (cfg->used_int_regs & (1 << X86_ESI)) {
4687 x86_push_reg (code, X86_ESI);
4688 pos += 4;
4689 cfa_offset += sizeof (gpointer);
4690 mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset);
4694 alloc_size -= pos;
4696 /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
4697 if (mono_do_x86_stack_align && need_stack_frame) {
4698 int tot = alloc_size + pos + 4; /* ret ip */
4699 if (need_stack_frame)
4700 tot += 4; /* ebp */
4701 tot &= MONO_ARCH_FRAME_ALIGNMENT - 1;
4702 if (tot)
4703 alloc_size += MONO_ARCH_FRAME_ALIGNMENT - tot;
4706 if (alloc_size) {
4707 /* See mono_emit_stack_alloc */
4708 #if defined(TARGET_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
4709 guint32 remaining_size = alloc_size;
4710 /*FIXME handle unbounded code expansion, we should use a loop in case of more than X interactions*/
4711 guint32 required_code_size = ((remaining_size / 0x1000) + 1) * 8; /*8 is the max size of x86_alu_reg_imm + x86_test_membase_reg*/
4712 guint32 offset = code - cfg->native_code;
4713 if (G_UNLIKELY (required_code_size >= (cfg->code_size - offset))) {
4714 while (required_code_size >= (cfg->code_size - offset))
4715 cfg->code_size *= 2;
4716 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4717 code = cfg->native_code + offset;
4718 mono_jit_stats.code_reallocs++;
4720 while (remaining_size >= 0x1000) {
4721 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4722 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4723 remaining_size -= 0x1000;
4725 if (remaining_size)
4726 x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4727 #else
4728 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4729 #endif
4731 g_assert (need_stack_frame);
4734 if (cfg->method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED ||
4735 cfg->method->wrapper_type == MONO_WRAPPER_RUNTIME_INVOKE) {
4736 x86_alu_reg_imm (code, X86_AND, X86_ESP, -MONO_ARCH_FRAME_ALIGNMENT);
4739 #if DEBUG_STACK_ALIGNMENT
4740 /* check the stack is aligned */
4741 if (need_stack_frame && method->wrapper_type == MONO_WRAPPER_NONE) {
4742 x86_mov_reg_reg (code, X86_ECX, X86_ESP, 4);
4743 x86_alu_reg_imm (code, X86_AND, X86_ECX, MONO_ARCH_FRAME_ALIGNMENT - 1);
4744 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
4745 x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
4746 x86_breakpoint (code);
4748 #endif
4750 /* compute max_offset in order to use short forward jumps */
4751 max_offset = 0;
4752 if (cfg->opt & MONO_OPT_BRANCH) {
4753 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4754 MonoInst *ins;
4755 bb->max_offset = max_offset;
4757 if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4758 max_offset += 6;
4759 /* max alignment for loops */
4760 if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4761 max_offset += LOOP_ALIGNMENT;
4763 MONO_BB_FOR_EACH_INS (bb, ins) {
4764 if (ins->opcode == OP_LABEL)
4765 ins->inst_c1 = max_offset;
4767 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
4772 /* store runtime generic context */
4773 if (cfg->rgctx_var) {
4774 g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET && cfg->rgctx_var->inst_basereg == X86_EBP);
4776 x86_mov_membase_reg (code, X86_EBP, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 4);
4779 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4780 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4782 /* load arguments allocated to register from the stack */
4783 sig = mono_method_signature (method);
4784 pos = 0;
4786 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4787 inst = cfg->args [pos];
4788 if (inst->opcode == OP_REGVAR) {
4789 g_assert (need_stack_frame);
4790 x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4791 if (cfg->verbose_level > 2)
4792 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4794 pos++;
4797 cfg->code_len = code - cfg->native_code;
4799 g_assert (cfg->code_len < cfg->code_size);
4801 return code;
4804 void
4805 mono_arch_emit_epilog (MonoCompile *cfg)
4807 MonoMethod *method = cfg->method;
4808 MonoMethodSignature *sig = mono_method_signature (method);
4809 int quad, pos;
4810 guint32 stack_to_pop;
4811 guint8 *code;
4812 int max_epilog_size = 16;
4813 CallInfo *cinfo;
4814 gboolean need_stack_frame = needs_stack_frame (cfg);
4816 if (cfg->method->save_lmf)
4817 max_epilog_size += 128;
4819 while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4820 cfg->code_size *= 2;
4821 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4822 mono_jit_stats.code_reallocs++;
4825 code = cfg->native_code + cfg->code_len;
4827 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4828 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4830 /* the code restoring the registers must be kept in sync with OP_JMP */
4831 pos = 0;
4833 if (method->save_lmf) {
4834 gint32 prev_lmf_reg;
4835 gint32 lmf_offset = -sizeof (MonoLMF);
4837 /* check if we need to restore protection of the stack after a stack overflow */
4838 if (mono_get_jit_tls_offset () != -1) {
4839 guint8 *patch;
4840 code = mono_x86_emit_tls_get (code, X86_ECX, mono_get_jit_tls_offset ());
4841 /* we load the value in a separate instruction: this mechanism may be
4842 * used later as a safer way to do thread interruption
4844 x86_mov_reg_membase (code, X86_ECX, X86_ECX, G_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 4);
4845 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
4846 patch = code;
4847 x86_branch8 (code, X86_CC_Z, 0, FALSE);
4848 /* note that the call trampoline will preserve eax/edx */
4849 x86_call_reg (code, X86_ECX);
4850 x86_patch (patch, code);
4851 } else {
4852 /* FIXME: maybe save the jit tls in the prolog */
4854 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
4856 * Optimized version which uses the mono_lmf TLS variable instead of indirection
4857 * through the mono_lmf_addr TLS variable.
4859 /* reg = previous_lmf */
4860 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
4862 /* lmf = previous_lmf */
4863 x86_prefix (code, X86_GS_PREFIX);
4864 x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
4865 } else {
4866 /* Find a spare register */
4867 switch (mini_type_get_underlying_type (cfg->generic_sharing_context, sig->ret)->type) {
4868 case MONO_TYPE_I8:
4869 case MONO_TYPE_U8:
4870 prev_lmf_reg = X86_EDI;
4871 cfg->used_int_regs |= (1 << X86_EDI);
4872 break;
4873 default:
4874 prev_lmf_reg = X86_EDX;
4875 break;
4878 /* reg = previous_lmf */
4879 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
4881 /* ecx = lmf */
4882 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
4884 /* *(lmf) = previous_lmf */
4885 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4888 /* restore caller saved regs */
4889 if (cfg->used_int_regs & (1 << X86_EBX)) {
4890 x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
4893 if (cfg->used_int_regs & (1 << X86_EDI)) {
4894 x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
4896 if (cfg->used_int_regs & (1 << X86_ESI)) {
4897 x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
4900 /* EBP is restored by LEAVE */
4901 } else {
4902 if (cfg->used_int_regs & (1 << X86_EBX)) {
4903 pos -= 4;
4905 if (cfg->used_int_regs & (1 << X86_EDI)) {
4906 pos -= 4;
4908 if (cfg->used_int_regs & (1 << X86_ESI)) {
4909 pos -= 4;
4912 if (pos) {
4913 g_assert (need_stack_frame);
4914 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4917 if (cfg->used_int_regs & (1 << X86_ESI)) {
4918 x86_pop_reg (code, X86_ESI);
4920 if (cfg->used_int_regs & (1 << X86_EDI)) {
4921 x86_pop_reg (code, X86_EDI);
4923 if (cfg->used_int_regs & (1 << X86_EBX)) {
4924 x86_pop_reg (code, X86_EBX);
4928 /* Load returned vtypes into registers if needed */
4929 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
4930 if (cinfo->ret.storage == ArgValuetypeInReg) {
4931 for (quad = 0; quad < 2; quad ++) {
4932 switch (cinfo->ret.pair_storage [quad]) {
4933 case ArgInIReg:
4934 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
4935 break;
4936 case ArgOnFloatFpStack:
4937 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
4938 break;
4939 case ArgOnDoubleFpStack:
4940 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
4941 break;
4942 case ArgNone:
4943 break;
4944 default:
4945 g_assert_not_reached ();
4950 if (need_stack_frame)
4951 x86_leave (code);
4953 if (CALLCONV_IS_STDCALL (sig)) {
4954 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4956 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4957 } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
4958 stack_to_pop = 4;
4959 else
4960 stack_to_pop = 0;
4962 if (stack_to_pop) {
4963 g_assert (need_stack_frame);
4964 x86_ret_imm (code, stack_to_pop);
4965 } else {
4966 x86_ret (code);
4969 cfg->code_len = code - cfg->native_code;
4971 g_assert (cfg->code_len < cfg->code_size);
4974 void
4975 mono_arch_emit_exceptions (MonoCompile *cfg)
4977 MonoJumpInfo *patch_info;
4978 int nthrows, i;
4979 guint8 *code;
4980 MonoClass *exc_classes [16];
4981 guint8 *exc_throw_start [16], *exc_throw_end [16];
4982 guint32 code_size;
4983 int exc_count = 0;
4985 /* Compute needed space */
4986 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4987 if (patch_info->type == MONO_PATCH_INFO_EXC)
4988 exc_count++;
4992 * make sure we have enough space for exceptions
4993 * 16 is the size of two push_imm instructions and a call
4995 if (cfg->compile_aot)
4996 code_size = exc_count * 32;
4997 else
4998 code_size = exc_count * 16;
5000 while (cfg->code_len + code_size > (cfg->code_size - 16)) {
5001 cfg->code_size *= 2;
5002 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
5003 mono_jit_stats.code_reallocs++;
5006 code = cfg->native_code + cfg->code_len;
5008 nthrows = 0;
5009 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
5010 switch (patch_info->type) {
5011 case MONO_PATCH_INFO_EXC: {
5012 MonoClass *exc_class;
5013 guint8 *buf, *buf2;
5014 guint32 throw_ip;
5016 x86_patch (patch_info->ip.i + cfg->native_code, code);
5018 exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
5019 g_assert (exc_class);
5020 throw_ip = patch_info->ip.i;
5022 /* Find a throw sequence for the same exception class */
5023 for (i = 0; i < nthrows; ++i)
5024 if (exc_classes [i] == exc_class)
5025 break;
5026 if (i < nthrows) {
5027 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
5028 x86_jump_code (code, exc_throw_start [i]);
5029 patch_info->type = MONO_PATCH_INFO_NONE;
5031 else {
5032 guint32 size;
5034 /* Compute size of code following the push <OFFSET> */
5035 size = 5 + 5;
5037 /*This is aligned to 16 bytes by the callee. This way we save a few bytes here.*/
5039 if ((code - cfg->native_code) - throw_ip < 126 - size) {
5040 /* Use the shorter form */
5041 buf = buf2 = code;
5042 x86_push_imm (code, 0);
5044 else {
5045 buf = code;
5046 x86_push_imm (code, 0xf0f0f0f0);
5047 buf2 = code;
5050 if (nthrows < 16) {
5051 exc_classes [nthrows] = exc_class;
5052 exc_throw_start [nthrows] = code;
5055 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
5056 patch_info->data.name = "mono_arch_throw_corlib_exception";
5057 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
5058 patch_info->ip.i = code - cfg->native_code;
5059 x86_call_code (code, 0);
5060 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
5061 while (buf < buf2)
5062 x86_nop (buf);
5064 if (nthrows < 16) {
5065 exc_throw_end [nthrows] = code;
5066 nthrows ++;
5069 break;
5071 default:
5072 /* do nothing */
5073 break;
5077 cfg->code_len = code - cfg->native_code;
5079 g_assert (cfg->code_len < cfg->code_size);
5082 void
5083 mono_arch_flush_icache (guint8 *code, gint size)
5085 /* not needed */
5088 void
5089 mono_arch_flush_register_windows (void)
5093 gboolean
5094 mono_arch_is_inst_imm (gint64 imm)
5096 return TRUE;
5100 * Support for fast access to the thread-local lmf structure using the GS
5101 * segment register on NPTL + kernel 2.6.x.
5104 static gboolean tls_offset_inited = FALSE;
5106 void
5107 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
5109 if (!tls_offset_inited) {
5110 if (!getenv ("MONO_NO_TLS")) {
5111 #ifdef TARGET_WIN32
5113 * We need to init this multiple times, since when we are first called, the key might not
5114 * be initialized yet.
5116 appdomain_tls_offset = mono_domain_get_tls_key ();
5117 lmf_tls_offset = mono_get_jit_tls_key ();
5119 /* Only 64 tls entries can be accessed using inline code */
5120 if (appdomain_tls_offset >= 64)
5121 appdomain_tls_offset = -1;
5122 if (lmf_tls_offset >= 64)
5123 lmf_tls_offset = -1;
5124 #else
5125 #if MONO_XEN_OPT
5126 optimize_for_xen = access ("/proc/xen", F_OK) == 0;
5127 #endif
5128 tls_offset_inited = TRUE;
5129 appdomain_tls_offset = mono_domain_get_tls_offset ();
5130 lmf_tls_offset = mono_get_lmf_tls_offset ();
5131 lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
5132 #endif
5137 void
5138 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
5142 #ifdef MONO_ARCH_HAVE_IMT
5144 // Linear handler, the bsearch head compare is shorter
5145 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
5146 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
5147 // x86_patch(ins,target)
5148 //[1 + 5] x86_jump_mem(inst,mem)
5150 #define CMP_SIZE 6
5151 #define BR_SMALL_SIZE 2
5152 #define BR_LARGE_SIZE 5
5153 #define JUMP_IMM_SIZE 6
5154 #define ENABLE_WRONG_METHOD_CHECK 0
5155 #define DEBUG_IMT 0
5157 static int
5158 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
5160 int i, distance = 0;
5161 for (i = start; i < target; ++i)
5162 distance += imt_entries [i]->chunk_size;
5163 return distance;
5167 * LOCKING: called with the domain lock held
5169 gpointer
5170 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
5171 gpointer fail_tramp)
5173 int i;
5174 int size = 0;
5175 guint8 *code, *start;
5177 for (i = 0; i < count; ++i) {
5178 MonoIMTCheckItem *item = imt_entries [i];
5179 if (item->is_equals) {
5180 if (item->check_target_idx) {
5181 if (!item->compare_done)
5182 item->chunk_size += CMP_SIZE;
5183 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
5184 } else {
5185 if (fail_tramp) {
5186 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + JUMP_IMM_SIZE * 2;
5187 } else {
5188 item->chunk_size += JUMP_IMM_SIZE;
5189 #if ENABLE_WRONG_METHOD_CHECK
5190 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
5191 #endif
5194 } else {
5195 item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
5196 imt_entries [item->check_target_idx]->compare_done = TRUE;
5198 size += item->chunk_size;
5200 if (fail_tramp)
5201 code = mono_method_alloc_generic_virtual_thunk (domain, size);
5202 else
5203 code = mono_domain_code_reserve (domain, size);
5204 start = code;
5205 for (i = 0; i < count; ++i) {
5206 MonoIMTCheckItem *item = imt_entries [i];
5207 item->code_target = code;
5208 if (item->is_equals) {
5209 if (item->check_target_idx) {
5210 if (!item->compare_done)
5211 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5212 item->jmp_code = code;
5213 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5214 if (item->has_target_code)
5215 x86_jump_code (code, item->value.target_code);
5216 else
5217 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5218 } else {
5219 if (fail_tramp) {
5220 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5221 item->jmp_code = code;
5222 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5223 if (item->has_target_code)
5224 x86_jump_code (code, item->value.target_code);
5225 else
5226 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5227 x86_patch (item->jmp_code, code);
5228 x86_jump_code (code, fail_tramp);
5229 item->jmp_code = NULL;
5230 } else {
5231 /* enable the commented code to assert on wrong method */
5232 #if ENABLE_WRONG_METHOD_CHECK
5233 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5234 item->jmp_code = code;
5235 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5236 #endif
5237 if (item->has_target_code)
5238 x86_jump_code (code, item->value.target_code);
5239 else
5240 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5241 #if ENABLE_WRONG_METHOD_CHECK
5242 x86_patch (item->jmp_code, code);
5243 x86_breakpoint (code);
5244 item->jmp_code = NULL;
5245 #endif
5248 } else {
5249 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5250 item->jmp_code = code;
5251 if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
5252 x86_branch8 (code, X86_CC_GE, 0, FALSE);
5253 else
5254 x86_branch32 (code, X86_CC_GE, 0, FALSE);
5257 /* patch the branches to get to the target items */
5258 for (i = 0; i < count; ++i) {
5259 MonoIMTCheckItem *item = imt_entries [i];
5260 if (item->jmp_code) {
5261 if (item->check_target_idx) {
5262 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
5267 if (!fail_tramp)
5268 mono_stats.imt_thunks_size += code - start;
5269 g_assert (code - start <= size);
5271 #if DEBUG_IMT
5273 char *buff = g_strdup_printf ("thunk_for_class_%s_%s_entries_%d", vtable->klass->name_space, vtable->klass->name, count);
5274 mono_disassemble_code (NULL, (guint8*)start, code - start, buff);
5275 g_free (buff);
5277 #endif
5279 return start;
5282 MonoMethod*
5283 mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
5285 return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
5287 #endif
5289 MonoVTable*
5290 mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code)
5292 return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
5295 MonoInst*
5296 mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
5298 MonoInst *ins = NULL;
5299 int opcode = 0;
5301 if (cmethod->klass == mono_defaults.math_class) {
5302 if (strcmp (cmethod->name, "Sin") == 0) {
5303 opcode = OP_SIN;
5304 } else if (strcmp (cmethod->name, "Cos") == 0) {
5305 opcode = OP_COS;
5306 } else if (strcmp (cmethod->name, "Tan") == 0) {
5307 opcode = OP_TAN;
5308 } else if (strcmp (cmethod->name, "Atan") == 0) {
5309 opcode = OP_ATAN;
5310 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
5311 opcode = OP_SQRT;
5312 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
5313 opcode = OP_ABS;
5314 } else if (strcmp (cmethod->name, "Round") == 0 && fsig->param_count == 1 && fsig->params [0]->type == MONO_TYPE_R8) {
5315 opcode = OP_ROUND;
5318 if (opcode) {
5319 MONO_INST_NEW (cfg, ins, opcode);
5320 ins->type = STACK_R8;
5321 ins->dreg = mono_alloc_freg (cfg);
5322 ins->sreg1 = args [0]->dreg;
5323 MONO_ADD_INS (cfg->cbb, ins);
5326 if (cfg->opt & MONO_OPT_CMOV) {
5327 int opcode = 0;
5329 if (strcmp (cmethod->name, "Min") == 0) {
5330 if (fsig->params [0]->type == MONO_TYPE_I4)
5331 opcode = OP_IMIN;
5332 } else if (strcmp (cmethod->name, "Max") == 0) {
5333 if (fsig->params [0]->type == MONO_TYPE_I4)
5334 opcode = OP_IMAX;
5337 if (opcode) {
5338 MONO_INST_NEW (cfg, ins, opcode);
5339 ins->type = STACK_I4;
5340 ins->dreg = mono_alloc_ireg (cfg);
5341 ins->sreg1 = args [0]->dreg;
5342 ins->sreg2 = args [1]->dreg;
5343 MONO_ADD_INS (cfg->cbb, ins);
5347 #if 0
5348 /* OP_FREM is not IEEE compatible */
5349 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
5350 MONO_INST_NEW (cfg, ins, OP_FREM);
5351 ins->inst_i0 = args [0];
5352 ins->inst_i1 = args [1];
5354 #endif
5357 return ins;
5360 gboolean
5361 mono_arch_print_tree (MonoInst *tree, int arity)
5363 return 0;
5366 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
5368 MonoInst* ins;
5370 return NULL;
5372 if (appdomain_tls_offset == -1)
5373 return NULL;
5375 MONO_INST_NEW (cfg, ins, OP_TLS_GET);
5376 ins->inst_offset = appdomain_tls_offset;
5377 return ins;
5380 guint32
5381 mono_arch_get_patch_offset (guint8 *code)
5383 if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
5384 return 2;
5385 else if ((code [0] == 0xba))
5386 return 1;
5387 else if ((code [0] == 0x68))
5388 /* push IMM */
5389 return 1;
5390 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
5391 /* push <OFFSET>(<REG>) */
5392 return 2;
5393 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
5394 /* call *<OFFSET>(<REG>) */
5395 return 2;
5396 else if ((code [0] == 0xdd) || (code [0] == 0xd9))
5397 /* fldl <ADDR> */
5398 return 2;
5399 else if ((code [0] == 0x58) && (code [1] == 0x05))
5400 /* pop %eax; add <OFFSET>, %eax */
5401 return 2;
5402 else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
5403 /* pop <REG>; add <OFFSET>, <REG> */
5404 return 3;
5405 else if ((code [0] >= 0xb8) && (code [0] < 0xb8 + 8))
5406 /* mov <REG>, imm */
5407 return 1;
5408 else {
5409 g_assert_not_reached ();
5410 return -1;
5415 * mono_breakpoint_clean_code:
5417 * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
5418 * breakpoints in the original code, they are removed in the copy.
5420 * Returns TRUE if no sw breakpoint was present.
5422 gboolean
5423 mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size)
5425 int i;
5426 gboolean can_write = TRUE;
5428 * If method_start is non-NULL we need to perform bound checks, since we access memory
5429 * at code - offset we could go before the start of the method and end up in a different
5430 * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
5431 * instead.
5433 if (!method_start || code - offset >= method_start) {
5434 memcpy (buf, code - offset, size);
5435 } else {
5436 int diff = code - method_start;
5437 memset (buf, 0, size);
5438 memcpy (buf + offset - diff, method_start, diff + size - offset);
5440 code -= offset;
5441 for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
5442 int idx = mono_breakpoint_info_index [i];
5443 guint8 *ptr;
5444 if (idx < 1)
5445 continue;
5446 ptr = mono_breakpoint_info [idx].address;
5447 if (ptr >= code && ptr < code + size) {
5448 guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
5449 can_write = FALSE;
5450 /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
5451 buf [ptr - code] = saved_byte;
5454 return can_write;
5457 gpointer
5458 mono_arch_get_vcall_slot (guint8 *code, mgreg_t *regs, int *displacement)
5460 guint8 buf [8];
5461 guint8 reg = 0;
5462 gint32 disp = 0;
5464 mono_breakpoint_clean_code (NULL, code, 8, buf, sizeof (buf));
5465 code = buf + 8;
5467 *displacement = 0;
5469 code -= 6;
5472 * A given byte sequence can match more than case here, so we have to be
5473 * really careful about the ordering of the cases. Longer sequences
5474 * come first.
5475 * There are two types of calls:
5476 * - direct calls: 0xff address_byte 8/32 bits displacement
5477 * - indirect calls: nop nop nop <call>
5478 * The nops make sure we don't confuse the instruction preceeding an indirect
5479 * call with a direct call.
5481 if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
5482 reg = code [4] & 0x07;
5483 disp = (signed char)code [5];
5484 } else if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
5485 reg = code [1] & 0x07;
5486 disp = *((gint32*)(code + 2));
5487 } else if ((code [1] == 0xe8)) {
5488 return NULL;
5489 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
5491 * This is a interface call
5492 * 8b 40 30 mov 0x30(%eax),%eax
5493 * ff 10 call *(%eax)
5495 disp = 0;
5496 reg = code [5] & 0x07;
5498 else
5499 return NULL;
5501 *displacement = disp;
5502 return (gpointer)regs [reg];
5506 * mono_x86_get_this_arg_offset:
5508 * Return the offset of the stack location where this is passed during a virtual
5509 * call.
5511 guint32
5512 mono_x86_get_this_arg_offset (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig)
5514 CallInfo *cinfo = NULL;
5515 int offset;
5517 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
5518 cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5520 offset = cinfo->args [0].offset;
5521 } else {
5522 offset = 0;
5525 return offset;
5528 gpointer
5529 mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig,
5530 mgreg_t *regs, guint8 *code)
5532 guint32 esp = regs [X86_ESP];
5533 CallInfo *cinfo = NULL;
5534 gpointer res;
5535 int offset;
5538 * Avoid expensive calls to get_generic_context_from_code () + get_call_info
5539 * if possible.
5541 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
5542 if (!gsctx && code)
5543 gsctx = mono_get_generic_context_from_code (code);
5544 cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5546 offset = cinfo->args [0].offset;
5547 } else {
5548 offset = 0;
5552 * The stack looks like:
5553 * <other args>
5554 * <this=delegate>
5555 * <possible vtype return address>
5556 * <return addr>
5557 * <4 pointers pushed by mono_arch_create_trampoline_code ()>
5559 res = (((MonoObject**)esp) [5 + (offset / 4)]);
5560 if (cinfo)
5561 g_free (cinfo);
5562 return res;
5565 #define MAX_ARCH_DELEGATE_PARAMS 10
5567 static gpointer
5568 get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *code_len)
5570 guint8 *code, *start;
5573 * The stack contains:
5574 * <delegate>
5575 * <return addr>
5578 if (has_target) {
5579 start = code = mono_global_codeman_reserve (64);
5581 /* Replace the this argument with the target */
5582 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
5583 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
5584 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
5585 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5587 g_assert ((code - start) < 64);
5588 } else {
5589 int i = 0;
5590 /* 8 for mov_reg and jump, plus 8 for each parameter */
5591 int code_reserve = 8 + (param_count * 8);
5594 * The stack contains:
5595 * <args in reverse order>
5596 * <delegate>
5597 * <return addr>
5599 * and we need:
5600 * <args in reverse order>
5601 * <return addr>
5603 * without unbalancing the stack.
5604 * So move each arg up a spot in the stack (overwriting un-needed 'this' arg)
5605 * and leaving original spot of first arg as placeholder in stack so
5606 * when callee pops stack everything works.
5609 start = code = mono_global_codeman_reserve (code_reserve);
5611 /* store delegate for access to method_ptr */
5612 x86_mov_reg_membase (code, X86_ECX, X86_ESP, 4, 4);
5614 /* move args up */
5615 for (i = 0; i < param_count; ++i) {
5616 x86_mov_reg_membase (code, X86_EAX, X86_ESP, (i+2)*4, 4);
5617 x86_mov_membase_reg (code, X86_ESP, (i+1)*4, X86_EAX, 4);
5620 x86_jump_membase (code, X86_ECX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5622 g_assert ((code - start) < code_reserve);
5625 mono_debug_add_delegate_trampoline (start, code - start);
5627 if (code_len)
5628 *code_len = code - start;
5630 return start;
5633 GSList*
5634 mono_arch_get_delegate_invoke_impls (void)
5636 GSList *res = NULL;
5637 guint8 *code;
5638 guint32 code_len;
5639 int i;
5641 code = get_delegate_invoke_impl (TRUE, 0, &code_len);
5642 res = g_slist_prepend (res, mono_tramp_info_create (g_strdup ("delegate_invoke_impl_has_target"), code, code_len, NULL, NULL));
5644 for (i = 0; i < MAX_ARCH_DELEGATE_PARAMS; ++i) {
5645 code = get_delegate_invoke_impl (FALSE, i, &code_len);
5646 res = g_slist_prepend (res, mono_tramp_info_create (g_strdup_printf ("delegate_invoke_impl_target_%d", i), code, code_len, NULL, NULL));
5649 return res;
5652 gpointer
5653 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
5655 guint8 *code, *start;
5657 if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
5658 return NULL;
5660 /* FIXME: Support more cases */
5661 if (MONO_TYPE_ISSTRUCT (sig->ret))
5662 return NULL;
5665 * The stack contains:
5666 * <delegate>
5667 * <return addr>
5670 if (has_target) {
5671 static guint8* cached = NULL;
5672 if (cached)
5673 return cached;
5675 if (mono_aot_only)
5676 start = mono_aot_get_trampoline ("delegate_invoke_impl_has_target");
5677 else
5678 start = get_delegate_invoke_impl (TRUE, 0, NULL);
5680 mono_memory_barrier ();
5682 cached = start;
5683 } else {
5684 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
5685 int i = 0;
5687 for (i = 0; i < sig->param_count; ++i)
5688 if (!mono_is_regsize_var (sig->params [i]))
5689 return NULL;
5691 code = cache [sig->param_count];
5692 if (code)
5693 return code;
5695 if (mono_aot_only) {
5696 char *name = g_strdup_printf ("delegate_invoke_impl_target_%d", sig->param_count);
5697 start = mono_aot_get_trampoline (name);
5698 g_free (name);
5699 } else {
5700 start = get_delegate_invoke_impl (FALSE, sig->param_count, NULL);
5703 mono_memory_barrier ();
5705 cache [sig->param_count] = start;
5708 return start;
5711 gpointer
5712 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
5714 switch (reg) {
5715 case X86_EAX: return (gpointer)ctx->eax;
5716 case X86_EBX: return (gpointer)ctx->ebx;
5717 case X86_ECX: return (gpointer)ctx->ecx;
5718 case X86_EDX: return (gpointer)ctx->edx;
5719 case X86_ESP: return (gpointer)ctx->esp;
5720 case X86_EBP: return (gpointer)ctx->ebp;
5721 case X86_ESI: return (gpointer)ctx->esi;
5722 case X86_EDI: return (gpointer)ctx->edi;
5723 default: g_assert_not_reached ();
5727 #ifdef MONO_ARCH_SIMD_INTRINSICS
5729 static MonoInst*
5730 get_float_to_x_spill_area (MonoCompile *cfg)
5732 if (!cfg->fconv_to_r8_x_var) {
5733 cfg->fconv_to_r8_x_var = mono_compile_create_var (cfg, &mono_defaults.double_class->byval_arg, OP_LOCAL);
5734 cfg->fconv_to_r8_x_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
5736 return cfg->fconv_to_r8_x_var;
5740 * Convert all fconv opts that MONO_OPT_SSE2 would get wrong.
5742 void
5743 mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
5745 MonoInst *fconv;
5746 int dreg, src_opcode;
5748 if (!(cfg->opt & MONO_OPT_SSE2) || !(cfg->opt & MONO_OPT_SIMD) || COMPILE_LLVM (cfg))
5749 return;
5751 switch (src_opcode = ins->opcode) {
5752 case OP_FCONV_TO_I1:
5753 case OP_FCONV_TO_U1:
5754 case OP_FCONV_TO_I2:
5755 case OP_FCONV_TO_U2:
5756 case OP_FCONV_TO_I4:
5757 case OP_FCONV_TO_I:
5758 break;
5759 default:
5760 return;
5763 /* dreg is the IREG and sreg1 is the FREG */
5764 MONO_INST_NEW (cfg, fconv, OP_FCONV_TO_R8_X);
5765 fconv->klass = NULL; /*FIXME, what can I use here as the Mono.Simd lib might not be loaded yet*/
5766 fconv->sreg1 = ins->sreg1;
5767 fconv->dreg = mono_alloc_ireg (cfg);
5768 fconv->type = STACK_VTYPE;
5769 fconv->backend.spill_var = get_float_to_x_spill_area (cfg);
5771 mono_bblock_insert_before_ins (cfg->cbb, ins, fconv);
5773 dreg = ins->dreg;
5774 NULLIFY_INS (ins);
5775 ins->opcode = OP_XCONV_R8_TO_I4;
5777 ins->klass = mono_defaults.int32_class;
5778 ins->sreg1 = fconv->dreg;
5779 ins->dreg = dreg;
5780 ins->type = STACK_I4;
5781 ins->backend.source_opcode = src_opcode;
5784 #endif /* #ifdef MONO_ARCH_SIMD_INTRINSICS */
5786 void
5787 mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins)
5789 MonoInst *ins;
5790 int vreg;
5792 if (long_ins->opcode == OP_LNEG) {
5793 ins = long_ins;
5794 MONO_EMIT_NEW_UNALU (cfg, OP_INEG, ins->dreg + 1, ins->sreg1 + 1);
5795 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ADC_IMM, ins->dreg + 2, ins->sreg1 + 2, 0);
5796 MONO_EMIT_NEW_UNALU (cfg, OP_INEG, ins->dreg + 2, ins->dreg + 2);
5797 NULLIFY_INS (ins);
5798 return;
5801 #ifdef MONO_ARCH_SIMD_INTRINSICS
5803 if (!(cfg->opt & MONO_OPT_SIMD))
5804 return;
5806 /*TODO move this to simd-intrinsic.c once we support sse 4.1 dword extractors since we need the runtime caps info */
5807 switch (long_ins->opcode) {
5808 case OP_EXTRACT_I8:
5809 vreg = long_ins->sreg1;
5811 if (long_ins->inst_c0) {
5812 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5813 ins->klass = long_ins->klass;
5814 ins->sreg1 = long_ins->sreg1;
5815 ins->inst_c0 = 2;
5816 ins->type = STACK_VTYPE;
5817 ins->dreg = vreg = alloc_ireg (cfg);
5818 MONO_ADD_INS (cfg->cbb, ins);
5821 MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
5822 ins->klass = mono_defaults.int32_class;
5823 ins->sreg1 = vreg;
5824 ins->type = STACK_I4;
5825 ins->dreg = long_ins->dreg + 1;
5826 MONO_ADD_INS (cfg->cbb, ins);
5828 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5829 ins->klass = long_ins->klass;
5830 ins->sreg1 = long_ins->sreg1;
5831 ins->inst_c0 = long_ins->inst_c0 ? 3 : 1;
5832 ins->type = STACK_VTYPE;
5833 ins->dreg = vreg = alloc_ireg (cfg);
5834 MONO_ADD_INS (cfg->cbb, ins);
5836 MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
5837 ins->klass = mono_defaults.int32_class;
5838 ins->sreg1 = vreg;
5839 ins->type = STACK_I4;
5840 ins->dreg = long_ins->dreg + 2;
5841 MONO_ADD_INS (cfg->cbb, ins);
5843 long_ins->opcode = OP_NOP;
5844 break;
5845 case OP_INSERTX_I8_SLOW:
5846 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5847 ins->dreg = long_ins->dreg;
5848 ins->sreg1 = long_ins->dreg;
5849 ins->sreg2 = long_ins->sreg2 + 1;
5850 ins->inst_c0 = long_ins->inst_c0 * 2;
5851 MONO_ADD_INS (cfg->cbb, ins);
5853 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5854 ins->dreg = long_ins->dreg;
5855 ins->sreg1 = long_ins->dreg;
5856 ins->sreg2 = long_ins->sreg2 + 2;
5857 ins->inst_c0 = long_ins->inst_c0 * 2 + 1;
5858 MONO_ADD_INS (cfg->cbb, ins);
5860 long_ins->opcode = OP_NOP;
5861 break;
5862 case OP_EXPAND_I8:
5863 MONO_INST_NEW (cfg, ins, OP_ICONV_TO_X);
5864 ins->dreg = long_ins->dreg;
5865 ins->sreg1 = long_ins->sreg1 + 1;
5866 ins->klass = long_ins->klass;
5867 ins->type = STACK_VTYPE;
5868 MONO_ADD_INS (cfg->cbb, ins);
5870 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5871 ins->dreg = long_ins->dreg;
5872 ins->sreg1 = long_ins->dreg;
5873 ins->sreg2 = long_ins->sreg1 + 2;
5874 ins->inst_c0 = 1;
5875 ins->klass = long_ins->klass;
5876 ins->type = STACK_VTYPE;
5877 MONO_ADD_INS (cfg->cbb, ins);
5879 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5880 ins->dreg = long_ins->dreg;
5881 ins->sreg1 = long_ins->dreg;;
5882 ins->inst_c0 = 0x44; /*Magic number for swizzling (X,Y,X,Y)*/
5883 ins->klass = long_ins->klass;
5884 ins->type = STACK_VTYPE;
5885 MONO_ADD_INS (cfg->cbb, ins);
5887 long_ins->opcode = OP_NOP;
5888 break;
5890 #endif /* MONO_ARCH_SIMD_INTRINSICS */
5893 /*MONO_ARCH_HAVE_HANDLER_BLOCK_GUARD*/
5894 gpointer
5895 mono_arch_install_handler_block_guard (MonoJitInfo *ji, MonoJitExceptionInfo *clause, MonoContext *ctx, gpointer new_value)
5897 int offset;
5898 gpointer *sp, old_value;
5899 char *bp;
5900 const unsigned char *handler;
5902 /*Decode the first instruction to figure out where did we store the spvar*/
5903 /*Our jit MUST generate the following:
5904 mov %esp, -?(%ebp)
5905 Which is encoded as: 0x89 mod_rm.
5906 mod_rm (esp, ebp, imm) which can be: (imm will never be zero)
5907 mod (reg + imm8): 01 reg(esp): 100 rm(ebp): 101 -> 01100101 (0x65)
5908 mod (reg + imm32): 10 reg(esp): 100 rm(ebp): 101 -> 10100101 (0xA5)
5910 handler = clause->handler_start;
5912 if (*handler != 0x89)
5913 return NULL;
5915 ++handler;
5917 if (*handler == 0x65)
5918 offset = *(signed char*)(handler + 1);
5919 else if (*handler == 0xA5)
5920 offset = *(int*)(handler + 1);
5921 else
5922 return NULL;
5924 /*Load the spvar*/
5925 bp = MONO_CONTEXT_GET_BP (ctx);
5926 sp = *(gpointer*)(bp + offset);
5928 old_value = *sp;
5929 if (old_value < ji->code_start || (char*)old_value > ((char*)ji->code_start + ji->code_size))
5930 return old_value;
5932 *sp = new_value;
5934 return old_value;
5938 * mono_aot_emit_load_got_addr:
5940 * Emit code to load the got address.
5941 * On x86, the result is placed into EBX.
5943 guint8*
5944 mono_arch_emit_load_got_addr (guint8 *start, guint8 *code, MonoCompile *cfg, MonoJumpInfo **ji)
5946 x86_call_imm (code, 0);
5948 * The patch needs to point to the pop, since the GOT offset needs
5949 * to be added to that address.
5951 if (cfg)
5952 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
5953 else
5954 *ji = mono_patch_info_list_prepend (*ji, code - start, MONO_PATCH_INFO_GOT_OFFSET, NULL);
5955 x86_pop_reg (code, MONO_ARCH_GOT_REG);
5956 x86_alu_reg_imm (code, X86_ADD, MONO_ARCH_GOT_REG, 0xf0f0f0f0);
5958 return code;
5962 * mono_ppc_emit_load_aotconst:
5964 * Emit code to load the contents of the GOT slot identified by TRAMP_TYPE and
5965 * TARGET from the mscorlib GOT in full-aot code.
5966 * On x86, the GOT address is assumed to be in EBX, and the result is placed into
5967 * EAX.
5969 guint8*
5970 mono_arch_emit_load_aotconst (guint8 *start, guint8 *code, MonoJumpInfo **ji, int tramp_type, gconstpointer target)
5972 /* Load the mscorlib got address */
5973 x86_mov_reg_membase (code, X86_EAX, MONO_ARCH_GOT_REG, sizeof (gpointer), 4);
5974 *ji = mono_patch_info_list_prepend (*ji, code - start, tramp_type, target);
5975 /* arch_emit_got_access () patches this */
5976 x86_mov_reg_membase (code, X86_EAX, X86_EAX, 0xf0f0f0f0, 4);
5978 return code;
5981 #if __APPLE__
5982 #define DBG_SIGNAL SIGBUS
5983 #else
5984 #define DBG_SIGNAL SIGSEGV
5985 #endif
5987 /* Soft Debug support */
5988 #ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED
5991 * mono_arch_set_breakpoint:
5993 * Set a breakpoint at the native code corresponding to JI at NATIVE_OFFSET.
5994 * The location should contain code emitted by OP_SEQ_POINT.
5996 void
5997 mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
5999 guint8 *code = ip;
6002 * In production, we will use int3 (has to fix the size in the md
6003 * file). But that could confuse gdb, so during development, we emit a SIGSEGV
6004 * instead.
6006 g_assert (code [0] == 0x90);
6007 x86_alu_reg_mem (code, X86_CMP, X86_EAX, (guint32)bp_trigger_page);
6011 * mono_arch_clear_breakpoint:
6013 * Clear the breakpoint at IP.
6015 void
6016 mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
6018 guint8 *code = ip;
6019 int i;
6021 for (i = 0; i < 6; ++i)
6022 x86_nop (code);
6026 * mono_arch_start_single_stepping:
6028 * Start single stepping.
6030 void
6031 mono_arch_start_single_stepping (void)
6033 mono_mprotect (ss_trigger_page, mono_pagesize (), 0);
6037 * mono_arch_stop_single_stepping:
6039 * Stop single stepping.
6041 void
6042 mono_arch_stop_single_stepping (void)
6044 mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ);
6048 * mono_arch_is_single_step_event:
6050 * Return whenever the machine state in SIGCTX corresponds to a single
6051 * step event.
6053 gboolean
6054 mono_arch_is_single_step_event (void *info, void *sigctx)
6056 #ifdef TARGET_WIN32
6057 EXCEPTION_RECORD* einfo = (EXCEPTION_RECORD*)info; /* Sometimes the address is off by 4 */
6058 if ((einfo->ExceptionInformation[1] >= ss_trigger_page && (guint8*)einfo->ExceptionInformation[1] <= (guint8*)ss_trigger_page + 128))
6059 return TRUE;
6060 else
6061 return FALSE;
6062 #else
6063 siginfo_t* sinfo = (siginfo_t*) info;
6064 /* Sometimes the address is off by 4 */
6065 if (sinfo->si_signo == DBG_SIGNAL && (sinfo->si_addr >= ss_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)ss_trigger_page + 128))
6066 return TRUE;
6067 else
6068 return FALSE;
6069 #endif
6072 gboolean
6073 mono_arch_is_breakpoint_event (void *info, void *sigctx)
6075 #ifdef TARGET_WIN32
6076 EXCEPTION_RECORD* einfo = (EXCEPTION_RECORD*)info; /* Sometimes the address is off by 4 */
6077 if ((einfo->ExceptionInformation[1] >= bp_trigger_page && (guint8*)einfo->ExceptionInformation[1] <= (guint8*)bp_trigger_page + 128))
6078 return TRUE;
6079 else
6080 return FALSE;
6081 #else
6082 siginfo_t* sinfo = (siginfo_t*)info;
6083 /* Sometimes the address is off by 4 */
6084 if (sinfo->si_signo == DBG_SIGNAL && (sinfo->si_addr >= bp_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)bp_trigger_page + 128))
6085 return TRUE;
6086 else
6087 return FALSE;
6088 #endif
6092 * mono_arch_get_ip_for_breakpoint:
6094 * See mini-amd64.c for docs.
6096 guint8*
6097 mono_arch_get_ip_for_breakpoint (MonoJitInfo *ji, MonoContext *ctx)
6099 guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
6101 return ip;
6104 #define BREAKPOINT_SIZE 6
6107 * mono_arch_get_ip_for_single_step:
6109 * See mini-amd64.c for docs.
6111 guint8*
6112 mono_arch_get_ip_for_single_step (MonoJitInfo *ji, MonoContext *ctx)
6114 guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
6116 /* Size of x86_alu_reg_imm */
6117 ip += 6;
6119 return ip;
6123 * mono_arch_skip_breakpoint:
6125 * See mini-amd64.c for docs.
6127 void
6128 mono_arch_skip_breakpoint (MonoContext *ctx)
6130 MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + BREAKPOINT_SIZE);
6134 * mono_arch_skip_single_step:
6136 * See mini-amd64.c for docs.
6138 void
6139 mono_arch_skip_single_step (MonoContext *ctx)
6141 MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + 6);
6145 * mono_arch_get_seq_point_info:
6147 * See mini-amd64.c for docs.
6149 gpointer
6150 mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code)
6152 NOT_IMPLEMENTED;
6153 return NULL;
6156 #endif