2010-01-07 Rodrigo Kumpera <rkumpera@novell.com>
[mono.git] / mono / mini / mini-x86.c
blob5b4ec192da5860dc66def313baf3f498ae766dd4
1 /*
2 * mini-x86.c: x86 backend for the Mono code generator
4 * Authors:
5 * Paolo Molaro (lupus@ximian.com)
6 * Dietmar Maurer (dietmar@ximian.com)
7 * Patrik Torstensson
9 * (C) 2003 Ximian, Inc.
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/metadata/mono-debug.h>
23 #include <mono/utils/mono-math.h>
24 #include <mono/utils/mono-counters.h>
25 #include <mono/utils/mono-mmap.h>
27 #include "trace.h"
28 #include "mini-x86.h"
29 #include "cpu-x86.h"
30 #include "ir-emit.h"
32 /* On windows, these hold the key returned by TlsAlloc () */
33 static gint lmf_tls_offset = -1;
34 static gint lmf_addr_tls_offset = -1;
35 static gint appdomain_tls_offset = -1;
37 #ifdef MONO_XEN_OPT
38 static gboolean optimize_for_xen = TRUE;
39 #else
40 #define optimize_for_xen 0
41 #endif
43 #ifdef TARGET_WIN32
44 static gboolean is_win32 = TRUE;
45 #else
46 static gboolean is_win32 = FALSE;
47 #endif
49 /* This mutex protects architecture specific caches */
50 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
51 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
52 static CRITICAL_SECTION mini_arch_mutex;
54 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
56 #define ARGS_OFFSET 8
58 #ifdef TARGET_WIN32
59 /* Under windows, the default pinvoke calling convention is stdcall */
60 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
61 #else
62 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
63 #endif
65 MonoBreakpointInfo
66 mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
69 * The code generated for sequence points reads from this location, which is
70 * made read-only when single stepping is enabled.
72 static gpointer ss_trigger_page;
74 /* Enabled breakpoints read from this trigger page */
75 static gpointer bp_trigger_page;
77 const char*
78 mono_arch_regname (int reg)
80 switch (reg) {
81 case X86_EAX: return "%eax";
82 case X86_EBX: return "%ebx";
83 case X86_ECX: return "%ecx";
84 case X86_EDX: return "%edx";
85 case X86_ESP: return "%esp";
86 case X86_EBP: return "%ebp";
87 case X86_EDI: return "%edi";
88 case X86_ESI: return "%esi";
90 return "unknown";
93 const char*
94 mono_arch_fregname (int reg)
96 switch (reg) {
97 case 0:
98 return "%fr0";
99 case 1:
100 return "%fr1";
101 case 2:
102 return "%fr2";
103 case 3:
104 return "%fr3";
105 case 4:
106 return "%fr4";
107 case 5:
108 return "%fr5";
109 case 6:
110 return "%fr6";
111 case 7:
112 return "%fr7";
113 default:
114 return "unknown";
118 const char *
119 mono_arch_xregname (int reg)
121 switch (reg) {
122 case 0:
123 return "%xmm0";
124 case 1:
125 return "%xmm1";
126 case 2:
127 return "%xmm2";
128 case 3:
129 return "%xmm3";
130 case 4:
131 return "%xmm4";
132 case 5:
133 return "%xmm5";
134 case 6:
135 return "%xmm6";
136 case 7:
137 return "%xmm7";
138 default:
139 return "unknown";
144 typedef enum {
145 ArgInIReg,
146 ArgInFloatSSEReg,
147 ArgInDoubleSSEReg,
148 ArgOnStack,
149 ArgValuetypeInReg,
150 ArgOnFloatFpStack,
151 ArgOnDoubleFpStack,
152 ArgNone
153 } ArgStorage;
155 typedef struct {
156 gint16 offset;
157 gint8 reg;
158 ArgStorage storage;
160 /* Only if storage == ArgValuetypeInReg */
161 ArgStorage pair_storage [2];
162 gint8 pair_regs [2];
163 } ArgInfo;
165 typedef struct {
166 int nargs;
167 guint32 stack_usage;
168 guint32 reg_usage;
169 guint32 freg_usage;
170 gboolean need_stack_align;
171 guint32 stack_align_amount;
172 ArgInfo ret;
173 ArgInfo sig_cookie;
174 ArgInfo args [1];
175 } CallInfo;
177 #define PARAM_REGS 0
179 #define FLOAT_PARAM_REGS 0
181 static X86_Reg_No param_regs [] = { 0 };
183 #if defined(TARGET_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
184 #define SMALL_STRUCTS_IN_REGS
185 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
186 #endif
188 static void inline
189 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
191 ainfo->offset = *stack_size;
193 if (*gr >= PARAM_REGS) {
194 ainfo->storage = ArgOnStack;
195 (*stack_size) += sizeof (gpointer);
197 else {
198 ainfo->storage = ArgInIReg;
199 ainfo->reg = param_regs [*gr];
200 (*gr) ++;
204 static void inline
205 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
207 ainfo->offset = *stack_size;
209 g_assert (PARAM_REGS == 0);
211 ainfo->storage = ArgOnStack;
212 (*stack_size) += sizeof (gpointer) * 2;
215 static void inline
216 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
218 ainfo->offset = *stack_size;
220 if (*gr >= FLOAT_PARAM_REGS) {
221 ainfo->storage = ArgOnStack;
222 (*stack_size) += is_double ? 8 : 4;
224 else {
225 /* A double register */
226 if (is_double)
227 ainfo->storage = ArgInDoubleSSEReg;
228 else
229 ainfo->storage = ArgInFloatSSEReg;
230 ainfo->reg = *gr;
231 (*gr) += 1;
236 static void
237 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
238 gboolean is_return,
239 guint32 *gr, guint32 *fr, guint32 *stack_size)
241 guint32 size;
242 MonoClass *klass;
244 klass = mono_class_from_mono_type (type);
245 size = mini_type_stack_size_full (gsctx, &klass->byval_arg, NULL, sig->pinvoke);
247 #ifdef SMALL_STRUCTS_IN_REGS
248 if (sig->pinvoke && is_return) {
249 MonoMarshalType *info;
252 * the exact rules are not very well documented, the code below seems to work with the
253 * code generated by gcc 3.3.3 -mno-cygwin.
255 info = mono_marshal_load_type_info (klass);
256 g_assert (info);
258 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
260 /* Special case structs with only a float member */
261 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
262 ainfo->storage = ArgValuetypeInReg;
263 ainfo->pair_storage [0] = ArgOnDoubleFpStack;
264 return;
266 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
267 ainfo->storage = ArgValuetypeInReg;
268 ainfo->pair_storage [0] = ArgOnFloatFpStack;
269 return;
271 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
272 ainfo->storage = ArgValuetypeInReg;
273 ainfo->pair_storage [0] = ArgInIReg;
274 ainfo->pair_regs [0] = return_regs [0];
275 if (info->native_size > 4) {
276 ainfo->pair_storage [1] = ArgInIReg;
277 ainfo->pair_regs [1] = return_regs [1];
279 return;
282 #endif
284 ainfo->offset = *stack_size;
285 ainfo->storage = ArgOnStack;
286 *stack_size += ALIGN_TO (size, sizeof (gpointer));
290 * get_call_info:
292 * Obtain information about a call according to the calling convention.
293 * For x86 ELF, see the "System V Application Binary Interface Intel386
294 * Architecture Processor Supplment, Fourth Edition" document for more
295 * information.
296 * For x86 win32, see ???.
298 static CallInfo*
299 get_call_info_internal (MonoGenericSharingContext *gsctx, CallInfo *cinfo, MonoMethodSignature *sig, gboolean is_pinvoke)
301 guint32 i, gr, fr;
302 MonoType *ret_type;
303 int n = sig->hasthis + sig->param_count;
304 guint32 stack_size = 0;
306 gr = 0;
307 fr = 0;
309 /* return value */
311 ret_type = mini_type_get_underlying_type (gsctx, sig->ret);
312 switch (ret_type->type) {
313 case MONO_TYPE_BOOLEAN:
314 case MONO_TYPE_I1:
315 case MONO_TYPE_U1:
316 case MONO_TYPE_I2:
317 case MONO_TYPE_U2:
318 case MONO_TYPE_CHAR:
319 case MONO_TYPE_I4:
320 case MONO_TYPE_U4:
321 case MONO_TYPE_I:
322 case MONO_TYPE_U:
323 case MONO_TYPE_PTR:
324 case MONO_TYPE_FNPTR:
325 case MONO_TYPE_CLASS:
326 case MONO_TYPE_OBJECT:
327 case MONO_TYPE_SZARRAY:
328 case MONO_TYPE_ARRAY:
329 case MONO_TYPE_STRING:
330 cinfo->ret.storage = ArgInIReg;
331 cinfo->ret.reg = X86_EAX;
332 break;
333 case MONO_TYPE_U8:
334 case MONO_TYPE_I8:
335 cinfo->ret.storage = ArgInIReg;
336 cinfo->ret.reg = X86_EAX;
337 break;
338 case MONO_TYPE_R4:
339 cinfo->ret.storage = ArgOnFloatFpStack;
340 break;
341 case MONO_TYPE_R8:
342 cinfo->ret.storage = ArgOnDoubleFpStack;
343 break;
344 case MONO_TYPE_GENERICINST:
345 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
346 cinfo->ret.storage = ArgInIReg;
347 cinfo->ret.reg = X86_EAX;
348 break;
350 /* Fall through */
351 case MONO_TYPE_VALUETYPE: {
352 guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
354 add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
355 if (cinfo->ret.storage == ArgOnStack)
356 /* The caller passes the address where the value is stored */
357 add_general (&gr, &stack_size, &cinfo->ret);
358 break;
360 case MONO_TYPE_TYPEDBYREF:
361 /* Same as a valuetype with size 24 */
362 add_general (&gr, &stack_size, &cinfo->ret);
364 break;
365 case MONO_TYPE_VOID:
366 cinfo->ret.storage = ArgNone;
367 break;
368 default:
369 g_error ("Can't handle as return value 0x%x", sig->ret->type);
373 /* this */
374 if (sig->hasthis)
375 add_general (&gr, &stack_size, cinfo->args + 0);
377 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
378 gr = PARAM_REGS;
379 fr = FLOAT_PARAM_REGS;
381 /* Emit the signature cookie just before the implicit arguments */
382 add_general (&gr, &stack_size, &cinfo->sig_cookie);
385 for (i = 0; i < sig->param_count; ++i) {
386 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
387 MonoType *ptype;
389 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
390 /* We allways pass the sig cookie on the stack for simplicity */
392 * Prevent implicit arguments + the sig cookie from being passed
393 * in registers.
395 gr = PARAM_REGS;
396 fr = FLOAT_PARAM_REGS;
398 /* Emit the signature cookie just before the implicit arguments */
399 add_general (&gr, &stack_size, &cinfo->sig_cookie);
402 if (sig->params [i]->byref) {
403 add_general (&gr, &stack_size, ainfo);
404 continue;
406 ptype = mini_type_get_underlying_type (gsctx, sig->params [i]);
407 switch (ptype->type) {
408 case MONO_TYPE_BOOLEAN:
409 case MONO_TYPE_I1:
410 case MONO_TYPE_U1:
411 add_general (&gr, &stack_size, ainfo);
412 break;
413 case MONO_TYPE_I2:
414 case MONO_TYPE_U2:
415 case MONO_TYPE_CHAR:
416 add_general (&gr, &stack_size, ainfo);
417 break;
418 case MONO_TYPE_I4:
419 case MONO_TYPE_U4:
420 add_general (&gr, &stack_size, ainfo);
421 break;
422 case MONO_TYPE_I:
423 case MONO_TYPE_U:
424 case MONO_TYPE_PTR:
425 case MONO_TYPE_FNPTR:
426 case MONO_TYPE_CLASS:
427 case MONO_TYPE_OBJECT:
428 case MONO_TYPE_STRING:
429 case MONO_TYPE_SZARRAY:
430 case MONO_TYPE_ARRAY:
431 add_general (&gr, &stack_size, ainfo);
432 break;
433 case MONO_TYPE_GENERICINST:
434 if (!mono_type_generic_inst_is_valuetype (ptype)) {
435 add_general (&gr, &stack_size, ainfo);
436 break;
438 /* Fall through */
439 case MONO_TYPE_VALUETYPE:
440 add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
441 break;
442 case MONO_TYPE_TYPEDBYREF:
443 stack_size += sizeof (MonoTypedRef);
444 ainfo->storage = ArgOnStack;
445 break;
446 case MONO_TYPE_U8:
447 case MONO_TYPE_I8:
448 add_general_pair (&gr, &stack_size, ainfo);
449 break;
450 case MONO_TYPE_R4:
451 add_float (&fr, &stack_size, ainfo, FALSE);
452 break;
453 case MONO_TYPE_R8:
454 add_float (&fr, &stack_size, ainfo, TRUE);
455 break;
456 default:
457 g_error ("unexpected type 0x%x", ptype->type);
458 g_assert_not_reached ();
462 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
463 gr = PARAM_REGS;
464 fr = FLOAT_PARAM_REGS;
466 /* Emit the signature cookie just before the implicit arguments */
467 add_general (&gr, &stack_size, &cinfo->sig_cookie);
470 if (mono_do_x86_stack_align && (stack_size % MONO_ARCH_FRAME_ALIGNMENT) != 0) {
471 cinfo->need_stack_align = TRUE;
472 cinfo->stack_align_amount = MONO_ARCH_FRAME_ALIGNMENT - (stack_size % MONO_ARCH_FRAME_ALIGNMENT);
473 stack_size += cinfo->stack_align_amount;
476 cinfo->stack_usage = stack_size;
477 cinfo->reg_usage = gr;
478 cinfo->freg_usage = fr;
479 return cinfo;
482 static CallInfo*
483 get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
485 int n = sig->hasthis + sig->param_count;
486 CallInfo *cinfo;
488 if (mp)
489 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
490 else
491 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
493 return get_call_info_internal (gsctx, cinfo, sig, is_pinvoke);
497 * mono_arch_get_argument_info:
498 * @csig: a method signature
499 * @param_count: the number of parameters to consider
500 * @arg_info: an array to store the result infos
502 * Gathers information on parameters such as size, alignment and
503 * padding. arg_info should be large enought to hold param_count + 1 entries.
505 * Returns the size of the argument area on the stack.
506 * This should be signal safe, since it is called from
507 * mono_arch_find_jit_info_ext ().
508 * FIXME: The metadata calls might not be signal safe.
511 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
513 int k, args_size = 0;
514 int size, pad;
515 guint32 align;
516 int offset = 8;
517 CallInfo *cinfo;
519 /* Avoid g_malloc as it is not signal safe */
520 cinfo = (CallInfo*)g_newa (guint8*, sizeof (CallInfo) + (sizeof (ArgInfo) * (csig->param_count + 1)));
522 cinfo = get_call_info_internal (NULL, cinfo, csig, FALSE);
524 if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
525 args_size += sizeof (gpointer);
526 offset += 4;
529 arg_info [0].offset = offset;
531 if (csig->hasthis) {
532 args_size += sizeof (gpointer);
533 offset += 4;
536 arg_info [0].size = args_size;
538 for (k = 0; k < param_count; k++) {
539 size = mini_type_stack_size_full (NULL, csig->params [k], &align, csig->pinvoke);
541 /* ignore alignment for now */
542 align = 1;
544 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
545 arg_info [k].pad = pad;
546 args_size += size;
547 arg_info [k + 1].pad = 0;
548 arg_info [k + 1].size = size;
549 offset += pad;
550 arg_info [k + 1].offset = offset;
551 offset += size;
554 if (mono_do_x86_stack_align && !CALLCONV_IS_STDCALL (csig))
555 align = MONO_ARCH_FRAME_ALIGNMENT;
556 else
557 align = 4;
558 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
559 arg_info [k].pad = pad;
561 return args_size;
564 static const guchar cpuid_impl [] = {
565 0x55, /* push %ebp */
566 0x89, 0xe5, /* mov %esp,%ebp */
567 0x53, /* push %ebx */
568 0x8b, 0x45, 0x08, /* mov 0x8(%ebp),%eax */
569 0x0f, 0xa2, /* cpuid */
570 0x50, /* push %eax */
571 0x8b, 0x45, 0x10, /* mov 0x10(%ebp),%eax */
572 0x89, 0x18, /* mov %ebx,(%eax) */
573 0x8b, 0x45, 0x14, /* mov 0x14(%ebp),%eax */
574 0x89, 0x08, /* mov %ecx,(%eax) */
575 0x8b, 0x45, 0x18, /* mov 0x18(%ebp),%eax */
576 0x89, 0x10, /* mov %edx,(%eax) */
577 0x58, /* pop %eax */
578 0x8b, 0x55, 0x0c, /* mov 0xc(%ebp),%edx */
579 0x89, 0x02, /* mov %eax,(%edx) */
580 0x5b, /* pop %ebx */
581 0xc9, /* leave */
582 0xc3, /* ret */
585 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
587 static int
588 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
590 int have_cpuid = 0;
591 #ifndef _MSC_VER
592 __asm__ __volatile__ (
593 "pushfl\n"
594 "popl %%eax\n"
595 "movl %%eax, %%edx\n"
596 "xorl $0x200000, %%eax\n"
597 "pushl %%eax\n"
598 "popfl\n"
599 "pushfl\n"
600 "popl %%eax\n"
601 "xorl %%edx, %%eax\n"
602 "andl $0x200000, %%eax\n"
603 "movl %%eax, %0"
604 : "=r" (have_cpuid)
606 : "%eax", "%edx"
608 #else
609 __asm {
610 pushfd
611 pop eax
612 mov edx, eax
613 xor eax, 0x200000
614 push eax
615 popfd
616 pushfd
617 pop eax
618 xor eax, edx
619 and eax, 0x200000
620 mov have_cpuid, eax
622 #endif
623 if (have_cpuid) {
624 /* Have to use the code manager to get around WinXP DEP */
625 static CpuidFunc func = NULL;
626 void *ptr;
627 if (!func) {
628 ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
629 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
630 func = (CpuidFunc)ptr;
632 func (id, p_eax, p_ebx, p_ecx, p_edx);
635 * We use this approach because of issues with gcc and pic code, see:
636 * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
637 __asm__ __volatile__ ("cpuid"
638 : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
639 : "a" (id));
641 return 1;
643 return 0;
647 * Initialize the cpu to execute managed code.
649 void
650 mono_arch_cpu_init (void)
652 /* spec compliance requires running with double precision */
653 #ifndef _MSC_VER
654 guint16 fpcw;
656 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
657 fpcw &= ~X86_FPCW_PRECC_MASK;
658 fpcw |= X86_FPCW_PREC_DOUBLE;
659 __asm__ __volatile__ ("fldcw %0\n": : "m" (fpcw));
660 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
661 #else
662 _control87 (_PC_53, MCW_PC);
663 #endif
667 * Initialize architecture specific code.
669 void
670 mono_arch_init (void)
672 InitializeCriticalSection (&mini_arch_mutex);
674 ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ);
675 bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
676 mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
680 * Cleanup architecture specific code.
682 void
683 mono_arch_cleanup (void)
685 DeleteCriticalSection (&mini_arch_mutex);
689 * This function returns the optimizations supported on this cpu.
691 guint32
692 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
694 int eax, ebx, ecx, edx;
695 guint32 opts = 0;
697 *exclude_mask = 0;
698 /* Feature Flags function, flags returned in EDX. */
699 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
700 if (edx & (1 << 15)) {
701 opts |= MONO_OPT_CMOV;
702 if (edx & 1)
703 opts |= MONO_OPT_FCMOV;
704 else
705 *exclude_mask |= MONO_OPT_FCMOV;
706 } else
707 *exclude_mask |= MONO_OPT_CMOV;
708 if (edx & (1 << 26))
709 opts |= MONO_OPT_SSE2;
710 else
711 *exclude_mask |= MONO_OPT_SSE2;
713 #ifdef MONO_ARCH_SIMD_INTRINSICS
714 /*SIMD intrinsics require at least SSE2.*/
715 if (!(opts & MONO_OPT_SSE2))
716 *exclude_mask |= MONO_OPT_SIMD;
717 #endif
719 return opts;
723 * This function test for all SSE functions supported.
725 * Returns a bitmask corresponding to all supported versions.
728 guint32
729 mono_arch_cpu_enumerate_simd_versions (void)
731 int eax, ebx, ecx, edx;
732 guint32 sse_opts = 0;
734 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
735 if (edx & (1 << 25))
736 sse_opts |= 1 << SIMD_VERSION_SSE1;
737 if (edx & (1 << 26))
738 sse_opts |= 1 << SIMD_VERSION_SSE2;
739 if (ecx & (1 << 0))
740 sse_opts |= 1 << SIMD_VERSION_SSE3;
741 if (ecx & (1 << 9))
742 sse_opts |= 1 << SIMD_VERSION_SSSE3;
743 if (ecx & (1 << 19))
744 sse_opts |= 1 << SIMD_VERSION_SSE41;
745 if (ecx & (1 << 20))
746 sse_opts |= 1 << SIMD_VERSION_SSE42;
749 /* Yes, all this needs to be done to check for sse4a.
750 See: "Amd: CPUID Specification"
752 if (cpuid (0x80000000, &eax, &ebx, &ecx, &edx)) {
753 /* eax greater or equal than 0x80000001, ebx = 'htuA', ecx = DMAc', edx = 'itne'*/
754 if ((((unsigned int) eax) >= 0x80000001) && (ebx == 0x68747541) && (ecx == 0x444D4163) && (edx == 0x69746E65)) {
755 cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
756 if (ecx & (1 << 6))
757 sse_opts |= 1 << SIMD_VERSION_SSE4a;
762 return sse_opts;
766 * Determine whenever the trap whose info is in SIGINFO is caused by
767 * integer overflow.
769 gboolean
770 mono_arch_is_int_overflow (void *sigctx, void *info)
772 MonoContext ctx;
773 guint8* ip;
775 mono_arch_sigctx_to_monoctx (sigctx, &ctx);
777 ip = (guint8*)ctx.eip;
779 if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
780 gint32 reg;
782 /* idiv REG */
783 switch (x86_modrm_rm (ip [1])) {
784 case X86_EAX:
785 reg = ctx.eax;
786 break;
787 case X86_ECX:
788 reg = ctx.ecx;
789 break;
790 case X86_EDX:
791 reg = ctx.edx;
792 break;
793 case X86_EBX:
794 reg = ctx.ebx;
795 break;
796 case X86_ESI:
797 reg = ctx.esi;
798 break;
799 case X86_EDI:
800 reg = ctx.edi;
801 break;
802 default:
803 g_assert_not_reached ();
804 reg = -1;
807 if (reg == -1)
808 return TRUE;
811 return FALSE;
814 GList *
815 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
817 GList *vars = NULL;
818 int i;
820 for (i = 0; i < cfg->num_varinfo; i++) {
821 MonoInst *ins = cfg->varinfo [i];
822 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
824 /* unused vars */
825 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
826 continue;
828 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) ||
829 (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
830 continue;
832 /* we dont allocate I1 to registers because there is no simply way to sign extend
833 * 8bit quantities in caller saved registers on x86 */
834 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
835 g_assert (MONO_VARINFO (cfg, i)->reg == -1);
836 g_assert (i == vmv->idx);
837 vars = g_list_prepend (vars, vmv);
841 vars = mono_varlist_sort (cfg, vars, 0);
843 return vars;
846 GList *
847 mono_arch_get_global_int_regs (MonoCompile *cfg)
849 GList *regs = NULL;
851 /* we can use 3 registers for global allocation */
852 regs = g_list_prepend (regs, (gpointer)X86_EBX);
853 regs = g_list_prepend (regs, (gpointer)X86_ESI);
854 regs = g_list_prepend (regs, (gpointer)X86_EDI);
856 return regs;
860 * mono_arch_regalloc_cost:
862 * Return the cost, in number of memory references, of the action of
863 * allocating the variable VMV into a register during global register
864 * allocation.
866 guint32
867 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
869 MonoInst *ins = cfg->varinfo [vmv->idx];
871 if (cfg->method->save_lmf)
872 /* The register is already saved */
873 return (ins->opcode == OP_ARG) ? 1 : 0;
874 else
875 /* push+pop+possible load if it is an argument */
876 return (ins->opcode == OP_ARG) ? 3 : 2;
879 static void
880 set_needs_stack_frame (MonoCompile *cfg, gboolean flag)
882 static int inited = FALSE;
883 static int count = 0;
885 if (cfg->arch.need_stack_frame_inited) {
886 g_assert (cfg->arch.need_stack_frame == flag);
887 return;
890 cfg->arch.need_stack_frame = flag;
891 cfg->arch.need_stack_frame_inited = TRUE;
893 if (flag)
894 return;
896 if (!inited) {
897 mono_counters_register ("Could eliminate stack frame", MONO_COUNTER_INT|MONO_COUNTER_JIT, &count);
898 inited = TRUE;
900 ++count;
902 //g_print ("will eliminate %s.%s.%s\n", cfg->method->klass->name_space, cfg->method->klass->name, cfg->method->name);
905 static gboolean
906 needs_stack_frame (MonoCompile *cfg)
908 MonoMethodSignature *sig;
909 MonoMethodHeader *header;
910 gboolean result = FALSE;
912 #if defined(__APPLE__)
913 /*OSX requires stack frame code to have the correct alignment. */
914 return TRUE;
915 #endif
917 if (cfg->arch.need_stack_frame_inited)
918 return cfg->arch.need_stack_frame;
920 header = mono_method_get_header (cfg->method);
921 sig = mono_method_signature (cfg->method);
923 if (cfg->disable_omit_fp)
924 result = TRUE;
925 else if (cfg->flags & MONO_CFG_HAS_ALLOCA)
926 result = TRUE;
927 else if (cfg->method->save_lmf)
928 result = TRUE;
929 else if (cfg->stack_offset)
930 result = TRUE;
931 else if (cfg->param_area)
932 result = TRUE;
933 else if (cfg->flags & (MONO_CFG_HAS_CALLS | MONO_CFG_HAS_ALLOCA | MONO_CFG_HAS_TAIL))
934 result = TRUE;
935 else if (header->num_clauses)
936 result = TRUE;
937 else if (sig->param_count + sig->hasthis)
938 result = TRUE;
939 else if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
940 result = TRUE;
941 else if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
942 (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE))
943 result = TRUE;
945 set_needs_stack_frame (cfg, result);
947 return cfg->arch.need_stack_frame;
951 * Set var information according to the calling convention. X86 version.
952 * The locals var stuff should most likely be split in another method.
954 void
955 mono_arch_allocate_vars (MonoCompile *cfg)
957 MonoMethodSignature *sig;
958 MonoMethodHeader *header;
959 MonoInst *inst;
960 guint32 locals_stack_size, locals_stack_align;
961 int i, offset;
962 gint32 *offsets;
963 CallInfo *cinfo;
965 header = mono_method_get_header (cfg->method);
966 sig = mono_method_signature (cfg->method);
968 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
970 cfg->frame_reg = X86_EBP;
971 offset = 0;
973 /* Reserve space to save LMF and caller saved registers */
975 if (cfg->method->save_lmf) {
976 offset += sizeof (MonoLMF);
977 } else {
978 if (cfg->used_int_regs & (1 << X86_EBX)) {
979 offset += 4;
982 if (cfg->used_int_regs & (1 << X86_EDI)) {
983 offset += 4;
986 if (cfg->used_int_regs & (1 << X86_ESI)) {
987 offset += 4;
991 switch (cinfo->ret.storage) {
992 case ArgValuetypeInReg:
993 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
994 offset += 8;
995 cfg->ret->opcode = OP_REGOFFSET;
996 cfg->ret->inst_basereg = X86_EBP;
997 cfg->ret->inst_offset = - offset;
998 break;
999 default:
1000 break;
1003 /* Allocate locals */
1004 offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
1005 if (locals_stack_size > MONO_ARCH_MAX_FRAME_SIZE) {
1006 char *mname = mono_method_full_name (cfg->method, TRUE);
1007 cfg->exception_type = MONO_EXCEPTION_INVALID_PROGRAM;
1008 cfg->exception_message = g_strdup_printf ("Method %s stack is too big.", mname);
1009 g_free (mname);
1010 return;
1012 if (locals_stack_align) {
1013 offset += (locals_stack_align - 1);
1014 offset &= ~(locals_stack_align - 1);
1017 * EBP is at alignment 8 % MONO_ARCH_FRAME_ALIGNMENT, so if we
1018 * have locals larger than 8 bytes we need to make sure that
1019 * they have the appropriate offset.
1021 if (MONO_ARCH_FRAME_ALIGNMENT > 8 && locals_stack_align > 8)
1022 offset += MONO_ARCH_FRAME_ALIGNMENT - sizeof (gpointer) * 2;
1023 for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
1024 if (offsets [i] != -1) {
1025 MonoInst *inst = cfg->varinfo [i];
1026 inst->opcode = OP_REGOFFSET;
1027 inst->inst_basereg = X86_EBP;
1028 inst->inst_offset = - (offset + offsets [i]);
1029 //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
1032 offset += locals_stack_size;
1036 * Allocate arguments+return value
1039 switch (cinfo->ret.storage) {
1040 case ArgOnStack:
1041 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
1043 * In the new IR, the cfg->vret_addr variable represents the
1044 * vtype return value.
1046 cfg->vret_addr->opcode = OP_REGOFFSET;
1047 cfg->vret_addr->inst_basereg = cfg->frame_reg;
1048 cfg->vret_addr->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
1049 if (G_UNLIKELY (cfg->verbose_level > 1)) {
1050 printf ("vret_addr =");
1051 mono_print_ins (cfg->vret_addr);
1053 } else {
1054 cfg->ret->opcode = OP_REGOFFSET;
1055 cfg->ret->inst_basereg = X86_EBP;
1056 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
1058 break;
1059 case ArgValuetypeInReg:
1060 break;
1061 case ArgInIReg:
1062 cfg->ret->opcode = OP_REGVAR;
1063 cfg->ret->inst_c0 = cinfo->ret.reg;
1064 cfg->ret->dreg = cinfo->ret.reg;
1065 break;
1066 case ArgNone:
1067 case ArgOnFloatFpStack:
1068 case ArgOnDoubleFpStack:
1069 break;
1070 default:
1071 g_assert_not_reached ();
1074 if (sig->call_convention == MONO_CALL_VARARG) {
1075 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1076 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
1079 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1080 ArgInfo *ainfo = &cinfo->args [i];
1081 inst = cfg->args [i];
1082 if (inst->opcode != OP_REGVAR) {
1083 inst->opcode = OP_REGOFFSET;
1084 inst->inst_basereg = X86_EBP;
1086 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
1089 cfg->stack_offset = offset;
1092 void
1093 mono_arch_create_vars (MonoCompile *cfg)
1095 MonoMethodSignature *sig;
1096 CallInfo *cinfo;
1098 sig = mono_method_signature (cfg->method);
1100 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1102 if (cinfo->ret.storage == ArgValuetypeInReg)
1103 cfg->ret_var_is_local = TRUE;
1104 if ((cinfo->ret.storage != ArgValuetypeInReg) && MONO_TYPE_ISSTRUCT (sig->ret)) {
1105 cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
1110 * It is expensive to adjust esp for each individual fp argument pushed on the stack
1111 * so we try to do it just once when we have multiple fp arguments in a row.
1112 * We don't use this mechanism generally because for int arguments the generated code
1113 * is slightly bigger and new generation cpus optimize away the dependency chains
1114 * created by push instructions on the esp value.
1115 * fp_arg_setup is the first argument in the execution sequence where the esp register
1116 * is modified.
1118 static G_GNUC_UNUSED int
1119 collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
1121 int fp_space = 0;
1122 MonoType *t;
1124 for (; start_arg < sig->param_count; ++start_arg) {
1125 t = mini_type_get_underlying_type (NULL, sig->params [start_arg]);
1126 if (!t->byref && t->type == MONO_TYPE_R8) {
1127 fp_space += sizeof (double);
1128 *fp_arg_setup = start_arg;
1129 } else {
1130 break;
1133 return fp_space;
1136 static void
1137 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
1139 MonoMethodSignature *tmp_sig;
1141 /* FIXME: Add support for signature tokens to AOT */
1142 cfg->disable_aot = TRUE;
1145 * mono_ArgIterator_Setup assumes the signature cookie is
1146 * passed first and all the arguments which were before it are
1147 * passed on the stack after the signature. So compensate by
1148 * passing a different signature.
1150 tmp_sig = mono_metadata_signature_dup (call->signature);
1151 tmp_sig->param_count -= call->signature->sentinelpos;
1152 tmp_sig->sentinelpos = 0;
1153 memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1155 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_X86_PUSH_IMM, -1, -1, tmp_sig);
1158 #ifdef ENABLE_LLVM
1159 LLVMCallInfo*
1160 mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
1162 int i, n;
1163 CallInfo *cinfo;
1164 ArgInfo *ainfo;
1165 int j;
1166 LLVMCallInfo *linfo;
1168 n = sig->param_count + sig->hasthis;
1170 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke);
1172 linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n));
1175 * LLVM always uses the native ABI while we use our own ABI, the
1176 * only difference is the handling of vtypes:
1177 * - we only pass/receive them in registers in some cases, and only
1178 * in 1 or 2 integer registers.
1180 if (cinfo->ret.storage == ArgValuetypeInReg) {
1181 if (sig->pinvoke) {
1182 cfg->exception_message = g_strdup ("pinvoke + vtypes");
1183 cfg->disable_llvm = TRUE;
1184 return linfo;
1187 cfg->exception_message = g_strdup ("vtype ret in call");
1188 cfg->disable_llvm = TRUE;
1190 linfo->ret.storage = LLVMArgVtypeInReg;
1191 for (j = 0; j < 2; ++j)
1192 linfo->ret.pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, cinfo->ret.pair_storage [j]);
1196 if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage == ArgInIReg) {
1197 /* Vtype returned using a hidden argument */
1198 linfo->ret.storage = LLVMArgVtypeRetAddr;
1201 if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage != ArgInIReg) {
1202 // FIXME:
1203 cfg->exception_message = g_strdup ("vtype ret in call");
1204 cfg->disable_llvm = TRUE;
1207 for (i = 0; i < n; ++i) {
1208 ainfo = cinfo->args + i;
1210 linfo->args [i].storage = LLVMArgNone;
1212 switch (ainfo->storage) {
1213 case ArgInIReg:
1214 linfo->args [i].storage = LLVMArgInIReg;
1215 break;
1216 case ArgInDoubleSSEReg:
1217 case ArgInFloatSSEReg:
1218 linfo->args [i].storage = LLVMArgInFPReg;
1219 break;
1220 case ArgOnStack:
1221 if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
1222 linfo->args [i].storage = LLVMArgVtypeByVal;
1223 } else {
1224 linfo->args [i].storage = LLVMArgInIReg;
1225 if (!sig->params [i - sig->hasthis]->byref) {
1226 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4) {
1227 linfo->args [i].storage = LLVMArgInFPReg;
1228 } else if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8) {
1229 linfo->args [i].storage = LLVMArgInFPReg;
1233 break;
1234 case ArgValuetypeInReg:
1235 if (sig->pinvoke) {
1236 cfg->exception_message = g_strdup ("pinvoke + vtypes");
1237 cfg->disable_llvm = TRUE;
1238 return linfo;
1241 cfg->exception_message = g_strdup ("vtype arg");
1242 cfg->disable_llvm = TRUE;
1244 linfo->args [i].storage = LLVMArgVtypeInReg;
1245 for (j = 0; j < 2; ++j)
1246 linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
1248 break;
1249 default:
1250 cfg->exception_message = g_strdup ("ainfo->storage");
1251 cfg->disable_llvm = TRUE;
1252 break;
1256 return linfo;
1258 #endif
1260 void
1261 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
1263 MonoInst *arg, *in;
1264 MonoMethodSignature *sig;
1265 int i, n;
1266 CallInfo *cinfo;
1267 int sentinelpos = 0;
1269 sig = call->signature;
1270 n = sig->param_count + sig->hasthis;
1272 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1274 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1275 sentinelpos = sig->sentinelpos + (sig->hasthis ? 1 : 0);
1277 if (cinfo->need_stack_align) {
1278 MONO_INST_NEW (cfg, arg, OP_SUB_IMM);
1279 arg->dreg = X86_ESP;
1280 arg->sreg1 = X86_ESP;
1281 arg->inst_imm = cinfo->stack_align_amount;
1282 MONO_ADD_INS (cfg->cbb, arg);
1285 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1286 if (cinfo->ret.storage == ArgValuetypeInReg) {
1288 * Tell the JIT to use a more efficient calling convention: call using
1289 * OP_CALL, compute the result location after the call, and save the
1290 * result there.
1292 call->vret_in_reg = TRUE;
1293 if (call->vret_var)
1294 NULLIFY_INS (call->vret_var);
1298 /* Handle the case where there are no implicit arguments */
1299 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1300 emit_sig_cookie (cfg, call, cinfo);
1303 /* Arguments are pushed in the reverse order */
1304 for (i = n - 1; i >= 0; i --) {
1305 ArgInfo *ainfo = cinfo->args + i;
1306 MonoType *t;
1308 if (i >= sig->hasthis)
1309 t = sig->params [i - sig->hasthis];
1310 else
1311 t = &mono_defaults.int_class->byval_arg;
1312 t = mini_type_get_underlying_type (cfg->generic_sharing_context, t);
1314 MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
1316 in = call->args [i];
1317 arg->cil_code = in->cil_code;
1318 arg->sreg1 = in->dreg;
1319 arg->type = in->type;
1321 g_assert (in->dreg != -1);
1323 if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
1324 guint32 align;
1325 guint32 size;
1327 g_assert (in->klass);
1329 if (t->type == MONO_TYPE_TYPEDBYREF) {
1330 size = sizeof (MonoTypedRef);
1331 align = sizeof (gpointer);
1333 else {
1334 size = mini_type_stack_size_full (cfg->generic_sharing_context, &in->klass->byval_arg, &align, sig->pinvoke);
1337 if (size > 0) {
1338 arg->opcode = OP_OUTARG_VT;
1339 arg->sreg1 = in->dreg;
1340 arg->klass = in->klass;
1341 arg->backend.size = size;
1343 MONO_ADD_INS (cfg->cbb, arg);
1346 else {
1347 switch (ainfo->storage) {
1348 case ArgOnStack:
1349 arg->opcode = OP_X86_PUSH;
1350 if (!t->byref) {
1351 if (t->type == MONO_TYPE_R4) {
1352 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 4);
1353 arg->opcode = OP_STORER4_MEMBASE_REG;
1354 arg->inst_destbasereg = X86_ESP;
1355 arg->inst_offset = 0;
1356 } else if (t->type == MONO_TYPE_R8) {
1357 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
1358 arg->opcode = OP_STORER8_MEMBASE_REG;
1359 arg->inst_destbasereg = X86_ESP;
1360 arg->inst_offset = 0;
1361 } else if (t->type == MONO_TYPE_I8 || t->type == MONO_TYPE_U8) {
1362 arg->sreg1 ++;
1363 MONO_EMIT_NEW_UNALU (cfg, OP_X86_PUSH, -1, in->dreg + 2);
1366 break;
1367 default:
1368 g_assert_not_reached ();
1371 MONO_ADD_INS (cfg->cbb, arg);
1374 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
1375 /* Emit the signature cookie just before the implicit arguments */
1376 emit_sig_cookie (cfg, call, cinfo);
1380 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1381 MonoInst *vtarg;
1383 if (cinfo->ret.storage == ArgValuetypeInReg) {
1384 /* Already done */
1386 else if (cinfo->ret.storage == ArgInIReg) {
1387 NOT_IMPLEMENTED;
1388 /* The return address is passed in a register */
1389 MONO_INST_NEW (cfg, vtarg, OP_MOVE);
1390 vtarg->sreg1 = call->inst.dreg;
1391 vtarg->dreg = mono_alloc_ireg (cfg);
1392 MONO_ADD_INS (cfg->cbb, vtarg);
1394 mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
1395 } else {
1396 MonoInst *vtarg;
1397 MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
1398 vtarg->type = STACK_MP;
1399 vtarg->sreg1 = call->vret_var->dreg;
1400 MONO_ADD_INS (cfg->cbb, vtarg);
1403 /* if the function returns a struct on stack, the called method already does a ret $0x4 */
1404 if (cinfo->ret.storage != ArgValuetypeInReg)
1405 cinfo->stack_usage -= 4;
1408 call->stack_usage = cinfo->stack_usage;
1411 void
1412 mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
1414 MonoInst *arg;
1415 int size = ins->backend.size;
1417 if (size <= 4) {
1418 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_MEMBASE);
1419 arg->sreg1 = src->dreg;
1421 MONO_ADD_INS (cfg->cbb, arg);
1422 } else if (size <= 20) {
1423 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 4));
1424 mini_emit_memcpy (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
1425 } else {
1426 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ);
1427 arg->inst_basereg = src->dreg;
1428 arg->inst_offset = 0;
1429 arg->inst_imm = size;
1431 MONO_ADD_INS (cfg->cbb, arg);
1435 void
1436 mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
1438 MonoType *ret = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
1440 if (!ret->byref) {
1441 if (ret->type == MONO_TYPE_R4) {
1442 if (COMPILE_LLVM (cfg))
1443 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
1444 /* Nothing to do */
1445 return;
1446 } else if (ret->type == MONO_TYPE_R8) {
1447 if (COMPILE_LLVM (cfg))
1448 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
1449 /* Nothing to do */
1450 return;
1451 } else if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
1452 if (COMPILE_LLVM (cfg))
1453 MONO_EMIT_NEW_UNALU (cfg, OP_LMOVE, cfg->ret->dreg, val->dreg);
1454 else {
1455 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EAX, val->dreg + 1);
1456 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EDX, val->dreg + 2);
1458 return;
1462 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
1466 * Allow tracing to work with this interface (with an optional argument)
1468 void*
1469 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1471 guchar *code = p;
1473 g_assert (MONO_ARCH_FRAME_ALIGNMENT >= 8);
1474 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 8);
1476 /* if some args are passed in registers, we need to save them here */
1477 x86_push_reg (code, X86_EBP);
1479 if (cfg->compile_aot) {
1480 x86_push_imm (code, cfg->method);
1481 x86_mov_reg_imm (code, X86_EAX, func);
1482 x86_call_reg (code, X86_EAX);
1483 } else {
1484 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1485 x86_push_imm (code, cfg->method);
1486 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1487 x86_call_code (code, 0);
1489 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT);
1491 return code;
1494 enum {
1495 SAVE_NONE,
1496 SAVE_STRUCT,
1497 SAVE_EAX,
1498 SAVE_EAX_EDX,
1499 SAVE_FP
1502 void*
1503 mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers)
1505 guchar *code = p;
1506 int arg_size = 0, stack_usage = 0, save_mode = SAVE_NONE;
1507 MonoMethod *method = cfg->method;
1508 MonoType *ret_type = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
1510 switch (ret_type->type) {
1511 case MONO_TYPE_VOID:
1512 /* special case string .ctor icall */
1513 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class) {
1514 save_mode = SAVE_EAX;
1515 stack_usage = enable_arguments ? 8 : 4;
1516 } else
1517 save_mode = SAVE_NONE;
1518 break;
1519 case MONO_TYPE_I8:
1520 case MONO_TYPE_U8:
1521 save_mode = SAVE_EAX_EDX;
1522 stack_usage = enable_arguments ? 16 : 8;
1523 break;
1524 case MONO_TYPE_R4:
1525 case MONO_TYPE_R8:
1526 save_mode = SAVE_FP;
1527 stack_usage = enable_arguments ? 16 : 8;
1528 break;
1529 case MONO_TYPE_GENERICINST:
1530 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
1531 save_mode = SAVE_EAX;
1532 stack_usage = enable_arguments ? 8 : 4;
1533 break;
1535 /* Fall through */
1536 case MONO_TYPE_VALUETYPE:
1537 // FIXME: Handle SMALL_STRUCT_IN_REG here for proper alignment on darwin-x86
1538 save_mode = SAVE_STRUCT;
1539 stack_usage = enable_arguments ? 4 : 0;
1540 break;
1541 default:
1542 save_mode = SAVE_EAX;
1543 stack_usage = enable_arguments ? 8 : 4;
1544 break;
1547 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - stack_usage - 4);
1549 switch (save_mode) {
1550 case SAVE_EAX_EDX:
1551 x86_push_reg (code, X86_EDX);
1552 x86_push_reg (code, X86_EAX);
1553 if (enable_arguments) {
1554 x86_push_reg (code, X86_EDX);
1555 x86_push_reg (code, X86_EAX);
1556 arg_size = 8;
1558 break;
1559 case SAVE_EAX:
1560 x86_push_reg (code, X86_EAX);
1561 if (enable_arguments) {
1562 x86_push_reg (code, X86_EAX);
1563 arg_size = 4;
1565 break;
1566 case SAVE_FP:
1567 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1568 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1569 if (enable_arguments) {
1570 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1571 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1572 arg_size = 8;
1574 break;
1575 case SAVE_STRUCT:
1576 if (enable_arguments) {
1577 x86_push_membase (code, X86_EBP, 8);
1578 arg_size = 4;
1580 break;
1581 case SAVE_NONE:
1582 default:
1583 break;
1586 if (cfg->compile_aot) {
1587 x86_push_imm (code, method);
1588 x86_mov_reg_imm (code, X86_EAX, func);
1589 x86_call_reg (code, X86_EAX);
1590 } else {
1591 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1592 x86_push_imm (code, method);
1593 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1594 x86_call_code (code, 0);
1597 x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1599 switch (save_mode) {
1600 case SAVE_EAX_EDX:
1601 x86_pop_reg (code, X86_EAX);
1602 x86_pop_reg (code, X86_EDX);
1603 break;
1604 case SAVE_EAX:
1605 x86_pop_reg (code, X86_EAX);
1606 break;
1607 case SAVE_FP:
1608 x86_fld_membase (code, X86_ESP, 0, TRUE);
1609 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1610 break;
1611 case SAVE_NONE:
1612 default:
1613 break;
1616 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - stack_usage);
1618 return code;
1621 #define EMIT_COND_BRANCH(ins,cond,sign) \
1622 if (ins->inst_true_bb->native_offset) { \
1623 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1624 } else { \
1625 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1626 if ((cfg->opt & MONO_OPT_BRANCH) && \
1627 x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1628 x86_branch8 (code, cond, 0, sign); \
1629 else \
1630 x86_branch32 (code, cond, 0, sign); \
1634 * Emit an exception if condition is fail and
1635 * if possible do a directly branch to target
1637 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \
1638 do { \
1639 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1640 if (tins == NULL) { \
1641 mono_add_patch_info (cfg, code - cfg->native_code, \
1642 MONO_PATCH_INFO_EXC, exc_name); \
1643 x86_branch32 (code, cond, 0, signed); \
1644 } else { \
1645 EMIT_COND_BRANCH (tins, cond, signed); \
1647 } while (0);
1649 #define EMIT_FPCOMPARE(code) do { \
1650 x86_fcompp (code); \
1651 x86_fnstsw (code); \
1652 } while (0);
1655 static guint8*
1656 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1658 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1659 x86_call_code (code, 0);
1661 return code;
1664 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1667 * mono_peephole_pass_1:
1669 * Perform peephole opts which should/can be performed before local regalloc
1671 void
1672 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1674 MonoInst *ins, *n;
1676 MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1677 MonoInst *last_ins = ins->prev;
1679 switch (ins->opcode) {
1680 case OP_IADD_IMM:
1681 case OP_ADD_IMM:
1682 if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1684 * X86_LEA is like ADD, but doesn't have the
1685 * sreg1==dreg restriction.
1687 ins->opcode = OP_X86_LEA_MEMBASE;
1688 ins->inst_basereg = ins->sreg1;
1689 } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1690 ins->opcode = OP_X86_INC_REG;
1691 break;
1692 case OP_SUB_IMM:
1693 case OP_ISUB_IMM:
1694 if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1695 ins->opcode = OP_X86_LEA_MEMBASE;
1696 ins->inst_basereg = ins->sreg1;
1697 ins->inst_imm = -ins->inst_imm;
1698 } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1699 ins->opcode = OP_X86_DEC_REG;
1700 break;
1701 case OP_COMPARE_IMM:
1702 case OP_ICOMPARE_IMM:
1703 /* OP_COMPARE_IMM (reg, 0)
1704 * -->
1705 * OP_X86_TEST_NULL (reg)
1707 if (!ins->inst_imm)
1708 ins->opcode = OP_X86_TEST_NULL;
1709 break;
1710 case OP_X86_COMPARE_MEMBASE_IMM:
1712 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1713 * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1714 * -->
1715 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1716 * OP_COMPARE_IMM reg, imm
1718 * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1720 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1721 ins->inst_basereg == last_ins->inst_destbasereg &&
1722 ins->inst_offset == last_ins->inst_offset) {
1723 ins->opcode = OP_COMPARE_IMM;
1724 ins->sreg1 = last_ins->sreg1;
1726 /* check if we can remove cmp reg,0 with test null */
1727 if (!ins->inst_imm)
1728 ins->opcode = OP_X86_TEST_NULL;
1731 break;
1732 case OP_X86_PUSH_MEMBASE:
1733 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1734 last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1735 ins->inst_basereg == last_ins->inst_destbasereg &&
1736 ins->inst_offset == last_ins->inst_offset) {
1737 ins->opcode = OP_X86_PUSH;
1738 ins->sreg1 = last_ins->sreg1;
1740 break;
1743 mono_peephole_ins (bb, ins);
1747 void
1748 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
1750 MonoInst *ins, *n;
1752 MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1753 switch (ins->opcode) {
1754 case OP_ICONST:
1755 /* reg = 0 -> XOR (reg, reg) */
1756 /* XOR sets cflags on x86, so we cant do it always */
1757 if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1758 MonoInst *ins2;
1760 ins->opcode = OP_IXOR;
1761 ins->sreg1 = ins->dreg;
1762 ins->sreg2 = ins->dreg;
1765 * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG
1766 * since it takes 3 bytes instead of 7.
1768 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1769 if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1770 ins2->opcode = OP_STORE_MEMBASE_REG;
1771 ins2->sreg1 = ins->dreg;
1773 else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1774 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1775 ins2->sreg1 = ins->dreg;
1777 else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1778 /* Continue iteration */
1780 else
1781 break;
1784 break;
1785 case OP_IADD_IMM:
1786 case OP_ADD_IMM:
1787 if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1788 ins->opcode = OP_X86_INC_REG;
1789 break;
1790 case OP_ISUB_IMM:
1791 case OP_SUB_IMM:
1792 if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1793 ins->opcode = OP_X86_DEC_REG;
1794 break;
1797 mono_peephole_ins (bb, ins);
1802 * mono_arch_lowering_pass:
1804 * Converts complex opcodes into simpler ones so that each IR instruction
1805 * corresponds to one machine instruction.
1807 void
1808 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1810 MonoInst *ins, *next;
1813 * FIXME: Need to add more instructions, but the current machine
1814 * description can't model some parts of the composite instructions like
1815 * cdq.
1817 MONO_BB_FOR_EACH_INS_SAFE (bb, next, ins) {
1818 switch (ins->opcode) {
1819 case OP_IREM_IMM:
1820 case OP_IDIV_IMM:
1821 case OP_IDIV_UN_IMM:
1822 case OP_IREM_UN_IMM:
1824 * Keep the cases where we could generated optimized code, otherwise convert
1825 * to the non-imm variant.
1827 if ((ins->opcode == OP_IREM_IMM) && mono_is_power_of_two (ins->inst_imm) >= 0)
1828 break;
1829 mono_decompose_op_imm (cfg, bb, ins);
1830 break;
1831 default:
1832 break;
1836 bb->max_vreg = cfg->next_vreg;
1839 static const int
1840 branch_cc_table [] = {
1841 X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1842 X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1843 X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1846 /* Maps CMP_... constants to X86_CC_... constants */
1847 static const int
1848 cc_table [] = {
1849 X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1850 X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1853 static const int
1854 cc_signed_table [] = {
1855 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1856 FALSE, FALSE, FALSE, FALSE
1859 static unsigned char*
1860 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1862 #define XMM_TEMP_REG 0
1863 /*This SSE2 optimization must not be done which OPT_SIMD in place as it clobbers xmm0.*/
1864 /*The xmm pass decomposes OP_FCONV_ ops anyway anyway.*/
1865 if (cfg->opt & MONO_OPT_SSE2 && size < 8 && !(cfg->opt & MONO_OPT_SIMD)) {
1866 /* optimize by assigning a local var for this use so we avoid
1867 * the stack manipulations */
1868 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1869 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1870 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1871 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1872 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1873 if (size == 1)
1874 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1875 else if (size == 2)
1876 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1877 return code;
1879 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1880 x86_fnstcw_membase(code, X86_ESP, 0);
1881 x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1882 x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1883 x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1884 x86_fldcw_membase (code, X86_ESP, 2);
1885 if (size == 8) {
1886 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1887 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1888 x86_pop_reg (code, dreg);
1889 /* FIXME: need the high register
1890 * x86_pop_reg (code, dreg_high);
1892 } else {
1893 x86_push_reg (code, X86_EAX); // SP = SP - 4
1894 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1895 x86_pop_reg (code, dreg);
1897 x86_fldcw_membase (code, X86_ESP, 0);
1898 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1900 if (size == 1)
1901 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1902 else if (size == 2)
1903 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1904 return code;
1907 static unsigned char*
1908 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1910 int sreg = tree->sreg1;
1911 int need_touch = FALSE;
1913 #if defined(TARGET_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1914 need_touch = TRUE;
1915 #endif
1917 if (need_touch) {
1918 guint8* br[5];
1921 * Under Windows:
1922 * If requested stack size is larger than one page,
1923 * perform stack-touch operation
1926 * Generate stack probe code.
1927 * Under Windows, it is necessary to allocate one page at a time,
1928 * "touching" stack after each successful sub-allocation. This is
1929 * because of the way stack growth is implemented - there is a
1930 * guard page before the lowest stack page that is currently commited.
1931 * Stack normally grows sequentially so OS traps access to the
1932 * guard page and commits more pages when needed.
1934 x86_test_reg_imm (code, sreg, ~0xFFF);
1935 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1937 br[2] = code; /* loop */
1938 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1939 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1942 * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1943 * that follows only initializes the last part of the area.
1945 /* Same as the init code below with size==0x1000 */
1946 if (tree->flags & MONO_INST_INIT) {
1947 x86_push_reg (code, X86_EAX);
1948 x86_push_reg (code, X86_ECX);
1949 x86_push_reg (code, X86_EDI);
1950 x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1951 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1952 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1953 x86_cld (code);
1954 x86_prefix (code, X86_REP_PREFIX);
1955 x86_stosl (code);
1956 x86_pop_reg (code, X86_EDI);
1957 x86_pop_reg (code, X86_ECX);
1958 x86_pop_reg (code, X86_EAX);
1961 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1962 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1963 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1964 x86_patch (br[3], br[2]);
1965 x86_test_reg_reg (code, sreg, sreg);
1966 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1967 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1969 br[1] = code; x86_jump8 (code, 0);
1971 x86_patch (br[0], code);
1972 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1973 x86_patch (br[1], code);
1974 x86_patch (br[4], code);
1976 else
1977 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1979 if (tree->flags & MONO_INST_INIT) {
1980 int offset = 0;
1981 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1982 x86_push_reg (code, X86_EAX);
1983 offset += 4;
1985 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1986 x86_push_reg (code, X86_ECX);
1987 offset += 4;
1989 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1990 x86_push_reg (code, X86_EDI);
1991 offset += 4;
1994 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1995 if (sreg != X86_ECX)
1996 x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1997 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1999 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2000 x86_cld (code);
2001 x86_prefix (code, X86_REP_PREFIX);
2002 x86_stosl (code);
2004 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2005 x86_pop_reg (code, X86_EDI);
2006 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2007 x86_pop_reg (code, X86_ECX);
2008 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2009 x86_pop_reg (code, X86_EAX);
2011 return code;
2015 static guint8*
2016 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2018 /* Move return value to the target register */
2019 switch (ins->opcode) {
2020 case OP_CALL:
2021 case OP_CALL_REG:
2022 case OP_CALL_MEMBASE:
2023 if (ins->dreg != X86_EAX)
2024 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2025 break;
2026 default:
2027 break;
2030 return code;
2034 * mono_x86_emit_tls_get:
2035 * @code: buffer to store code to
2036 * @dreg: hard register where to place the result
2037 * @tls_offset: offset info
2039 * mono_x86_emit_tls_get emits in @code the native code that puts in
2040 * the dreg register the item in the thread local storage identified
2041 * by tls_offset.
2043 * Returns: a pointer to the end of the stored code
2045 guint8*
2046 mono_x86_emit_tls_get (guint8* code, int dreg, int tls_offset)
2048 #ifdef TARGET_WIN32
2050 * See the Under the Hood article in the May 1996 issue of Microsoft Systems
2051 * Journal and/or a disassembly of the TlsGet () function.
2053 g_assert (tls_offset < 64);
2054 x86_prefix (code, X86_FS_PREFIX);
2055 x86_mov_reg_mem (code, dreg, 0x18, 4);
2056 /* Dunno what this does but TlsGetValue () contains it */
2057 x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2058 x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2059 #else
2060 if (optimize_for_xen) {
2061 x86_prefix (code, X86_GS_PREFIX);
2062 x86_mov_reg_mem (code, dreg, 0, 4);
2063 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
2064 } else {
2065 x86_prefix (code, X86_GS_PREFIX);
2066 x86_mov_reg_mem (code, dreg, tls_offset, 4);
2068 #endif
2069 return code;
2073 * emit_load_volatile_arguments:
2075 * Load volatile arguments from the stack to the original input registers.
2076 * Required before a tail call.
2078 static guint8*
2079 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2081 MonoMethod *method = cfg->method;
2082 MonoMethodSignature *sig;
2083 MonoInst *inst;
2084 CallInfo *cinfo;
2085 guint32 i;
2087 /* FIXME: Generate intermediate code instead */
2089 sig = mono_method_signature (method);
2091 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
2093 /* This is the opposite of the code in emit_prolog */
2095 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2096 ArgInfo *ainfo = cinfo->args + i;
2097 MonoType *arg_type;
2098 inst = cfg->args [i];
2100 if (sig->hasthis && (i == 0))
2101 arg_type = &mono_defaults.object_class->byval_arg;
2102 else
2103 arg_type = sig->params [i - sig->hasthis];
2106 * On x86, the arguments are either in their original stack locations, or in
2107 * global regs.
2109 if (inst->opcode == OP_REGVAR) {
2110 g_assert (ainfo->storage == ArgOnStack);
2112 x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
2116 return code;
2119 #define REAL_PRINT_REG(text,reg) \
2120 mono_assert (reg >= 0); \
2121 x86_push_reg (code, X86_EAX); \
2122 x86_push_reg (code, X86_EDX); \
2123 x86_push_reg (code, X86_ECX); \
2124 x86_push_reg (code, reg); \
2125 x86_push_imm (code, reg); \
2126 x86_push_imm (code, text " %d %p\n"); \
2127 x86_mov_reg_imm (code, X86_EAX, printf); \
2128 x86_call_reg (code, X86_EAX); \
2129 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2130 x86_pop_reg (code, X86_ECX); \
2131 x86_pop_reg (code, X86_EDX); \
2132 x86_pop_reg (code, X86_EAX);
2134 /* benchmark and set based on cpu */
2135 #define LOOP_ALIGNMENT 8
2136 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2138 #ifndef DISABLE_JIT
2140 void
2141 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2143 MonoInst *ins;
2144 MonoCallInst *call;
2145 guint offset;
2146 guint8 *code = cfg->native_code + cfg->code_len;
2147 int max_len, cpos;
2149 if (cfg->opt & MONO_OPT_LOOP) {
2150 int pad, align = LOOP_ALIGNMENT;
2151 /* set alignment depending on cpu */
2152 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2153 pad = align - pad;
2154 /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2155 x86_padding (code, pad);
2156 cfg->code_len += pad;
2157 bb->native_offset = cfg->code_len;
2161 if (cfg->verbose_level > 2)
2162 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2164 cpos = bb->max_offset;
2166 if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2167 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2168 g_assert (!cfg->compile_aot);
2169 cpos += 6;
2171 cov->data [bb->dfn].cil_code = bb->cil_code;
2172 /* this is not thread save, but good enough */
2173 x86_inc_mem (code, &cov->data [bb->dfn].count);
2176 offset = code - cfg->native_code;
2178 mono_debug_open_block (cfg, bb, offset);
2180 MONO_BB_FOR_EACH_INS (bb, ins) {
2181 offset = code - cfg->native_code;
2183 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2185 if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) {
2186 cfg->code_size *= 2;
2187 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2188 code = cfg->native_code + offset;
2189 mono_jit_stats.code_reallocs++;
2192 if (cfg->debug_info)
2193 mono_debug_record_line_number (cfg, ins, offset);
2195 switch (ins->opcode) {
2196 case OP_BIGMUL:
2197 x86_mul_reg (code, ins->sreg2, TRUE);
2198 break;
2199 case OP_BIGMUL_UN:
2200 x86_mul_reg (code, ins->sreg2, FALSE);
2201 break;
2202 case OP_X86_SETEQ_MEMBASE:
2203 case OP_X86_SETNE_MEMBASE:
2204 x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2205 ins->inst_basereg, ins->inst_offset, TRUE);
2206 break;
2207 case OP_STOREI1_MEMBASE_IMM:
2208 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2209 break;
2210 case OP_STOREI2_MEMBASE_IMM:
2211 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2212 break;
2213 case OP_STORE_MEMBASE_IMM:
2214 case OP_STOREI4_MEMBASE_IMM:
2215 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2216 break;
2217 case OP_STOREI1_MEMBASE_REG:
2218 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2219 break;
2220 case OP_STOREI2_MEMBASE_REG:
2221 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2222 break;
2223 case OP_STORE_MEMBASE_REG:
2224 case OP_STOREI4_MEMBASE_REG:
2225 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2226 break;
2227 case OP_STORE_MEM_IMM:
2228 x86_mov_mem_imm (code, ins->inst_p0, ins->inst_c0, 4);
2229 break;
2230 case OP_LOADU4_MEM:
2231 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2232 break;
2233 case OP_LOAD_MEM:
2234 case OP_LOADI4_MEM:
2235 /* These are created by the cprop pass so they use inst_imm as the source */
2236 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2237 break;
2238 case OP_LOADU1_MEM:
2239 x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, FALSE);
2240 break;
2241 case OP_LOADU2_MEM:
2242 x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, TRUE);
2243 break;
2244 case OP_LOAD_MEMBASE:
2245 case OP_LOADI4_MEMBASE:
2246 case OP_LOADU4_MEMBASE:
2247 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2248 break;
2249 case OP_LOADU1_MEMBASE:
2250 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2251 break;
2252 case OP_LOADI1_MEMBASE:
2253 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2254 break;
2255 case OP_LOADU2_MEMBASE:
2256 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2257 break;
2258 case OP_LOADI2_MEMBASE:
2259 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2260 break;
2261 case OP_ICONV_TO_I1:
2262 case OP_SEXT_I1:
2263 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2264 break;
2265 case OP_ICONV_TO_I2:
2266 case OP_SEXT_I2:
2267 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2268 break;
2269 case OP_ICONV_TO_U1:
2270 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2271 break;
2272 case OP_ICONV_TO_U2:
2273 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2274 break;
2275 case OP_COMPARE:
2276 case OP_ICOMPARE:
2277 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2278 break;
2279 case OP_COMPARE_IMM:
2280 case OP_ICOMPARE_IMM:
2281 x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2282 break;
2283 case OP_X86_COMPARE_MEMBASE_REG:
2284 x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2285 break;
2286 case OP_X86_COMPARE_MEMBASE_IMM:
2287 x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2288 break;
2289 case OP_X86_COMPARE_MEMBASE8_IMM:
2290 x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2291 break;
2292 case OP_X86_COMPARE_REG_MEMBASE:
2293 x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2294 break;
2295 case OP_X86_COMPARE_MEM_IMM:
2296 x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2297 break;
2298 case OP_X86_TEST_NULL:
2299 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2300 break;
2301 case OP_X86_ADD_MEMBASE_IMM:
2302 x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2303 break;
2304 case OP_X86_ADD_REG_MEMBASE:
2305 x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2306 break;
2307 case OP_X86_SUB_MEMBASE_IMM:
2308 x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2309 break;
2310 case OP_X86_SUB_REG_MEMBASE:
2311 x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2312 break;
2313 case OP_X86_AND_MEMBASE_IMM:
2314 x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2315 break;
2316 case OP_X86_OR_MEMBASE_IMM:
2317 x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2318 break;
2319 case OP_X86_XOR_MEMBASE_IMM:
2320 x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2321 break;
2322 case OP_X86_ADD_MEMBASE_REG:
2323 x86_alu_membase_reg (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2324 break;
2325 case OP_X86_SUB_MEMBASE_REG:
2326 x86_alu_membase_reg (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2327 break;
2328 case OP_X86_AND_MEMBASE_REG:
2329 x86_alu_membase_reg (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2330 break;
2331 case OP_X86_OR_MEMBASE_REG:
2332 x86_alu_membase_reg (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2333 break;
2334 case OP_X86_XOR_MEMBASE_REG:
2335 x86_alu_membase_reg (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2336 break;
2337 case OP_X86_INC_MEMBASE:
2338 x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2339 break;
2340 case OP_X86_INC_REG:
2341 x86_inc_reg (code, ins->dreg);
2342 break;
2343 case OP_X86_DEC_MEMBASE:
2344 x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2345 break;
2346 case OP_X86_DEC_REG:
2347 x86_dec_reg (code, ins->dreg);
2348 break;
2349 case OP_X86_MUL_REG_MEMBASE:
2350 x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2351 break;
2352 case OP_X86_AND_REG_MEMBASE:
2353 x86_alu_reg_membase (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset);
2354 break;
2355 case OP_X86_OR_REG_MEMBASE:
2356 x86_alu_reg_membase (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset);
2357 break;
2358 case OP_X86_XOR_REG_MEMBASE:
2359 x86_alu_reg_membase (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset);
2360 break;
2361 case OP_BREAK:
2362 x86_breakpoint (code);
2363 break;
2364 case OP_RELAXED_NOP:
2365 x86_prefix (code, X86_REP_PREFIX);
2366 x86_nop (code);
2367 break;
2368 case OP_HARD_NOP:
2369 x86_nop (code);
2370 break;
2371 case OP_NOP:
2372 case OP_DUMMY_USE:
2373 case OP_DUMMY_STORE:
2374 case OP_NOT_REACHED:
2375 case OP_NOT_NULL:
2376 break;
2377 case OP_SEQ_POINT: {
2378 int i;
2380 if (cfg->compile_aot)
2381 NOT_IMPLEMENTED;
2384 * Read from the single stepping trigger page. This will cause a
2385 * SIGSEGV when single stepping is enabled.
2386 * We do this _before_ the breakpoint, so single stepping after
2387 * a breakpoint is hit will step to the next IL offset.
2389 if (ins->flags & MONO_INST_SINGLE_STEP_LOC)
2390 x86_alu_reg_mem (code, X86_CMP, X86_EAX, (guint32)ss_trigger_page);
2392 mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
2395 * A placeholder for a possible breakpoint inserted by
2396 * mono_arch_set_breakpoint ().
2398 for (i = 0; i < 6; ++i)
2399 x86_nop (code);
2400 break;
2402 case OP_ADDCC:
2403 case OP_IADDCC:
2404 case OP_IADD:
2405 x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2406 break;
2407 case OP_ADC:
2408 case OP_IADC:
2409 x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2410 break;
2411 case OP_ADDCC_IMM:
2412 case OP_ADD_IMM:
2413 case OP_IADD_IMM:
2414 x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2415 break;
2416 case OP_ADC_IMM:
2417 case OP_IADC_IMM:
2418 x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2419 break;
2420 case OP_SUBCC:
2421 case OP_ISUBCC:
2422 case OP_ISUB:
2423 x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2424 break;
2425 case OP_SBB:
2426 case OP_ISBB:
2427 x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2428 break;
2429 case OP_SUBCC_IMM:
2430 case OP_SUB_IMM:
2431 case OP_ISUB_IMM:
2432 x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2433 break;
2434 case OP_SBB_IMM:
2435 case OP_ISBB_IMM:
2436 x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2437 break;
2438 case OP_IAND:
2439 x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2440 break;
2441 case OP_AND_IMM:
2442 case OP_IAND_IMM:
2443 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2444 break;
2445 case OP_IDIV:
2446 case OP_IREM:
2448 * The code is the same for div/rem, the allocator will allocate dreg
2449 * to RAX/RDX as appropriate.
2451 if (ins->sreg2 == X86_EDX) {
2452 /* cdq clobbers this */
2453 x86_push_reg (code, ins->sreg2);
2454 x86_cdq (code);
2455 x86_div_membase (code, X86_ESP, 0, TRUE);
2456 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2457 } else {
2458 x86_cdq (code);
2459 x86_div_reg (code, ins->sreg2, TRUE);
2461 break;
2462 case OP_IDIV_UN:
2463 case OP_IREM_UN:
2464 if (ins->sreg2 == X86_EDX) {
2465 x86_push_reg (code, ins->sreg2);
2466 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2467 x86_div_membase (code, X86_ESP, 0, FALSE);
2468 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2469 } else {
2470 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2471 x86_div_reg (code, ins->sreg2, FALSE);
2473 break;
2474 case OP_DIV_IMM:
2475 x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2476 x86_cdq (code);
2477 x86_div_reg (code, ins->sreg2, TRUE);
2478 break;
2479 case OP_IREM_IMM: {
2480 int power = mono_is_power_of_two (ins->inst_imm);
2482 g_assert (ins->sreg1 == X86_EAX);
2483 g_assert (ins->dreg == X86_EAX);
2484 g_assert (power >= 0);
2486 if (power == 1) {
2487 /* Based on http://compilers.iecc.com/comparch/article/93-04-079 */
2488 x86_cdq (code);
2489 x86_alu_reg_imm (code, X86_AND, X86_EAX, 1);
2491 * If the divident is >= 0, this does not nothing. If it is positive, it
2492 * it transforms %eax=0 into %eax=0, and %eax=1 into %eax=-1.
2494 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EDX);
2495 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2496 } else if (power == 0) {
2497 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2498 } else {
2499 /* Based on gcc code */
2501 /* Add compensation for negative dividents */
2502 x86_cdq (code);
2503 x86_shift_reg_imm (code, X86_SHR, X86_EDX, 32 - power);
2504 x86_alu_reg_reg (code, X86_ADD, X86_EAX, X86_EDX);
2505 /* Compute remainder */
2506 x86_alu_reg_imm (code, X86_AND, X86_EAX, (1 << power) - 1);
2507 /* Remove compensation */
2508 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2510 break;
2512 case OP_IOR:
2513 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2514 break;
2515 case OP_OR_IMM:
2516 case OP_IOR_IMM:
2517 x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2518 break;
2519 case OP_IXOR:
2520 x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2521 break;
2522 case OP_XOR_IMM:
2523 case OP_IXOR_IMM:
2524 x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2525 break;
2526 case OP_ISHL:
2527 g_assert (ins->sreg2 == X86_ECX);
2528 x86_shift_reg (code, X86_SHL, ins->dreg);
2529 break;
2530 case OP_ISHR:
2531 g_assert (ins->sreg2 == X86_ECX);
2532 x86_shift_reg (code, X86_SAR, ins->dreg);
2533 break;
2534 case OP_SHR_IMM:
2535 case OP_ISHR_IMM:
2536 x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2537 break;
2538 case OP_SHR_UN_IMM:
2539 case OP_ISHR_UN_IMM:
2540 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2541 break;
2542 case OP_ISHR_UN:
2543 g_assert (ins->sreg2 == X86_ECX);
2544 x86_shift_reg (code, X86_SHR, ins->dreg);
2545 break;
2546 case OP_SHL_IMM:
2547 case OP_ISHL_IMM:
2548 x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2549 break;
2550 case OP_LSHL: {
2551 guint8 *jump_to_end;
2553 /* handle shifts below 32 bits */
2554 x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2555 x86_shift_reg (code, X86_SHL, ins->sreg1);
2557 x86_test_reg_imm (code, X86_ECX, 32);
2558 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2560 /* handle shift over 32 bit */
2561 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2562 x86_clear_reg (code, ins->sreg1);
2564 x86_patch (jump_to_end, code);
2566 break;
2567 case OP_LSHR: {
2568 guint8 *jump_to_end;
2570 /* handle shifts below 32 bits */
2571 x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2572 x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2574 x86_test_reg_imm (code, X86_ECX, 32);
2575 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2577 /* handle shifts over 31 bits */
2578 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2579 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2581 x86_patch (jump_to_end, code);
2583 break;
2584 case OP_LSHR_UN: {
2585 guint8 *jump_to_end;
2587 /* handle shifts below 32 bits */
2588 x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2589 x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2591 x86_test_reg_imm (code, X86_ECX, 32);
2592 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2594 /* handle shifts over 31 bits */
2595 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2596 x86_clear_reg (code, ins->backend.reg3);
2598 x86_patch (jump_to_end, code);
2600 break;
2601 case OP_LSHL_IMM:
2602 if (ins->inst_imm >= 32) {
2603 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2604 x86_clear_reg (code, ins->sreg1);
2605 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2606 } else {
2607 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2608 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2610 break;
2611 case OP_LSHR_IMM:
2612 if (ins->inst_imm >= 32) {
2613 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2614 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2615 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2616 } else {
2617 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2618 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2620 break;
2621 case OP_LSHR_UN_IMM:
2622 if (ins->inst_imm >= 32) {
2623 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2624 x86_clear_reg (code, ins->backend.reg3);
2625 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2626 } else {
2627 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2628 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2630 break;
2631 case OP_INOT:
2632 x86_not_reg (code, ins->sreg1);
2633 break;
2634 case OP_INEG:
2635 x86_neg_reg (code, ins->sreg1);
2636 break;
2638 case OP_IMUL:
2639 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2640 break;
2641 case OP_MUL_IMM:
2642 case OP_IMUL_IMM:
2643 switch (ins->inst_imm) {
2644 case 2:
2645 /* MOV r1, r2 */
2646 /* ADD r1, r1 */
2647 if (ins->dreg != ins->sreg1)
2648 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2649 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2650 break;
2651 case 3:
2652 /* LEA r1, [r2 + r2*2] */
2653 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2654 break;
2655 case 5:
2656 /* LEA r1, [r2 + r2*4] */
2657 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2658 break;
2659 case 6:
2660 /* LEA r1, [r2 + r2*2] */
2661 /* ADD r1, r1 */
2662 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2663 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2664 break;
2665 case 9:
2666 /* LEA r1, [r2 + r2*8] */
2667 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2668 break;
2669 case 10:
2670 /* LEA r1, [r2 + r2*4] */
2671 /* ADD r1, r1 */
2672 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2673 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2674 break;
2675 case 12:
2676 /* LEA r1, [r2 + r2*2] */
2677 /* SHL r1, 2 */
2678 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2679 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2680 break;
2681 case 25:
2682 /* LEA r1, [r2 + r2*4] */
2683 /* LEA r1, [r1 + r1*4] */
2684 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2685 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2686 break;
2687 case 100:
2688 /* LEA r1, [r2 + r2*4] */
2689 /* SHL r1, 2 */
2690 /* LEA r1, [r1 + r1*4] */
2691 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2692 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2693 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2694 break;
2695 default:
2696 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2697 break;
2699 break;
2700 case OP_IMUL_OVF:
2701 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2702 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2703 break;
2704 case OP_IMUL_OVF_UN: {
2705 /* the mul operation and the exception check should most likely be split */
2706 int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2707 /*g_assert (ins->sreg2 == X86_EAX);
2708 g_assert (ins->dreg == X86_EAX);*/
2709 if (ins->sreg2 == X86_EAX) {
2710 non_eax_reg = ins->sreg1;
2711 } else if (ins->sreg1 == X86_EAX) {
2712 non_eax_reg = ins->sreg2;
2713 } else {
2714 /* no need to save since we're going to store to it anyway */
2715 if (ins->dreg != X86_EAX) {
2716 saved_eax = TRUE;
2717 x86_push_reg (code, X86_EAX);
2719 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2720 non_eax_reg = ins->sreg2;
2722 if (ins->dreg == X86_EDX) {
2723 if (!saved_eax) {
2724 saved_eax = TRUE;
2725 x86_push_reg (code, X86_EAX);
2727 } else if (ins->dreg != X86_EAX) {
2728 saved_edx = TRUE;
2729 x86_push_reg (code, X86_EDX);
2731 x86_mul_reg (code, non_eax_reg, FALSE);
2732 /* save before the check since pop and mov don't change the flags */
2733 if (ins->dreg != X86_EAX)
2734 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2735 if (saved_edx)
2736 x86_pop_reg (code, X86_EDX);
2737 if (saved_eax)
2738 x86_pop_reg (code, X86_EAX);
2739 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2740 break;
2742 case OP_ICONST:
2743 x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2744 break;
2745 case OP_AOTCONST:
2746 g_assert_not_reached ();
2747 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2748 x86_mov_reg_imm (code, ins->dreg, 0);
2749 break;
2750 case OP_JUMP_TABLE:
2751 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2752 x86_mov_reg_imm (code, ins->dreg, 0);
2753 break;
2754 case OP_LOAD_GOTADDR:
2755 x86_call_imm (code, 0);
2757 * The patch needs to point to the pop, since the GOT offset needs
2758 * to be added to that address.
2760 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2761 x86_pop_reg (code, ins->dreg);
2762 x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2763 break;
2764 case OP_GOT_ENTRY:
2765 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2766 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2767 break;
2768 case OP_X86_PUSH_GOT_ENTRY:
2769 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2770 x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2771 break;
2772 case OP_MOVE:
2773 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2774 break;
2775 case OP_JMP: {
2777 * Note: this 'frame destruction' logic is useful for tail calls, too.
2778 * Keep in sync with the code in emit_epilog.
2780 int pos = 0;
2782 /* FIXME: no tracing support... */
2783 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2784 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2785 /* reset offset to make max_len work */
2786 offset = code - cfg->native_code;
2788 g_assert (!cfg->method->save_lmf);
2790 code = emit_load_volatile_arguments (cfg, code);
2792 if (cfg->used_int_regs & (1 << X86_EBX))
2793 pos -= 4;
2794 if (cfg->used_int_regs & (1 << X86_EDI))
2795 pos -= 4;
2796 if (cfg->used_int_regs & (1 << X86_ESI))
2797 pos -= 4;
2798 if (pos)
2799 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2801 if (cfg->used_int_regs & (1 << X86_ESI))
2802 x86_pop_reg (code, X86_ESI);
2803 if (cfg->used_int_regs & (1 << X86_EDI))
2804 x86_pop_reg (code, X86_EDI);
2805 if (cfg->used_int_regs & (1 << X86_EBX))
2806 x86_pop_reg (code, X86_EBX);
2808 /* restore ESP/EBP */
2809 x86_leave (code);
2810 offset = code - cfg->native_code;
2811 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2812 x86_jump32 (code, 0);
2814 cfg->disable_aot = TRUE;
2815 break;
2817 case OP_CHECK_THIS:
2818 /* ensure ins->sreg1 is not NULL
2819 * note that cmp DWORD PTR [eax], eax is one byte shorter than
2820 * cmp DWORD PTR [eax], 0
2822 x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2823 break;
2824 case OP_ARGLIST: {
2825 int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2826 x86_push_reg (code, hreg);
2827 x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2828 x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2829 x86_pop_reg (code, hreg);
2830 break;
2832 case OP_FCALL:
2833 case OP_LCALL:
2834 case OP_VCALL:
2835 case OP_VCALL2:
2836 case OP_VOIDCALL:
2837 case OP_CALL:
2838 call = (MonoCallInst*)ins;
2839 if (ins->flags & MONO_INST_HAS_METHOD)
2840 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2841 else
2842 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2843 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2844 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2845 * bytes to pop, we want to use pops. GCC does this (note it won't happen
2846 * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2847 * smart enough to do that optimization yet
2849 * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2850 * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2851 * (most likely from locality benefits). People with other processors should
2852 * check on theirs to see what happens.
2854 if (call->stack_usage == 4) {
2855 /* we want to use registers that won't get used soon, so use
2856 * ecx, as eax will get allocated first. edx is used by long calls,
2857 * so we can't use that.
2860 x86_pop_reg (code, X86_ECX);
2861 } else {
2862 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2865 code = emit_move_return_value (cfg, ins, code);
2866 break;
2867 case OP_FCALL_REG:
2868 case OP_LCALL_REG:
2869 case OP_VCALL_REG:
2870 case OP_VCALL2_REG:
2871 case OP_VOIDCALL_REG:
2872 case OP_CALL_REG:
2873 call = (MonoCallInst*)ins;
2874 x86_call_reg (code, ins->sreg1);
2875 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2876 if (call->stack_usage == 4)
2877 x86_pop_reg (code, X86_ECX);
2878 else
2879 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2881 code = emit_move_return_value (cfg, ins, code);
2882 break;
2883 case OP_FCALL_MEMBASE:
2884 case OP_LCALL_MEMBASE:
2885 case OP_VCALL_MEMBASE:
2886 case OP_VCALL2_MEMBASE:
2887 case OP_VOIDCALL_MEMBASE:
2888 case OP_CALL_MEMBASE:
2889 call = (MonoCallInst*)ins;
2892 * Emit a few nops to simplify get_vcall_slot ().
2894 x86_nop (code);
2895 x86_nop (code);
2896 x86_nop (code);
2898 x86_call_membase (code, ins->sreg1, ins->inst_offset);
2899 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2900 if (call->stack_usage == 4)
2901 x86_pop_reg (code, X86_ECX);
2902 else
2903 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2905 code = emit_move_return_value (cfg, ins, code);
2906 break;
2907 case OP_X86_PUSH:
2908 x86_push_reg (code, ins->sreg1);
2909 break;
2910 case OP_X86_PUSH_IMM:
2911 x86_push_imm (code, ins->inst_imm);
2912 break;
2913 case OP_X86_PUSH_MEMBASE:
2914 x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2915 break;
2916 case OP_X86_PUSH_OBJ:
2917 x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2918 x86_push_reg (code, X86_EDI);
2919 x86_push_reg (code, X86_ESI);
2920 x86_push_reg (code, X86_ECX);
2921 if (ins->inst_offset)
2922 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2923 else
2924 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2925 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2926 x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2927 x86_cld (code);
2928 x86_prefix (code, X86_REP_PREFIX);
2929 x86_movsd (code);
2930 x86_pop_reg (code, X86_ECX);
2931 x86_pop_reg (code, X86_ESI);
2932 x86_pop_reg (code, X86_EDI);
2933 break;
2934 case OP_X86_LEA:
2935 x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2936 break;
2937 case OP_X86_LEA_MEMBASE:
2938 x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2939 break;
2940 case OP_X86_XCHG:
2941 x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2942 break;
2943 case OP_LOCALLOC:
2944 /* keep alignment */
2945 x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2946 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2947 code = mono_emit_stack_alloc (code, ins);
2948 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2949 break;
2950 case OP_LOCALLOC_IMM: {
2951 guint32 size = ins->inst_imm;
2952 size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
2954 if (ins->flags & MONO_INST_INIT) {
2955 /* FIXME: Optimize this */
2956 x86_mov_reg_imm (code, ins->dreg, size);
2957 ins->sreg1 = ins->dreg;
2959 code = mono_emit_stack_alloc (code, ins);
2960 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2961 } else {
2962 x86_alu_reg_imm (code, X86_SUB, X86_ESP, size);
2963 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2965 break;
2967 case OP_THROW: {
2968 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
2969 x86_push_reg (code, ins->sreg1);
2970 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
2971 (gpointer)"mono_arch_throw_exception");
2972 break;
2974 case OP_RETHROW: {
2975 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
2976 x86_push_reg (code, ins->sreg1);
2977 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
2978 (gpointer)"mono_arch_rethrow_exception");
2979 break;
2981 case OP_CALL_HANDLER:
2982 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
2983 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2984 x86_call_imm (code, 0);
2985 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
2986 break;
2987 case OP_START_HANDLER: {
2988 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2989 x86_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, X86_ESP, 4);
2990 break;
2992 case OP_ENDFINALLY: {
2993 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2994 x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
2995 x86_ret (code);
2996 break;
2998 case OP_ENDFILTER: {
2999 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3000 x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
3001 /* The local allocator will put the result into EAX */
3002 x86_ret (code);
3003 break;
3006 case OP_LABEL:
3007 ins->inst_c0 = code - cfg->native_code;
3008 break;
3009 case OP_BR:
3010 if (ins->inst_target_bb->native_offset) {
3011 x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset);
3012 } else {
3013 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3014 if ((cfg->opt & MONO_OPT_BRANCH) &&
3015 x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3016 x86_jump8 (code, 0);
3017 else
3018 x86_jump32 (code, 0);
3020 break;
3021 case OP_BR_REG:
3022 x86_jump_reg (code, ins->sreg1);
3023 break;
3024 case OP_CEQ:
3025 case OP_CLT:
3026 case OP_CLT_UN:
3027 case OP_CGT:
3028 case OP_CGT_UN:
3029 case OP_CNE:
3030 case OP_ICEQ:
3031 case OP_ICLT:
3032 case OP_ICLT_UN:
3033 case OP_ICGT:
3034 case OP_ICGT_UN:
3035 x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
3036 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3037 break;
3038 case OP_COND_EXC_EQ:
3039 case OP_COND_EXC_NE_UN:
3040 case OP_COND_EXC_LT:
3041 case OP_COND_EXC_LT_UN:
3042 case OP_COND_EXC_GT:
3043 case OP_COND_EXC_GT_UN:
3044 case OP_COND_EXC_GE:
3045 case OP_COND_EXC_GE_UN:
3046 case OP_COND_EXC_LE:
3047 case OP_COND_EXC_LE_UN:
3048 case OP_COND_EXC_IEQ:
3049 case OP_COND_EXC_INE_UN:
3050 case OP_COND_EXC_ILT:
3051 case OP_COND_EXC_ILT_UN:
3052 case OP_COND_EXC_IGT:
3053 case OP_COND_EXC_IGT_UN:
3054 case OP_COND_EXC_IGE:
3055 case OP_COND_EXC_IGE_UN:
3056 case OP_COND_EXC_ILE:
3057 case OP_COND_EXC_ILE_UN:
3058 EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
3059 break;
3060 case OP_COND_EXC_OV:
3061 case OP_COND_EXC_NO:
3062 case OP_COND_EXC_C:
3063 case OP_COND_EXC_NC:
3064 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3065 break;
3066 case OP_COND_EXC_IOV:
3067 case OP_COND_EXC_INO:
3068 case OP_COND_EXC_IC:
3069 case OP_COND_EXC_INC:
3070 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_IEQ], (ins->opcode < OP_COND_EXC_INE_UN), ins->inst_p1);
3071 break;
3072 case OP_IBEQ:
3073 case OP_IBNE_UN:
3074 case OP_IBLT:
3075 case OP_IBLT_UN:
3076 case OP_IBGT:
3077 case OP_IBGT_UN:
3078 case OP_IBGE:
3079 case OP_IBGE_UN:
3080 case OP_IBLE:
3081 case OP_IBLE_UN:
3082 EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
3083 break;
3085 case OP_CMOV_IEQ:
3086 case OP_CMOV_IGE:
3087 case OP_CMOV_IGT:
3088 case OP_CMOV_ILE:
3089 case OP_CMOV_ILT:
3090 case OP_CMOV_INE_UN:
3091 case OP_CMOV_IGE_UN:
3092 case OP_CMOV_IGT_UN:
3093 case OP_CMOV_ILE_UN:
3094 case OP_CMOV_ILT_UN:
3095 g_assert (ins->dreg == ins->sreg1);
3096 x86_cmov_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, ins->sreg2);
3097 break;
3099 /* floating point opcodes */
3100 case OP_R8CONST: {
3101 double d = *(double *)ins->inst_p0;
3103 if ((d == 0.0) && (mono_signbit (d) == 0)) {
3104 x86_fldz (code);
3105 } else if (d == 1.0) {
3106 x86_fld1 (code);
3107 } else {
3108 if (cfg->compile_aot) {
3109 guint32 *val = (guint32*)&d;
3110 x86_push_imm (code, val [1]);
3111 x86_push_imm (code, val [0]);
3112 x86_fld_membase (code, X86_ESP, 0, TRUE);
3113 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3115 else {
3116 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3117 x86_fld (code, NULL, TRUE);
3120 break;
3122 case OP_R4CONST: {
3123 float f = *(float *)ins->inst_p0;
3125 if ((f == 0.0) && (mono_signbit (f) == 0)) {
3126 x86_fldz (code);
3127 } else if (f == 1.0) {
3128 x86_fld1 (code);
3129 } else {
3130 if (cfg->compile_aot) {
3131 guint32 val = *(guint32*)&f;
3132 x86_push_imm (code, val);
3133 x86_fld_membase (code, X86_ESP, 0, FALSE);
3134 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3136 else {
3137 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3138 x86_fld (code, NULL, FALSE);
3141 break;
3143 case OP_STORER8_MEMBASE_REG:
3144 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3145 break;
3146 case OP_LOADR8_MEMBASE:
3147 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3148 break;
3149 case OP_STORER4_MEMBASE_REG:
3150 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3151 break;
3152 case OP_LOADR4_MEMBASE:
3153 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3154 break;
3155 case OP_ICONV_TO_R4:
3156 x86_push_reg (code, ins->sreg1);
3157 x86_fild_membase (code, X86_ESP, 0, FALSE);
3158 /* Change precision */
3159 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3160 x86_fld_membase (code, X86_ESP, 0, FALSE);
3161 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3162 break;
3163 case OP_ICONV_TO_R8:
3164 x86_push_reg (code, ins->sreg1);
3165 x86_fild_membase (code, X86_ESP, 0, FALSE);
3166 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3167 break;
3168 case OP_ICONV_TO_R_UN:
3169 x86_push_imm (code, 0);
3170 x86_push_reg (code, ins->sreg1);
3171 x86_fild_membase (code, X86_ESP, 0, TRUE);
3172 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3173 break;
3174 case OP_X86_FP_LOAD_I8:
3175 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3176 break;
3177 case OP_X86_FP_LOAD_I4:
3178 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3179 break;
3180 case OP_FCONV_TO_R4:
3181 /* Change precision */
3182 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3183 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3184 x86_fld_membase (code, X86_ESP, 0, FALSE);
3185 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3186 break;
3187 case OP_FCONV_TO_I1:
3188 code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3189 break;
3190 case OP_FCONV_TO_U1:
3191 code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3192 break;
3193 case OP_FCONV_TO_I2:
3194 code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3195 break;
3196 case OP_FCONV_TO_U2:
3197 code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3198 break;
3199 case OP_FCONV_TO_I4:
3200 case OP_FCONV_TO_I:
3201 code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3202 break;
3203 case OP_FCONV_TO_I8:
3204 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3205 x86_fnstcw_membase(code, X86_ESP, 0);
3206 x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3207 x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3208 x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3209 x86_fldcw_membase (code, X86_ESP, 2);
3210 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3211 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3212 x86_pop_reg (code, ins->dreg);
3213 x86_pop_reg (code, ins->backend.reg3);
3214 x86_fldcw_membase (code, X86_ESP, 0);
3215 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3216 break;
3217 case OP_LCONV_TO_R8_2:
3218 x86_push_reg (code, ins->sreg2);
3219 x86_push_reg (code, ins->sreg1);
3220 x86_fild_membase (code, X86_ESP, 0, TRUE);
3221 /* Change precision */
3222 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
3223 x86_fld_membase (code, X86_ESP, 0, TRUE);
3224 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3225 break;
3226 case OP_LCONV_TO_R4_2:
3227 x86_push_reg (code, ins->sreg2);
3228 x86_push_reg (code, ins->sreg1);
3229 x86_fild_membase (code, X86_ESP, 0, TRUE);
3230 /* Change precision */
3231 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3232 x86_fld_membase (code, X86_ESP, 0, FALSE);
3233 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3234 break;
3235 case OP_LCONV_TO_R_UN_2: {
3236 static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3237 guint8 *br;
3239 /* load 64bit integer to FP stack */
3240 x86_push_reg (code, ins->sreg2);
3241 x86_push_reg (code, ins->sreg1);
3242 x86_fild_membase (code, X86_ESP, 0, TRUE);
3244 /* test if lreg is negative */
3245 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3246 br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3248 /* add correction constant mn */
3249 x86_fld80_mem (code, mn);
3250 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3252 x86_patch (br, code);
3254 /* Change precision */
3255 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
3256 x86_fld_membase (code, X86_ESP, 0, TRUE);
3258 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3260 break;
3262 case OP_LCONV_TO_OVF_I:
3263 case OP_LCONV_TO_OVF_I4_2: {
3264 guint8 *br [3], *label [1];
3265 MonoInst *tins;
3268 * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3270 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3272 /* If the low word top bit is set, see if we are negative */
3273 br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3274 /* We are not negative (no top bit set, check for our top word to be zero */
3275 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3276 br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3277 label [0] = code;
3279 /* throw exception */
3280 tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
3281 if (tins) {
3282 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
3283 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
3284 x86_jump8 (code, 0);
3285 else
3286 x86_jump32 (code, 0);
3287 } else {
3288 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3289 x86_jump32 (code, 0);
3293 x86_patch (br [0], code);
3294 /* our top bit is set, check that top word is 0xfffffff */
3295 x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3297 x86_patch (br [1], code);
3298 /* nope, emit exception */
3299 br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3300 x86_patch (br [2], label [0]);
3302 if (ins->dreg != ins->sreg1)
3303 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3304 break;
3306 case OP_FMOVE:
3307 /* Not needed on the fp stack */
3308 break;
3309 case OP_FADD:
3310 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3311 break;
3312 case OP_FSUB:
3313 x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3314 break;
3315 case OP_FMUL:
3316 x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3317 break;
3318 case OP_FDIV:
3319 x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3320 break;
3321 case OP_FNEG:
3322 x86_fchs (code);
3323 break;
3324 case OP_SIN:
3325 x86_fsin (code);
3326 x86_fldz (code);
3327 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3328 break;
3329 case OP_COS:
3330 x86_fcos (code);
3331 x86_fldz (code);
3332 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3333 break;
3334 case OP_ABS:
3335 x86_fabs (code);
3336 break;
3337 case OP_TAN: {
3339 * it really doesn't make sense to inline all this code,
3340 * it's here just to show that things may not be as simple
3341 * as they appear.
3343 guchar *check_pos, *end_tan, *pop_jump;
3344 x86_push_reg (code, X86_EAX);
3345 x86_fptan (code);
3346 x86_fnstsw (code);
3347 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3348 check_pos = code;
3349 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3350 x86_fstp (code, 0); /* pop the 1.0 */
3351 end_tan = code;
3352 x86_jump8 (code, 0);
3353 x86_fldpi (code);
3354 x86_fp_op (code, X86_FADD, 0);
3355 x86_fxch (code, 1);
3356 x86_fprem1 (code);
3357 x86_fstsw (code);
3358 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3359 pop_jump = code;
3360 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3361 x86_fstp (code, 1);
3362 x86_fptan (code);
3363 x86_patch (pop_jump, code);
3364 x86_fstp (code, 0); /* pop the 1.0 */
3365 x86_patch (check_pos, code);
3366 x86_patch (end_tan, code);
3367 x86_fldz (code);
3368 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3369 x86_pop_reg (code, X86_EAX);
3370 break;
3372 case OP_ATAN:
3373 x86_fld1 (code);
3374 x86_fpatan (code);
3375 x86_fldz (code);
3376 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3377 break;
3378 case OP_SQRT:
3379 x86_fsqrt (code);
3380 break;
3381 case OP_ROUND:
3382 x86_frndint (code);
3383 break;
3384 case OP_IMIN:
3385 g_assert (cfg->opt & MONO_OPT_CMOV);
3386 g_assert (ins->dreg == ins->sreg1);
3387 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3388 x86_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2);
3389 break;
3390 case OP_IMIN_UN:
3391 g_assert (cfg->opt & MONO_OPT_CMOV);
3392 g_assert (ins->dreg == ins->sreg1);
3393 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3394 x86_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2);
3395 break;
3396 case OP_IMAX:
3397 g_assert (cfg->opt & MONO_OPT_CMOV);
3398 g_assert (ins->dreg == ins->sreg1);
3399 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3400 x86_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2);
3401 break;
3402 case OP_IMAX_UN:
3403 g_assert (cfg->opt & MONO_OPT_CMOV);
3404 g_assert (ins->dreg == ins->sreg1);
3405 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3406 x86_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2);
3407 break;
3408 case OP_X86_FPOP:
3409 x86_fstp (code, 0);
3410 break;
3411 case OP_X86_FXCH:
3412 x86_fxch (code, ins->inst_imm);
3413 break;
3414 case OP_FREM: {
3415 guint8 *l1, *l2;
3417 x86_push_reg (code, X86_EAX);
3418 /* we need to exchange ST(0) with ST(1) */
3419 x86_fxch (code, 1);
3421 /* this requires a loop, because fprem somtimes
3422 * returns a partial remainder */
3423 l1 = code;
3424 /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3425 /* x86_fprem1 (code); */
3426 x86_fprem (code);
3427 x86_fnstsw (code);
3428 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3429 l2 = code + 2;
3430 x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3432 /* pop result */
3433 x86_fstp (code, 1);
3435 x86_pop_reg (code, X86_EAX);
3436 break;
3438 case OP_FCOMPARE:
3439 if (cfg->opt & MONO_OPT_FCMOV) {
3440 x86_fcomip (code, 1);
3441 x86_fstp (code, 0);
3442 break;
3444 /* this overwrites EAX */
3445 EMIT_FPCOMPARE(code);
3446 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3447 break;
3448 case OP_FCEQ:
3449 if (cfg->opt & MONO_OPT_FCMOV) {
3450 /* zeroing the register at the start results in
3451 * shorter and faster code (we can also remove the widening op)
3453 guchar *unordered_check;
3454 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3455 x86_fcomip (code, 1);
3456 x86_fstp (code, 0);
3457 unordered_check = code;
3458 x86_branch8 (code, X86_CC_P, 0, FALSE);
3459 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3460 x86_patch (unordered_check, code);
3461 break;
3463 if (ins->dreg != X86_EAX)
3464 x86_push_reg (code, X86_EAX);
3466 EMIT_FPCOMPARE(code);
3467 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3468 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3469 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3470 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3472 if (ins->dreg != X86_EAX)
3473 x86_pop_reg (code, X86_EAX);
3474 break;
3475 case OP_FCLT:
3476 case OP_FCLT_UN:
3477 if (cfg->opt & MONO_OPT_FCMOV) {
3478 /* zeroing the register at the start results in
3479 * shorter and faster code (we can also remove the widening op)
3481 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3482 x86_fcomip (code, 1);
3483 x86_fstp (code, 0);
3484 if (ins->opcode == OP_FCLT_UN) {
3485 guchar *unordered_check = code;
3486 guchar *jump_to_end;
3487 x86_branch8 (code, X86_CC_P, 0, FALSE);
3488 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3489 jump_to_end = code;
3490 x86_jump8 (code, 0);
3491 x86_patch (unordered_check, code);
3492 x86_inc_reg (code, ins->dreg);
3493 x86_patch (jump_to_end, code);
3494 } else {
3495 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3497 break;
3499 if (ins->dreg != X86_EAX)
3500 x86_push_reg (code, X86_EAX);
3502 EMIT_FPCOMPARE(code);
3503 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3504 if (ins->opcode == OP_FCLT_UN) {
3505 guchar *is_not_zero_check, *end_jump;
3506 is_not_zero_check = code;
3507 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3508 end_jump = code;
3509 x86_jump8 (code, 0);
3510 x86_patch (is_not_zero_check, code);
3511 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3513 x86_patch (end_jump, code);
3515 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3516 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3518 if (ins->dreg != X86_EAX)
3519 x86_pop_reg (code, X86_EAX);
3520 break;
3521 case OP_FCGT:
3522 case OP_FCGT_UN:
3523 if (cfg->opt & MONO_OPT_FCMOV) {
3524 /* zeroing the register at the start results in
3525 * shorter and faster code (we can also remove the widening op)
3527 guchar *unordered_check;
3528 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3529 x86_fcomip (code, 1);
3530 x86_fstp (code, 0);
3531 if (ins->opcode == OP_FCGT) {
3532 unordered_check = code;
3533 x86_branch8 (code, X86_CC_P, 0, FALSE);
3534 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3535 x86_patch (unordered_check, code);
3536 } else {
3537 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3539 break;
3541 if (ins->dreg != X86_EAX)
3542 x86_push_reg (code, X86_EAX);
3544 EMIT_FPCOMPARE(code);
3545 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3546 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3547 if (ins->opcode == OP_FCGT_UN) {
3548 guchar *is_not_zero_check, *end_jump;
3549 is_not_zero_check = code;
3550 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3551 end_jump = code;
3552 x86_jump8 (code, 0);
3553 x86_patch (is_not_zero_check, code);
3554 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3556 x86_patch (end_jump, code);
3558 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3559 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3561 if (ins->dreg != X86_EAX)
3562 x86_pop_reg (code, X86_EAX);
3563 break;
3564 case OP_FBEQ:
3565 if (cfg->opt & MONO_OPT_FCMOV) {
3566 guchar *jump = code;
3567 x86_branch8 (code, X86_CC_P, 0, TRUE);
3568 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3569 x86_patch (jump, code);
3570 break;
3572 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3573 EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3574 break;
3575 case OP_FBNE_UN:
3576 /* Branch if C013 != 100 */
3577 if (cfg->opt & MONO_OPT_FCMOV) {
3578 /* branch if !ZF or (PF|CF) */
3579 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3580 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3581 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3582 break;
3584 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3585 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3586 break;
3587 case OP_FBLT:
3588 if (cfg->opt & MONO_OPT_FCMOV) {
3589 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3590 break;
3592 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3593 break;
3594 case OP_FBLT_UN:
3595 if (cfg->opt & MONO_OPT_FCMOV) {
3596 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3597 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3598 break;
3600 if (ins->opcode == OP_FBLT_UN) {
3601 guchar *is_not_zero_check, *end_jump;
3602 is_not_zero_check = code;
3603 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3604 end_jump = code;
3605 x86_jump8 (code, 0);
3606 x86_patch (is_not_zero_check, code);
3607 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3609 x86_patch (end_jump, code);
3611 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3612 break;
3613 case OP_FBGT:
3614 case OP_FBGT_UN:
3615 if (cfg->opt & MONO_OPT_FCMOV) {
3616 if (ins->opcode == OP_FBGT) {
3617 guchar *br1;
3619 /* skip branch if C1=1 */
3620 br1 = code;
3621 x86_branch8 (code, X86_CC_P, 0, FALSE);
3622 /* branch if (C0 | C3) = 1 */
3623 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3624 x86_patch (br1, code);
3625 } else {
3626 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3628 break;
3630 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3631 if (ins->opcode == OP_FBGT_UN) {
3632 guchar *is_not_zero_check, *end_jump;
3633 is_not_zero_check = code;
3634 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3635 end_jump = code;
3636 x86_jump8 (code, 0);
3637 x86_patch (is_not_zero_check, code);
3638 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3640 x86_patch (end_jump, code);
3642 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3643 break;
3644 case OP_FBGE:
3645 /* Branch if C013 == 100 or 001 */
3646 if (cfg->opt & MONO_OPT_FCMOV) {
3647 guchar *br1;
3649 /* skip branch if C1=1 */
3650 br1 = code;
3651 x86_branch8 (code, X86_CC_P, 0, FALSE);
3652 /* branch if (C0 | C3) = 1 */
3653 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3654 x86_patch (br1, code);
3655 break;
3657 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3658 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3659 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3660 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3661 break;
3662 case OP_FBGE_UN:
3663 /* Branch if C013 == 000 */
3664 if (cfg->opt & MONO_OPT_FCMOV) {
3665 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3666 break;
3668 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3669 break;
3670 case OP_FBLE:
3671 /* Branch if C013=000 or 100 */
3672 if (cfg->opt & MONO_OPT_FCMOV) {
3673 guchar *br1;
3675 /* skip branch if C1=1 */
3676 br1 = code;
3677 x86_branch8 (code, X86_CC_P, 0, FALSE);
3678 /* branch if C0=0 */
3679 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3680 x86_patch (br1, code);
3681 break;
3683 x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3684 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3685 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3686 break;
3687 case OP_FBLE_UN:
3688 /* Branch if C013 != 001 */
3689 if (cfg->opt & MONO_OPT_FCMOV) {
3690 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3691 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3692 break;
3694 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3695 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3696 break;
3697 case OP_CKFINITE: {
3698 guchar *br1;
3699 x86_push_reg (code, X86_EAX);
3700 x86_fxam (code);
3701 x86_fnstsw (code);
3702 x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3703 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3704 x86_pop_reg (code, X86_EAX);
3706 /* Have to clean up the fp stack before throwing the exception */
3707 br1 = code;
3708 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3710 x86_fstp (code, 0);
3711 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3713 x86_patch (br1, code);
3714 break;
3716 case OP_TLS_GET: {
3717 code = mono_x86_emit_tls_get (code, ins->dreg, ins->inst_offset);
3718 break;
3720 case OP_MEMORY_BARRIER: {
3721 /* Not needed on x86 */
3722 break;
3724 case OP_ATOMIC_ADD_I4: {
3725 int dreg = ins->dreg;
3727 if (dreg == ins->inst_basereg) {
3728 x86_push_reg (code, ins->sreg2);
3729 dreg = ins->sreg2;
3732 if (dreg != ins->sreg2)
3733 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3735 x86_prefix (code, X86_LOCK_PREFIX);
3736 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3738 if (dreg != ins->dreg) {
3739 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3740 x86_pop_reg (code, dreg);
3743 break;
3745 case OP_ATOMIC_ADD_NEW_I4: {
3746 int dreg = ins->dreg;
3748 /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3749 if (ins->sreg2 == dreg) {
3750 if (dreg == X86_EBX) {
3751 dreg = X86_EDI;
3752 if (ins->inst_basereg == X86_EDI)
3753 dreg = X86_ESI;
3754 } else {
3755 dreg = X86_EBX;
3756 if (ins->inst_basereg == X86_EBX)
3757 dreg = X86_EDI;
3759 } else if (ins->inst_basereg == dreg) {
3760 if (dreg == X86_EBX) {
3761 dreg = X86_EDI;
3762 if (ins->sreg2 == X86_EDI)
3763 dreg = X86_ESI;
3764 } else {
3765 dreg = X86_EBX;
3766 if (ins->sreg2 == X86_EBX)
3767 dreg = X86_EDI;
3771 if (dreg != ins->dreg) {
3772 x86_push_reg (code, dreg);
3775 x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3776 x86_prefix (code, X86_LOCK_PREFIX);
3777 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3778 /* dreg contains the old value, add with sreg2 value */
3779 x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3781 if (ins->dreg != dreg) {
3782 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3783 x86_pop_reg (code, dreg);
3786 break;
3788 case OP_ATOMIC_EXCHANGE_I4: {
3789 guchar *br[2];
3790 int sreg2 = ins->sreg2;
3791 int breg = ins->inst_basereg;
3793 /* cmpxchg uses eax as comperand, need to make sure we can use it
3794 * hack to overcome limits in x86 reg allocator
3795 * (req: dreg == eax and sreg2 != eax and breg != eax)
3797 g_assert (ins->dreg == X86_EAX);
3799 /* We need the EAX reg for the cmpxchg */
3800 if (ins->sreg2 == X86_EAX) {
3801 sreg2 = (breg == X86_EDX) ? X86_EBX : X86_EDX;
3802 x86_push_reg (code, sreg2);
3803 x86_mov_reg_reg (code, sreg2, X86_EAX, 4);
3806 if (breg == X86_EAX) {
3807 breg = (sreg2 == X86_ESI) ? X86_EDI : X86_ESI;
3808 x86_push_reg (code, breg);
3809 x86_mov_reg_reg (code, breg, X86_EAX, 4);
3812 x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3814 br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3815 x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3816 br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3817 x86_patch (br [1], br [0]);
3819 if (breg != ins->inst_basereg)
3820 x86_pop_reg (code, breg);
3822 if (ins->sreg2 != sreg2)
3823 x86_pop_reg (code, sreg2);
3825 break;
3827 case OP_ATOMIC_CAS_I4: {
3828 g_assert (ins->sreg3 == X86_EAX);
3829 g_assert (ins->sreg1 != X86_EAX);
3830 g_assert (ins->sreg1 != ins->sreg2);
3832 x86_prefix (code, X86_LOCK_PREFIX);
3833 x86_cmpxchg_membase_reg (code, ins->sreg1, ins->inst_offset, ins->sreg2);
3835 if (ins->dreg != X86_EAX)
3836 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3837 break;
3839 #ifdef MONO_ARCH_SIMD_INTRINSICS
3840 case OP_ADDPS:
3841 x86_sse_alu_ps_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
3842 break;
3843 case OP_DIVPS:
3844 x86_sse_alu_ps_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
3845 break;
3846 case OP_MULPS:
3847 x86_sse_alu_ps_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
3848 break;
3849 case OP_SUBPS:
3850 x86_sse_alu_ps_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
3851 break;
3852 case OP_MAXPS:
3853 x86_sse_alu_ps_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
3854 break;
3855 case OP_MINPS:
3856 x86_sse_alu_ps_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
3857 break;
3858 case OP_COMPPS:
3859 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
3860 x86_sse_alu_ps_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
3861 break;
3862 case OP_ANDPS:
3863 x86_sse_alu_ps_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
3864 break;
3865 case OP_ANDNPS:
3866 x86_sse_alu_ps_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
3867 break;
3868 case OP_ORPS:
3869 x86_sse_alu_ps_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
3870 break;
3871 case OP_XORPS:
3872 x86_sse_alu_ps_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
3873 break;
3874 case OP_SQRTPS:
3875 x86_sse_alu_ps_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
3876 break;
3877 case OP_RSQRTPS:
3878 x86_sse_alu_ps_reg_reg (code, X86_SSE_RSQRT, ins->dreg, ins->sreg1);
3879 break;
3880 case OP_RCPPS:
3881 x86_sse_alu_ps_reg_reg (code, X86_SSE_RCP, ins->dreg, ins->sreg1);
3882 break;
3883 case OP_ADDSUBPS:
3884 x86_sse_alu_sd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
3885 break;
3886 case OP_HADDPS:
3887 x86_sse_alu_sd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
3888 break;
3889 case OP_HSUBPS:
3890 x86_sse_alu_sd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
3891 break;
3892 case OP_DUPPS_HIGH:
3893 x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSHDUP, ins->dreg, ins->sreg1);
3894 break;
3895 case OP_DUPPS_LOW:
3896 x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSLDUP, ins->dreg, ins->sreg1);
3897 break;
3899 case OP_PSHUFLEW_HIGH:
3900 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3901 x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 1);
3902 break;
3903 case OP_PSHUFLEW_LOW:
3904 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3905 x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 0);
3906 break;
3907 case OP_PSHUFLED:
3908 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3909 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->sreg1, ins->inst_c0);
3910 break;
3912 case OP_ADDPD:
3913 x86_sse_alu_pd_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
3914 break;
3915 case OP_DIVPD:
3916 x86_sse_alu_pd_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
3917 break;
3918 case OP_MULPD:
3919 x86_sse_alu_pd_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
3920 break;
3921 case OP_SUBPD:
3922 x86_sse_alu_pd_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
3923 break;
3924 case OP_MAXPD:
3925 x86_sse_alu_pd_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
3926 break;
3927 case OP_MINPD:
3928 x86_sse_alu_pd_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
3929 break;
3930 case OP_COMPPD:
3931 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
3932 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
3933 break;
3934 case OP_ANDPD:
3935 x86_sse_alu_pd_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
3936 break;
3937 case OP_ANDNPD:
3938 x86_sse_alu_pd_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
3939 break;
3940 case OP_ORPD:
3941 x86_sse_alu_pd_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
3942 break;
3943 case OP_XORPD:
3944 x86_sse_alu_pd_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
3945 break;
3946 case OP_SQRTPD:
3947 x86_sse_alu_pd_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
3948 break;
3949 case OP_ADDSUBPD:
3950 x86_sse_alu_pd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
3951 break;
3952 case OP_HADDPD:
3953 x86_sse_alu_pd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
3954 break;
3955 case OP_HSUBPD:
3956 x86_sse_alu_pd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
3957 break;
3958 case OP_DUPPD:
3959 x86_sse_alu_sd_reg_reg (code, X86_SSE_MOVDDUP, ins->dreg, ins->sreg1);
3960 break;
3962 case OP_EXTRACT_MASK:
3963 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMOVMSKB, ins->dreg, ins->sreg1);
3964 break;
3966 case OP_PAND:
3967 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAND, ins->sreg1, ins->sreg2);
3968 break;
3969 case OP_POR:
3970 x86_sse_alu_pd_reg_reg (code, X86_SSE_POR, ins->sreg1, ins->sreg2);
3971 break;
3972 case OP_PXOR:
3973 x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->sreg1, ins->sreg2);
3974 break;
3976 case OP_PADDB:
3977 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDB, ins->sreg1, ins->sreg2);
3978 break;
3979 case OP_PADDW:
3980 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDW, ins->sreg1, ins->sreg2);
3981 break;
3982 case OP_PADDD:
3983 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDD, ins->sreg1, ins->sreg2);
3984 break;
3985 case OP_PADDQ:
3986 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDQ, ins->sreg1, ins->sreg2);
3987 break;
3989 case OP_PSUBB:
3990 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBB, ins->sreg1, ins->sreg2);
3991 break;
3992 case OP_PSUBW:
3993 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBW, ins->sreg1, ins->sreg2);
3994 break;
3995 case OP_PSUBD:
3996 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBD, ins->sreg1, ins->sreg2);
3997 break;
3998 case OP_PSUBQ:
3999 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBQ, ins->sreg1, ins->sreg2);
4000 break;
4002 case OP_PMAXB_UN:
4003 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXUB, ins->sreg1, ins->sreg2);
4004 break;
4005 case OP_PMAXW_UN:
4006 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUW, ins->sreg1, ins->sreg2);
4007 break;
4008 case OP_PMAXD_UN:
4009 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUD, ins->sreg1, ins->sreg2);
4010 break;
4012 case OP_PMAXB:
4013 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSB, ins->sreg1, ins->sreg2);
4014 break;
4015 case OP_PMAXW:
4016 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXSW, ins->sreg1, ins->sreg2);
4017 break;
4018 case OP_PMAXD:
4019 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSD, ins->sreg1, ins->sreg2);
4020 break;
4022 case OP_PAVGB_UN:
4023 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGB, ins->sreg1, ins->sreg2);
4024 break;
4025 case OP_PAVGW_UN:
4026 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGW, ins->sreg1, ins->sreg2);
4027 break;
4029 case OP_PMINB_UN:
4030 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINUB, ins->sreg1, ins->sreg2);
4031 break;
4032 case OP_PMINW_UN:
4033 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUW, ins->sreg1, ins->sreg2);
4034 break;
4035 case OP_PMIND_UN:
4036 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUD, ins->sreg1, ins->sreg2);
4037 break;
4039 case OP_PMINB:
4040 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSB, ins->sreg1, ins->sreg2);
4041 break;
4042 case OP_PMINW:
4043 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINSW, ins->sreg1, ins->sreg2);
4044 break;
4045 case OP_PMIND:
4046 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSD, ins->sreg1, ins->sreg2);
4047 break;
4049 case OP_PCMPEQB:
4050 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQB, ins->sreg1, ins->sreg2);
4051 break;
4052 case OP_PCMPEQW:
4053 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQW, ins->sreg1, ins->sreg2);
4054 break;
4055 case OP_PCMPEQD:
4056 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQD, ins->sreg1, ins->sreg2);
4057 break;
4058 case OP_PCMPEQQ:
4059 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPEQQ, ins->sreg1, ins->sreg2);
4060 break;
4062 case OP_PCMPGTB:
4063 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTB, ins->sreg1, ins->sreg2);
4064 break;
4065 case OP_PCMPGTW:
4066 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTW, ins->sreg1, ins->sreg2);
4067 break;
4068 case OP_PCMPGTD:
4069 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTD, ins->sreg1, ins->sreg2);
4070 break;
4071 case OP_PCMPGTQ:
4072 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPGTQ, ins->sreg1, ins->sreg2);
4073 break;
4075 case OP_PSUM_ABS_DIFF:
4076 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSADBW, ins->sreg1, ins->sreg2);
4077 break;
4079 case OP_UNPACK_LOWB:
4080 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLBW, ins->sreg1, ins->sreg2);
4081 break;
4082 case OP_UNPACK_LOWW:
4083 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLWD, ins->sreg1, ins->sreg2);
4084 break;
4085 case OP_UNPACK_LOWD:
4086 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLDQ, ins->sreg1, ins->sreg2);
4087 break;
4088 case OP_UNPACK_LOWQ:
4089 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLQDQ, ins->sreg1, ins->sreg2);
4090 break;
4091 case OP_UNPACK_LOWPS:
4092 x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
4093 break;
4094 case OP_UNPACK_LOWPD:
4095 x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
4096 break;
4098 case OP_UNPACK_HIGHB:
4099 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHBW, ins->sreg1, ins->sreg2);
4100 break;
4101 case OP_UNPACK_HIGHW:
4102 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHWD, ins->sreg1, ins->sreg2);
4103 break;
4104 case OP_UNPACK_HIGHD:
4105 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHDQ, ins->sreg1, ins->sreg2);
4106 break;
4107 case OP_UNPACK_HIGHQ:
4108 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHQDQ, ins->sreg1, ins->sreg2);
4109 break;
4110 case OP_UNPACK_HIGHPS:
4111 x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
4112 break;
4113 case OP_UNPACK_HIGHPD:
4114 x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
4115 break;
4117 case OP_PACKW:
4118 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSWB, ins->sreg1, ins->sreg2);
4119 break;
4120 case OP_PACKD:
4121 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSDW, ins->sreg1, ins->sreg2);
4122 break;
4123 case OP_PACKW_UN:
4124 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKUSWB, ins->sreg1, ins->sreg2);
4125 break;
4126 case OP_PACKD_UN:
4127 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PACKUSDW, ins->sreg1, ins->sreg2);
4128 break;
4130 case OP_PADDB_SAT_UN:
4131 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSB, ins->sreg1, ins->sreg2);
4132 break;
4133 case OP_PSUBB_SAT_UN:
4134 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSB, ins->sreg1, ins->sreg2);
4135 break;
4136 case OP_PADDW_SAT_UN:
4137 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSW, ins->sreg1, ins->sreg2);
4138 break;
4139 case OP_PSUBW_SAT_UN:
4140 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSW, ins->sreg1, ins->sreg2);
4141 break;
4143 case OP_PADDB_SAT:
4144 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSB, ins->sreg1, ins->sreg2);
4145 break;
4146 case OP_PSUBB_SAT:
4147 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSB, ins->sreg1, ins->sreg2);
4148 break;
4149 case OP_PADDW_SAT:
4150 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSW, ins->sreg1, ins->sreg2);
4151 break;
4152 case OP_PSUBW_SAT:
4153 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSW, ins->sreg1, ins->sreg2);
4154 break;
4156 case OP_PMULW:
4157 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULLW, ins->sreg1, ins->sreg2);
4158 break;
4159 case OP_PMULD:
4160 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMULLD, ins->sreg1, ins->sreg2);
4161 break;
4162 case OP_PMULQ:
4163 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULUDQ, ins->sreg1, ins->sreg2);
4164 break;
4165 case OP_PMULW_HIGH_UN:
4166 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHUW, ins->sreg1, ins->sreg2);
4167 break;
4168 case OP_PMULW_HIGH:
4169 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHW, ins->sreg1, ins->sreg2);
4170 break;
4172 case OP_PSHRW:
4173 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4174 break;
4175 case OP_PSHRW_REG:
4176 x86_sse_shift_reg_reg (code, X86_SSE_PSRLW_REG, ins->dreg, ins->sreg2);
4177 break;
4179 case OP_PSARW:
4180 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4181 break;
4182 case OP_PSARW_REG:
4183 x86_sse_shift_reg_reg (code, X86_SSE_PSRAW_REG, ins->dreg, ins->sreg2);
4184 break;
4186 case OP_PSHLW:
4187 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4188 break;
4189 case OP_PSHLW_REG:
4190 x86_sse_shift_reg_reg (code, X86_SSE_PSLLW_REG, ins->dreg, ins->sreg2);
4191 break;
4193 case OP_PSHRD:
4194 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4195 break;
4196 case OP_PSHRD_REG:
4197 x86_sse_shift_reg_reg (code, X86_SSE_PSRLD_REG, ins->dreg, ins->sreg2);
4198 break;
4200 case OP_PSARD:
4201 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4202 break;
4203 case OP_PSARD_REG:
4204 x86_sse_shift_reg_reg (code, X86_SSE_PSRAD_REG, ins->dreg, ins->sreg2);
4205 break;
4207 case OP_PSHLD:
4208 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4209 break;
4210 case OP_PSHLD_REG:
4211 x86_sse_shift_reg_reg (code, X86_SSE_PSLLD_REG, ins->dreg, ins->sreg2);
4212 break;
4214 case OP_PSHRQ:
4215 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4216 break;
4217 case OP_PSHRQ_REG:
4218 x86_sse_shift_reg_reg (code, X86_SSE_PSRLQ_REG, ins->dreg, ins->sreg2);
4219 break;
4221 case OP_PSHLQ:
4222 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4223 break;
4224 case OP_PSHLQ_REG:
4225 x86_sse_shift_reg_reg (code, X86_SSE_PSLLQ_REG, ins->dreg, ins->sreg2);
4226 break;
4228 case OP_ICONV_TO_X:
4229 x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4230 break;
4231 case OP_EXTRACT_I4:
4232 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4233 break;
4234 case OP_EXTRACT_I1:
4235 case OP_EXTRACT_U1:
4236 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4237 if (ins->inst_c0)
4238 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
4239 x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE);
4240 break;
4241 case OP_EXTRACT_I2:
4242 case OP_EXTRACT_U2:
4243 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4244 if (ins->inst_c0)
4245 x86_shift_reg_imm (code, X86_SHR, ins->dreg, 16);
4246 x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE);
4247 break;
4248 case OP_EXTRACT_R8:
4249 if (ins->inst_c0)
4250 x86_sse_alu_pd_membase_reg (code, X86_SSE_MOVHPD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
4251 else
4252 x86_sse_alu_sd_membase_reg (code, X86_SSE_MOVSD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
4253 x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE);
4254 break;
4256 case OP_INSERT_I2:
4257 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->sreg1, ins->sreg2, ins->inst_c0);
4258 break;
4259 case OP_EXTRACTX_U2:
4260 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PEXTRW, ins->dreg, ins->sreg1, ins->inst_c0);
4261 break;
4262 case OP_INSERTX_U1_SLOW:
4263 /*sreg1 is the extracted ireg (scratch)
4264 /sreg2 is the to be inserted ireg (scratch)
4265 /dreg is the xreg to receive the value*/
4267 /*clear the bits from the extracted word*/
4268 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00);
4269 /*shift the value to insert if needed*/
4270 if (ins->inst_c0 & 1)
4271 x86_shift_reg_imm (code, X86_SHL, ins->sreg2, 8);
4272 /*join them together*/
4273 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
4274 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
4275 break;
4276 case OP_INSERTX_I4_SLOW:
4277 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
4278 x86_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
4279 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
4280 break;
4282 case OP_INSERTX_R4_SLOW:
4283 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
4284 /*TODO if inst_c0 == 0 use movss*/
4285 x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 0, ins->inst_c0 * 2);
4286 x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 2, ins->inst_c0 * 2 + 1);
4287 break;
4288 case OP_INSERTX_R8_SLOW:
4289 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4290 if (ins->inst_c0)
4291 x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVHPD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4292 else
4293 x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVSD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4294 break;
4296 case OP_STOREX_MEMBASE_REG:
4297 case OP_STOREX_MEMBASE:
4298 x86_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4299 break;
4300 case OP_LOADX_MEMBASE:
4301 x86_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4302 break;
4303 case OP_LOADX_ALIGNED_MEMBASE:
4304 x86_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4305 break;
4306 case OP_STOREX_ALIGNED_MEMBASE_REG:
4307 x86_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4308 break;
4309 case OP_STOREX_NTA_MEMBASE_REG:
4310 x86_sse_alu_reg_membase (code, X86_SSE_MOVNTPS, ins->dreg, ins->sreg1, ins->inst_offset);
4311 break;
4312 case OP_PREFETCH_MEMBASE:
4313 x86_sse_alu_reg_membase (code, X86_SSE_PREFETCH, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
4315 break;
4316 case OP_XMOVE:
4317 /*FIXME the peephole pass should have killed this*/
4318 if (ins->dreg != ins->sreg1)
4319 x86_movaps_reg_reg (code, ins->dreg, ins->sreg1);
4320 break;
4321 case OP_XZERO:
4322 x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->dreg, ins->dreg);
4323 break;
4324 case OP_ICONV_TO_R8_RAW:
4325 x86_mov_membase_reg (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1, 4);
4326 x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE);
4327 break;
4329 case OP_FCONV_TO_R8_X:
4330 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4331 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4332 break;
4334 case OP_XCONV_R8_TO_I4:
4335 x86_cvttsd2si (code, ins->dreg, ins->sreg1);
4336 switch (ins->backend.source_opcode) {
4337 case OP_FCONV_TO_I1:
4338 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
4339 break;
4340 case OP_FCONV_TO_U1:
4341 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4342 break;
4343 case OP_FCONV_TO_I2:
4344 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
4345 break;
4346 case OP_FCONV_TO_U2:
4347 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
4348 break;
4350 break;
4352 case OP_EXPAND_I1:
4353 /*FIXME this causes a partial register stall, maybe it would not be that bad to use shift + mask + or*/
4354 /*The +4 is to get a mov ?h, ?l over the same reg.*/
4355 x86_mov_reg_reg (code, ins->dreg + 4, ins->dreg, 1);
4356 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
4357 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
4358 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4359 break;
4360 case OP_EXPAND_I2:
4361 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
4362 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
4363 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4364 break;
4365 case OP_EXPAND_I4:
4366 x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4367 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4368 break;
4369 case OP_EXPAND_R4:
4370 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
4371 x86_movd_xreg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4372 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4373 break;
4374 case OP_EXPAND_R8:
4375 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4376 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4377 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0x44);
4378 break;
4379 #endif
4380 case OP_LIVERANGE_START: {
4381 if (cfg->verbose_level > 1)
4382 printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
4383 MONO_VARINFO (cfg, ins->inst_c0)->live_range_start = code - cfg->native_code;
4384 break;
4386 case OP_LIVERANGE_END: {
4387 if (cfg->verbose_level > 1)
4388 printf ("R%d END=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
4389 MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code;
4390 break;
4392 default:
4393 g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
4394 g_assert_not_reached ();
4397 if (G_UNLIKELY ((code - cfg->native_code - offset) > max_len)) {
4398 g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4399 mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4400 g_assert_not_reached ();
4403 cpos += max_len;
4406 cfg->code_len = code - cfg->native_code;
4409 #endif /* DISABLE_JIT */
4411 void
4412 mono_arch_register_lowlevel_calls (void)
4416 void
4417 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4419 MonoJumpInfo *patch_info;
4420 gboolean compile_aot = !run_cctors;
4422 for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4423 unsigned char *ip = patch_info->ip.i + code;
4424 const unsigned char *target;
4426 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4428 if (compile_aot) {
4429 switch (patch_info->type) {
4430 case MONO_PATCH_INFO_BB:
4431 case MONO_PATCH_INFO_LABEL:
4432 break;
4433 default:
4434 /* No need to patch these */
4435 continue;
4439 switch (patch_info->type) {
4440 case MONO_PATCH_INFO_IP:
4441 *((gconstpointer *)(ip)) = target;
4442 break;
4443 case MONO_PATCH_INFO_CLASS_INIT: {
4444 guint8 *code = ip;
4445 /* Might already been changed to a nop */
4446 x86_call_code (code, 0);
4447 x86_patch (ip, target);
4448 break;
4450 case MONO_PATCH_INFO_ABS:
4451 case MONO_PATCH_INFO_METHOD:
4452 case MONO_PATCH_INFO_METHOD_JUMP:
4453 case MONO_PATCH_INFO_INTERNAL_METHOD:
4454 case MONO_PATCH_INFO_BB:
4455 case MONO_PATCH_INFO_LABEL:
4456 case MONO_PATCH_INFO_RGCTX_FETCH:
4457 case MONO_PATCH_INFO_GENERIC_CLASS_INIT:
4458 case MONO_PATCH_INFO_MONITOR_ENTER:
4459 case MONO_PATCH_INFO_MONITOR_EXIT:
4460 x86_patch (ip, target);
4461 break;
4462 case MONO_PATCH_INFO_NONE:
4463 break;
4464 default: {
4465 guint32 offset = mono_arch_get_patch_offset (ip);
4466 *((gconstpointer *)(ip + offset)) = target;
4467 break;
4473 guint8 *
4474 mono_arch_emit_prolog (MonoCompile *cfg)
4476 MonoMethod *method = cfg->method;
4477 MonoBasicBlock *bb;
4478 MonoMethodSignature *sig;
4479 MonoInst *inst;
4480 int alloc_size, pos, max_offset, i, cfa_offset;
4481 guint8 *code;
4482 gboolean need_stack_frame;
4484 cfg->code_size = MAX (mono_method_get_header (method)->code_size * 4, 10240);
4486 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
4487 cfg->code_size += 512;
4489 code = cfg->native_code = g_malloc (cfg->code_size);
4491 /* Offset between RSP and the CFA */
4492 cfa_offset = 0;
4494 // CFA = sp + 4
4495 cfa_offset = sizeof (gpointer);
4496 mono_emit_unwind_op_def_cfa (cfg, code, X86_ESP, sizeof (gpointer));
4497 // IP saved at CFA - 4
4498 /* There is no IP reg on x86 */
4499 mono_emit_unwind_op_offset (cfg, code, X86_NREG, -cfa_offset);
4501 need_stack_frame = needs_stack_frame (cfg);
4503 if (need_stack_frame) {
4504 x86_push_reg (code, X86_EBP);
4505 cfa_offset += sizeof (gpointer);
4506 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
4507 mono_emit_unwind_op_offset (cfg, code, X86_EBP, - cfa_offset);
4508 x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4509 mono_emit_unwind_op_def_cfa_reg (cfg, code, X86_EBP);
4512 alloc_size = cfg->stack_offset;
4513 pos = 0;
4515 if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4516 /* Might need to attach the thread to the JIT or change the domain for the callback */
4517 if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
4518 guint8 *buf, *no_domain_branch;
4520 code = mono_x86_emit_tls_get (code, X86_EAX, appdomain_tls_offset);
4521 x86_alu_reg_imm (code, X86_CMP, X86_EAX, GPOINTER_TO_UINT (cfg->domain));
4522 no_domain_branch = code;
4523 x86_branch8 (code, X86_CC_NE, 0, 0);
4524 code = mono_x86_emit_tls_get ( code, X86_EAX, lmf_tls_offset);
4525 x86_test_reg_reg (code, X86_EAX, X86_EAX);
4526 buf = code;
4527 x86_branch8 (code, X86_CC_NE, 0, 0);
4528 x86_patch (no_domain_branch, code);
4529 x86_push_imm (code, cfg->domain);
4530 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4531 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4532 x86_patch (buf, code);
4533 #ifdef TARGET_WIN32
4534 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4535 /* FIXME: Add a separate key for LMF to avoid this */
4536 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4537 #endif
4539 else {
4540 g_assert (!cfg->compile_aot);
4541 x86_push_imm (code, cfg->domain);
4542 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4543 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4547 if (method->save_lmf) {
4548 pos += sizeof (MonoLMF);
4550 if (cfg->compile_aot)
4551 cfg->disable_aot = TRUE;
4553 /* save the current IP */
4554 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4555 x86_push_imm_template (code);
4556 cfa_offset += sizeof (gpointer);
4558 /* save all caller saved regs */
4559 x86_push_reg (code, X86_EBP);
4560 cfa_offset += sizeof (gpointer);
4561 x86_push_reg (code, X86_ESI);
4562 cfa_offset += sizeof (gpointer);
4563 mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset);
4564 x86_push_reg (code, X86_EDI);
4565 cfa_offset += sizeof (gpointer);
4566 mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset);
4567 x86_push_reg (code, X86_EBX);
4568 cfa_offset += sizeof (gpointer);
4569 mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset);
4571 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
4573 * Optimized version which uses the mono_lmf TLS variable instead of indirection
4574 * through the mono_lmf_addr TLS variable.
4576 /* %eax = previous_lmf */
4577 x86_prefix (code, X86_GS_PREFIX);
4578 x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
4579 /* skip esp + method_info + lmf */
4580 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
4581 /* push previous_lmf */
4582 x86_push_reg (code, X86_EAX);
4583 /* new lmf = ESP */
4584 x86_prefix (code, X86_GS_PREFIX);
4585 x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
4586 } else {
4587 /* get the address of lmf for the current thread */
4589 * This is performance critical so we try to use some tricks to make
4590 * it fast.
4593 if (lmf_addr_tls_offset != -1) {
4594 /* Load lmf quicky using the GS register */
4595 code = mono_x86_emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
4596 #ifdef TARGET_WIN32
4597 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4598 /* FIXME: Add a separate key for LMF to avoid this */
4599 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4600 #endif
4601 } else {
4602 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4605 /* Skip esp + method info */
4606 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
4608 /* push lmf */
4609 x86_push_reg (code, X86_EAX);
4610 /* push *lfm (previous_lmf) */
4611 x86_push_membase (code, X86_EAX, 0);
4612 /* *(lmf) = ESP */
4613 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4615 } else {
4617 if (cfg->used_int_regs & (1 << X86_EBX)) {
4618 x86_push_reg (code, X86_EBX);
4619 pos += 4;
4620 cfa_offset += sizeof (gpointer);
4621 mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset);
4624 if (cfg->used_int_regs & (1 << X86_EDI)) {
4625 x86_push_reg (code, X86_EDI);
4626 pos += 4;
4627 cfa_offset += sizeof (gpointer);
4628 mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset);
4631 if (cfg->used_int_regs & (1 << X86_ESI)) {
4632 x86_push_reg (code, X86_ESI);
4633 pos += 4;
4634 cfa_offset += sizeof (gpointer);
4635 mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset);
4639 alloc_size -= pos;
4641 /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
4642 if (mono_do_x86_stack_align && need_stack_frame) {
4643 int tot = alloc_size + pos + 4; /* ret ip */
4644 if (need_stack_frame)
4645 tot += 4; /* ebp */
4646 tot &= MONO_ARCH_FRAME_ALIGNMENT - 1;
4647 if (tot)
4648 alloc_size += MONO_ARCH_FRAME_ALIGNMENT - tot;
4651 if (alloc_size) {
4652 /* See mono_emit_stack_alloc */
4653 #if defined(TARGET_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
4654 guint32 remaining_size = alloc_size;
4655 /*FIXME handle unbounded code expansion, we should use a loop in case of more than X interactions*/
4656 guint32 required_code_size = ((remaining_size / 0x1000) + 1) * 8; /*8 is the max size of x86_alu_reg_imm + x86_test_membase_reg*/
4657 guint32 offset = code - cfg->native_code;
4658 if (G_UNLIKELY (required_code_size >= (cfg->code_size - offset))) {
4659 while (required_code_size >= (cfg->code_size - offset))
4660 cfg->code_size *= 2;
4661 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4662 code = cfg->native_code + offset;
4663 mono_jit_stats.code_reallocs++;
4665 while (remaining_size >= 0x1000) {
4666 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4667 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4668 remaining_size -= 0x1000;
4670 if (remaining_size)
4671 x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4672 #else
4673 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4674 #endif
4676 g_assert (need_stack_frame);
4679 if (cfg->method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED ||
4680 cfg->method->wrapper_type == MONO_WRAPPER_RUNTIME_INVOKE) {
4681 x86_alu_reg_imm (code, X86_AND, X86_ESP, -MONO_ARCH_FRAME_ALIGNMENT);
4684 #if DEBUG_STACK_ALIGNMENT
4685 /* check the stack is aligned */
4686 if (need_stack_frame && method->wrapper_type == MONO_WRAPPER_NONE) {
4687 x86_mov_reg_reg (code, X86_ECX, X86_ESP, 4);
4688 x86_alu_reg_imm (code, X86_AND, X86_ECX, MONO_ARCH_FRAME_ALIGNMENT - 1);
4689 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
4690 x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
4691 x86_breakpoint (code);
4693 #endif
4695 /* compute max_offset in order to use short forward jumps */
4696 max_offset = 0;
4697 if (cfg->opt & MONO_OPT_BRANCH) {
4698 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4699 MonoInst *ins;
4700 bb->max_offset = max_offset;
4702 if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4703 max_offset += 6;
4704 /* max alignment for loops */
4705 if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4706 max_offset += LOOP_ALIGNMENT;
4708 MONO_BB_FOR_EACH_INS (bb, ins) {
4709 if (ins->opcode == OP_LABEL)
4710 ins->inst_c1 = max_offset;
4712 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
4717 /* store runtime generic context */
4718 if (cfg->rgctx_var) {
4719 g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET && cfg->rgctx_var->inst_basereg == X86_EBP);
4721 x86_mov_membase_reg (code, X86_EBP, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 4);
4724 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4725 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4727 /* load arguments allocated to register from the stack */
4728 sig = mono_method_signature (method);
4729 pos = 0;
4731 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4732 inst = cfg->args [pos];
4733 if (inst->opcode == OP_REGVAR) {
4734 g_assert (need_stack_frame);
4735 x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4736 if (cfg->verbose_level > 2)
4737 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4739 pos++;
4742 cfg->code_len = code - cfg->native_code;
4744 g_assert (cfg->code_len < cfg->code_size);
4746 return code;
4749 void
4750 mono_arch_emit_epilog (MonoCompile *cfg)
4752 MonoMethod *method = cfg->method;
4753 MonoMethodSignature *sig = mono_method_signature (method);
4754 int quad, pos;
4755 guint32 stack_to_pop;
4756 guint8 *code;
4757 int max_epilog_size = 16;
4758 CallInfo *cinfo;
4759 gboolean need_stack_frame = needs_stack_frame (cfg);
4761 if (cfg->method->save_lmf)
4762 max_epilog_size += 128;
4764 while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4765 cfg->code_size *= 2;
4766 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4767 mono_jit_stats.code_reallocs++;
4770 code = cfg->native_code + cfg->code_len;
4772 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4773 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4775 /* the code restoring the registers must be kept in sync with OP_JMP */
4776 pos = 0;
4778 if (method->save_lmf) {
4779 gint32 prev_lmf_reg;
4780 gint32 lmf_offset = -sizeof (MonoLMF);
4782 /* check if we need to restore protection of the stack after a stack overflow */
4783 if (mono_get_jit_tls_offset () != -1) {
4784 guint8 *patch;
4785 code = mono_x86_emit_tls_get (code, X86_ECX, mono_get_jit_tls_offset ());
4786 /* we load the value in a separate instruction: this mechanism may be
4787 * used later as a safer way to do thread interruption
4789 x86_mov_reg_membase (code, X86_ECX, X86_ECX, G_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 4);
4790 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
4791 patch = code;
4792 x86_branch8 (code, X86_CC_Z, 0, FALSE);
4793 /* note that the call trampoline will preserve eax/edx */
4794 x86_call_reg (code, X86_ECX);
4795 x86_patch (patch, code);
4796 } else {
4797 /* FIXME: maybe save the jit tls in the prolog */
4799 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
4801 * Optimized version which uses the mono_lmf TLS variable instead of indirection
4802 * through the mono_lmf_addr TLS variable.
4804 /* reg = previous_lmf */
4805 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
4807 /* lmf = previous_lmf */
4808 x86_prefix (code, X86_GS_PREFIX);
4809 x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
4810 } else {
4811 /* Find a spare register */
4812 switch (mini_type_get_underlying_type (cfg->generic_sharing_context, sig->ret)->type) {
4813 case MONO_TYPE_I8:
4814 case MONO_TYPE_U8:
4815 prev_lmf_reg = X86_EDI;
4816 cfg->used_int_regs |= (1 << X86_EDI);
4817 break;
4818 default:
4819 prev_lmf_reg = X86_EDX;
4820 break;
4823 /* reg = previous_lmf */
4824 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
4826 /* ecx = lmf */
4827 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
4829 /* *(lmf) = previous_lmf */
4830 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4833 /* restore caller saved regs */
4834 if (cfg->used_int_regs & (1 << X86_EBX)) {
4835 x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
4838 if (cfg->used_int_regs & (1 << X86_EDI)) {
4839 x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
4841 if (cfg->used_int_regs & (1 << X86_ESI)) {
4842 x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
4845 /* EBP is restored by LEAVE */
4846 } else {
4847 if (cfg->used_int_regs & (1 << X86_EBX)) {
4848 pos -= 4;
4850 if (cfg->used_int_regs & (1 << X86_EDI)) {
4851 pos -= 4;
4853 if (cfg->used_int_regs & (1 << X86_ESI)) {
4854 pos -= 4;
4857 if (pos) {
4858 g_assert (need_stack_frame);
4859 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4862 if (cfg->used_int_regs & (1 << X86_ESI)) {
4863 x86_pop_reg (code, X86_ESI);
4865 if (cfg->used_int_regs & (1 << X86_EDI)) {
4866 x86_pop_reg (code, X86_EDI);
4868 if (cfg->used_int_regs & (1 << X86_EBX)) {
4869 x86_pop_reg (code, X86_EBX);
4873 /* Load returned vtypes into registers if needed */
4874 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
4875 if (cinfo->ret.storage == ArgValuetypeInReg) {
4876 for (quad = 0; quad < 2; quad ++) {
4877 switch (cinfo->ret.pair_storage [quad]) {
4878 case ArgInIReg:
4879 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
4880 break;
4881 case ArgOnFloatFpStack:
4882 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
4883 break;
4884 case ArgOnDoubleFpStack:
4885 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
4886 break;
4887 case ArgNone:
4888 break;
4889 default:
4890 g_assert_not_reached ();
4895 if (need_stack_frame)
4896 x86_leave (code);
4898 if (CALLCONV_IS_STDCALL (sig)) {
4899 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4901 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4902 } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
4903 stack_to_pop = 4;
4904 else
4905 stack_to_pop = 0;
4907 if (stack_to_pop) {
4908 g_assert (need_stack_frame);
4909 x86_ret_imm (code, stack_to_pop);
4910 } else {
4911 x86_ret (code);
4914 cfg->code_len = code - cfg->native_code;
4916 g_assert (cfg->code_len < cfg->code_size);
4919 void
4920 mono_arch_emit_exceptions (MonoCompile *cfg)
4922 MonoJumpInfo *patch_info;
4923 int nthrows, i;
4924 guint8 *code;
4925 MonoClass *exc_classes [16];
4926 guint8 *exc_throw_start [16], *exc_throw_end [16];
4927 guint32 code_size;
4928 int exc_count = 0;
4930 /* Compute needed space */
4931 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4932 if (patch_info->type == MONO_PATCH_INFO_EXC)
4933 exc_count++;
4937 * make sure we have enough space for exceptions
4938 * 16 is the size of two push_imm instructions and a call
4940 if (cfg->compile_aot)
4941 code_size = exc_count * 32;
4942 else
4943 code_size = exc_count * 16;
4945 while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4946 cfg->code_size *= 2;
4947 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4948 mono_jit_stats.code_reallocs++;
4951 code = cfg->native_code + cfg->code_len;
4953 nthrows = 0;
4954 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4955 switch (patch_info->type) {
4956 case MONO_PATCH_INFO_EXC: {
4957 MonoClass *exc_class;
4958 guint8 *buf, *buf2;
4959 guint32 throw_ip;
4961 x86_patch (patch_info->ip.i + cfg->native_code, code);
4963 exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4964 g_assert (exc_class);
4965 throw_ip = patch_info->ip.i;
4967 /* Find a throw sequence for the same exception class */
4968 for (i = 0; i < nthrows; ++i)
4969 if (exc_classes [i] == exc_class)
4970 break;
4971 if (i < nthrows) {
4972 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4973 x86_jump_code (code, exc_throw_start [i]);
4974 patch_info->type = MONO_PATCH_INFO_NONE;
4976 else {
4977 guint32 size;
4979 /* Compute size of code following the push <OFFSET> */
4980 size = 5 + 5;
4982 /*This is aligned to 16 bytes by the callee. This way we save a few bytes here.*/
4984 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4985 /* Use the shorter form */
4986 buf = buf2 = code;
4987 x86_push_imm (code, 0);
4989 else {
4990 buf = code;
4991 x86_push_imm (code, 0xf0f0f0f0);
4992 buf2 = code;
4995 if (nthrows < 16) {
4996 exc_classes [nthrows] = exc_class;
4997 exc_throw_start [nthrows] = code;
5000 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
5001 patch_info->data.name = "mono_arch_throw_corlib_exception";
5002 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
5003 patch_info->ip.i = code - cfg->native_code;
5004 x86_call_code (code, 0);
5005 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
5006 while (buf < buf2)
5007 x86_nop (buf);
5009 if (nthrows < 16) {
5010 exc_throw_end [nthrows] = code;
5011 nthrows ++;
5014 break;
5016 default:
5017 /* do nothing */
5018 break;
5022 cfg->code_len = code - cfg->native_code;
5024 g_assert (cfg->code_len < cfg->code_size);
5027 void
5028 mono_arch_flush_icache (guint8 *code, gint size)
5030 /* not needed */
5033 void
5034 mono_arch_flush_register_windows (void)
5038 gboolean
5039 mono_arch_is_inst_imm (gint64 imm)
5041 return TRUE;
5045 * Support for fast access to the thread-local lmf structure using the GS
5046 * segment register on NPTL + kernel 2.6.x.
5049 static gboolean tls_offset_inited = FALSE;
5051 void
5052 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
5054 if (!tls_offset_inited) {
5055 if (!getenv ("MONO_NO_TLS")) {
5056 #ifdef TARGET_WIN32
5058 * We need to init this multiple times, since when we are first called, the key might not
5059 * be initialized yet.
5061 appdomain_tls_offset = mono_domain_get_tls_key ();
5062 lmf_tls_offset = mono_get_jit_tls_key ();
5064 /* Only 64 tls entries can be accessed using inline code */
5065 if (appdomain_tls_offset >= 64)
5066 appdomain_tls_offset = -1;
5067 if (lmf_tls_offset >= 64)
5068 lmf_tls_offset = -1;
5069 #else
5070 #if MONO_XEN_OPT
5071 optimize_for_xen = access ("/proc/xen", F_OK) == 0;
5072 #endif
5073 tls_offset_inited = TRUE;
5074 appdomain_tls_offset = mono_domain_get_tls_offset ();
5075 lmf_tls_offset = mono_get_lmf_tls_offset ();
5076 lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
5077 #endif
5082 void
5083 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
5087 #ifdef MONO_ARCH_HAVE_IMT
5089 // Linear handler, the bsearch head compare is shorter
5090 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
5091 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
5092 // x86_patch(ins,target)
5093 //[1 + 5] x86_jump_mem(inst,mem)
5095 #define CMP_SIZE 6
5096 #define BR_SMALL_SIZE 2
5097 #define BR_LARGE_SIZE 5
5098 #define JUMP_IMM_SIZE 6
5099 #define ENABLE_WRONG_METHOD_CHECK 0
5100 #define DEBUG_IMT 0
5102 static int
5103 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
5105 int i, distance = 0;
5106 for (i = start; i < target; ++i)
5107 distance += imt_entries [i]->chunk_size;
5108 return distance;
5112 * LOCKING: called with the domain lock held
5114 gpointer
5115 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
5116 gpointer fail_tramp)
5118 int i;
5119 int size = 0;
5120 guint8 *code, *start;
5122 for (i = 0; i < count; ++i) {
5123 MonoIMTCheckItem *item = imt_entries [i];
5124 if (item->is_equals) {
5125 if (item->check_target_idx) {
5126 if (!item->compare_done)
5127 item->chunk_size += CMP_SIZE;
5128 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
5129 } else {
5130 if (fail_tramp) {
5131 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + JUMP_IMM_SIZE * 2;
5132 } else {
5133 item->chunk_size += JUMP_IMM_SIZE;
5134 #if ENABLE_WRONG_METHOD_CHECK
5135 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
5136 #endif
5139 } else {
5140 item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
5141 imt_entries [item->check_target_idx]->compare_done = TRUE;
5143 size += item->chunk_size;
5145 if (fail_tramp)
5146 code = mono_method_alloc_generic_virtual_thunk (domain, size);
5147 else
5148 code = mono_domain_code_reserve (domain, size);
5149 start = code;
5150 for (i = 0; i < count; ++i) {
5151 MonoIMTCheckItem *item = imt_entries [i];
5152 item->code_target = code;
5153 if (item->is_equals) {
5154 if (item->check_target_idx) {
5155 if (!item->compare_done)
5156 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5157 item->jmp_code = code;
5158 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5159 if (item->has_target_code)
5160 x86_jump_code (code, item->value.target_code);
5161 else
5162 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5163 } else {
5164 if (fail_tramp) {
5165 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5166 item->jmp_code = code;
5167 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5168 if (item->has_target_code)
5169 x86_jump_code (code, item->value.target_code);
5170 else
5171 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5172 x86_patch (item->jmp_code, code);
5173 x86_jump_code (code, fail_tramp);
5174 item->jmp_code = NULL;
5175 } else {
5176 /* enable the commented code to assert on wrong method */
5177 #if ENABLE_WRONG_METHOD_CHECK
5178 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5179 item->jmp_code = code;
5180 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5181 #endif
5182 if (item->has_target_code)
5183 x86_jump_code (code, item->value.target_code);
5184 else
5185 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5186 #if ENABLE_WRONG_METHOD_CHECK
5187 x86_patch (item->jmp_code, code);
5188 x86_breakpoint (code);
5189 item->jmp_code = NULL;
5190 #endif
5193 } else {
5194 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5195 item->jmp_code = code;
5196 if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
5197 x86_branch8 (code, X86_CC_GE, 0, FALSE);
5198 else
5199 x86_branch32 (code, X86_CC_GE, 0, FALSE);
5202 /* patch the branches to get to the target items */
5203 for (i = 0; i < count; ++i) {
5204 MonoIMTCheckItem *item = imt_entries [i];
5205 if (item->jmp_code) {
5206 if (item->check_target_idx) {
5207 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
5212 if (!fail_tramp)
5213 mono_stats.imt_thunks_size += code - start;
5214 g_assert (code - start <= size);
5216 #if DEBUG_IMT
5218 char *buff = g_strdup_printf ("thunk_for_class_%s_%s_entries_%d", vtable->klass->name_space, vtable->klass->name, count);
5219 mono_disassemble_code (NULL, (guint8*)start, code - start, buff);
5220 g_free (buff);
5222 #endif
5224 return start;
5227 MonoMethod*
5228 mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
5230 return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
5232 #endif
5234 MonoVTable*
5235 mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code)
5237 return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
5240 MonoInst*
5241 mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
5243 MonoInst *ins = NULL;
5244 int opcode = 0;
5246 if (cmethod->klass == mono_defaults.math_class) {
5247 if (strcmp (cmethod->name, "Sin") == 0) {
5248 opcode = OP_SIN;
5249 } else if (strcmp (cmethod->name, "Cos") == 0) {
5250 opcode = OP_COS;
5251 } else if (strcmp (cmethod->name, "Tan") == 0) {
5252 opcode = OP_TAN;
5253 } else if (strcmp (cmethod->name, "Atan") == 0) {
5254 opcode = OP_ATAN;
5255 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
5256 opcode = OP_SQRT;
5257 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
5258 opcode = OP_ABS;
5259 } else if (strcmp (cmethod->name, "Round") == 0 && fsig->param_count == 1 && fsig->params [0]->type == MONO_TYPE_R8) {
5260 opcode = OP_ROUND;
5263 if (opcode) {
5264 MONO_INST_NEW (cfg, ins, opcode);
5265 ins->type = STACK_R8;
5266 ins->dreg = mono_alloc_freg (cfg);
5267 ins->sreg1 = args [0]->dreg;
5268 MONO_ADD_INS (cfg->cbb, ins);
5271 if (cfg->opt & MONO_OPT_CMOV) {
5272 int opcode = 0;
5274 if (strcmp (cmethod->name, "Min") == 0) {
5275 if (fsig->params [0]->type == MONO_TYPE_I4)
5276 opcode = OP_IMIN;
5277 } else if (strcmp (cmethod->name, "Max") == 0) {
5278 if (fsig->params [0]->type == MONO_TYPE_I4)
5279 opcode = OP_IMAX;
5282 if (opcode) {
5283 MONO_INST_NEW (cfg, ins, opcode);
5284 ins->type = STACK_I4;
5285 ins->dreg = mono_alloc_ireg (cfg);
5286 ins->sreg1 = args [0]->dreg;
5287 ins->sreg2 = args [1]->dreg;
5288 MONO_ADD_INS (cfg->cbb, ins);
5292 #if 0
5293 /* OP_FREM is not IEEE compatible */
5294 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
5295 MONO_INST_NEW (cfg, ins, OP_FREM);
5296 ins->inst_i0 = args [0];
5297 ins->inst_i1 = args [1];
5299 #endif
5302 return ins;
5305 gboolean
5306 mono_arch_print_tree (MonoInst *tree, int arity)
5308 return 0;
5311 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
5313 MonoInst* ins;
5315 return NULL;
5317 if (appdomain_tls_offset == -1)
5318 return NULL;
5320 MONO_INST_NEW (cfg, ins, OP_TLS_GET);
5321 ins->inst_offset = appdomain_tls_offset;
5322 return ins;
5325 guint32
5326 mono_arch_get_patch_offset (guint8 *code)
5328 if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
5329 return 2;
5330 else if ((code [0] == 0xba))
5331 return 1;
5332 else if ((code [0] == 0x68))
5333 /* push IMM */
5334 return 1;
5335 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
5336 /* push <OFFSET>(<REG>) */
5337 return 2;
5338 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
5339 /* call *<OFFSET>(<REG>) */
5340 return 2;
5341 else if ((code [0] == 0xdd) || (code [0] == 0xd9))
5342 /* fldl <ADDR> */
5343 return 2;
5344 else if ((code [0] == 0x58) && (code [1] == 0x05))
5345 /* pop %eax; add <OFFSET>, %eax */
5346 return 2;
5347 else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
5348 /* pop <REG>; add <OFFSET>, <REG> */
5349 return 3;
5350 else if ((code [0] >= 0xb8) && (code [0] < 0xb8 + 8))
5351 /* mov <REG>, imm */
5352 return 1;
5353 else {
5354 g_assert_not_reached ();
5355 return -1;
5360 * mono_breakpoint_clean_code:
5362 * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
5363 * breakpoints in the original code, they are removed in the copy.
5365 * Returns TRUE if no sw breakpoint was present.
5367 gboolean
5368 mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size)
5370 int i;
5371 gboolean can_write = TRUE;
5373 * If method_start is non-NULL we need to perform bound checks, since we access memory
5374 * at code - offset we could go before the start of the method and end up in a different
5375 * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
5376 * instead.
5378 if (!method_start || code - offset >= method_start) {
5379 memcpy (buf, code - offset, size);
5380 } else {
5381 int diff = code - method_start;
5382 memset (buf, 0, size);
5383 memcpy (buf + offset - diff, method_start, diff + size - offset);
5385 code -= offset;
5386 for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
5387 int idx = mono_breakpoint_info_index [i];
5388 guint8 *ptr;
5389 if (idx < 1)
5390 continue;
5391 ptr = mono_breakpoint_info [idx].address;
5392 if (ptr >= code && ptr < code + size) {
5393 guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
5394 can_write = FALSE;
5395 /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
5396 buf [ptr - code] = saved_byte;
5399 return can_write;
5402 gpointer
5403 mono_arch_get_vcall_slot (guint8 *code, mgreg_t *regs, int *displacement)
5405 guint8 buf [8];
5406 guint8 reg = 0;
5407 gint32 disp = 0;
5409 mono_breakpoint_clean_code (NULL, code, 8, buf, sizeof (buf));
5410 code = buf + 8;
5412 *displacement = 0;
5414 code -= 6;
5417 * A given byte sequence can match more than case here, so we have to be
5418 * really careful about the ordering of the cases. Longer sequences
5419 * come first.
5420 * There are two types of calls:
5421 * - direct calls: 0xff address_byte 8/32 bits displacement
5422 * - indirect calls: nop nop nop <call>
5423 * The nops make sure we don't confuse the instruction preceeding an indirect
5424 * call with a direct call.
5426 if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
5427 reg = code [4] & 0x07;
5428 disp = (signed char)code [5];
5429 } else if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
5430 reg = code [1] & 0x07;
5431 disp = *((gint32*)(code + 2));
5432 } else if ((code [1] == 0xe8)) {
5433 return NULL;
5434 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
5436 * This is a interface call
5437 * 8b 40 30 mov 0x30(%eax),%eax
5438 * ff 10 call *(%eax)
5440 disp = 0;
5441 reg = code [5] & 0x07;
5443 else
5444 return NULL;
5446 *displacement = disp;
5447 return (gpointer)regs [reg];
5451 * mono_x86_get_this_arg_offset:
5453 * Return the offset of the stack location where this is passed during a virtual
5454 * call.
5456 guint32
5457 mono_x86_get_this_arg_offset (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig)
5459 CallInfo *cinfo = NULL;
5460 int offset;
5462 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
5463 cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5465 offset = cinfo->args [0].offset;
5466 } else {
5467 offset = 0;
5470 return offset;
5473 gpointer
5474 mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig,
5475 mgreg_t *regs, guint8 *code)
5477 guint32 esp = regs [X86_ESP];
5478 CallInfo *cinfo = NULL;
5479 gpointer res;
5480 int offset;
5483 * Avoid expensive calls to get_generic_context_from_code () + get_call_info
5484 * if possible.
5486 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
5487 if (!gsctx && code)
5488 gsctx = mono_get_generic_context_from_code (code);
5489 cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5491 offset = cinfo->args [0].offset;
5492 } else {
5493 offset = 0;
5497 * The stack looks like:
5498 * <other args>
5499 * <this=delegate>
5500 * <possible vtype return address>
5501 * <return addr>
5502 * <4 pointers pushed by mono_arch_create_trampoline_code ()>
5504 res = (((MonoObject**)esp) [5 + (offset / 4)]);
5505 if (cinfo)
5506 g_free (cinfo);
5507 return res;
5510 #define MAX_ARCH_DELEGATE_PARAMS 10
5512 gpointer
5513 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
5515 guint8 *code, *start;
5517 if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
5518 return NULL;
5520 /* FIXME: Support more cases */
5521 if (MONO_TYPE_ISSTRUCT (sig->ret))
5522 return NULL;
5525 * The stack contains:
5526 * <delegate>
5527 * <return addr>
5530 if (has_target) {
5531 static guint8* cached = NULL;
5532 if (cached)
5533 return cached;
5535 start = code = mono_global_codeman_reserve (64);
5537 /* Replace the this argument with the target */
5538 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
5539 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
5540 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
5541 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5543 g_assert ((code - start) < 64);
5545 mono_debug_add_delegate_trampoline (start, code - start);
5547 mono_memory_barrier ();
5549 cached = start;
5550 } else {
5551 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
5552 int i = 0;
5553 /* 8 for mov_reg and jump, plus 8 for each parameter */
5554 int code_reserve = 8 + (sig->param_count * 8);
5556 for (i = 0; i < sig->param_count; ++i)
5557 if (!mono_is_regsize_var (sig->params [i]))
5558 return NULL;
5560 code = cache [sig->param_count];
5561 if (code)
5562 return code;
5565 * The stack contains:
5566 * <args in reverse order>
5567 * <delegate>
5568 * <return addr>
5570 * and we need:
5571 * <args in reverse order>
5572 * <return addr>
5574 * without unbalancing the stack.
5575 * So move each arg up a spot in the stack (overwriting un-needed 'this' arg)
5576 * and leaving original spot of first arg as placeholder in stack so
5577 * when callee pops stack everything works.
5580 start = code = mono_global_codeman_reserve (code_reserve);
5582 /* store delegate for access to method_ptr */
5583 x86_mov_reg_membase (code, X86_ECX, X86_ESP, 4, 4);
5585 /* move args up */
5586 for (i = 0; i < sig->param_count; ++i) {
5587 x86_mov_reg_membase (code, X86_EAX, X86_ESP, (i+2)*4, 4);
5588 x86_mov_membase_reg (code, X86_ESP, (i+1)*4, X86_EAX, 4);
5591 x86_jump_membase (code, X86_ECX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5593 g_assert ((code - start) < code_reserve);
5595 mono_debug_add_delegate_trampoline (start, code - start);
5597 mono_memory_barrier ();
5599 cache [sig->param_count] = start;
5602 return start;
5605 gpointer
5606 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
5608 switch (reg) {
5609 case X86_EAX: return (gpointer)ctx->eax;
5610 case X86_EBX: return (gpointer)ctx->ebx;
5611 case X86_ECX: return (gpointer)ctx->ecx;
5612 case X86_EDX: return (gpointer)ctx->edx;
5613 case X86_ESP: return (gpointer)ctx->esp;
5614 case X86_EBP: return (gpointer)ctx->ebp;
5615 case X86_ESI: return (gpointer)ctx->esi;
5616 case X86_EDI: return (gpointer)ctx->edi;
5617 default: g_assert_not_reached ();
5621 #ifdef MONO_ARCH_SIMD_INTRINSICS
5623 static MonoInst*
5624 get_float_to_x_spill_area (MonoCompile *cfg)
5626 if (!cfg->fconv_to_r8_x_var) {
5627 cfg->fconv_to_r8_x_var = mono_compile_create_var (cfg, &mono_defaults.double_class->byval_arg, OP_LOCAL);
5628 cfg->fconv_to_r8_x_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
5630 return cfg->fconv_to_r8_x_var;
5634 * Convert all fconv opts that MONO_OPT_SSE2 would get wrong.
5636 void
5637 mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
5639 MonoInst *fconv;
5640 int dreg, src_opcode;
5642 if (!(cfg->opt & MONO_OPT_SSE2) || !(cfg->opt & MONO_OPT_SIMD) || COMPILE_LLVM (cfg))
5643 return;
5645 switch (src_opcode = ins->opcode) {
5646 case OP_FCONV_TO_I1:
5647 case OP_FCONV_TO_U1:
5648 case OP_FCONV_TO_I2:
5649 case OP_FCONV_TO_U2:
5650 case OP_FCONV_TO_I4:
5651 case OP_FCONV_TO_I:
5652 break;
5653 default:
5654 return;
5657 /* dreg is the IREG and sreg1 is the FREG */
5658 MONO_INST_NEW (cfg, fconv, OP_FCONV_TO_R8_X);
5659 fconv->klass = NULL; /*FIXME, what can I use here as the Mono.Simd lib might not be loaded yet*/
5660 fconv->sreg1 = ins->sreg1;
5661 fconv->dreg = mono_alloc_ireg (cfg);
5662 fconv->type = STACK_VTYPE;
5663 fconv->backend.spill_var = get_float_to_x_spill_area (cfg);
5665 mono_bblock_insert_before_ins (cfg->cbb, ins, fconv);
5667 dreg = ins->dreg;
5668 NULLIFY_INS (ins);
5669 ins->opcode = OP_XCONV_R8_TO_I4;
5671 ins->klass = mono_defaults.int32_class;
5672 ins->sreg1 = fconv->dreg;
5673 ins->dreg = dreg;
5674 ins->type = STACK_I4;
5675 ins->backend.source_opcode = src_opcode;
5678 #endif /* #ifdef MONO_ARCH_SIMD_INTRINSICS */
5680 void
5681 mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins)
5683 MonoInst *ins;
5684 int vreg;
5686 if (long_ins->opcode == OP_LNEG) {
5687 ins = long_ins;
5688 MONO_EMIT_NEW_UNALU (cfg, OP_INEG, ins->dreg + 1, ins->sreg1 + 1);
5689 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ADC_IMM, ins->dreg + 2, ins->sreg1 + 2, 0);
5690 MONO_EMIT_NEW_UNALU (cfg, OP_INEG, ins->dreg + 2, ins->dreg + 2);
5691 NULLIFY_INS (ins);
5692 return;
5695 #ifdef MONO_ARCH_SIMD_INTRINSICS
5697 if (!(cfg->opt & MONO_OPT_SIMD))
5698 return;
5700 /*TODO move this to simd-intrinsic.c once we support sse 4.1 dword extractors since we need the runtime caps info */
5701 switch (long_ins->opcode) {
5702 case OP_EXTRACT_I8:
5703 vreg = long_ins->sreg1;
5705 if (long_ins->inst_c0) {
5706 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5707 ins->klass = long_ins->klass;
5708 ins->sreg1 = long_ins->sreg1;
5709 ins->inst_c0 = 2;
5710 ins->type = STACK_VTYPE;
5711 ins->dreg = vreg = alloc_ireg (cfg);
5712 MONO_ADD_INS (cfg->cbb, ins);
5715 MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
5716 ins->klass = mono_defaults.int32_class;
5717 ins->sreg1 = vreg;
5718 ins->type = STACK_I4;
5719 ins->dreg = long_ins->dreg + 1;
5720 MONO_ADD_INS (cfg->cbb, ins);
5722 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5723 ins->klass = long_ins->klass;
5724 ins->sreg1 = long_ins->sreg1;
5725 ins->inst_c0 = long_ins->inst_c0 ? 3 : 1;
5726 ins->type = STACK_VTYPE;
5727 ins->dreg = vreg = alloc_ireg (cfg);
5728 MONO_ADD_INS (cfg->cbb, ins);
5730 MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
5731 ins->klass = mono_defaults.int32_class;
5732 ins->sreg1 = vreg;
5733 ins->type = STACK_I4;
5734 ins->dreg = long_ins->dreg + 2;
5735 MONO_ADD_INS (cfg->cbb, ins);
5737 long_ins->opcode = OP_NOP;
5738 break;
5739 case OP_INSERTX_I8_SLOW:
5740 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5741 ins->dreg = long_ins->dreg;
5742 ins->sreg1 = long_ins->dreg;
5743 ins->sreg2 = long_ins->sreg2 + 1;
5744 ins->inst_c0 = long_ins->inst_c0 * 2;
5745 MONO_ADD_INS (cfg->cbb, ins);
5747 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5748 ins->dreg = long_ins->dreg;
5749 ins->sreg1 = long_ins->dreg;
5750 ins->sreg2 = long_ins->sreg2 + 2;
5751 ins->inst_c0 = long_ins->inst_c0 * 2 + 1;
5752 MONO_ADD_INS (cfg->cbb, ins);
5754 long_ins->opcode = OP_NOP;
5755 break;
5756 case OP_EXPAND_I8:
5757 MONO_INST_NEW (cfg, ins, OP_ICONV_TO_X);
5758 ins->dreg = long_ins->dreg;
5759 ins->sreg1 = long_ins->sreg1 + 1;
5760 ins->klass = long_ins->klass;
5761 ins->type = STACK_VTYPE;
5762 MONO_ADD_INS (cfg->cbb, ins);
5764 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5765 ins->dreg = long_ins->dreg;
5766 ins->sreg1 = long_ins->dreg;
5767 ins->sreg2 = long_ins->sreg1 + 2;
5768 ins->inst_c0 = 1;
5769 ins->klass = long_ins->klass;
5770 ins->type = STACK_VTYPE;
5771 MONO_ADD_INS (cfg->cbb, ins);
5773 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5774 ins->dreg = long_ins->dreg;
5775 ins->sreg1 = long_ins->dreg;;
5776 ins->inst_c0 = 0x44; /*Magic number for swizzling (X,Y,X,Y)*/
5777 ins->klass = long_ins->klass;
5778 ins->type = STACK_VTYPE;
5779 MONO_ADD_INS (cfg->cbb, ins);
5781 long_ins->opcode = OP_NOP;
5782 break;
5784 #endif /* MONO_ARCH_SIMD_INTRINSICS */
5787 #if __APPLE__
5788 #define DBG_SIGNAL SIGBUS
5789 #else
5790 #define DBG_SIGNAL SIGSEGV
5791 #endif
5793 /* Soft Debug support */
5794 #ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED
5797 * mono_arch_set_breakpoint:
5799 * Set a breakpoint at the native code corresponding to JI at NATIVE_OFFSET.
5800 * The location should contain code emitted by OP_SEQ_POINT.
5802 void
5803 mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
5805 guint8 *code = ip;
5808 * In production, we will use int3 (has to fix the size in the md
5809 * file). But that could confuse gdb, so during development, we emit a SIGSEGV
5810 * instead.
5812 g_assert (code [0] == 0x90);
5813 x86_alu_reg_mem (code, X86_CMP, X86_EAX, (guint32)bp_trigger_page);
5817 * mono_arch_clear_breakpoint:
5819 * Clear the breakpoint at IP.
5821 void
5822 mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
5824 guint8 *code = ip;
5825 int i;
5827 for (i = 0; i < 6; ++i)
5828 x86_nop (code);
5832 * mono_arch_start_single_stepping:
5834 * Start single stepping.
5836 void
5837 mono_arch_start_single_stepping (void)
5839 mono_mprotect (ss_trigger_page, mono_pagesize (), 0);
5843 * mono_arch_stop_single_stepping:
5845 * Stop single stepping.
5847 void
5848 mono_arch_stop_single_stepping (void)
5850 mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ);
5854 * mono_arch_is_single_step_event:
5856 * Return whenever the machine state in SIGCTX corresponds to a single
5857 * step event.
5859 gboolean
5860 mono_arch_is_single_step_event (void *info, void *sigctx)
5862 #ifdef TARGET_WIN32
5863 EXCEPTION_RECORD* einfo = (EXCEPTION_RECORD*)info; /* Sometimes the address is off by 4 */
5864 if ((einfo->ExceptionInformation[1] >= ss_trigger_page && (guint8*)einfo->ExceptionInformation[1] <= (guint8*)ss_trigger_page + 128))
5865 return TRUE;
5866 else
5867 return FALSE;
5868 #else
5869 siginfo_t* sinfo = (siginfo_t*) info;
5870 /* Sometimes the address is off by 4 */
5871 if (sinfo->si_signo == DBG_SIGNAL && (sinfo->si_addr >= ss_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)ss_trigger_page + 128))
5872 return TRUE;
5873 else
5874 return FALSE;
5875 #endif
5878 gboolean
5879 mono_arch_is_breakpoint_event (void *info, void *sigctx)
5881 #ifdef TARGET_WIN32
5882 EXCEPTION_RECORD* einfo = (EXCEPTION_RECORD*)info; /* Sometimes the address is off by 4 */
5883 if ((einfo->ExceptionInformation[1] >= bp_trigger_page && (guint8*)einfo->ExceptionInformation[1] <= (guint8*)bp_trigger_page + 128))
5884 return TRUE;
5885 else
5886 return FALSE;
5887 #else
5888 siginfo_t* sinfo = (siginfo_t*)info;
5889 /* Sometimes the address is off by 4 */
5890 if (sinfo->si_signo == DBG_SIGNAL && (sinfo->si_addr >= bp_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)bp_trigger_page + 128))
5891 return TRUE;
5892 else
5893 return FALSE;
5894 #endif
5898 * mono_arch_get_ip_for_breakpoint:
5900 * See mini-amd64.c for docs.
5902 guint8*
5903 mono_arch_get_ip_for_breakpoint (MonoJitInfo *ji, MonoContext *ctx)
5905 guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
5907 return ip;
5910 #define BREAKPOINT_SIZE 6
5913 * mono_arch_get_ip_for_single_step:
5915 * See mini-amd64.c for docs.
5917 guint8*
5918 mono_arch_get_ip_for_single_step (MonoJitInfo *ji, MonoContext *ctx)
5920 guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
5922 /* Size of x86_alu_reg_imm */
5923 ip += 6;
5925 return ip;
5929 * mono_arch_skip_breakpoint:
5931 * See mini-amd64.c for docs.
5933 void
5934 mono_arch_skip_breakpoint (MonoContext *ctx)
5936 MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + BREAKPOINT_SIZE);
5940 * mono_arch_skip_single_step:
5942 * See mini-amd64.c for docs.
5944 void
5945 mono_arch_skip_single_step (MonoContext *ctx)
5947 MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + 6);
5951 * mono_arch_get_seq_point_info:
5953 * See mini-amd64.c for docs.
5955 gpointer
5956 mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code)
5958 NOT_IMPLEMENTED;
5959 return NULL;
5962 #endif