2010-04-19 Rodrigo Kumpera <rkumpera@novell.com>
[mono.git] / mono / mini / mini-x86.c
blob4f2715a6f4849a95b341b1b28342e3198bc7587c
1 /*
2 * mini-x86.c: x86 backend for the Mono code generator
4 * Authors:
5 * Paolo Molaro (lupus@ximian.com)
6 * Dietmar Maurer (dietmar@ximian.com)
7 * Patrik Torstensson
9 * (C) 2003 Ximian, Inc.
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/metadata/mono-debug.h>
23 #include <mono/utils/mono-math.h>
24 #include <mono/utils/mono-counters.h>
25 #include <mono/utils/mono-mmap.h>
27 #include "trace.h"
28 #include "mini-x86.h"
29 #include "cpu-x86.h"
30 #include "ir-emit.h"
32 /* On windows, these hold the key returned by TlsAlloc () */
33 static gint lmf_tls_offset = -1;
34 static gint lmf_addr_tls_offset = -1;
35 static gint appdomain_tls_offset = -1;
37 #ifdef MONO_XEN_OPT
38 static gboolean optimize_for_xen = TRUE;
39 #else
40 #define optimize_for_xen 0
41 #endif
43 #ifdef TARGET_WIN32
44 static gboolean is_win32 = TRUE;
45 #else
46 static gboolean is_win32 = FALSE;
47 #endif
49 /* This mutex protects architecture specific caches */
50 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
51 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
52 static CRITICAL_SECTION mini_arch_mutex;
54 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
56 #define ARGS_OFFSET 8
58 #ifdef TARGET_WIN32
59 /* Under windows, the default pinvoke calling convention is stdcall */
60 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
61 #else
62 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
63 #endif
65 MonoBreakpointInfo
66 mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
69 * The code generated for sequence points reads from this location, which is
70 * made read-only when single stepping is enabled.
72 static gpointer ss_trigger_page;
74 /* Enabled breakpoints read from this trigger page */
75 static gpointer bp_trigger_page;
77 const char*
78 mono_arch_regname (int reg)
80 switch (reg) {
81 case X86_EAX: return "%eax";
82 case X86_EBX: return "%ebx";
83 case X86_ECX: return "%ecx";
84 case X86_EDX: return "%edx";
85 case X86_ESP: return "%esp";
86 case X86_EBP: return "%ebp";
87 case X86_EDI: return "%edi";
88 case X86_ESI: return "%esi";
90 return "unknown";
93 const char*
94 mono_arch_fregname (int reg)
96 switch (reg) {
97 case 0:
98 return "%fr0";
99 case 1:
100 return "%fr1";
101 case 2:
102 return "%fr2";
103 case 3:
104 return "%fr3";
105 case 4:
106 return "%fr4";
107 case 5:
108 return "%fr5";
109 case 6:
110 return "%fr6";
111 case 7:
112 return "%fr7";
113 default:
114 return "unknown";
118 const char *
119 mono_arch_xregname (int reg)
121 switch (reg) {
122 case 0:
123 return "%xmm0";
124 case 1:
125 return "%xmm1";
126 case 2:
127 return "%xmm2";
128 case 3:
129 return "%xmm3";
130 case 4:
131 return "%xmm4";
132 case 5:
133 return "%xmm5";
134 case 6:
135 return "%xmm6";
136 case 7:
137 return "%xmm7";
138 default:
139 return "unknown";
144 typedef enum {
145 ArgInIReg,
146 ArgInFloatSSEReg,
147 ArgInDoubleSSEReg,
148 ArgOnStack,
149 ArgValuetypeInReg,
150 ArgOnFloatFpStack,
151 ArgOnDoubleFpStack,
152 ArgNone
153 } ArgStorage;
155 typedef struct {
156 gint16 offset;
157 gint8 reg;
158 ArgStorage storage;
160 /* Only if storage == ArgValuetypeInReg */
161 ArgStorage pair_storage [2];
162 gint8 pair_regs [2];
163 } ArgInfo;
165 typedef struct {
166 int nargs;
167 guint32 stack_usage;
168 guint32 reg_usage;
169 guint32 freg_usage;
170 gboolean need_stack_align;
171 guint32 stack_align_amount;
172 ArgInfo ret;
173 ArgInfo sig_cookie;
174 ArgInfo args [1];
175 } CallInfo;
177 #define PARAM_REGS 0
179 #define FLOAT_PARAM_REGS 0
181 static X86_Reg_No param_regs [] = { 0 };
183 #if defined(TARGET_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
184 #define SMALL_STRUCTS_IN_REGS
185 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
186 #endif
188 static void inline
189 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
191 ainfo->offset = *stack_size;
193 if (*gr >= PARAM_REGS) {
194 ainfo->storage = ArgOnStack;
195 (*stack_size) += sizeof (gpointer);
197 else {
198 ainfo->storage = ArgInIReg;
199 ainfo->reg = param_regs [*gr];
200 (*gr) ++;
204 static void inline
205 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
207 ainfo->offset = *stack_size;
209 g_assert (PARAM_REGS == 0);
211 ainfo->storage = ArgOnStack;
212 (*stack_size) += sizeof (gpointer) * 2;
215 static void inline
216 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
218 ainfo->offset = *stack_size;
220 if (*gr >= FLOAT_PARAM_REGS) {
221 ainfo->storage = ArgOnStack;
222 (*stack_size) += is_double ? 8 : 4;
224 else {
225 /* A double register */
226 if (is_double)
227 ainfo->storage = ArgInDoubleSSEReg;
228 else
229 ainfo->storage = ArgInFloatSSEReg;
230 ainfo->reg = *gr;
231 (*gr) += 1;
236 static void
237 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
238 gboolean is_return,
239 guint32 *gr, guint32 *fr, guint32 *stack_size)
241 guint32 size;
242 MonoClass *klass;
244 klass = mono_class_from_mono_type (type);
245 size = mini_type_stack_size_full (gsctx, &klass->byval_arg, NULL, sig->pinvoke);
247 #ifdef SMALL_STRUCTS_IN_REGS
248 if (sig->pinvoke && is_return) {
249 MonoMarshalType *info;
252 * the exact rules are not very well documented, the code below seems to work with the
253 * code generated by gcc 3.3.3 -mno-cygwin.
255 info = mono_marshal_load_type_info (klass);
256 g_assert (info);
258 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
260 /* Special case structs with only a float member */
261 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
262 ainfo->storage = ArgValuetypeInReg;
263 ainfo->pair_storage [0] = ArgOnDoubleFpStack;
264 return;
266 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
267 ainfo->storage = ArgValuetypeInReg;
268 ainfo->pair_storage [0] = ArgOnFloatFpStack;
269 return;
271 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
272 ainfo->storage = ArgValuetypeInReg;
273 ainfo->pair_storage [0] = ArgInIReg;
274 ainfo->pair_regs [0] = return_regs [0];
275 if (info->native_size > 4) {
276 ainfo->pair_storage [1] = ArgInIReg;
277 ainfo->pair_regs [1] = return_regs [1];
279 return;
282 #endif
284 ainfo->offset = *stack_size;
285 ainfo->storage = ArgOnStack;
286 *stack_size += ALIGN_TO (size, sizeof (gpointer));
290 * get_call_info:
292 * Obtain information about a call according to the calling convention.
293 * For x86 ELF, see the "System V Application Binary Interface Intel386
294 * Architecture Processor Supplment, Fourth Edition" document for more
295 * information.
296 * For x86 win32, see ???.
298 static CallInfo*
299 get_call_info_internal (MonoGenericSharingContext *gsctx, CallInfo *cinfo, MonoMethodSignature *sig, gboolean is_pinvoke)
301 guint32 i, gr, fr;
302 MonoType *ret_type;
303 int n = sig->hasthis + sig->param_count;
304 guint32 stack_size = 0;
306 gr = 0;
307 fr = 0;
309 /* return value */
311 ret_type = mini_type_get_underlying_type (gsctx, sig->ret);
312 switch (ret_type->type) {
313 case MONO_TYPE_BOOLEAN:
314 case MONO_TYPE_I1:
315 case MONO_TYPE_U1:
316 case MONO_TYPE_I2:
317 case MONO_TYPE_U2:
318 case MONO_TYPE_CHAR:
319 case MONO_TYPE_I4:
320 case MONO_TYPE_U4:
321 case MONO_TYPE_I:
322 case MONO_TYPE_U:
323 case MONO_TYPE_PTR:
324 case MONO_TYPE_FNPTR:
325 case MONO_TYPE_CLASS:
326 case MONO_TYPE_OBJECT:
327 case MONO_TYPE_SZARRAY:
328 case MONO_TYPE_ARRAY:
329 case MONO_TYPE_STRING:
330 cinfo->ret.storage = ArgInIReg;
331 cinfo->ret.reg = X86_EAX;
332 break;
333 case MONO_TYPE_U8:
334 case MONO_TYPE_I8:
335 cinfo->ret.storage = ArgInIReg;
336 cinfo->ret.reg = X86_EAX;
337 break;
338 case MONO_TYPE_R4:
339 cinfo->ret.storage = ArgOnFloatFpStack;
340 break;
341 case MONO_TYPE_R8:
342 cinfo->ret.storage = ArgOnDoubleFpStack;
343 break;
344 case MONO_TYPE_GENERICINST:
345 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
346 cinfo->ret.storage = ArgInIReg;
347 cinfo->ret.reg = X86_EAX;
348 break;
350 /* Fall through */
351 case MONO_TYPE_VALUETYPE: {
352 guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
354 add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
355 if (cinfo->ret.storage == ArgOnStack)
356 /* The caller passes the address where the value is stored */
357 add_general (&gr, &stack_size, &cinfo->ret);
358 break;
360 case MONO_TYPE_TYPEDBYREF:
361 /* Same as a valuetype with size 24 */
362 add_general (&gr, &stack_size, &cinfo->ret);
364 break;
365 case MONO_TYPE_VOID:
366 cinfo->ret.storage = ArgNone;
367 break;
368 default:
369 g_error ("Can't handle as return value 0x%x", sig->ret->type);
373 /* this */
374 if (sig->hasthis)
375 add_general (&gr, &stack_size, cinfo->args + 0);
377 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
378 gr = PARAM_REGS;
379 fr = FLOAT_PARAM_REGS;
381 /* Emit the signature cookie just before the implicit arguments */
382 add_general (&gr, &stack_size, &cinfo->sig_cookie);
385 for (i = 0; i < sig->param_count; ++i) {
386 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
387 MonoType *ptype;
389 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
390 /* We allways pass the sig cookie on the stack for simplicity */
392 * Prevent implicit arguments + the sig cookie from being passed
393 * in registers.
395 gr = PARAM_REGS;
396 fr = FLOAT_PARAM_REGS;
398 /* Emit the signature cookie just before the implicit arguments */
399 add_general (&gr, &stack_size, &cinfo->sig_cookie);
402 if (sig->params [i]->byref) {
403 add_general (&gr, &stack_size, ainfo);
404 continue;
406 ptype = mini_type_get_underlying_type (gsctx, sig->params [i]);
407 switch (ptype->type) {
408 case MONO_TYPE_BOOLEAN:
409 case MONO_TYPE_I1:
410 case MONO_TYPE_U1:
411 add_general (&gr, &stack_size, ainfo);
412 break;
413 case MONO_TYPE_I2:
414 case MONO_TYPE_U2:
415 case MONO_TYPE_CHAR:
416 add_general (&gr, &stack_size, ainfo);
417 break;
418 case MONO_TYPE_I4:
419 case MONO_TYPE_U4:
420 add_general (&gr, &stack_size, ainfo);
421 break;
422 case MONO_TYPE_I:
423 case MONO_TYPE_U:
424 case MONO_TYPE_PTR:
425 case MONO_TYPE_FNPTR:
426 case MONO_TYPE_CLASS:
427 case MONO_TYPE_OBJECT:
428 case MONO_TYPE_STRING:
429 case MONO_TYPE_SZARRAY:
430 case MONO_TYPE_ARRAY:
431 add_general (&gr, &stack_size, ainfo);
432 break;
433 case MONO_TYPE_GENERICINST:
434 if (!mono_type_generic_inst_is_valuetype (ptype)) {
435 add_general (&gr, &stack_size, ainfo);
436 break;
438 /* Fall through */
439 case MONO_TYPE_VALUETYPE:
440 add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
441 break;
442 case MONO_TYPE_TYPEDBYREF:
443 stack_size += sizeof (MonoTypedRef);
444 ainfo->storage = ArgOnStack;
445 break;
446 case MONO_TYPE_U8:
447 case MONO_TYPE_I8:
448 add_general_pair (&gr, &stack_size, ainfo);
449 break;
450 case MONO_TYPE_R4:
451 add_float (&fr, &stack_size, ainfo, FALSE);
452 break;
453 case MONO_TYPE_R8:
454 add_float (&fr, &stack_size, ainfo, TRUE);
455 break;
456 default:
457 g_error ("unexpected type 0x%x", ptype->type);
458 g_assert_not_reached ();
462 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
463 gr = PARAM_REGS;
464 fr = FLOAT_PARAM_REGS;
466 /* Emit the signature cookie just before the implicit arguments */
467 add_general (&gr, &stack_size, &cinfo->sig_cookie);
470 if (mono_do_x86_stack_align && (stack_size % MONO_ARCH_FRAME_ALIGNMENT) != 0) {
471 cinfo->need_stack_align = TRUE;
472 cinfo->stack_align_amount = MONO_ARCH_FRAME_ALIGNMENT - (stack_size % MONO_ARCH_FRAME_ALIGNMENT);
473 stack_size += cinfo->stack_align_amount;
476 cinfo->stack_usage = stack_size;
477 cinfo->reg_usage = gr;
478 cinfo->freg_usage = fr;
479 return cinfo;
482 static CallInfo*
483 get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
485 int n = sig->hasthis + sig->param_count;
486 CallInfo *cinfo;
488 if (mp)
489 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
490 else
491 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
493 return get_call_info_internal (gsctx, cinfo, sig, is_pinvoke);
497 * mono_arch_get_argument_info:
498 * @csig: a method signature
499 * @param_count: the number of parameters to consider
500 * @arg_info: an array to store the result infos
502 * Gathers information on parameters such as size, alignment and
503 * padding. arg_info should be large enought to hold param_count + 1 entries.
505 * Returns the size of the argument area on the stack.
506 * This should be signal safe, since it is called from
507 * mono_arch_find_jit_info_ext ().
508 * FIXME: The metadata calls might not be signal safe.
511 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
513 int k, args_size = 0;
514 int size, pad;
515 guint32 align;
516 int offset = 8;
517 CallInfo *cinfo;
519 /* Avoid g_malloc as it is not signal safe */
520 cinfo = (CallInfo*)g_newa (guint8*, sizeof (CallInfo) + (sizeof (ArgInfo) * (csig->param_count + 1)));
522 cinfo = get_call_info_internal (NULL, cinfo, csig, FALSE);
524 if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
525 args_size += sizeof (gpointer);
526 offset += 4;
529 arg_info [0].offset = offset;
531 if (csig->hasthis) {
532 args_size += sizeof (gpointer);
533 offset += 4;
536 arg_info [0].size = args_size;
538 for (k = 0; k < param_count; k++) {
539 size = mini_type_stack_size_full (NULL, csig->params [k], &align, csig->pinvoke);
541 /* ignore alignment for now */
542 align = 1;
544 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
545 arg_info [k].pad = pad;
546 args_size += size;
547 arg_info [k + 1].pad = 0;
548 arg_info [k + 1].size = size;
549 offset += pad;
550 arg_info [k + 1].offset = offset;
551 offset += size;
554 if (mono_do_x86_stack_align && !CALLCONV_IS_STDCALL (csig))
555 align = MONO_ARCH_FRAME_ALIGNMENT;
556 else
557 align = 4;
558 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
559 arg_info [k].pad = pad;
561 return args_size;
564 static const guchar cpuid_impl [] = {
565 0x55, /* push %ebp */
566 0x89, 0xe5, /* mov %esp,%ebp */
567 0x53, /* push %ebx */
568 0x8b, 0x45, 0x08, /* mov 0x8(%ebp),%eax */
569 0x0f, 0xa2, /* cpuid */
570 0x50, /* push %eax */
571 0x8b, 0x45, 0x10, /* mov 0x10(%ebp),%eax */
572 0x89, 0x18, /* mov %ebx,(%eax) */
573 0x8b, 0x45, 0x14, /* mov 0x14(%ebp),%eax */
574 0x89, 0x08, /* mov %ecx,(%eax) */
575 0x8b, 0x45, 0x18, /* mov 0x18(%ebp),%eax */
576 0x89, 0x10, /* mov %edx,(%eax) */
577 0x58, /* pop %eax */
578 0x8b, 0x55, 0x0c, /* mov 0xc(%ebp),%edx */
579 0x89, 0x02, /* mov %eax,(%edx) */
580 0x5b, /* pop %ebx */
581 0xc9, /* leave */
582 0xc3, /* ret */
585 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
587 static int
588 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
590 int have_cpuid = 0;
591 #ifndef _MSC_VER
592 __asm__ __volatile__ (
593 "pushfl\n"
594 "popl %%eax\n"
595 "movl %%eax, %%edx\n"
596 "xorl $0x200000, %%eax\n"
597 "pushl %%eax\n"
598 "popfl\n"
599 "pushfl\n"
600 "popl %%eax\n"
601 "xorl %%edx, %%eax\n"
602 "andl $0x200000, %%eax\n"
603 "movl %%eax, %0"
604 : "=r" (have_cpuid)
606 : "%eax", "%edx"
608 #else
609 __asm {
610 pushfd
611 pop eax
612 mov edx, eax
613 xor eax, 0x200000
614 push eax
615 popfd
616 pushfd
617 pop eax
618 xor eax, edx
619 and eax, 0x200000
620 mov have_cpuid, eax
622 #endif
623 if (have_cpuid) {
624 /* Have to use the code manager to get around WinXP DEP */
625 static CpuidFunc func = NULL;
626 void *ptr;
627 if (!func) {
628 ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
629 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
630 func = (CpuidFunc)ptr;
632 func (id, p_eax, p_ebx, p_ecx, p_edx);
635 * We use this approach because of issues with gcc and pic code, see:
636 * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
637 __asm__ __volatile__ ("cpuid"
638 : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
639 : "a" (id));
641 return 1;
643 return 0;
647 * Initialize the cpu to execute managed code.
649 void
650 mono_arch_cpu_init (void)
652 /* spec compliance requires running with double precision */
653 #ifndef _MSC_VER
654 guint16 fpcw;
656 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
657 fpcw &= ~X86_FPCW_PRECC_MASK;
658 fpcw |= X86_FPCW_PREC_DOUBLE;
659 __asm__ __volatile__ ("fldcw %0\n": : "m" (fpcw));
660 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
661 #else
662 _control87 (_PC_53, MCW_PC);
663 #endif
667 * Initialize architecture specific code.
669 void
670 mono_arch_init (void)
672 InitializeCriticalSection (&mini_arch_mutex);
674 ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ);
675 bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
676 mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
680 * Cleanup architecture specific code.
682 void
683 mono_arch_cleanup (void)
685 DeleteCriticalSection (&mini_arch_mutex);
689 * This function returns the optimizations supported on this cpu.
691 guint32
692 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
694 int eax, ebx, ecx, edx;
695 guint32 opts = 0;
697 *exclude_mask = 0;
698 /* Feature Flags function, flags returned in EDX. */
699 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
700 if (edx & (1 << 15)) {
701 opts |= MONO_OPT_CMOV;
702 if (edx & 1)
703 opts |= MONO_OPT_FCMOV;
704 else
705 *exclude_mask |= MONO_OPT_FCMOV;
706 } else
707 *exclude_mask |= MONO_OPT_CMOV;
708 if (edx & (1 << 26))
709 opts |= MONO_OPT_SSE2;
710 else
711 *exclude_mask |= MONO_OPT_SSE2;
713 #ifdef MONO_ARCH_SIMD_INTRINSICS
714 /*SIMD intrinsics require at least SSE2.*/
715 if (!(opts & MONO_OPT_SSE2))
716 *exclude_mask |= MONO_OPT_SIMD;
717 #endif
719 return opts;
723 * This function test for all SSE functions supported.
725 * Returns a bitmask corresponding to all supported versions.
728 guint32
729 mono_arch_cpu_enumerate_simd_versions (void)
731 int eax, ebx, ecx, edx;
732 guint32 sse_opts = 0;
734 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
735 if (edx & (1 << 25))
736 sse_opts |= SIMD_VERSION_SSE1;
737 if (edx & (1 << 26))
738 sse_opts |= SIMD_VERSION_SSE2;
739 if (ecx & (1 << 0))
740 sse_opts |= SIMD_VERSION_SSE3;
741 if (ecx & (1 << 9))
742 sse_opts |= SIMD_VERSION_SSSE3;
743 if (ecx & (1 << 19))
744 sse_opts |= SIMD_VERSION_SSE41;
745 if (ecx & (1 << 20))
746 sse_opts |= SIMD_VERSION_SSE42;
749 /* Yes, all this needs to be done to check for sse4a.
750 See: "Amd: CPUID Specification"
752 if (cpuid (0x80000000, &eax, &ebx, &ecx, &edx)) {
753 /* eax greater or equal than 0x80000001, ebx = 'htuA', ecx = DMAc', edx = 'itne'*/
754 if ((((unsigned int) eax) >= 0x80000001) && (ebx == 0x68747541) && (ecx == 0x444D4163) && (edx == 0x69746E65)) {
755 cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
756 if (ecx & (1 << 6))
757 sse_opts |= SIMD_VERSION_SSE4a;
762 return sse_opts;
766 * Determine whenever the trap whose info is in SIGINFO is caused by
767 * integer overflow.
769 gboolean
770 mono_arch_is_int_overflow (void *sigctx, void *info)
772 MonoContext ctx;
773 guint8* ip;
775 mono_arch_sigctx_to_monoctx (sigctx, &ctx);
777 ip = (guint8*)ctx.eip;
779 if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
780 gint32 reg;
782 /* idiv REG */
783 switch (x86_modrm_rm (ip [1])) {
784 case X86_EAX:
785 reg = ctx.eax;
786 break;
787 case X86_ECX:
788 reg = ctx.ecx;
789 break;
790 case X86_EDX:
791 reg = ctx.edx;
792 break;
793 case X86_EBX:
794 reg = ctx.ebx;
795 break;
796 case X86_ESI:
797 reg = ctx.esi;
798 break;
799 case X86_EDI:
800 reg = ctx.edi;
801 break;
802 default:
803 g_assert_not_reached ();
804 reg = -1;
807 if (reg == -1)
808 return TRUE;
811 return FALSE;
814 GList *
815 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
817 GList *vars = NULL;
818 int i;
820 for (i = 0; i < cfg->num_varinfo; i++) {
821 MonoInst *ins = cfg->varinfo [i];
822 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
824 /* unused vars */
825 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
826 continue;
828 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) ||
829 (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
830 continue;
832 /* we dont allocate I1 to registers because there is no simply way to sign extend
833 * 8bit quantities in caller saved registers on x86 */
834 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
835 g_assert (MONO_VARINFO (cfg, i)->reg == -1);
836 g_assert (i == vmv->idx);
837 vars = g_list_prepend (vars, vmv);
841 vars = mono_varlist_sort (cfg, vars, 0);
843 return vars;
846 GList *
847 mono_arch_get_global_int_regs (MonoCompile *cfg)
849 GList *regs = NULL;
851 /* we can use 3 registers for global allocation */
852 regs = g_list_prepend (regs, (gpointer)X86_EBX);
853 regs = g_list_prepend (regs, (gpointer)X86_ESI);
854 regs = g_list_prepend (regs, (gpointer)X86_EDI);
856 return regs;
860 * mono_arch_regalloc_cost:
862 * Return the cost, in number of memory references, of the action of
863 * allocating the variable VMV into a register during global register
864 * allocation.
866 guint32
867 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
869 MonoInst *ins = cfg->varinfo [vmv->idx];
871 if (cfg->method->save_lmf)
872 /* The register is already saved */
873 return (ins->opcode == OP_ARG) ? 1 : 0;
874 else
875 /* push+pop+possible load if it is an argument */
876 return (ins->opcode == OP_ARG) ? 3 : 2;
879 static void
880 set_needs_stack_frame (MonoCompile *cfg, gboolean flag)
882 static int inited = FALSE;
883 static int count = 0;
885 if (cfg->arch.need_stack_frame_inited) {
886 g_assert (cfg->arch.need_stack_frame == flag);
887 return;
890 cfg->arch.need_stack_frame = flag;
891 cfg->arch.need_stack_frame_inited = TRUE;
893 if (flag)
894 return;
896 if (!inited) {
897 mono_counters_register ("Could eliminate stack frame", MONO_COUNTER_INT|MONO_COUNTER_JIT, &count);
898 inited = TRUE;
900 ++count;
902 //g_print ("will eliminate %s.%s.%s\n", cfg->method->klass->name_space, cfg->method->klass->name, cfg->method->name);
905 static gboolean
906 needs_stack_frame (MonoCompile *cfg)
908 MonoMethodSignature *sig;
909 MonoMethodHeader *header;
910 gboolean result = FALSE;
912 #if defined(__APPLE__)
913 /*OSX requires stack frame code to have the correct alignment. */
914 return TRUE;
915 #endif
917 if (cfg->arch.need_stack_frame_inited)
918 return cfg->arch.need_stack_frame;
920 header = cfg->header;
921 sig = mono_method_signature (cfg->method);
923 if (cfg->disable_omit_fp)
924 result = TRUE;
925 else if (cfg->flags & MONO_CFG_HAS_ALLOCA)
926 result = TRUE;
927 else if (cfg->method->save_lmf)
928 result = TRUE;
929 else if (cfg->stack_offset)
930 result = TRUE;
931 else if (cfg->param_area)
932 result = TRUE;
933 else if (cfg->flags & (MONO_CFG_HAS_CALLS | MONO_CFG_HAS_ALLOCA | MONO_CFG_HAS_TAIL))
934 result = TRUE;
935 else if (header->num_clauses)
936 result = TRUE;
937 else if (sig->param_count + sig->hasthis)
938 result = TRUE;
939 else if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
940 result = TRUE;
941 else if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
942 (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE))
943 result = TRUE;
945 set_needs_stack_frame (cfg, result);
947 return cfg->arch.need_stack_frame;
951 * Set var information according to the calling convention. X86 version.
952 * The locals var stuff should most likely be split in another method.
954 void
955 mono_arch_allocate_vars (MonoCompile *cfg)
957 MonoMethodSignature *sig;
958 MonoMethodHeader *header;
959 MonoInst *inst;
960 guint32 locals_stack_size, locals_stack_align;
961 int i, offset;
962 gint32 *offsets;
963 CallInfo *cinfo;
965 header = cfg->header;
966 sig = mono_method_signature (cfg->method);
968 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
970 cfg->frame_reg = X86_EBP;
971 offset = 0;
973 /* Reserve space to save LMF and caller saved registers */
975 if (cfg->method->save_lmf) {
976 offset += sizeof (MonoLMF);
977 } else {
978 if (cfg->used_int_regs & (1 << X86_EBX)) {
979 offset += 4;
982 if (cfg->used_int_regs & (1 << X86_EDI)) {
983 offset += 4;
986 if (cfg->used_int_regs & (1 << X86_ESI)) {
987 offset += 4;
991 switch (cinfo->ret.storage) {
992 case ArgValuetypeInReg:
993 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
994 offset += 8;
995 cfg->ret->opcode = OP_REGOFFSET;
996 cfg->ret->inst_basereg = X86_EBP;
997 cfg->ret->inst_offset = - offset;
998 break;
999 default:
1000 break;
1003 /* Allocate locals */
1004 offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
1005 if (locals_stack_size > MONO_ARCH_MAX_FRAME_SIZE) {
1006 char *mname = mono_method_full_name (cfg->method, TRUE);
1007 cfg->exception_type = MONO_EXCEPTION_INVALID_PROGRAM;
1008 cfg->exception_message = g_strdup_printf ("Method %s stack is too big.", mname);
1009 g_free (mname);
1010 return;
1012 if (locals_stack_align) {
1013 offset += (locals_stack_align - 1);
1014 offset &= ~(locals_stack_align - 1);
1017 * EBP is at alignment 8 % MONO_ARCH_FRAME_ALIGNMENT, so if we
1018 * have locals larger than 8 bytes we need to make sure that
1019 * they have the appropriate offset.
1021 if (MONO_ARCH_FRAME_ALIGNMENT > 8 && locals_stack_align > 8)
1022 offset += MONO_ARCH_FRAME_ALIGNMENT - sizeof (gpointer) * 2;
1023 for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
1024 if (offsets [i] != -1) {
1025 MonoInst *inst = cfg->varinfo [i];
1026 inst->opcode = OP_REGOFFSET;
1027 inst->inst_basereg = X86_EBP;
1028 inst->inst_offset = - (offset + offsets [i]);
1029 //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
1032 offset += locals_stack_size;
1036 * Allocate arguments+return value
1039 switch (cinfo->ret.storage) {
1040 case ArgOnStack:
1041 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
1043 * In the new IR, the cfg->vret_addr variable represents the
1044 * vtype return value.
1046 cfg->vret_addr->opcode = OP_REGOFFSET;
1047 cfg->vret_addr->inst_basereg = cfg->frame_reg;
1048 cfg->vret_addr->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
1049 if (G_UNLIKELY (cfg->verbose_level > 1)) {
1050 printf ("vret_addr =");
1051 mono_print_ins (cfg->vret_addr);
1053 } else {
1054 cfg->ret->opcode = OP_REGOFFSET;
1055 cfg->ret->inst_basereg = X86_EBP;
1056 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
1058 break;
1059 case ArgValuetypeInReg:
1060 break;
1061 case ArgInIReg:
1062 cfg->ret->opcode = OP_REGVAR;
1063 cfg->ret->inst_c0 = cinfo->ret.reg;
1064 cfg->ret->dreg = cinfo->ret.reg;
1065 break;
1066 case ArgNone:
1067 case ArgOnFloatFpStack:
1068 case ArgOnDoubleFpStack:
1069 break;
1070 default:
1071 g_assert_not_reached ();
1074 if (sig->call_convention == MONO_CALL_VARARG) {
1075 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1076 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
1079 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1080 ArgInfo *ainfo = &cinfo->args [i];
1081 inst = cfg->args [i];
1082 if (inst->opcode != OP_REGVAR) {
1083 inst->opcode = OP_REGOFFSET;
1084 inst->inst_basereg = X86_EBP;
1086 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
1089 cfg->stack_offset = offset;
1092 void
1093 mono_arch_create_vars (MonoCompile *cfg)
1095 MonoMethodSignature *sig;
1096 CallInfo *cinfo;
1098 sig = mono_method_signature (cfg->method);
1100 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1102 if (cinfo->ret.storage == ArgValuetypeInReg)
1103 cfg->ret_var_is_local = TRUE;
1104 if ((cinfo->ret.storage != ArgValuetypeInReg) && MONO_TYPE_ISSTRUCT (sig->ret)) {
1105 cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
1110 * It is expensive to adjust esp for each individual fp argument pushed on the stack
1111 * so we try to do it just once when we have multiple fp arguments in a row.
1112 * We don't use this mechanism generally because for int arguments the generated code
1113 * is slightly bigger and new generation cpus optimize away the dependency chains
1114 * created by push instructions on the esp value.
1115 * fp_arg_setup is the first argument in the execution sequence where the esp register
1116 * is modified.
1118 static G_GNUC_UNUSED int
1119 collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
1121 int fp_space = 0;
1122 MonoType *t;
1124 for (; start_arg < sig->param_count; ++start_arg) {
1125 t = mini_type_get_underlying_type (NULL, sig->params [start_arg]);
1126 if (!t->byref && t->type == MONO_TYPE_R8) {
1127 fp_space += sizeof (double);
1128 *fp_arg_setup = start_arg;
1129 } else {
1130 break;
1133 return fp_space;
1136 static void
1137 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
1139 MonoMethodSignature *tmp_sig;
1141 /* FIXME: Add support for signature tokens to AOT */
1142 cfg->disable_aot = TRUE;
1145 * mono_ArgIterator_Setup assumes the signature cookie is
1146 * passed first and all the arguments which were before it are
1147 * passed on the stack after the signature. So compensate by
1148 * passing a different signature.
1150 tmp_sig = mono_metadata_signature_dup (call->signature);
1151 tmp_sig->param_count -= call->signature->sentinelpos;
1152 tmp_sig->sentinelpos = 0;
1153 memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1155 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_X86_PUSH_IMM, -1, -1, tmp_sig);
1158 #ifdef ENABLE_LLVM
1159 LLVMCallInfo*
1160 mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
1162 int i, n;
1163 CallInfo *cinfo;
1164 ArgInfo *ainfo;
1165 LLVMCallInfo *linfo;
1166 MonoType *t;
1168 n = sig->param_count + sig->hasthis;
1170 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke);
1172 linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n));
1175 * LLVM always uses the native ABI while we use our own ABI, the
1176 * only difference is the handling of vtypes:
1177 * - we only pass/receive them in registers in some cases, and only
1178 * in 1 or 2 integer registers.
1180 if (cinfo->ret.storage == ArgValuetypeInReg) {
1181 if (sig->pinvoke) {
1182 cfg->exception_message = g_strdup ("pinvoke + vtypes");
1183 cfg->disable_llvm = TRUE;
1184 return linfo;
1187 cfg->exception_message = g_strdup ("vtype ret in call");
1188 cfg->disable_llvm = TRUE;
1190 linfo->ret.storage = LLVMArgVtypeInReg;
1191 for (j = 0; j < 2; ++j)
1192 linfo->ret.pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, cinfo->ret.pair_storage [j]);
1196 if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage == ArgInIReg) {
1197 /* Vtype returned using a hidden argument */
1198 linfo->ret.storage = LLVMArgVtypeRetAddr;
1201 if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage != ArgInIReg) {
1202 // FIXME:
1203 cfg->exception_message = g_strdup ("vtype ret in call");
1204 cfg->disable_llvm = TRUE;
1207 for (i = 0; i < n; ++i) {
1208 ainfo = cinfo->args + i;
1210 if (i >= sig->hasthis)
1211 t = sig->params [i - sig->hasthis];
1212 else
1213 t = &mono_defaults.int_class->byval_arg;
1215 linfo->args [i].storage = LLVMArgNone;
1217 switch (ainfo->storage) {
1218 case ArgInIReg:
1219 linfo->args [i].storage = LLVMArgInIReg;
1220 break;
1221 case ArgInDoubleSSEReg:
1222 case ArgInFloatSSEReg:
1223 linfo->args [i].storage = LLVMArgInFPReg;
1224 break;
1225 case ArgOnStack:
1226 if (MONO_TYPE_ISSTRUCT (t)) {
1227 if (mono_class_value_size (mono_class_from_mono_type (t), NULL) == 0)
1228 /* LLVM seems to allocate argument space for empty structures too */
1229 linfo->args [i].storage = LLVMArgNone;
1230 else
1231 linfo->args [i].storage = LLVMArgVtypeByVal;
1232 } else {
1233 linfo->args [i].storage = LLVMArgInIReg;
1234 if (t->byref) {
1235 if (t->type == MONO_TYPE_R4)
1236 linfo->args [i].storage = LLVMArgInFPReg;
1237 else if (t->type == MONO_TYPE_R8)
1238 linfo->args [i].storage = LLVMArgInFPReg;
1241 break;
1242 case ArgValuetypeInReg:
1243 if (sig->pinvoke) {
1244 cfg->exception_message = g_strdup ("pinvoke + vtypes");
1245 cfg->disable_llvm = TRUE;
1246 return linfo;
1249 cfg->exception_message = g_strdup ("vtype arg");
1250 cfg->disable_llvm = TRUE;
1252 linfo->args [i].storage = LLVMArgVtypeInReg;
1253 for (j = 0; j < 2; ++j)
1254 linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
1256 break;
1257 default:
1258 cfg->exception_message = g_strdup ("ainfo->storage");
1259 cfg->disable_llvm = TRUE;
1260 break;
1264 return linfo;
1266 #endif
1268 void
1269 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
1271 MonoInst *arg, *in;
1272 MonoMethodSignature *sig;
1273 int i, n;
1274 CallInfo *cinfo;
1275 int sentinelpos = 0;
1277 sig = call->signature;
1278 n = sig->param_count + sig->hasthis;
1280 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1282 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1283 sentinelpos = sig->sentinelpos + (sig->hasthis ? 1 : 0);
1285 if (cinfo->need_stack_align) {
1286 MONO_INST_NEW (cfg, arg, OP_SUB_IMM);
1287 arg->dreg = X86_ESP;
1288 arg->sreg1 = X86_ESP;
1289 arg->inst_imm = cinfo->stack_align_amount;
1290 MONO_ADD_INS (cfg->cbb, arg);
1293 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1294 if (cinfo->ret.storage == ArgValuetypeInReg) {
1296 * Tell the JIT to use a more efficient calling convention: call using
1297 * OP_CALL, compute the result location after the call, and save the
1298 * result there.
1300 call->vret_in_reg = TRUE;
1301 if (call->vret_var)
1302 NULLIFY_INS (call->vret_var);
1306 /* Handle the case where there are no implicit arguments */
1307 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1308 emit_sig_cookie (cfg, call, cinfo);
1311 /* Arguments are pushed in the reverse order */
1312 for (i = n - 1; i >= 0; i --) {
1313 ArgInfo *ainfo = cinfo->args + i;
1314 MonoType *t;
1316 if (i >= sig->hasthis)
1317 t = sig->params [i - sig->hasthis];
1318 else
1319 t = &mono_defaults.int_class->byval_arg;
1320 t = mini_type_get_underlying_type (cfg->generic_sharing_context, t);
1322 MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
1324 in = call->args [i];
1325 arg->cil_code = in->cil_code;
1326 arg->sreg1 = in->dreg;
1327 arg->type = in->type;
1329 g_assert (in->dreg != -1);
1331 if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
1332 guint32 align;
1333 guint32 size;
1335 g_assert (in->klass);
1337 if (t->type == MONO_TYPE_TYPEDBYREF) {
1338 size = sizeof (MonoTypedRef);
1339 align = sizeof (gpointer);
1341 else {
1342 size = mini_type_stack_size_full (cfg->generic_sharing_context, &in->klass->byval_arg, &align, sig->pinvoke);
1345 if (size > 0) {
1346 arg->opcode = OP_OUTARG_VT;
1347 arg->sreg1 = in->dreg;
1348 arg->klass = in->klass;
1349 arg->backend.size = size;
1351 MONO_ADD_INS (cfg->cbb, arg);
1354 else {
1355 switch (ainfo->storage) {
1356 case ArgOnStack:
1357 arg->opcode = OP_X86_PUSH;
1358 if (!t->byref) {
1359 if (t->type == MONO_TYPE_R4) {
1360 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 4);
1361 arg->opcode = OP_STORER4_MEMBASE_REG;
1362 arg->inst_destbasereg = X86_ESP;
1363 arg->inst_offset = 0;
1364 } else if (t->type == MONO_TYPE_R8) {
1365 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
1366 arg->opcode = OP_STORER8_MEMBASE_REG;
1367 arg->inst_destbasereg = X86_ESP;
1368 arg->inst_offset = 0;
1369 } else if (t->type == MONO_TYPE_I8 || t->type == MONO_TYPE_U8) {
1370 arg->sreg1 ++;
1371 MONO_EMIT_NEW_UNALU (cfg, OP_X86_PUSH, -1, in->dreg + 2);
1374 break;
1375 default:
1376 g_assert_not_reached ();
1379 MONO_ADD_INS (cfg->cbb, arg);
1382 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
1383 /* Emit the signature cookie just before the implicit arguments */
1384 emit_sig_cookie (cfg, call, cinfo);
1388 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1389 MonoInst *vtarg;
1391 if (cinfo->ret.storage == ArgValuetypeInReg) {
1392 /* Already done */
1394 else if (cinfo->ret.storage == ArgInIReg) {
1395 NOT_IMPLEMENTED;
1396 /* The return address is passed in a register */
1397 MONO_INST_NEW (cfg, vtarg, OP_MOVE);
1398 vtarg->sreg1 = call->inst.dreg;
1399 vtarg->dreg = mono_alloc_ireg (cfg);
1400 MONO_ADD_INS (cfg->cbb, vtarg);
1402 mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
1403 } else {
1404 MonoInst *vtarg;
1405 MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
1406 vtarg->type = STACK_MP;
1407 vtarg->sreg1 = call->vret_var->dreg;
1408 MONO_ADD_INS (cfg->cbb, vtarg);
1411 /* if the function returns a struct on stack, the called method already does a ret $0x4 */
1412 if (cinfo->ret.storage != ArgValuetypeInReg)
1413 cinfo->stack_usage -= 4;
1416 call->stack_usage = cinfo->stack_usage;
1419 void
1420 mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
1422 MonoInst *arg;
1423 int size = ins->backend.size;
1425 if (size <= 4) {
1426 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_MEMBASE);
1427 arg->sreg1 = src->dreg;
1429 MONO_ADD_INS (cfg->cbb, arg);
1430 } else if (size <= 20) {
1431 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 4));
1432 mini_emit_memcpy (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
1433 } else {
1434 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ);
1435 arg->inst_basereg = src->dreg;
1436 arg->inst_offset = 0;
1437 arg->inst_imm = size;
1439 MONO_ADD_INS (cfg->cbb, arg);
1443 void
1444 mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
1446 MonoType *ret = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
1448 if (!ret->byref) {
1449 if (ret->type == MONO_TYPE_R4) {
1450 if (COMPILE_LLVM (cfg))
1451 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
1452 /* Nothing to do */
1453 return;
1454 } else if (ret->type == MONO_TYPE_R8) {
1455 if (COMPILE_LLVM (cfg))
1456 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
1457 /* Nothing to do */
1458 return;
1459 } else if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
1460 if (COMPILE_LLVM (cfg))
1461 MONO_EMIT_NEW_UNALU (cfg, OP_LMOVE, cfg->ret->dreg, val->dreg);
1462 else {
1463 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EAX, val->dreg + 1);
1464 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EDX, val->dreg + 2);
1466 return;
1470 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
1474 * Allow tracing to work with this interface (with an optional argument)
1476 void*
1477 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1479 guchar *code = p;
1481 g_assert (MONO_ARCH_FRAME_ALIGNMENT >= 8);
1482 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 8);
1484 /* if some args are passed in registers, we need to save them here */
1485 x86_push_reg (code, X86_EBP);
1487 if (cfg->compile_aot) {
1488 x86_push_imm (code, cfg->method);
1489 x86_mov_reg_imm (code, X86_EAX, func);
1490 x86_call_reg (code, X86_EAX);
1491 } else {
1492 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1493 x86_push_imm (code, cfg->method);
1494 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1495 x86_call_code (code, 0);
1497 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT);
1499 return code;
1502 enum {
1503 SAVE_NONE,
1504 SAVE_STRUCT,
1505 SAVE_EAX,
1506 SAVE_EAX_EDX,
1507 SAVE_FP
1510 void*
1511 mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers)
1513 guchar *code = p;
1514 int arg_size = 0, stack_usage = 0, save_mode = SAVE_NONE;
1515 MonoMethod *method = cfg->method;
1516 MonoType *ret_type = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
1518 switch (ret_type->type) {
1519 case MONO_TYPE_VOID:
1520 /* special case string .ctor icall */
1521 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class) {
1522 save_mode = SAVE_EAX;
1523 stack_usage = enable_arguments ? 8 : 4;
1524 } else
1525 save_mode = SAVE_NONE;
1526 break;
1527 case MONO_TYPE_I8:
1528 case MONO_TYPE_U8:
1529 save_mode = SAVE_EAX_EDX;
1530 stack_usage = enable_arguments ? 16 : 8;
1531 break;
1532 case MONO_TYPE_R4:
1533 case MONO_TYPE_R8:
1534 save_mode = SAVE_FP;
1535 stack_usage = enable_arguments ? 16 : 8;
1536 break;
1537 case MONO_TYPE_GENERICINST:
1538 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
1539 save_mode = SAVE_EAX;
1540 stack_usage = enable_arguments ? 8 : 4;
1541 break;
1543 /* Fall through */
1544 case MONO_TYPE_VALUETYPE:
1545 // FIXME: Handle SMALL_STRUCT_IN_REG here for proper alignment on darwin-x86
1546 save_mode = SAVE_STRUCT;
1547 stack_usage = enable_arguments ? 4 : 0;
1548 break;
1549 default:
1550 save_mode = SAVE_EAX;
1551 stack_usage = enable_arguments ? 8 : 4;
1552 break;
1555 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - stack_usage - 4);
1557 switch (save_mode) {
1558 case SAVE_EAX_EDX:
1559 x86_push_reg (code, X86_EDX);
1560 x86_push_reg (code, X86_EAX);
1561 if (enable_arguments) {
1562 x86_push_reg (code, X86_EDX);
1563 x86_push_reg (code, X86_EAX);
1564 arg_size = 8;
1566 break;
1567 case SAVE_EAX:
1568 x86_push_reg (code, X86_EAX);
1569 if (enable_arguments) {
1570 x86_push_reg (code, X86_EAX);
1571 arg_size = 4;
1573 break;
1574 case SAVE_FP:
1575 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1576 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1577 if (enable_arguments) {
1578 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1579 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1580 arg_size = 8;
1582 break;
1583 case SAVE_STRUCT:
1584 if (enable_arguments) {
1585 x86_push_membase (code, X86_EBP, 8);
1586 arg_size = 4;
1588 break;
1589 case SAVE_NONE:
1590 default:
1591 break;
1594 if (cfg->compile_aot) {
1595 x86_push_imm (code, method);
1596 x86_mov_reg_imm (code, X86_EAX, func);
1597 x86_call_reg (code, X86_EAX);
1598 } else {
1599 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1600 x86_push_imm (code, method);
1601 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1602 x86_call_code (code, 0);
1605 x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1607 switch (save_mode) {
1608 case SAVE_EAX_EDX:
1609 x86_pop_reg (code, X86_EAX);
1610 x86_pop_reg (code, X86_EDX);
1611 break;
1612 case SAVE_EAX:
1613 x86_pop_reg (code, X86_EAX);
1614 break;
1615 case SAVE_FP:
1616 x86_fld_membase (code, X86_ESP, 0, TRUE);
1617 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1618 break;
1619 case SAVE_NONE:
1620 default:
1621 break;
1624 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - stack_usage);
1626 return code;
1629 #define EMIT_COND_BRANCH(ins,cond,sign) \
1630 if (ins->inst_true_bb->native_offset) { \
1631 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1632 } else { \
1633 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1634 if ((cfg->opt & MONO_OPT_BRANCH) && \
1635 x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1636 x86_branch8 (code, cond, 0, sign); \
1637 else \
1638 x86_branch32 (code, cond, 0, sign); \
1642 * Emit an exception if condition is fail and
1643 * if possible do a directly branch to target
1645 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \
1646 do { \
1647 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1648 if (tins == NULL) { \
1649 mono_add_patch_info (cfg, code - cfg->native_code, \
1650 MONO_PATCH_INFO_EXC, exc_name); \
1651 x86_branch32 (code, cond, 0, signed); \
1652 } else { \
1653 EMIT_COND_BRANCH (tins, cond, signed); \
1655 } while (0);
1657 #define EMIT_FPCOMPARE(code) do { \
1658 x86_fcompp (code); \
1659 x86_fnstsw (code); \
1660 } while (0);
1663 static guint8*
1664 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1666 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1667 x86_call_code (code, 0);
1669 return code;
1672 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1675 * mono_peephole_pass_1:
1677 * Perform peephole opts which should/can be performed before local regalloc
1679 void
1680 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1682 MonoInst *ins, *n;
1684 MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1685 MonoInst *last_ins = ins->prev;
1687 switch (ins->opcode) {
1688 case OP_IADD_IMM:
1689 case OP_ADD_IMM:
1690 if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1692 * X86_LEA is like ADD, but doesn't have the
1693 * sreg1==dreg restriction.
1695 ins->opcode = OP_X86_LEA_MEMBASE;
1696 ins->inst_basereg = ins->sreg1;
1697 } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1698 ins->opcode = OP_X86_INC_REG;
1699 break;
1700 case OP_SUB_IMM:
1701 case OP_ISUB_IMM:
1702 if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1703 ins->opcode = OP_X86_LEA_MEMBASE;
1704 ins->inst_basereg = ins->sreg1;
1705 ins->inst_imm = -ins->inst_imm;
1706 } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1707 ins->opcode = OP_X86_DEC_REG;
1708 break;
1709 case OP_COMPARE_IMM:
1710 case OP_ICOMPARE_IMM:
1711 /* OP_COMPARE_IMM (reg, 0)
1712 * -->
1713 * OP_X86_TEST_NULL (reg)
1715 if (!ins->inst_imm)
1716 ins->opcode = OP_X86_TEST_NULL;
1717 break;
1718 case OP_X86_COMPARE_MEMBASE_IMM:
1720 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1721 * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1722 * -->
1723 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1724 * OP_COMPARE_IMM reg, imm
1726 * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1728 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1729 ins->inst_basereg == last_ins->inst_destbasereg &&
1730 ins->inst_offset == last_ins->inst_offset) {
1731 ins->opcode = OP_COMPARE_IMM;
1732 ins->sreg1 = last_ins->sreg1;
1734 /* check if we can remove cmp reg,0 with test null */
1735 if (!ins->inst_imm)
1736 ins->opcode = OP_X86_TEST_NULL;
1739 break;
1740 case OP_X86_PUSH_MEMBASE:
1741 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1742 last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1743 ins->inst_basereg == last_ins->inst_destbasereg &&
1744 ins->inst_offset == last_ins->inst_offset) {
1745 ins->opcode = OP_X86_PUSH;
1746 ins->sreg1 = last_ins->sreg1;
1748 break;
1751 mono_peephole_ins (bb, ins);
1755 void
1756 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
1758 MonoInst *ins, *n;
1760 MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1761 switch (ins->opcode) {
1762 case OP_ICONST:
1763 /* reg = 0 -> XOR (reg, reg) */
1764 /* XOR sets cflags on x86, so we cant do it always */
1765 if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1766 MonoInst *ins2;
1768 ins->opcode = OP_IXOR;
1769 ins->sreg1 = ins->dreg;
1770 ins->sreg2 = ins->dreg;
1773 * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG
1774 * since it takes 3 bytes instead of 7.
1776 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1777 if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1778 ins2->opcode = OP_STORE_MEMBASE_REG;
1779 ins2->sreg1 = ins->dreg;
1781 else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1782 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1783 ins2->sreg1 = ins->dreg;
1785 else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1786 /* Continue iteration */
1788 else
1789 break;
1792 break;
1793 case OP_IADD_IMM:
1794 case OP_ADD_IMM:
1795 if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1796 ins->opcode = OP_X86_INC_REG;
1797 break;
1798 case OP_ISUB_IMM:
1799 case OP_SUB_IMM:
1800 if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1801 ins->opcode = OP_X86_DEC_REG;
1802 break;
1805 mono_peephole_ins (bb, ins);
1810 * mono_arch_lowering_pass:
1812 * Converts complex opcodes into simpler ones so that each IR instruction
1813 * corresponds to one machine instruction.
1815 void
1816 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1818 MonoInst *ins, *next;
1821 * FIXME: Need to add more instructions, but the current machine
1822 * description can't model some parts of the composite instructions like
1823 * cdq.
1825 MONO_BB_FOR_EACH_INS_SAFE (bb, next, ins) {
1826 switch (ins->opcode) {
1827 case OP_IREM_IMM:
1828 case OP_IDIV_IMM:
1829 case OP_IDIV_UN_IMM:
1830 case OP_IREM_UN_IMM:
1832 * Keep the cases where we could generated optimized code, otherwise convert
1833 * to the non-imm variant.
1835 if ((ins->opcode == OP_IREM_IMM) && mono_is_power_of_two (ins->inst_imm) >= 0)
1836 break;
1837 mono_decompose_op_imm (cfg, bb, ins);
1838 break;
1839 default:
1840 break;
1844 bb->max_vreg = cfg->next_vreg;
1847 static const int
1848 branch_cc_table [] = {
1849 X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1850 X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1851 X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1854 /* Maps CMP_... constants to X86_CC_... constants */
1855 static const int
1856 cc_table [] = {
1857 X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1858 X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1861 static const int
1862 cc_signed_table [] = {
1863 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1864 FALSE, FALSE, FALSE, FALSE
1867 static unsigned char*
1868 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1870 #define XMM_TEMP_REG 0
1871 /*This SSE2 optimization must not be done which OPT_SIMD in place as it clobbers xmm0.*/
1872 /*The xmm pass decomposes OP_FCONV_ ops anyway anyway.*/
1873 if (cfg->opt & MONO_OPT_SSE2 && size < 8 && !(cfg->opt & MONO_OPT_SIMD)) {
1874 /* optimize by assigning a local var for this use so we avoid
1875 * the stack manipulations */
1876 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1877 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1878 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1879 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1880 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1881 if (size == 1)
1882 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1883 else if (size == 2)
1884 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1885 return code;
1887 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1888 x86_fnstcw_membase(code, X86_ESP, 0);
1889 x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1890 x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1891 x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1892 x86_fldcw_membase (code, X86_ESP, 2);
1893 if (size == 8) {
1894 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1895 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1896 x86_pop_reg (code, dreg);
1897 /* FIXME: need the high register
1898 * x86_pop_reg (code, dreg_high);
1900 } else {
1901 x86_push_reg (code, X86_EAX); // SP = SP - 4
1902 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1903 x86_pop_reg (code, dreg);
1905 x86_fldcw_membase (code, X86_ESP, 0);
1906 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1908 if (size == 1)
1909 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1910 else if (size == 2)
1911 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1912 return code;
1915 static unsigned char*
1916 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1918 int sreg = tree->sreg1;
1919 int need_touch = FALSE;
1921 #if defined(TARGET_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1922 need_touch = TRUE;
1923 #endif
1925 if (need_touch) {
1926 guint8* br[5];
1929 * Under Windows:
1930 * If requested stack size is larger than one page,
1931 * perform stack-touch operation
1934 * Generate stack probe code.
1935 * Under Windows, it is necessary to allocate one page at a time,
1936 * "touching" stack after each successful sub-allocation. This is
1937 * because of the way stack growth is implemented - there is a
1938 * guard page before the lowest stack page that is currently commited.
1939 * Stack normally grows sequentially so OS traps access to the
1940 * guard page and commits more pages when needed.
1942 x86_test_reg_imm (code, sreg, ~0xFFF);
1943 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1945 br[2] = code; /* loop */
1946 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1947 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1950 * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1951 * that follows only initializes the last part of the area.
1953 /* Same as the init code below with size==0x1000 */
1954 if (tree->flags & MONO_INST_INIT) {
1955 x86_push_reg (code, X86_EAX);
1956 x86_push_reg (code, X86_ECX);
1957 x86_push_reg (code, X86_EDI);
1958 x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1959 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1960 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1961 x86_cld (code);
1962 x86_prefix (code, X86_REP_PREFIX);
1963 x86_stosl (code);
1964 x86_pop_reg (code, X86_EDI);
1965 x86_pop_reg (code, X86_ECX);
1966 x86_pop_reg (code, X86_EAX);
1969 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1970 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1971 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1972 x86_patch (br[3], br[2]);
1973 x86_test_reg_reg (code, sreg, sreg);
1974 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1975 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1977 br[1] = code; x86_jump8 (code, 0);
1979 x86_patch (br[0], code);
1980 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1981 x86_patch (br[1], code);
1982 x86_patch (br[4], code);
1984 else
1985 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1987 if (tree->flags & MONO_INST_INIT) {
1988 int offset = 0;
1989 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1990 x86_push_reg (code, X86_EAX);
1991 offset += 4;
1993 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1994 x86_push_reg (code, X86_ECX);
1995 offset += 4;
1997 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1998 x86_push_reg (code, X86_EDI);
1999 offset += 4;
2002 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
2003 if (sreg != X86_ECX)
2004 x86_mov_reg_reg (code, X86_ECX, sreg, 4);
2005 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2007 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2008 x86_cld (code);
2009 x86_prefix (code, X86_REP_PREFIX);
2010 x86_stosl (code);
2012 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2013 x86_pop_reg (code, X86_EDI);
2014 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2015 x86_pop_reg (code, X86_ECX);
2016 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2017 x86_pop_reg (code, X86_EAX);
2019 return code;
2023 static guint8*
2024 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2026 /* Move return value to the target register */
2027 switch (ins->opcode) {
2028 case OP_CALL:
2029 case OP_CALL_REG:
2030 case OP_CALL_MEMBASE:
2031 if (ins->dreg != X86_EAX)
2032 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2033 break;
2034 default:
2035 break;
2038 return code;
2041 gboolean
2042 mono_x86_have_tls_get (void)
2044 #ifdef __APPLE__
2045 guint32 *ins = (guint32*)pthread_getspecific;
2047 * We're looking for these two instructions:
2049 * mov 0x4(%esp),%eax
2050 * mov %gs:0x48(,%eax,4),%eax
2052 return ins [0] == 0x0424448b && ins [1] == 0x85048b65 && ins [2] == 0x00000048;
2053 #else
2054 return TRUE;
2055 #endif
2059 * mono_x86_emit_tls_get:
2060 * @code: buffer to store code to
2061 * @dreg: hard register where to place the result
2062 * @tls_offset: offset info
2064 * mono_x86_emit_tls_get emits in @code the native code that puts in
2065 * the dreg register the item in the thread local storage identified
2066 * by tls_offset.
2068 * Returns: a pointer to the end of the stored code
2070 guint8*
2071 mono_x86_emit_tls_get (guint8* code, int dreg, int tls_offset)
2073 #if defined(__APPLE__)
2074 x86_prefix (code, X86_GS_PREFIX);
2075 x86_mov_reg_mem (code, dreg, 0x48 + tls_offset * 4, 4);
2076 #elif defined(TARGET_WIN32)
2078 * See the Under the Hood article in the May 1996 issue of Microsoft Systems
2079 * Journal and/or a disassembly of the TlsGet () function.
2081 g_assert (tls_offset < 64);
2082 x86_prefix (code, X86_FS_PREFIX);
2083 x86_mov_reg_mem (code, dreg, 0x18, 4);
2084 /* Dunno what this does but TlsGetValue () contains it */
2085 x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2086 x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2087 #else
2088 if (optimize_for_xen) {
2089 x86_prefix (code, X86_GS_PREFIX);
2090 x86_mov_reg_mem (code, dreg, 0, 4);
2091 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
2092 } else {
2093 x86_prefix (code, X86_GS_PREFIX);
2094 x86_mov_reg_mem (code, dreg, tls_offset, 4);
2096 #endif
2097 return code;
2101 * emit_load_volatile_arguments:
2103 * Load volatile arguments from the stack to the original input registers.
2104 * Required before a tail call.
2106 static guint8*
2107 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2109 MonoMethod *method = cfg->method;
2110 MonoMethodSignature *sig;
2111 MonoInst *inst;
2112 CallInfo *cinfo;
2113 guint32 i;
2115 /* FIXME: Generate intermediate code instead */
2117 sig = mono_method_signature (method);
2119 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
2121 /* This is the opposite of the code in emit_prolog */
2123 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2124 ArgInfo *ainfo = cinfo->args + i;
2125 MonoType *arg_type;
2126 inst = cfg->args [i];
2128 if (sig->hasthis && (i == 0))
2129 arg_type = &mono_defaults.object_class->byval_arg;
2130 else
2131 arg_type = sig->params [i - sig->hasthis];
2134 * On x86, the arguments are either in their original stack locations, or in
2135 * global regs.
2137 if (inst->opcode == OP_REGVAR) {
2138 g_assert (ainfo->storage == ArgOnStack);
2140 x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
2144 return code;
2147 #define REAL_PRINT_REG(text,reg) \
2148 mono_assert (reg >= 0); \
2149 x86_push_reg (code, X86_EAX); \
2150 x86_push_reg (code, X86_EDX); \
2151 x86_push_reg (code, X86_ECX); \
2152 x86_push_reg (code, reg); \
2153 x86_push_imm (code, reg); \
2154 x86_push_imm (code, text " %d %p\n"); \
2155 x86_mov_reg_imm (code, X86_EAX, printf); \
2156 x86_call_reg (code, X86_EAX); \
2157 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2158 x86_pop_reg (code, X86_ECX); \
2159 x86_pop_reg (code, X86_EDX); \
2160 x86_pop_reg (code, X86_EAX);
2162 /* benchmark and set based on cpu */
2163 #define LOOP_ALIGNMENT 8
2164 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2166 #ifndef DISABLE_JIT
2168 void
2169 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2171 MonoInst *ins;
2172 MonoCallInst *call;
2173 guint offset;
2174 guint8 *code = cfg->native_code + cfg->code_len;
2175 int max_len, cpos;
2177 if (cfg->opt & MONO_OPT_LOOP) {
2178 int pad, align = LOOP_ALIGNMENT;
2179 /* set alignment depending on cpu */
2180 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2181 pad = align - pad;
2182 /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2183 x86_padding (code, pad);
2184 cfg->code_len += pad;
2185 bb->native_offset = cfg->code_len;
2189 if (cfg->verbose_level > 2)
2190 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2192 cpos = bb->max_offset;
2194 if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2195 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2196 g_assert (!cfg->compile_aot);
2197 cpos += 6;
2199 cov->data [bb->dfn].cil_code = bb->cil_code;
2200 /* this is not thread save, but good enough */
2201 x86_inc_mem (code, &cov->data [bb->dfn].count);
2204 offset = code - cfg->native_code;
2206 mono_debug_open_block (cfg, bb, offset);
2208 MONO_BB_FOR_EACH_INS (bb, ins) {
2209 offset = code - cfg->native_code;
2211 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2213 if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) {
2214 cfg->code_size *= 2;
2215 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2216 code = cfg->native_code + offset;
2217 mono_jit_stats.code_reallocs++;
2220 if (cfg->debug_info)
2221 mono_debug_record_line_number (cfg, ins, offset);
2223 switch (ins->opcode) {
2224 case OP_BIGMUL:
2225 x86_mul_reg (code, ins->sreg2, TRUE);
2226 break;
2227 case OP_BIGMUL_UN:
2228 x86_mul_reg (code, ins->sreg2, FALSE);
2229 break;
2230 case OP_X86_SETEQ_MEMBASE:
2231 case OP_X86_SETNE_MEMBASE:
2232 x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2233 ins->inst_basereg, ins->inst_offset, TRUE);
2234 break;
2235 case OP_STOREI1_MEMBASE_IMM:
2236 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2237 break;
2238 case OP_STOREI2_MEMBASE_IMM:
2239 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2240 break;
2241 case OP_STORE_MEMBASE_IMM:
2242 case OP_STOREI4_MEMBASE_IMM:
2243 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2244 break;
2245 case OP_STOREI1_MEMBASE_REG:
2246 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2247 break;
2248 case OP_STOREI2_MEMBASE_REG:
2249 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2250 break;
2251 case OP_STORE_MEMBASE_REG:
2252 case OP_STOREI4_MEMBASE_REG:
2253 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2254 break;
2255 case OP_STORE_MEM_IMM:
2256 x86_mov_mem_imm (code, ins->inst_p0, ins->inst_c0, 4);
2257 break;
2258 case OP_LOADU4_MEM:
2259 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2260 break;
2261 case OP_LOAD_MEM:
2262 case OP_LOADI4_MEM:
2263 /* These are created by the cprop pass so they use inst_imm as the source */
2264 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2265 break;
2266 case OP_LOADU1_MEM:
2267 x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, FALSE);
2268 break;
2269 case OP_LOADU2_MEM:
2270 x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, TRUE);
2271 break;
2272 case OP_LOAD_MEMBASE:
2273 case OP_LOADI4_MEMBASE:
2274 case OP_LOADU4_MEMBASE:
2275 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2276 break;
2277 case OP_LOADU1_MEMBASE:
2278 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2279 break;
2280 case OP_LOADI1_MEMBASE:
2281 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2282 break;
2283 case OP_LOADU2_MEMBASE:
2284 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2285 break;
2286 case OP_LOADI2_MEMBASE:
2287 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2288 break;
2289 case OP_ICONV_TO_I1:
2290 case OP_SEXT_I1:
2291 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2292 break;
2293 case OP_ICONV_TO_I2:
2294 case OP_SEXT_I2:
2295 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2296 break;
2297 case OP_ICONV_TO_U1:
2298 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2299 break;
2300 case OP_ICONV_TO_U2:
2301 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2302 break;
2303 case OP_COMPARE:
2304 case OP_ICOMPARE:
2305 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2306 break;
2307 case OP_COMPARE_IMM:
2308 case OP_ICOMPARE_IMM:
2309 x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2310 break;
2311 case OP_X86_COMPARE_MEMBASE_REG:
2312 x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2313 break;
2314 case OP_X86_COMPARE_MEMBASE_IMM:
2315 x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2316 break;
2317 case OP_X86_COMPARE_MEMBASE8_IMM:
2318 x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2319 break;
2320 case OP_X86_COMPARE_REG_MEMBASE:
2321 x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2322 break;
2323 case OP_X86_COMPARE_MEM_IMM:
2324 x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2325 break;
2326 case OP_X86_TEST_NULL:
2327 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2328 break;
2329 case OP_X86_ADD_MEMBASE_IMM:
2330 x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2331 break;
2332 case OP_X86_ADD_REG_MEMBASE:
2333 x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2334 break;
2335 case OP_X86_SUB_MEMBASE_IMM:
2336 x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2337 break;
2338 case OP_X86_SUB_REG_MEMBASE:
2339 x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2340 break;
2341 case OP_X86_AND_MEMBASE_IMM:
2342 x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2343 break;
2344 case OP_X86_OR_MEMBASE_IMM:
2345 x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2346 break;
2347 case OP_X86_XOR_MEMBASE_IMM:
2348 x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2349 break;
2350 case OP_X86_ADD_MEMBASE_REG:
2351 x86_alu_membase_reg (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2352 break;
2353 case OP_X86_SUB_MEMBASE_REG:
2354 x86_alu_membase_reg (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2355 break;
2356 case OP_X86_AND_MEMBASE_REG:
2357 x86_alu_membase_reg (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2358 break;
2359 case OP_X86_OR_MEMBASE_REG:
2360 x86_alu_membase_reg (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2361 break;
2362 case OP_X86_XOR_MEMBASE_REG:
2363 x86_alu_membase_reg (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2364 break;
2365 case OP_X86_INC_MEMBASE:
2366 x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2367 break;
2368 case OP_X86_INC_REG:
2369 x86_inc_reg (code, ins->dreg);
2370 break;
2371 case OP_X86_DEC_MEMBASE:
2372 x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2373 break;
2374 case OP_X86_DEC_REG:
2375 x86_dec_reg (code, ins->dreg);
2376 break;
2377 case OP_X86_MUL_REG_MEMBASE:
2378 x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2379 break;
2380 case OP_X86_AND_REG_MEMBASE:
2381 x86_alu_reg_membase (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset);
2382 break;
2383 case OP_X86_OR_REG_MEMBASE:
2384 x86_alu_reg_membase (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset);
2385 break;
2386 case OP_X86_XOR_REG_MEMBASE:
2387 x86_alu_reg_membase (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset);
2388 break;
2389 case OP_BREAK:
2390 x86_breakpoint (code);
2391 break;
2392 case OP_RELAXED_NOP:
2393 x86_prefix (code, X86_REP_PREFIX);
2394 x86_nop (code);
2395 break;
2396 case OP_HARD_NOP:
2397 x86_nop (code);
2398 break;
2399 case OP_NOP:
2400 case OP_DUMMY_USE:
2401 case OP_DUMMY_STORE:
2402 case OP_NOT_REACHED:
2403 case OP_NOT_NULL:
2404 break;
2405 case OP_SEQ_POINT: {
2406 int i;
2408 if (cfg->compile_aot)
2409 NOT_IMPLEMENTED;
2412 * Read from the single stepping trigger page. This will cause a
2413 * SIGSEGV when single stepping is enabled.
2414 * We do this _before_ the breakpoint, so single stepping after
2415 * a breakpoint is hit will step to the next IL offset.
2417 if (ins->flags & MONO_INST_SINGLE_STEP_LOC)
2418 x86_alu_reg_mem (code, X86_CMP, X86_EAX, (guint32)ss_trigger_page);
2420 mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
2423 * A placeholder for a possible breakpoint inserted by
2424 * mono_arch_set_breakpoint ().
2426 for (i = 0; i < 6; ++i)
2427 x86_nop (code);
2428 break;
2430 case OP_ADDCC:
2431 case OP_IADDCC:
2432 case OP_IADD:
2433 x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2434 break;
2435 case OP_ADC:
2436 case OP_IADC:
2437 x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2438 break;
2439 case OP_ADDCC_IMM:
2440 case OP_ADD_IMM:
2441 case OP_IADD_IMM:
2442 x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2443 break;
2444 case OP_ADC_IMM:
2445 case OP_IADC_IMM:
2446 x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2447 break;
2448 case OP_SUBCC:
2449 case OP_ISUBCC:
2450 case OP_ISUB:
2451 x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2452 break;
2453 case OP_SBB:
2454 case OP_ISBB:
2455 x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2456 break;
2457 case OP_SUBCC_IMM:
2458 case OP_SUB_IMM:
2459 case OP_ISUB_IMM:
2460 x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2461 break;
2462 case OP_SBB_IMM:
2463 case OP_ISBB_IMM:
2464 x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2465 break;
2466 case OP_IAND:
2467 x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2468 break;
2469 case OP_AND_IMM:
2470 case OP_IAND_IMM:
2471 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2472 break;
2473 case OP_IDIV:
2474 case OP_IREM:
2476 * The code is the same for div/rem, the allocator will allocate dreg
2477 * to RAX/RDX as appropriate.
2479 if (ins->sreg2 == X86_EDX) {
2480 /* cdq clobbers this */
2481 x86_push_reg (code, ins->sreg2);
2482 x86_cdq (code);
2483 x86_div_membase (code, X86_ESP, 0, TRUE);
2484 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2485 } else {
2486 x86_cdq (code);
2487 x86_div_reg (code, ins->sreg2, TRUE);
2489 break;
2490 case OP_IDIV_UN:
2491 case OP_IREM_UN:
2492 if (ins->sreg2 == X86_EDX) {
2493 x86_push_reg (code, ins->sreg2);
2494 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2495 x86_div_membase (code, X86_ESP, 0, FALSE);
2496 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2497 } else {
2498 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2499 x86_div_reg (code, ins->sreg2, FALSE);
2501 break;
2502 case OP_DIV_IMM:
2503 x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2504 x86_cdq (code);
2505 x86_div_reg (code, ins->sreg2, TRUE);
2506 break;
2507 case OP_IREM_IMM: {
2508 int power = mono_is_power_of_two (ins->inst_imm);
2510 g_assert (ins->sreg1 == X86_EAX);
2511 g_assert (ins->dreg == X86_EAX);
2512 g_assert (power >= 0);
2514 if (power == 1) {
2515 /* Based on http://compilers.iecc.com/comparch/article/93-04-079 */
2516 x86_cdq (code);
2517 x86_alu_reg_imm (code, X86_AND, X86_EAX, 1);
2519 * If the divident is >= 0, this does not nothing. If it is positive, it
2520 * it transforms %eax=0 into %eax=0, and %eax=1 into %eax=-1.
2522 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EDX);
2523 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2524 } else if (power == 0) {
2525 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2526 } else {
2527 /* Based on gcc code */
2529 /* Add compensation for negative dividents */
2530 x86_cdq (code);
2531 x86_shift_reg_imm (code, X86_SHR, X86_EDX, 32 - power);
2532 x86_alu_reg_reg (code, X86_ADD, X86_EAX, X86_EDX);
2533 /* Compute remainder */
2534 x86_alu_reg_imm (code, X86_AND, X86_EAX, (1 << power) - 1);
2535 /* Remove compensation */
2536 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2538 break;
2540 case OP_IOR:
2541 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2542 break;
2543 case OP_OR_IMM:
2544 case OP_IOR_IMM:
2545 x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2546 break;
2547 case OP_IXOR:
2548 x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2549 break;
2550 case OP_XOR_IMM:
2551 case OP_IXOR_IMM:
2552 x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2553 break;
2554 case OP_ISHL:
2555 g_assert (ins->sreg2 == X86_ECX);
2556 x86_shift_reg (code, X86_SHL, ins->dreg);
2557 break;
2558 case OP_ISHR:
2559 g_assert (ins->sreg2 == X86_ECX);
2560 x86_shift_reg (code, X86_SAR, ins->dreg);
2561 break;
2562 case OP_SHR_IMM:
2563 case OP_ISHR_IMM:
2564 x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2565 break;
2566 case OP_SHR_UN_IMM:
2567 case OP_ISHR_UN_IMM:
2568 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2569 break;
2570 case OP_ISHR_UN:
2571 g_assert (ins->sreg2 == X86_ECX);
2572 x86_shift_reg (code, X86_SHR, ins->dreg);
2573 break;
2574 case OP_SHL_IMM:
2575 case OP_ISHL_IMM:
2576 x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2577 break;
2578 case OP_LSHL: {
2579 guint8 *jump_to_end;
2581 /* handle shifts below 32 bits */
2582 x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2583 x86_shift_reg (code, X86_SHL, ins->sreg1);
2585 x86_test_reg_imm (code, X86_ECX, 32);
2586 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2588 /* handle shift over 32 bit */
2589 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2590 x86_clear_reg (code, ins->sreg1);
2592 x86_patch (jump_to_end, code);
2594 break;
2595 case OP_LSHR: {
2596 guint8 *jump_to_end;
2598 /* handle shifts below 32 bits */
2599 x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2600 x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2602 x86_test_reg_imm (code, X86_ECX, 32);
2603 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2605 /* handle shifts over 31 bits */
2606 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2607 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2609 x86_patch (jump_to_end, code);
2611 break;
2612 case OP_LSHR_UN: {
2613 guint8 *jump_to_end;
2615 /* handle shifts below 32 bits */
2616 x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2617 x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2619 x86_test_reg_imm (code, X86_ECX, 32);
2620 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2622 /* handle shifts over 31 bits */
2623 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2624 x86_clear_reg (code, ins->backend.reg3);
2626 x86_patch (jump_to_end, code);
2628 break;
2629 case OP_LSHL_IMM:
2630 if (ins->inst_imm >= 32) {
2631 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2632 x86_clear_reg (code, ins->sreg1);
2633 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2634 } else {
2635 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2636 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2638 break;
2639 case OP_LSHR_IMM:
2640 if (ins->inst_imm >= 32) {
2641 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2642 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2643 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2644 } else {
2645 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2646 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2648 break;
2649 case OP_LSHR_UN_IMM:
2650 if (ins->inst_imm >= 32) {
2651 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2652 x86_clear_reg (code, ins->backend.reg3);
2653 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2654 } else {
2655 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2656 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2658 break;
2659 case OP_INOT:
2660 x86_not_reg (code, ins->sreg1);
2661 break;
2662 case OP_INEG:
2663 x86_neg_reg (code, ins->sreg1);
2664 break;
2666 case OP_IMUL:
2667 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2668 break;
2669 case OP_MUL_IMM:
2670 case OP_IMUL_IMM:
2671 switch (ins->inst_imm) {
2672 case 2:
2673 /* MOV r1, r2 */
2674 /* ADD r1, r1 */
2675 if (ins->dreg != ins->sreg1)
2676 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2677 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2678 break;
2679 case 3:
2680 /* LEA r1, [r2 + r2*2] */
2681 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2682 break;
2683 case 5:
2684 /* LEA r1, [r2 + r2*4] */
2685 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2686 break;
2687 case 6:
2688 /* LEA r1, [r2 + r2*2] */
2689 /* ADD r1, r1 */
2690 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2691 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2692 break;
2693 case 9:
2694 /* LEA r1, [r2 + r2*8] */
2695 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2696 break;
2697 case 10:
2698 /* LEA r1, [r2 + r2*4] */
2699 /* ADD r1, r1 */
2700 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2701 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2702 break;
2703 case 12:
2704 /* LEA r1, [r2 + r2*2] */
2705 /* SHL r1, 2 */
2706 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2707 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2708 break;
2709 case 25:
2710 /* LEA r1, [r2 + r2*4] */
2711 /* LEA r1, [r1 + r1*4] */
2712 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2713 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2714 break;
2715 case 100:
2716 /* LEA r1, [r2 + r2*4] */
2717 /* SHL r1, 2 */
2718 /* LEA r1, [r1 + r1*4] */
2719 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2720 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2721 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2722 break;
2723 default:
2724 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2725 break;
2727 break;
2728 case OP_IMUL_OVF:
2729 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2730 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2731 break;
2732 case OP_IMUL_OVF_UN: {
2733 /* the mul operation and the exception check should most likely be split */
2734 int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2735 /*g_assert (ins->sreg2 == X86_EAX);
2736 g_assert (ins->dreg == X86_EAX);*/
2737 if (ins->sreg2 == X86_EAX) {
2738 non_eax_reg = ins->sreg1;
2739 } else if (ins->sreg1 == X86_EAX) {
2740 non_eax_reg = ins->sreg2;
2741 } else {
2742 /* no need to save since we're going to store to it anyway */
2743 if (ins->dreg != X86_EAX) {
2744 saved_eax = TRUE;
2745 x86_push_reg (code, X86_EAX);
2747 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2748 non_eax_reg = ins->sreg2;
2750 if (ins->dreg == X86_EDX) {
2751 if (!saved_eax) {
2752 saved_eax = TRUE;
2753 x86_push_reg (code, X86_EAX);
2755 } else if (ins->dreg != X86_EAX) {
2756 saved_edx = TRUE;
2757 x86_push_reg (code, X86_EDX);
2759 x86_mul_reg (code, non_eax_reg, FALSE);
2760 /* save before the check since pop and mov don't change the flags */
2761 if (ins->dreg != X86_EAX)
2762 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2763 if (saved_edx)
2764 x86_pop_reg (code, X86_EDX);
2765 if (saved_eax)
2766 x86_pop_reg (code, X86_EAX);
2767 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2768 break;
2770 case OP_ICONST:
2771 x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2772 break;
2773 case OP_AOTCONST:
2774 g_assert_not_reached ();
2775 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2776 x86_mov_reg_imm (code, ins->dreg, 0);
2777 break;
2778 case OP_JUMP_TABLE:
2779 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2780 x86_mov_reg_imm (code, ins->dreg, 0);
2781 break;
2782 case OP_LOAD_GOTADDR:
2783 x86_call_imm (code, 0);
2785 * The patch needs to point to the pop, since the GOT offset needs
2786 * to be added to that address.
2788 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2789 x86_pop_reg (code, ins->dreg);
2790 x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2791 break;
2792 case OP_GOT_ENTRY:
2793 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2794 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2795 break;
2796 case OP_X86_PUSH_GOT_ENTRY:
2797 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2798 x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2799 break;
2800 case OP_MOVE:
2801 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2802 break;
2803 case OP_JMP: {
2805 * Note: this 'frame destruction' logic is useful for tail calls, too.
2806 * Keep in sync with the code in emit_epilog.
2808 int pos = 0;
2810 /* FIXME: no tracing support... */
2811 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2812 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2813 /* reset offset to make max_len work */
2814 offset = code - cfg->native_code;
2816 g_assert (!cfg->method->save_lmf);
2818 code = emit_load_volatile_arguments (cfg, code);
2820 if (cfg->used_int_regs & (1 << X86_EBX))
2821 pos -= 4;
2822 if (cfg->used_int_regs & (1 << X86_EDI))
2823 pos -= 4;
2824 if (cfg->used_int_regs & (1 << X86_ESI))
2825 pos -= 4;
2826 if (pos)
2827 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2829 if (cfg->used_int_regs & (1 << X86_ESI))
2830 x86_pop_reg (code, X86_ESI);
2831 if (cfg->used_int_regs & (1 << X86_EDI))
2832 x86_pop_reg (code, X86_EDI);
2833 if (cfg->used_int_regs & (1 << X86_EBX))
2834 x86_pop_reg (code, X86_EBX);
2836 /* restore ESP/EBP */
2837 x86_leave (code);
2838 offset = code - cfg->native_code;
2839 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2840 x86_jump32 (code, 0);
2842 cfg->disable_aot = TRUE;
2843 break;
2845 case OP_CHECK_THIS:
2846 /* ensure ins->sreg1 is not NULL
2847 * note that cmp DWORD PTR [eax], eax is one byte shorter than
2848 * cmp DWORD PTR [eax], 0
2850 x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2851 break;
2852 case OP_ARGLIST: {
2853 int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2854 x86_push_reg (code, hreg);
2855 x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2856 x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2857 x86_pop_reg (code, hreg);
2858 break;
2860 case OP_FCALL:
2861 case OP_LCALL:
2862 case OP_VCALL:
2863 case OP_VCALL2:
2864 case OP_VOIDCALL:
2865 case OP_CALL:
2866 call = (MonoCallInst*)ins;
2867 if (ins->flags & MONO_INST_HAS_METHOD)
2868 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2869 else
2870 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2871 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2872 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2873 * bytes to pop, we want to use pops. GCC does this (note it won't happen
2874 * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2875 * smart enough to do that optimization yet
2877 * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2878 * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2879 * (most likely from locality benefits). People with other processors should
2880 * check on theirs to see what happens.
2882 if (call->stack_usage == 4) {
2883 /* we want to use registers that won't get used soon, so use
2884 * ecx, as eax will get allocated first. edx is used by long calls,
2885 * so we can't use that.
2888 x86_pop_reg (code, X86_ECX);
2889 } else {
2890 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2893 code = emit_move_return_value (cfg, ins, code);
2894 break;
2895 case OP_FCALL_REG:
2896 case OP_LCALL_REG:
2897 case OP_VCALL_REG:
2898 case OP_VCALL2_REG:
2899 case OP_VOIDCALL_REG:
2900 case OP_CALL_REG:
2901 call = (MonoCallInst*)ins;
2902 x86_call_reg (code, ins->sreg1);
2903 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2904 if (call->stack_usage == 4)
2905 x86_pop_reg (code, X86_ECX);
2906 else
2907 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2909 code = emit_move_return_value (cfg, ins, code);
2910 break;
2911 case OP_FCALL_MEMBASE:
2912 case OP_LCALL_MEMBASE:
2913 case OP_VCALL_MEMBASE:
2914 case OP_VCALL2_MEMBASE:
2915 case OP_VOIDCALL_MEMBASE:
2916 case OP_CALL_MEMBASE:
2917 call = (MonoCallInst*)ins;
2920 * Emit a few nops to simplify get_vcall_slot ().
2922 x86_nop (code);
2923 x86_nop (code);
2924 x86_nop (code);
2926 x86_call_membase (code, ins->sreg1, ins->inst_offset);
2927 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2928 if (call->stack_usage == 4)
2929 x86_pop_reg (code, X86_ECX);
2930 else
2931 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2933 code = emit_move_return_value (cfg, ins, code);
2934 break;
2935 case OP_X86_PUSH:
2936 x86_push_reg (code, ins->sreg1);
2937 break;
2938 case OP_X86_PUSH_IMM:
2939 x86_push_imm (code, ins->inst_imm);
2940 break;
2941 case OP_X86_PUSH_MEMBASE:
2942 x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2943 break;
2944 case OP_X86_PUSH_OBJ:
2945 x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2946 x86_push_reg (code, X86_EDI);
2947 x86_push_reg (code, X86_ESI);
2948 x86_push_reg (code, X86_ECX);
2949 if (ins->inst_offset)
2950 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2951 else
2952 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2953 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2954 x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2955 x86_cld (code);
2956 x86_prefix (code, X86_REP_PREFIX);
2957 x86_movsd (code);
2958 x86_pop_reg (code, X86_ECX);
2959 x86_pop_reg (code, X86_ESI);
2960 x86_pop_reg (code, X86_EDI);
2961 break;
2962 case OP_X86_LEA:
2963 x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2964 break;
2965 case OP_X86_LEA_MEMBASE:
2966 x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2967 break;
2968 case OP_X86_XCHG:
2969 x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2970 break;
2971 case OP_LOCALLOC:
2972 /* keep alignment */
2973 x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2974 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2975 code = mono_emit_stack_alloc (code, ins);
2976 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2977 break;
2978 case OP_LOCALLOC_IMM: {
2979 guint32 size = ins->inst_imm;
2980 size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
2982 if (ins->flags & MONO_INST_INIT) {
2983 /* FIXME: Optimize this */
2984 x86_mov_reg_imm (code, ins->dreg, size);
2985 ins->sreg1 = ins->dreg;
2987 code = mono_emit_stack_alloc (code, ins);
2988 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2989 } else {
2990 x86_alu_reg_imm (code, X86_SUB, X86_ESP, size);
2991 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2993 break;
2995 case OP_THROW: {
2996 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
2997 x86_push_reg (code, ins->sreg1);
2998 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
2999 (gpointer)"mono_arch_throw_exception");
3000 break;
3002 case OP_RETHROW: {
3003 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
3004 x86_push_reg (code, ins->sreg1);
3005 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
3006 (gpointer)"mono_arch_rethrow_exception");
3007 break;
3009 case OP_CALL_HANDLER:
3010 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
3011 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3012 x86_call_imm (code, 0);
3013 mono_cfg_add_try_hole (cfg, ins->inst_eh_block, code, bb);
3014 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
3015 break;
3016 case OP_START_HANDLER: {
3017 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3018 x86_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, X86_ESP, 4);
3019 break;
3021 case OP_ENDFINALLY: {
3022 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3023 x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
3024 x86_ret (code);
3025 break;
3027 case OP_ENDFILTER: {
3028 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3029 x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
3030 /* The local allocator will put the result into EAX */
3031 x86_ret (code);
3032 break;
3035 case OP_LABEL:
3036 ins->inst_c0 = code - cfg->native_code;
3037 break;
3038 case OP_BR:
3039 if (ins->inst_target_bb->native_offset) {
3040 x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset);
3041 } else {
3042 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3043 if ((cfg->opt & MONO_OPT_BRANCH) &&
3044 x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3045 x86_jump8 (code, 0);
3046 else
3047 x86_jump32 (code, 0);
3049 break;
3050 case OP_BR_REG:
3051 x86_jump_reg (code, ins->sreg1);
3052 break;
3053 case OP_CEQ:
3054 case OP_CLT:
3055 case OP_CLT_UN:
3056 case OP_CGT:
3057 case OP_CGT_UN:
3058 case OP_CNE:
3059 case OP_ICEQ:
3060 case OP_ICLT:
3061 case OP_ICLT_UN:
3062 case OP_ICGT:
3063 case OP_ICGT_UN:
3064 x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
3065 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3066 break;
3067 case OP_COND_EXC_EQ:
3068 case OP_COND_EXC_NE_UN:
3069 case OP_COND_EXC_LT:
3070 case OP_COND_EXC_LT_UN:
3071 case OP_COND_EXC_GT:
3072 case OP_COND_EXC_GT_UN:
3073 case OP_COND_EXC_GE:
3074 case OP_COND_EXC_GE_UN:
3075 case OP_COND_EXC_LE:
3076 case OP_COND_EXC_LE_UN:
3077 case OP_COND_EXC_IEQ:
3078 case OP_COND_EXC_INE_UN:
3079 case OP_COND_EXC_ILT:
3080 case OP_COND_EXC_ILT_UN:
3081 case OP_COND_EXC_IGT:
3082 case OP_COND_EXC_IGT_UN:
3083 case OP_COND_EXC_IGE:
3084 case OP_COND_EXC_IGE_UN:
3085 case OP_COND_EXC_ILE:
3086 case OP_COND_EXC_ILE_UN:
3087 EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
3088 break;
3089 case OP_COND_EXC_OV:
3090 case OP_COND_EXC_NO:
3091 case OP_COND_EXC_C:
3092 case OP_COND_EXC_NC:
3093 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3094 break;
3095 case OP_COND_EXC_IOV:
3096 case OP_COND_EXC_INO:
3097 case OP_COND_EXC_IC:
3098 case OP_COND_EXC_INC:
3099 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_IEQ], (ins->opcode < OP_COND_EXC_INE_UN), ins->inst_p1);
3100 break;
3101 case OP_IBEQ:
3102 case OP_IBNE_UN:
3103 case OP_IBLT:
3104 case OP_IBLT_UN:
3105 case OP_IBGT:
3106 case OP_IBGT_UN:
3107 case OP_IBGE:
3108 case OP_IBGE_UN:
3109 case OP_IBLE:
3110 case OP_IBLE_UN:
3111 EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
3112 break;
3114 case OP_CMOV_IEQ:
3115 case OP_CMOV_IGE:
3116 case OP_CMOV_IGT:
3117 case OP_CMOV_ILE:
3118 case OP_CMOV_ILT:
3119 case OP_CMOV_INE_UN:
3120 case OP_CMOV_IGE_UN:
3121 case OP_CMOV_IGT_UN:
3122 case OP_CMOV_ILE_UN:
3123 case OP_CMOV_ILT_UN:
3124 g_assert (ins->dreg == ins->sreg1);
3125 x86_cmov_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, ins->sreg2);
3126 break;
3128 /* floating point opcodes */
3129 case OP_R8CONST: {
3130 double d = *(double *)ins->inst_p0;
3132 if ((d == 0.0) && (mono_signbit (d) == 0)) {
3133 x86_fldz (code);
3134 } else if (d == 1.0) {
3135 x86_fld1 (code);
3136 } else {
3137 if (cfg->compile_aot) {
3138 guint32 *val = (guint32*)&d;
3139 x86_push_imm (code, val [1]);
3140 x86_push_imm (code, val [0]);
3141 x86_fld_membase (code, X86_ESP, 0, TRUE);
3142 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3144 else {
3145 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3146 x86_fld (code, NULL, TRUE);
3149 break;
3151 case OP_R4CONST: {
3152 float f = *(float *)ins->inst_p0;
3154 if ((f == 0.0) && (mono_signbit (f) == 0)) {
3155 x86_fldz (code);
3156 } else if (f == 1.0) {
3157 x86_fld1 (code);
3158 } else {
3159 if (cfg->compile_aot) {
3160 guint32 val = *(guint32*)&f;
3161 x86_push_imm (code, val);
3162 x86_fld_membase (code, X86_ESP, 0, FALSE);
3163 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3165 else {
3166 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3167 x86_fld (code, NULL, FALSE);
3170 break;
3172 case OP_STORER8_MEMBASE_REG:
3173 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3174 break;
3175 case OP_LOADR8_MEMBASE:
3176 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3177 break;
3178 case OP_STORER4_MEMBASE_REG:
3179 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3180 break;
3181 case OP_LOADR4_MEMBASE:
3182 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3183 break;
3184 case OP_ICONV_TO_R4:
3185 x86_push_reg (code, ins->sreg1);
3186 x86_fild_membase (code, X86_ESP, 0, FALSE);
3187 /* Change precision */
3188 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3189 x86_fld_membase (code, X86_ESP, 0, FALSE);
3190 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3191 break;
3192 case OP_ICONV_TO_R8:
3193 x86_push_reg (code, ins->sreg1);
3194 x86_fild_membase (code, X86_ESP, 0, FALSE);
3195 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3196 break;
3197 case OP_ICONV_TO_R_UN:
3198 x86_push_imm (code, 0);
3199 x86_push_reg (code, ins->sreg1);
3200 x86_fild_membase (code, X86_ESP, 0, TRUE);
3201 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3202 break;
3203 case OP_X86_FP_LOAD_I8:
3204 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3205 break;
3206 case OP_X86_FP_LOAD_I4:
3207 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3208 break;
3209 case OP_FCONV_TO_R4:
3210 /* Change precision */
3211 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3212 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3213 x86_fld_membase (code, X86_ESP, 0, FALSE);
3214 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3215 break;
3216 case OP_FCONV_TO_I1:
3217 code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3218 break;
3219 case OP_FCONV_TO_U1:
3220 code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3221 break;
3222 case OP_FCONV_TO_I2:
3223 code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3224 break;
3225 case OP_FCONV_TO_U2:
3226 code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3227 break;
3228 case OP_FCONV_TO_I4:
3229 case OP_FCONV_TO_I:
3230 code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3231 break;
3232 case OP_FCONV_TO_I8:
3233 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3234 x86_fnstcw_membase(code, X86_ESP, 0);
3235 x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3236 x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3237 x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3238 x86_fldcw_membase (code, X86_ESP, 2);
3239 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3240 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3241 x86_pop_reg (code, ins->dreg);
3242 x86_pop_reg (code, ins->backend.reg3);
3243 x86_fldcw_membase (code, X86_ESP, 0);
3244 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3245 break;
3246 case OP_LCONV_TO_R8_2:
3247 x86_push_reg (code, ins->sreg2);
3248 x86_push_reg (code, ins->sreg1);
3249 x86_fild_membase (code, X86_ESP, 0, TRUE);
3250 /* Change precision */
3251 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
3252 x86_fld_membase (code, X86_ESP, 0, TRUE);
3253 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3254 break;
3255 case OP_LCONV_TO_R4_2:
3256 x86_push_reg (code, ins->sreg2);
3257 x86_push_reg (code, ins->sreg1);
3258 x86_fild_membase (code, X86_ESP, 0, TRUE);
3259 /* Change precision */
3260 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3261 x86_fld_membase (code, X86_ESP, 0, FALSE);
3262 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3263 break;
3264 case OP_LCONV_TO_R_UN_2: {
3265 static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3266 guint8 *br;
3268 /* load 64bit integer to FP stack */
3269 x86_push_reg (code, ins->sreg2);
3270 x86_push_reg (code, ins->sreg1);
3271 x86_fild_membase (code, X86_ESP, 0, TRUE);
3273 /* test if lreg is negative */
3274 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3275 br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3277 /* add correction constant mn */
3278 x86_fld80_mem (code, mn);
3279 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3281 x86_patch (br, code);
3283 /* Change precision */
3284 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
3285 x86_fld_membase (code, X86_ESP, 0, TRUE);
3287 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3289 break;
3291 case OP_LCONV_TO_OVF_I:
3292 case OP_LCONV_TO_OVF_I4_2: {
3293 guint8 *br [3], *label [1];
3294 MonoInst *tins;
3297 * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3299 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3301 /* If the low word top bit is set, see if we are negative */
3302 br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3303 /* We are not negative (no top bit set, check for our top word to be zero */
3304 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3305 br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3306 label [0] = code;
3308 /* throw exception */
3309 tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
3310 if (tins) {
3311 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
3312 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
3313 x86_jump8 (code, 0);
3314 else
3315 x86_jump32 (code, 0);
3316 } else {
3317 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3318 x86_jump32 (code, 0);
3322 x86_patch (br [0], code);
3323 /* our top bit is set, check that top word is 0xfffffff */
3324 x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3326 x86_patch (br [1], code);
3327 /* nope, emit exception */
3328 br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3329 x86_patch (br [2], label [0]);
3331 if (ins->dreg != ins->sreg1)
3332 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3333 break;
3335 case OP_FMOVE:
3336 /* Not needed on the fp stack */
3337 break;
3338 case OP_FADD:
3339 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3340 break;
3341 case OP_FSUB:
3342 x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3343 break;
3344 case OP_FMUL:
3345 x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3346 break;
3347 case OP_FDIV:
3348 x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3349 break;
3350 case OP_FNEG:
3351 x86_fchs (code);
3352 break;
3353 case OP_SIN:
3354 x86_fsin (code);
3355 x86_fldz (code);
3356 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3357 break;
3358 case OP_COS:
3359 x86_fcos (code);
3360 x86_fldz (code);
3361 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3362 break;
3363 case OP_ABS:
3364 x86_fabs (code);
3365 break;
3366 case OP_TAN: {
3368 * it really doesn't make sense to inline all this code,
3369 * it's here just to show that things may not be as simple
3370 * as they appear.
3372 guchar *check_pos, *end_tan, *pop_jump;
3373 x86_push_reg (code, X86_EAX);
3374 x86_fptan (code);
3375 x86_fnstsw (code);
3376 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3377 check_pos = code;
3378 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3379 x86_fstp (code, 0); /* pop the 1.0 */
3380 end_tan = code;
3381 x86_jump8 (code, 0);
3382 x86_fldpi (code);
3383 x86_fp_op (code, X86_FADD, 0);
3384 x86_fxch (code, 1);
3385 x86_fprem1 (code);
3386 x86_fstsw (code);
3387 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3388 pop_jump = code;
3389 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3390 x86_fstp (code, 1);
3391 x86_fptan (code);
3392 x86_patch (pop_jump, code);
3393 x86_fstp (code, 0); /* pop the 1.0 */
3394 x86_patch (check_pos, code);
3395 x86_patch (end_tan, code);
3396 x86_fldz (code);
3397 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3398 x86_pop_reg (code, X86_EAX);
3399 break;
3401 case OP_ATAN:
3402 x86_fld1 (code);
3403 x86_fpatan (code);
3404 x86_fldz (code);
3405 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3406 break;
3407 case OP_SQRT:
3408 x86_fsqrt (code);
3409 break;
3410 case OP_ROUND:
3411 x86_frndint (code);
3412 break;
3413 case OP_IMIN:
3414 g_assert (cfg->opt & MONO_OPT_CMOV);
3415 g_assert (ins->dreg == ins->sreg1);
3416 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3417 x86_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2);
3418 break;
3419 case OP_IMIN_UN:
3420 g_assert (cfg->opt & MONO_OPT_CMOV);
3421 g_assert (ins->dreg == ins->sreg1);
3422 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3423 x86_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2);
3424 break;
3425 case OP_IMAX:
3426 g_assert (cfg->opt & MONO_OPT_CMOV);
3427 g_assert (ins->dreg == ins->sreg1);
3428 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3429 x86_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2);
3430 break;
3431 case OP_IMAX_UN:
3432 g_assert (cfg->opt & MONO_OPT_CMOV);
3433 g_assert (ins->dreg == ins->sreg1);
3434 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3435 x86_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2);
3436 break;
3437 case OP_X86_FPOP:
3438 x86_fstp (code, 0);
3439 break;
3440 case OP_X86_FXCH:
3441 x86_fxch (code, ins->inst_imm);
3442 break;
3443 case OP_FREM: {
3444 guint8 *l1, *l2;
3446 x86_push_reg (code, X86_EAX);
3447 /* we need to exchange ST(0) with ST(1) */
3448 x86_fxch (code, 1);
3450 /* this requires a loop, because fprem somtimes
3451 * returns a partial remainder */
3452 l1 = code;
3453 /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3454 /* x86_fprem1 (code); */
3455 x86_fprem (code);
3456 x86_fnstsw (code);
3457 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3458 l2 = code + 2;
3459 x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3461 /* pop result */
3462 x86_fstp (code, 1);
3464 x86_pop_reg (code, X86_EAX);
3465 break;
3467 case OP_FCOMPARE:
3468 if (cfg->opt & MONO_OPT_FCMOV) {
3469 x86_fcomip (code, 1);
3470 x86_fstp (code, 0);
3471 break;
3473 /* this overwrites EAX */
3474 EMIT_FPCOMPARE(code);
3475 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3476 break;
3477 case OP_FCEQ:
3478 if (cfg->opt & MONO_OPT_FCMOV) {
3479 /* zeroing the register at the start results in
3480 * shorter and faster code (we can also remove the widening op)
3482 guchar *unordered_check;
3483 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3484 x86_fcomip (code, 1);
3485 x86_fstp (code, 0);
3486 unordered_check = code;
3487 x86_branch8 (code, X86_CC_P, 0, FALSE);
3488 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3489 x86_patch (unordered_check, code);
3490 break;
3492 if (ins->dreg != X86_EAX)
3493 x86_push_reg (code, X86_EAX);
3495 EMIT_FPCOMPARE(code);
3496 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3497 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3498 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3499 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3501 if (ins->dreg != X86_EAX)
3502 x86_pop_reg (code, X86_EAX);
3503 break;
3504 case OP_FCLT:
3505 case OP_FCLT_UN:
3506 if (cfg->opt & MONO_OPT_FCMOV) {
3507 /* zeroing the register at the start results in
3508 * shorter and faster code (we can also remove the widening op)
3510 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3511 x86_fcomip (code, 1);
3512 x86_fstp (code, 0);
3513 if (ins->opcode == OP_FCLT_UN) {
3514 guchar *unordered_check = code;
3515 guchar *jump_to_end;
3516 x86_branch8 (code, X86_CC_P, 0, FALSE);
3517 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3518 jump_to_end = code;
3519 x86_jump8 (code, 0);
3520 x86_patch (unordered_check, code);
3521 x86_inc_reg (code, ins->dreg);
3522 x86_patch (jump_to_end, code);
3523 } else {
3524 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3526 break;
3528 if (ins->dreg != X86_EAX)
3529 x86_push_reg (code, X86_EAX);
3531 EMIT_FPCOMPARE(code);
3532 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3533 if (ins->opcode == OP_FCLT_UN) {
3534 guchar *is_not_zero_check, *end_jump;
3535 is_not_zero_check = code;
3536 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3537 end_jump = code;
3538 x86_jump8 (code, 0);
3539 x86_patch (is_not_zero_check, code);
3540 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3542 x86_patch (end_jump, code);
3544 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3545 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3547 if (ins->dreg != X86_EAX)
3548 x86_pop_reg (code, X86_EAX);
3549 break;
3550 case OP_FCGT:
3551 case OP_FCGT_UN:
3552 if (cfg->opt & MONO_OPT_FCMOV) {
3553 /* zeroing the register at the start results in
3554 * shorter and faster code (we can also remove the widening op)
3556 guchar *unordered_check;
3557 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3558 x86_fcomip (code, 1);
3559 x86_fstp (code, 0);
3560 if (ins->opcode == OP_FCGT) {
3561 unordered_check = code;
3562 x86_branch8 (code, X86_CC_P, 0, FALSE);
3563 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3564 x86_patch (unordered_check, code);
3565 } else {
3566 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3568 break;
3570 if (ins->dreg != X86_EAX)
3571 x86_push_reg (code, X86_EAX);
3573 EMIT_FPCOMPARE(code);
3574 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3575 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3576 if (ins->opcode == OP_FCGT_UN) {
3577 guchar *is_not_zero_check, *end_jump;
3578 is_not_zero_check = code;
3579 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3580 end_jump = code;
3581 x86_jump8 (code, 0);
3582 x86_patch (is_not_zero_check, code);
3583 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3585 x86_patch (end_jump, code);
3587 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3588 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3590 if (ins->dreg != X86_EAX)
3591 x86_pop_reg (code, X86_EAX);
3592 break;
3593 case OP_FBEQ:
3594 if (cfg->opt & MONO_OPT_FCMOV) {
3595 guchar *jump = code;
3596 x86_branch8 (code, X86_CC_P, 0, TRUE);
3597 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3598 x86_patch (jump, code);
3599 break;
3601 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3602 EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3603 break;
3604 case OP_FBNE_UN:
3605 /* Branch if C013 != 100 */
3606 if (cfg->opt & MONO_OPT_FCMOV) {
3607 /* branch if !ZF or (PF|CF) */
3608 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3609 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3610 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3611 break;
3613 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3614 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3615 break;
3616 case OP_FBLT:
3617 if (cfg->opt & MONO_OPT_FCMOV) {
3618 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3619 break;
3621 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3622 break;
3623 case OP_FBLT_UN:
3624 if (cfg->opt & MONO_OPT_FCMOV) {
3625 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3626 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3627 break;
3629 if (ins->opcode == OP_FBLT_UN) {
3630 guchar *is_not_zero_check, *end_jump;
3631 is_not_zero_check = code;
3632 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3633 end_jump = code;
3634 x86_jump8 (code, 0);
3635 x86_patch (is_not_zero_check, code);
3636 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3638 x86_patch (end_jump, code);
3640 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3641 break;
3642 case OP_FBGT:
3643 case OP_FBGT_UN:
3644 if (cfg->opt & MONO_OPT_FCMOV) {
3645 if (ins->opcode == OP_FBGT) {
3646 guchar *br1;
3648 /* skip branch if C1=1 */
3649 br1 = code;
3650 x86_branch8 (code, X86_CC_P, 0, FALSE);
3651 /* branch if (C0 | C3) = 1 */
3652 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3653 x86_patch (br1, code);
3654 } else {
3655 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3657 break;
3659 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3660 if (ins->opcode == OP_FBGT_UN) {
3661 guchar *is_not_zero_check, *end_jump;
3662 is_not_zero_check = code;
3663 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3664 end_jump = code;
3665 x86_jump8 (code, 0);
3666 x86_patch (is_not_zero_check, code);
3667 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3669 x86_patch (end_jump, code);
3671 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3672 break;
3673 case OP_FBGE:
3674 /* Branch if C013 == 100 or 001 */
3675 if (cfg->opt & MONO_OPT_FCMOV) {
3676 guchar *br1;
3678 /* skip branch if C1=1 */
3679 br1 = code;
3680 x86_branch8 (code, X86_CC_P, 0, FALSE);
3681 /* branch if (C0 | C3) = 1 */
3682 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3683 x86_patch (br1, code);
3684 break;
3686 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3687 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3688 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3689 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3690 break;
3691 case OP_FBGE_UN:
3692 /* Branch if C013 == 000 */
3693 if (cfg->opt & MONO_OPT_FCMOV) {
3694 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3695 break;
3697 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3698 break;
3699 case OP_FBLE:
3700 /* Branch if C013=000 or 100 */
3701 if (cfg->opt & MONO_OPT_FCMOV) {
3702 guchar *br1;
3704 /* skip branch if C1=1 */
3705 br1 = code;
3706 x86_branch8 (code, X86_CC_P, 0, FALSE);
3707 /* branch if C0=0 */
3708 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3709 x86_patch (br1, code);
3710 break;
3712 x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3713 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3714 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3715 break;
3716 case OP_FBLE_UN:
3717 /* Branch if C013 != 001 */
3718 if (cfg->opt & MONO_OPT_FCMOV) {
3719 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3720 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3721 break;
3723 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3724 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3725 break;
3726 case OP_CKFINITE: {
3727 guchar *br1;
3728 x86_push_reg (code, X86_EAX);
3729 x86_fxam (code);
3730 x86_fnstsw (code);
3731 x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3732 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3733 x86_pop_reg (code, X86_EAX);
3735 /* Have to clean up the fp stack before throwing the exception */
3736 br1 = code;
3737 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3739 x86_fstp (code, 0);
3740 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3742 x86_patch (br1, code);
3743 break;
3745 case OP_TLS_GET: {
3746 code = mono_x86_emit_tls_get (code, ins->dreg, ins->inst_offset);
3747 break;
3749 case OP_MEMORY_BARRIER: {
3750 /* Not needed on x86 */
3751 break;
3753 case OP_ATOMIC_ADD_I4: {
3754 int dreg = ins->dreg;
3756 if (dreg == ins->inst_basereg) {
3757 x86_push_reg (code, ins->sreg2);
3758 dreg = ins->sreg2;
3761 if (dreg != ins->sreg2)
3762 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3764 x86_prefix (code, X86_LOCK_PREFIX);
3765 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3767 if (dreg != ins->dreg) {
3768 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3769 x86_pop_reg (code, dreg);
3772 break;
3774 case OP_ATOMIC_ADD_NEW_I4: {
3775 int dreg = ins->dreg;
3777 /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3778 if (ins->sreg2 == dreg) {
3779 if (dreg == X86_EBX) {
3780 dreg = X86_EDI;
3781 if (ins->inst_basereg == X86_EDI)
3782 dreg = X86_ESI;
3783 } else {
3784 dreg = X86_EBX;
3785 if (ins->inst_basereg == X86_EBX)
3786 dreg = X86_EDI;
3788 } else if (ins->inst_basereg == dreg) {
3789 if (dreg == X86_EBX) {
3790 dreg = X86_EDI;
3791 if (ins->sreg2 == X86_EDI)
3792 dreg = X86_ESI;
3793 } else {
3794 dreg = X86_EBX;
3795 if (ins->sreg2 == X86_EBX)
3796 dreg = X86_EDI;
3800 if (dreg != ins->dreg) {
3801 x86_push_reg (code, dreg);
3804 x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3805 x86_prefix (code, X86_LOCK_PREFIX);
3806 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3807 /* dreg contains the old value, add with sreg2 value */
3808 x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3810 if (ins->dreg != dreg) {
3811 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3812 x86_pop_reg (code, dreg);
3815 break;
3817 case OP_ATOMIC_EXCHANGE_I4: {
3818 guchar *br[2];
3819 int sreg2 = ins->sreg2;
3820 int breg = ins->inst_basereg;
3822 /* cmpxchg uses eax as comperand, need to make sure we can use it
3823 * hack to overcome limits in x86 reg allocator
3824 * (req: dreg == eax and sreg2 != eax and breg != eax)
3826 g_assert (ins->dreg == X86_EAX);
3828 /* We need the EAX reg for the cmpxchg */
3829 if (ins->sreg2 == X86_EAX) {
3830 sreg2 = (breg == X86_EDX) ? X86_EBX : X86_EDX;
3831 x86_push_reg (code, sreg2);
3832 x86_mov_reg_reg (code, sreg2, X86_EAX, 4);
3835 if (breg == X86_EAX) {
3836 breg = (sreg2 == X86_ESI) ? X86_EDI : X86_ESI;
3837 x86_push_reg (code, breg);
3838 x86_mov_reg_reg (code, breg, X86_EAX, 4);
3841 x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3843 br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3844 x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3845 br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3846 x86_patch (br [1], br [0]);
3848 if (breg != ins->inst_basereg)
3849 x86_pop_reg (code, breg);
3851 if (ins->sreg2 != sreg2)
3852 x86_pop_reg (code, sreg2);
3854 break;
3856 case OP_ATOMIC_CAS_I4: {
3857 g_assert (ins->sreg3 == X86_EAX);
3858 g_assert (ins->sreg1 != X86_EAX);
3859 g_assert (ins->sreg1 != ins->sreg2);
3861 x86_prefix (code, X86_LOCK_PREFIX);
3862 x86_cmpxchg_membase_reg (code, ins->sreg1, ins->inst_offset, ins->sreg2);
3864 if (ins->dreg != X86_EAX)
3865 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3866 break;
3868 #ifdef MONO_ARCH_SIMD_INTRINSICS
3869 case OP_ADDPS:
3870 x86_sse_alu_ps_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
3871 break;
3872 case OP_DIVPS:
3873 x86_sse_alu_ps_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
3874 break;
3875 case OP_MULPS:
3876 x86_sse_alu_ps_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
3877 break;
3878 case OP_SUBPS:
3879 x86_sse_alu_ps_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
3880 break;
3881 case OP_MAXPS:
3882 x86_sse_alu_ps_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
3883 break;
3884 case OP_MINPS:
3885 x86_sse_alu_ps_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
3886 break;
3887 case OP_COMPPS:
3888 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
3889 x86_sse_alu_ps_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
3890 break;
3891 case OP_ANDPS:
3892 x86_sse_alu_ps_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
3893 break;
3894 case OP_ANDNPS:
3895 x86_sse_alu_ps_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
3896 break;
3897 case OP_ORPS:
3898 x86_sse_alu_ps_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
3899 break;
3900 case OP_XORPS:
3901 x86_sse_alu_ps_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
3902 break;
3903 case OP_SQRTPS:
3904 x86_sse_alu_ps_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
3905 break;
3906 case OP_RSQRTPS:
3907 x86_sse_alu_ps_reg_reg (code, X86_SSE_RSQRT, ins->dreg, ins->sreg1);
3908 break;
3909 case OP_RCPPS:
3910 x86_sse_alu_ps_reg_reg (code, X86_SSE_RCP, ins->dreg, ins->sreg1);
3911 break;
3912 case OP_ADDSUBPS:
3913 x86_sse_alu_sd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
3914 break;
3915 case OP_HADDPS:
3916 x86_sse_alu_sd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
3917 break;
3918 case OP_HSUBPS:
3919 x86_sse_alu_sd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
3920 break;
3921 case OP_DUPPS_HIGH:
3922 x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSHDUP, ins->dreg, ins->sreg1);
3923 break;
3924 case OP_DUPPS_LOW:
3925 x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSLDUP, ins->dreg, ins->sreg1);
3926 break;
3928 case OP_PSHUFLEW_HIGH:
3929 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3930 x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 1);
3931 break;
3932 case OP_PSHUFLEW_LOW:
3933 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3934 x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 0);
3935 break;
3936 case OP_PSHUFLED:
3937 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3938 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->sreg1, ins->inst_c0);
3939 break;
3941 case OP_ADDPD:
3942 x86_sse_alu_pd_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
3943 break;
3944 case OP_DIVPD:
3945 x86_sse_alu_pd_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
3946 break;
3947 case OP_MULPD:
3948 x86_sse_alu_pd_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
3949 break;
3950 case OP_SUBPD:
3951 x86_sse_alu_pd_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
3952 break;
3953 case OP_MAXPD:
3954 x86_sse_alu_pd_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
3955 break;
3956 case OP_MINPD:
3957 x86_sse_alu_pd_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
3958 break;
3959 case OP_COMPPD:
3960 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
3961 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
3962 break;
3963 case OP_ANDPD:
3964 x86_sse_alu_pd_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
3965 break;
3966 case OP_ANDNPD:
3967 x86_sse_alu_pd_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
3968 break;
3969 case OP_ORPD:
3970 x86_sse_alu_pd_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
3971 break;
3972 case OP_XORPD:
3973 x86_sse_alu_pd_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
3974 break;
3975 case OP_SQRTPD:
3976 x86_sse_alu_pd_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
3977 break;
3978 case OP_ADDSUBPD:
3979 x86_sse_alu_pd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
3980 break;
3981 case OP_HADDPD:
3982 x86_sse_alu_pd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
3983 break;
3984 case OP_HSUBPD:
3985 x86_sse_alu_pd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
3986 break;
3987 case OP_DUPPD:
3988 x86_sse_alu_sd_reg_reg (code, X86_SSE_MOVDDUP, ins->dreg, ins->sreg1);
3989 break;
3991 case OP_EXTRACT_MASK:
3992 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMOVMSKB, ins->dreg, ins->sreg1);
3993 break;
3995 case OP_PAND:
3996 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAND, ins->sreg1, ins->sreg2);
3997 break;
3998 case OP_POR:
3999 x86_sse_alu_pd_reg_reg (code, X86_SSE_POR, ins->sreg1, ins->sreg2);
4000 break;
4001 case OP_PXOR:
4002 x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->sreg1, ins->sreg2);
4003 break;
4005 case OP_PADDB:
4006 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDB, ins->sreg1, ins->sreg2);
4007 break;
4008 case OP_PADDW:
4009 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDW, ins->sreg1, ins->sreg2);
4010 break;
4011 case OP_PADDD:
4012 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDD, ins->sreg1, ins->sreg2);
4013 break;
4014 case OP_PADDQ:
4015 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDQ, ins->sreg1, ins->sreg2);
4016 break;
4018 case OP_PSUBB:
4019 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBB, ins->sreg1, ins->sreg2);
4020 break;
4021 case OP_PSUBW:
4022 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBW, ins->sreg1, ins->sreg2);
4023 break;
4024 case OP_PSUBD:
4025 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBD, ins->sreg1, ins->sreg2);
4026 break;
4027 case OP_PSUBQ:
4028 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBQ, ins->sreg1, ins->sreg2);
4029 break;
4031 case OP_PMAXB_UN:
4032 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXUB, ins->sreg1, ins->sreg2);
4033 break;
4034 case OP_PMAXW_UN:
4035 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUW, ins->sreg1, ins->sreg2);
4036 break;
4037 case OP_PMAXD_UN:
4038 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUD, ins->sreg1, ins->sreg2);
4039 break;
4041 case OP_PMAXB:
4042 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSB, ins->sreg1, ins->sreg2);
4043 break;
4044 case OP_PMAXW:
4045 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXSW, ins->sreg1, ins->sreg2);
4046 break;
4047 case OP_PMAXD:
4048 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSD, ins->sreg1, ins->sreg2);
4049 break;
4051 case OP_PAVGB_UN:
4052 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGB, ins->sreg1, ins->sreg2);
4053 break;
4054 case OP_PAVGW_UN:
4055 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGW, ins->sreg1, ins->sreg2);
4056 break;
4058 case OP_PMINB_UN:
4059 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINUB, ins->sreg1, ins->sreg2);
4060 break;
4061 case OP_PMINW_UN:
4062 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUW, ins->sreg1, ins->sreg2);
4063 break;
4064 case OP_PMIND_UN:
4065 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUD, ins->sreg1, ins->sreg2);
4066 break;
4068 case OP_PMINB:
4069 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSB, ins->sreg1, ins->sreg2);
4070 break;
4071 case OP_PMINW:
4072 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINSW, ins->sreg1, ins->sreg2);
4073 break;
4074 case OP_PMIND:
4075 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSD, ins->sreg1, ins->sreg2);
4076 break;
4078 case OP_PCMPEQB:
4079 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQB, ins->sreg1, ins->sreg2);
4080 break;
4081 case OP_PCMPEQW:
4082 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQW, ins->sreg1, ins->sreg2);
4083 break;
4084 case OP_PCMPEQD:
4085 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQD, ins->sreg1, ins->sreg2);
4086 break;
4087 case OP_PCMPEQQ:
4088 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPEQQ, ins->sreg1, ins->sreg2);
4089 break;
4091 case OP_PCMPGTB:
4092 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTB, ins->sreg1, ins->sreg2);
4093 break;
4094 case OP_PCMPGTW:
4095 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTW, ins->sreg1, ins->sreg2);
4096 break;
4097 case OP_PCMPGTD:
4098 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTD, ins->sreg1, ins->sreg2);
4099 break;
4100 case OP_PCMPGTQ:
4101 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPGTQ, ins->sreg1, ins->sreg2);
4102 break;
4104 case OP_PSUM_ABS_DIFF:
4105 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSADBW, ins->sreg1, ins->sreg2);
4106 break;
4108 case OP_UNPACK_LOWB:
4109 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLBW, ins->sreg1, ins->sreg2);
4110 break;
4111 case OP_UNPACK_LOWW:
4112 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLWD, ins->sreg1, ins->sreg2);
4113 break;
4114 case OP_UNPACK_LOWD:
4115 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLDQ, ins->sreg1, ins->sreg2);
4116 break;
4117 case OP_UNPACK_LOWQ:
4118 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLQDQ, ins->sreg1, ins->sreg2);
4119 break;
4120 case OP_UNPACK_LOWPS:
4121 x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
4122 break;
4123 case OP_UNPACK_LOWPD:
4124 x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
4125 break;
4127 case OP_UNPACK_HIGHB:
4128 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHBW, ins->sreg1, ins->sreg2);
4129 break;
4130 case OP_UNPACK_HIGHW:
4131 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHWD, ins->sreg1, ins->sreg2);
4132 break;
4133 case OP_UNPACK_HIGHD:
4134 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHDQ, ins->sreg1, ins->sreg2);
4135 break;
4136 case OP_UNPACK_HIGHQ:
4137 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHQDQ, ins->sreg1, ins->sreg2);
4138 break;
4139 case OP_UNPACK_HIGHPS:
4140 x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
4141 break;
4142 case OP_UNPACK_HIGHPD:
4143 x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
4144 break;
4146 case OP_PACKW:
4147 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSWB, ins->sreg1, ins->sreg2);
4148 break;
4149 case OP_PACKD:
4150 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSDW, ins->sreg1, ins->sreg2);
4151 break;
4152 case OP_PACKW_UN:
4153 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKUSWB, ins->sreg1, ins->sreg2);
4154 break;
4155 case OP_PACKD_UN:
4156 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PACKUSDW, ins->sreg1, ins->sreg2);
4157 break;
4159 case OP_PADDB_SAT_UN:
4160 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSB, ins->sreg1, ins->sreg2);
4161 break;
4162 case OP_PSUBB_SAT_UN:
4163 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSB, ins->sreg1, ins->sreg2);
4164 break;
4165 case OP_PADDW_SAT_UN:
4166 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSW, ins->sreg1, ins->sreg2);
4167 break;
4168 case OP_PSUBW_SAT_UN:
4169 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSW, ins->sreg1, ins->sreg2);
4170 break;
4172 case OP_PADDB_SAT:
4173 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSB, ins->sreg1, ins->sreg2);
4174 break;
4175 case OP_PSUBB_SAT:
4176 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSB, ins->sreg1, ins->sreg2);
4177 break;
4178 case OP_PADDW_SAT:
4179 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSW, ins->sreg1, ins->sreg2);
4180 break;
4181 case OP_PSUBW_SAT:
4182 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSW, ins->sreg1, ins->sreg2);
4183 break;
4185 case OP_PMULW:
4186 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULLW, ins->sreg1, ins->sreg2);
4187 break;
4188 case OP_PMULD:
4189 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMULLD, ins->sreg1, ins->sreg2);
4190 break;
4191 case OP_PMULQ:
4192 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULUDQ, ins->sreg1, ins->sreg2);
4193 break;
4194 case OP_PMULW_HIGH_UN:
4195 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHUW, ins->sreg1, ins->sreg2);
4196 break;
4197 case OP_PMULW_HIGH:
4198 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHW, ins->sreg1, ins->sreg2);
4199 break;
4201 case OP_PSHRW:
4202 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4203 break;
4204 case OP_PSHRW_REG:
4205 x86_sse_shift_reg_reg (code, X86_SSE_PSRLW_REG, ins->dreg, ins->sreg2);
4206 break;
4208 case OP_PSARW:
4209 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4210 break;
4211 case OP_PSARW_REG:
4212 x86_sse_shift_reg_reg (code, X86_SSE_PSRAW_REG, ins->dreg, ins->sreg2);
4213 break;
4215 case OP_PSHLW:
4216 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4217 break;
4218 case OP_PSHLW_REG:
4219 x86_sse_shift_reg_reg (code, X86_SSE_PSLLW_REG, ins->dreg, ins->sreg2);
4220 break;
4222 case OP_PSHRD:
4223 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4224 break;
4225 case OP_PSHRD_REG:
4226 x86_sse_shift_reg_reg (code, X86_SSE_PSRLD_REG, ins->dreg, ins->sreg2);
4227 break;
4229 case OP_PSARD:
4230 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4231 break;
4232 case OP_PSARD_REG:
4233 x86_sse_shift_reg_reg (code, X86_SSE_PSRAD_REG, ins->dreg, ins->sreg2);
4234 break;
4236 case OP_PSHLD:
4237 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4238 break;
4239 case OP_PSHLD_REG:
4240 x86_sse_shift_reg_reg (code, X86_SSE_PSLLD_REG, ins->dreg, ins->sreg2);
4241 break;
4243 case OP_PSHRQ:
4244 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4245 break;
4246 case OP_PSHRQ_REG:
4247 x86_sse_shift_reg_reg (code, X86_SSE_PSRLQ_REG, ins->dreg, ins->sreg2);
4248 break;
4250 case OP_PSHLQ:
4251 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4252 break;
4253 case OP_PSHLQ_REG:
4254 x86_sse_shift_reg_reg (code, X86_SSE_PSLLQ_REG, ins->dreg, ins->sreg2);
4255 break;
4257 case OP_ICONV_TO_X:
4258 x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4259 break;
4260 case OP_EXTRACT_I4:
4261 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4262 break;
4263 case OP_EXTRACT_I1:
4264 case OP_EXTRACT_U1:
4265 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4266 if (ins->inst_c0)
4267 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
4268 x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE);
4269 break;
4270 case OP_EXTRACT_I2:
4271 case OP_EXTRACT_U2:
4272 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4273 if (ins->inst_c0)
4274 x86_shift_reg_imm (code, X86_SHR, ins->dreg, 16);
4275 x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE);
4276 break;
4277 case OP_EXTRACT_R8:
4278 if (ins->inst_c0)
4279 x86_sse_alu_pd_membase_reg (code, X86_SSE_MOVHPD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
4280 else
4281 x86_sse_alu_sd_membase_reg (code, X86_SSE_MOVSD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
4282 x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE);
4283 break;
4285 case OP_INSERT_I2:
4286 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->sreg1, ins->sreg2, ins->inst_c0);
4287 break;
4288 case OP_EXTRACTX_U2:
4289 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PEXTRW, ins->dreg, ins->sreg1, ins->inst_c0);
4290 break;
4291 case OP_INSERTX_U1_SLOW:
4292 /*sreg1 is the extracted ireg (scratch)
4293 /sreg2 is the to be inserted ireg (scratch)
4294 /dreg is the xreg to receive the value*/
4296 /*clear the bits from the extracted word*/
4297 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00);
4298 /*shift the value to insert if needed*/
4299 if (ins->inst_c0 & 1)
4300 x86_shift_reg_imm (code, X86_SHL, ins->sreg2, 8);
4301 /*join them together*/
4302 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
4303 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
4304 break;
4305 case OP_INSERTX_I4_SLOW:
4306 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
4307 x86_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
4308 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
4309 break;
4311 case OP_INSERTX_R4_SLOW:
4312 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
4313 /*TODO if inst_c0 == 0 use movss*/
4314 x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 0, ins->inst_c0 * 2);
4315 x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 2, ins->inst_c0 * 2 + 1);
4316 break;
4317 case OP_INSERTX_R8_SLOW:
4318 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4319 if (ins->inst_c0)
4320 x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVHPD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4321 else
4322 x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVSD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4323 break;
4325 case OP_STOREX_MEMBASE_REG:
4326 case OP_STOREX_MEMBASE:
4327 x86_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4328 break;
4329 case OP_LOADX_MEMBASE:
4330 x86_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4331 break;
4332 case OP_LOADX_ALIGNED_MEMBASE:
4333 x86_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4334 break;
4335 case OP_STOREX_ALIGNED_MEMBASE_REG:
4336 x86_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4337 break;
4338 case OP_STOREX_NTA_MEMBASE_REG:
4339 x86_sse_alu_reg_membase (code, X86_SSE_MOVNTPS, ins->dreg, ins->sreg1, ins->inst_offset);
4340 break;
4341 case OP_PREFETCH_MEMBASE:
4342 x86_sse_alu_reg_membase (code, X86_SSE_PREFETCH, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
4344 break;
4345 case OP_XMOVE:
4346 /*FIXME the peephole pass should have killed this*/
4347 if (ins->dreg != ins->sreg1)
4348 x86_movaps_reg_reg (code, ins->dreg, ins->sreg1);
4349 break;
4350 case OP_XZERO:
4351 x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->dreg, ins->dreg);
4352 break;
4353 case OP_ICONV_TO_R8_RAW:
4354 x86_mov_membase_reg (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1, 4);
4355 x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE);
4356 break;
4358 case OP_FCONV_TO_R8_X:
4359 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4360 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4361 break;
4363 case OP_XCONV_R8_TO_I4:
4364 x86_cvttsd2si (code, ins->dreg, ins->sreg1);
4365 switch (ins->backend.source_opcode) {
4366 case OP_FCONV_TO_I1:
4367 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
4368 break;
4369 case OP_FCONV_TO_U1:
4370 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4371 break;
4372 case OP_FCONV_TO_I2:
4373 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
4374 break;
4375 case OP_FCONV_TO_U2:
4376 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
4377 break;
4379 break;
4381 case OP_EXPAND_I1:
4382 /*FIXME this causes a partial register stall, maybe it would not be that bad to use shift + mask + or*/
4383 /*The +4 is to get a mov ?h, ?l over the same reg.*/
4384 x86_mov_reg_reg (code, ins->dreg + 4, ins->dreg, 1);
4385 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
4386 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
4387 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4388 break;
4389 case OP_EXPAND_I2:
4390 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
4391 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
4392 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4393 break;
4394 case OP_EXPAND_I4:
4395 x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4396 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4397 break;
4398 case OP_EXPAND_R4:
4399 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
4400 x86_movd_xreg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4401 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4402 break;
4403 case OP_EXPAND_R8:
4404 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4405 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4406 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0x44);
4407 break;
4408 #endif
4409 case OP_LIVERANGE_START: {
4410 if (cfg->verbose_level > 1)
4411 printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
4412 MONO_VARINFO (cfg, ins->inst_c0)->live_range_start = code - cfg->native_code;
4413 break;
4415 case OP_LIVERANGE_END: {
4416 if (cfg->verbose_level > 1)
4417 printf ("R%d END=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
4418 MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code;
4419 break;
4421 default:
4422 g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
4423 g_assert_not_reached ();
4426 if (G_UNLIKELY ((code - cfg->native_code - offset) > max_len)) {
4427 g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4428 mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4429 g_assert_not_reached ();
4432 cpos += max_len;
4435 cfg->code_len = code - cfg->native_code;
4438 #endif /* DISABLE_JIT */
4440 void
4441 mono_arch_register_lowlevel_calls (void)
4445 void
4446 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4448 MonoJumpInfo *patch_info;
4449 gboolean compile_aot = !run_cctors;
4451 for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4452 unsigned char *ip = patch_info->ip.i + code;
4453 const unsigned char *target;
4455 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4457 if (compile_aot) {
4458 switch (patch_info->type) {
4459 case MONO_PATCH_INFO_BB:
4460 case MONO_PATCH_INFO_LABEL:
4461 break;
4462 default:
4463 /* No need to patch these */
4464 continue;
4468 switch (patch_info->type) {
4469 case MONO_PATCH_INFO_IP:
4470 *((gconstpointer *)(ip)) = target;
4471 break;
4472 case MONO_PATCH_INFO_CLASS_INIT: {
4473 guint8 *code = ip;
4474 /* Might already been changed to a nop */
4475 x86_call_code (code, 0);
4476 x86_patch (ip, target);
4477 break;
4479 case MONO_PATCH_INFO_ABS:
4480 case MONO_PATCH_INFO_METHOD:
4481 case MONO_PATCH_INFO_METHOD_JUMP:
4482 case MONO_PATCH_INFO_INTERNAL_METHOD:
4483 case MONO_PATCH_INFO_BB:
4484 case MONO_PATCH_INFO_LABEL:
4485 case MONO_PATCH_INFO_RGCTX_FETCH:
4486 case MONO_PATCH_INFO_GENERIC_CLASS_INIT:
4487 case MONO_PATCH_INFO_MONITOR_ENTER:
4488 case MONO_PATCH_INFO_MONITOR_EXIT:
4489 x86_patch (ip, target);
4490 break;
4491 case MONO_PATCH_INFO_NONE:
4492 break;
4493 default: {
4494 guint32 offset = mono_arch_get_patch_offset (ip);
4495 *((gconstpointer *)(ip + offset)) = target;
4496 break;
4502 guint8 *
4503 mono_arch_emit_prolog (MonoCompile *cfg)
4505 MonoMethod *method = cfg->method;
4506 MonoBasicBlock *bb;
4507 MonoMethodSignature *sig;
4508 MonoInst *inst;
4509 int alloc_size, pos, max_offset, i, cfa_offset;
4510 guint8 *code;
4511 gboolean need_stack_frame;
4513 cfg->code_size = MAX (cfg->header->code_size * 4, 10240);
4515 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
4516 cfg->code_size += 512;
4518 code = cfg->native_code = g_malloc (cfg->code_size);
4520 /* Offset between RSP and the CFA */
4521 cfa_offset = 0;
4523 // CFA = sp + 4
4524 cfa_offset = sizeof (gpointer);
4525 mono_emit_unwind_op_def_cfa (cfg, code, X86_ESP, sizeof (gpointer));
4526 // IP saved at CFA - 4
4527 /* There is no IP reg on x86 */
4528 mono_emit_unwind_op_offset (cfg, code, X86_NREG, -cfa_offset);
4530 need_stack_frame = needs_stack_frame (cfg);
4532 if (need_stack_frame) {
4533 x86_push_reg (code, X86_EBP);
4534 cfa_offset += sizeof (gpointer);
4535 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
4536 mono_emit_unwind_op_offset (cfg, code, X86_EBP, - cfa_offset);
4537 x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4538 mono_emit_unwind_op_def_cfa_reg (cfg, code, X86_EBP);
4541 alloc_size = cfg->stack_offset;
4542 pos = 0;
4544 if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4545 /* Might need to attach the thread to the JIT or change the domain for the callback */
4546 if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
4547 guint8 *buf, *no_domain_branch;
4549 code = mono_x86_emit_tls_get (code, X86_EAX, appdomain_tls_offset);
4550 x86_alu_reg_imm (code, X86_CMP, X86_EAX, GPOINTER_TO_UINT (cfg->domain));
4551 no_domain_branch = code;
4552 x86_branch8 (code, X86_CC_NE, 0, 0);
4553 code = mono_x86_emit_tls_get ( code, X86_EAX, lmf_tls_offset);
4554 x86_test_reg_reg (code, X86_EAX, X86_EAX);
4555 buf = code;
4556 x86_branch8 (code, X86_CC_NE, 0, 0);
4557 x86_patch (no_domain_branch, code);
4558 x86_push_imm (code, cfg->domain);
4559 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4560 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4561 x86_patch (buf, code);
4562 #ifdef TARGET_WIN32
4563 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4564 /* FIXME: Add a separate key for LMF to avoid this */
4565 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4566 #endif
4568 else {
4569 g_assert (!cfg->compile_aot);
4570 x86_push_imm (code, cfg->domain);
4571 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4572 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4576 if (method->save_lmf) {
4577 pos += sizeof (MonoLMF);
4579 if (cfg->compile_aot)
4580 cfg->disable_aot = TRUE;
4582 /* save the current IP */
4583 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4584 x86_push_imm_template (code);
4585 cfa_offset += sizeof (gpointer);
4587 /* save all caller saved regs */
4588 x86_push_reg (code, X86_EBP);
4589 cfa_offset += sizeof (gpointer);
4590 x86_push_reg (code, X86_ESI);
4591 cfa_offset += sizeof (gpointer);
4592 mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset);
4593 x86_push_reg (code, X86_EDI);
4594 cfa_offset += sizeof (gpointer);
4595 mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset);
4596 x86_push_reg (code, X86_EBX);
4597 cfa_offset += sizeof (gpointer);
4598 mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset);
4600 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
4602 * Optimized version which uses the mono_lmf TLS variable instead of indirection
4603 * through the mono_lmf_addr TLS variable.
4605 /* %eax = previous_lmf */
4606 x86_prefix (code, X86_GS_PREFIX);
4607 x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
4608 /* skip esp + method_info + lmf */
4609 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
4610 /* push previous_lmf */
4611 x86_push_reg (code, X86_EAX);
4612 /* new lmf = ESP */
4613 x86_prefix (code, X86_GS_PREFIX);
4614 x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
4615 } else {
4616 /* get the address of lmf for the current thread */
4618 * This is performance critical so we try to use some tricks to make
4619 * it fast.
4622 if (lmf_addr_tls_offset != -1) {
4623 /* Load lmf quicky using the GS register */
4624 code = mono_x86_emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
4625 #ifdef TARGET_WIN32
4626 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4627 /* FIXME: Add a separate key for LMF to avoid this */
4628 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4629 #endif
4630 } else {
4631 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4634 /* Skip esp + method info */
4635 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
4637 /* push lmf */
4638 x86_push_reg (code, X86_EAX);
4639 /* push *lfm (previous_lmf) */
4640 x86_push_membase (code, X86_EAX, 0);
4641 /* *(lmf) = ESP */
4642 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4644 } else {
4646 if (cfg->used_int_regs & (1 << X86_EBX)) {
4647 x86_push_reg (code, X86_EBX);
4648 pos += 4;
4649 cfa_offset += sizeof (gpointer);
4650 mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset);
4653 if (cfg->used_int_regs & (1 << X86_EDI)) {
4654 x86_push_reg (code, X86_EDI);
4655 pos += 4;
4656 cfa_offset += sizeof (gpointer);
4657 mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset);
4660 if (cfg->used_int_regs & (1 << X86_ESI)) {
4661 x86_push_reg (code, X86_ESI);
4662 pos += 4;
4663 cfa_offset += sizeof (gpointer);
4664 mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset);
4668 alloc_size -= pos;
4670 /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
4671 if (mono_do_x86_stack_align && need_stack_frame) {
4672 int tot = alloc_size + pos + 4; /* ret ip */
4673 if (need_stack_frame)
4674 tot += 4; /* ebp */
4675 tot &= MONO_ARCH_FRAME_ALIGNMENT - 1;
4676 if (tot)
4677 alloc_size += MONO_ARCH_FRAME_ALIGNMENT - tot;
4680 if (alloc_size) {
4681 /* See mono_emit_stack_alloc */
4682 #if defined(TARGET_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
4683 guint32 remaining_size = alloc_size;
4684 /*FIXME handle unbounded code expansion, we should use a loop in case of more than X interactions*/
4685 guint32 required_code_size = ((remaining_size / 0x1000) + 1) * 8; /*8 is the max size of x86_alu_reg_imm + x86_test_membase_reg*/
4686 guint32 offset = code - cfg->native_code;
4687 if (G_UNLIKELY (required_code_size >= (cfg->code_size - offset))) {
4688 while (required_code_size >= (cfg->code_size - offset))
4689 cfg->code_size *= 2;
4690 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4691 code = cfg->native_code + offset;
4692 mono_jit_stats.code_reallocs++;
4694 while (remaining_size >= 0x1000) {
4695 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4696 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4697 remaining_size -= 0x1000;
4699 if (remaining_size)
4700 x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4701 #else
4702 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4703 #endif
4705 g_assert (need_stack_frame);
4708 if (cfg->method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED ||
4709 cfg->method->wrapper_type == MONO_WRAPPER_RUNTIME_INVOKE) {
4710 x86_alu_reg_imm (code, X86_AND, X86_ESP, -MONO_ARCH_FRAME_ALIGNMENT);
4713 #if DEBUG_STACK_ALIGNMENT
4714 /* check the stack is aligned */
4715 if (need_stack_frame && method->wrapper_type == MONO_WRAPPER_NONE) {
4716 x86_mov_reg_reg (code, X86_ECX, X86_ESP, 4);
4717 x86_alu_reg_imm (code, X86_AND, X86_ECX, MONO_ARCH_FRAME_ALIGNMENT - 1);
4718 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
4719 x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
4720 x86_breakpoint (code);
4722 #endif
4724 /* compute max_offset in order to use short forward jumps */
4725 max_offset = 0;
4726 if (cfg->opt & MONO_OPT_BRANCH) {
4727 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4728 MonoInst *ins;
4729 bb->max_offset = max_offset;
4731 if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4732 max_offset += 6;
4733 /* max alignment for loops */
4734 if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4735 max_offset += LOOP_ALIGNMENT;
4737 MONO_BB_FOR_EACH_INS (bb, ins) {
4738 if (ins->opcode == OP_LABEL)
4739 ins->inst_c1 = max_offset;
4741 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
4746 /* store runtime generic context */
4747 if (cfg->rgctx_var) {
4748 g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET && cfg->rgctx_var->inst_basereg == X86_EBP);
4750 x86_mov_membase_reg (code, X86_EBP, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 4);
4753 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4754 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4756 /* load arguments allocated to register from the stack */
4757 sig = mono_method_signature (method);
4758 pos = 0;
4760 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4761 inst = cfg->args [pos];
4762 if (inst->opcode == OP_REGVAR) {
4763 g_assert (need_stack_frame);
4764 x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4765 if (cfg->verbose_level > 2)
4766 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4768 pos++;
4771 cfg->code_len = code - cfg->native_code;
4773 g_assert (cfg->code_len < cfg->code_size);
4775 return code;
4778 void
4779 mono_arch_emit_epilog (MonoCompile *cfg)
4781 MonoMethod *method = cfg->method;
4782 MonoMethodSignature *sig = mono_method_signature (method);
4783 int quad, pos;
4784 guint32 stack_to_pop;
4785 guint8 *code;
4786 int max_epilog_size = 16;
4787 CallInfo *cinfo;
4788 gboolean need_stack_frame = needs_stack_frame (cfg);
4790 if (cfg->method->save_lmf)
4791 max_epilog_size += 128;
4793 while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4794 cfg->code_size *= 2;
4795 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4796 mono_jit_stats.code_reallocs++;
4799 code = cfg->native_code + cfg->code_len;
4801 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4802 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4804 /* the code restoring the registers must be kept in sync with OP_JMP */
4805 pos = 0;
4807 if (method->save_lmf) {
4808 gint32 prev_lmf_reg;
4809 gint32 lmf_offset = -sizeof (MonoLMF);
4811 /* check if we need to restore protection of the stack after a stack overflow */
4812 if (mono_get_jit_tls_offset () != -1) {
4813 guint8 *patch;
4814 code = mono_x86_emit_tls_get (code, X86_ECX, mono_get_jit_tls_offset ());
4815 /* we load the value in a separate instruction: this mechanism may be
4816 * used later as a safer way to do thread interruption
4818 x86_mov_reg_membase (code, X86_ECX, X86_ECX, G_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 4);
4819 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
4820 patch = code;
4821 x86_branch8 (code, X86_CC_Z, 0, FALSE);
4822 /* note that the call trampoline will preserve eax/edx */
4823 x86_call_reg (code, X86_ECX);
4824 x86_patch (patch, code);
4825 } else {
4826 /* FIXME: maybe save the jit tls in the prolog */
4828 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
4830 * Optimized version which uses the mono_lmf TLS variable instead of indirection
4831 * through the mono_lmf_addr TLS variable.
4833 /* reg = previous_lmf */
4834 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
4836 /* lmf = previous_lmf */
4837 x86_prefix (code, X86_GS_PREFIX);
4838 x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
4839 } else {
4840 /* Find a spare register */
4841 switch (mini_type_get_underlying_type (cfg->generic_sharing_context, sig->ret)->type) {
4842 case MONO_TYPE_I8:
4843 case MONO_TYPE_U8:
4844 prev_lmf_reg = X86_EDI;
4845 cfg->used_int_regs |= (1 << X86_EDI);
4846 break;
4847 default:
4848 prev_lmf_reg = X86_EDX;
4849 break;
4852 /* reg = previous_lmf */
4853 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
4855 /* ecx = lmf */
4856 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
4858 /* *(lmf) = previous_lmf */
4859 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4862 /* restore caller saved regs */
4863 if (cfg->used_int_regs & (1 << X86_EBX)) {
4864 x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
4867 if (cfg->used_int_regs & (1 << X86_EDI)) {
4868 x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
4870 if (cfg->used_int_regs & (1 << X86_ESI)) {
4871 x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
4874 /* EBP is restored by LEAVE */
4875 } else {
4876 if (cfg->used_int_regs & (1 << X86_EBX)) {
4877 pos -= 4;
4879 if (cfg->used_int_regs & (1 << X86_EDI)) {
4880 pos -= 4;
4882 if (cfg->used_int_regs & (1 << X86_ESI)) {
4883 pos -= 4;
4886 if (pos) {
4887 g_assert (need_stack_frame);
4888 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4891 if (cfg->used_int_regs & (1 << X86_ESI)) {
4892 x86_pop_reg (code, X86_ESI);
4894 if (cfg->used_int_regs & (1 << X86_EDI)) {
4895 x86_pop_reg (code, X86_EDI);
4897 if (cfg->used_int_regs & (1 << X86_EBX)) {
4898 x86_pop_reg (code, X86_EBX);
4902 /* Load returned vtypes into registers if needed */
4903 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
4904 if (cinfo->ret.storage == ArgValuetypeInReg) {
4905 for (quad = 0; quad < 2; quad ++) {
4906 switch (cinfo->ret.pair_storage [quad]) {
4907 case ArgInIReg:
4908 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
4909 break;
4910 case ArgOnFloatFpStack:
4911 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
4912 break;
4913 case ArgOnDoubleFpStack:
4914 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
4915 break;
4916 case ArgNone:
4917 break;
4918 default:
4919 g_assert_not_reached ();
4924 if (need_stack_frame)
4925 x86_leave (code);
4927 if (CALLCONV_IS_STDCALL (sig)) {
4928 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4930 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4931 } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
4932 stack_to_pop = 4;
4933 else
4934 stack_to_pop = 0;
4936 if (stack_to_pop) {
4937 g_assert (need_stack_frame);
4938 x86_ret_imm (code, stack_to_pop);
4939 } else {
4940 x86_ret (code);
4943 cfg->code_len = code - cfg->native_code;
4945 g_assert (cfg->code_len < cfg->code_size);
4948 void
4949 mono_arch_emit_exceptions (MonoCompile *cfg)
4951 MonoJumpInfo *patch_info;
4952 int nthrows, i;
4953 guint8 *code;
4954 MonoClass *exc_classes [16];
4955 guint8 *exc_throw_start [16], *exc_throw_end [16];
4956 guint32 code_size;
4957 int exc_count = 0;
4959 /* Compute needed space */
4960 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4961 if (patch_info->type == MONO_PATCH_INFO_EXC)
4962 exc_count++;
4966 * make sure we have enough space for exceptions
4967 * 16 is the size of two push_imm instructions and a call
4969 if (cfg->compile_aot)
4970 code_size = exc_count * 32;
4971 else
4972 code_size = exc_count * 16;
4974 while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4975 cfg->code_size *= 2;
4976 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4977 mono_jit_stats.code_reallocs++;
4980 code = cfg->native_code + cfg->code_len;
4982 nthrows = 0;
4983 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4984 switch (patch_info->type) {
4985 case MONO_PATCH_INFO_EXC: {
4986 MonoClass *exc_class;
4987 guint8 *buf, *buf2;
4988 guint32 throw_ip;
4990 x86_patch (patch_info->ip.i + cfg->native_code, code);
4992 exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4993 g_assert (exc_class);
4994 throw_ip = patch_info->ip.i;
4996 /* Find a throw sequence for the same exception class */
4997 for (i = 0; i < nthrows; ++i)
4998 if (exc_classes [i] == exc_class)
4999 break;
5000 if (i < nthrows) {
5001 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
5002 x86_jump_code (code, exc_throw_start [i]);
5003 patch_info->type = MONO_PATCH_INFO_NONE;
5005 else {
5006 guint32 size;
5008 /* Compute size of code following the push <OFFSET> */
5009 size = 5 + 5;
5011 /*This is aligned to 16 bytes by the callee. This way we save a few bytes here.*/
5013 if ((code - cfg->native_code) - throw_ip < 126 - size) {
5014 /* Use the shorter form */
5015 buf = buf2 = code;
5016 x86_push_imm (code, 0);
5018 else {
5019 buf = code;
5020 x86_push_imm (code, 0xf0f0f0f0);
5021 buf2 = code;
5024 if (nthrows < 16) {
5025 exc_classes [nthrows] = exc_class;
5026 exc_throw_start [nthrows] = code;
5029 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
5030 patch_info->data.name = "mono_arch_throw_corlib_exception";
5031 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
5032 patch_info->ip.i = code - cfg->native_code;
5033 x86_call_code (code, 0);
5034 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
5035 while (buf < buf2)
5036 x86_nop (buf);
5038 if (nthrows < 16) {
5039 exc_throw_end [nthrows] = code;
5040 nthrows ++;
5043 break;
5045 default:
5046 /* do nothing */
5047 break;
5051 cfg->code_len = code - cfg->native_code;
5053 g_assert (cfg->code_len < cfg->code_size);
5056 void
5057 mono_arch_flush_icache (guint8 *code, gint size)
5059 /* not needed */
5062 void
5063 mono_arch_flush_register_windows (void)
5067 gboolean
5068 mono_arch_is_inst_imm (gint64 imm)
5070 return TRUE;
5074 * Support for fast access to the thread-local lmf structure using the GS
5075 * segment register on NPTL + kernel 2.6.x.
5078 static gboolean tls_offset_inited = FALSE;
5080 void
5081 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
5083 if (!tls_offset_inited) {
5084 if (!getenv ("MONO_NO_TLS")) {
5085 #ifdef TARGET_WIN32
5087 * We need to init this multiple times, since when we are first called, the key might not
5088 * be initialized yet.
5090 appdomain_tls_offset = mono_domain_get_tls_key ();
5091 lmf_tls_offset = mono_get_jit_tls_key ();
5093 /* Only 64 tls entries can be accessed using inline code */
5094 if (appdomain_tls_offset >= 64)
5095 appdomain_tls_offset = -1;
5096 if (lmf_tls_offset >= 64)
5097 lmf_tls_offset = -1;
5098 #else
5099 #if MONO_XEN_OPT
5100 optimize_for_xen = access ("/proc/xen", F_OK) == 0;
5101 #endif
5102 tls_offset_inited = TRUE;
5103 appdomain_tls_offset = mono_domain_get_tls_offset ();
5104 lmf_tls_offset = mono_get_lmf_tls_offset ();
5105 lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
5106 #endif
5111 void
5112 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
5116 #ifdef MONO_ARCH_HAVE_IMT
5118 // Linear handler, the bsearch head compare is shorter
5119 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
5120 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
5121 // x86_patch(ins,target)
5122 //[1 + 5] x86_jump_mem(inst,mem)
5124 #define CMP_SIZE 6
5125 #define BR_SMALL_SIZE 2
5126 #define BR_LARGE_SIZE 5
5127 #define JUMP_IMM_SIZE 6
5128 #define ENABLE_WRONG_METHOD_CHECK 0
5129 #define DEBUG_IMT 0
5131 static int
5132 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
5134 int i, distance = 0;
5135 for (i = start; i < target; ++i)
5136 distance += imt_entries [i]->chunk_size;
5137 return distance;
5141 * LOCKING: called with the domain lock held
5143 gpointer
5144 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
5145 gpointer fail_tramp)
5147 int i;
5148 int size = 0;
5149 guint8 *code, *start;
5151 for (i = 0; i < count; ++i) {
5152 MonoIMTCheckItem *item = imt_entries [i];
5153 if (item->is_equals) {
5154 if (item->check_target_idx) {
5155 if (!item->compare_done)
5156 item->chunk_size += CMP_SIZE;
5157 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
5158 } else {
5159 if (fail_tramp) {
5160 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + JUMP_IMM_SIZE * 2;
5161 } else {
5162 item->chunk_size += JUMP_IMM_SIZE;
5163 #if ENABLE_WRONG_METHOD_CHECK
5164 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
5165 #endif
5168 } else {
5169 item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
5170 imt_entries [item->check_target_idx]->compare_done = TRUE;
5172 size += item->chunk_size;
5174 if (fail_tramp)
5175 code = mono_method_alloc_generic_virtual_thunk (domain, size);
5176 else
5177 code = mono_domain_code_reserve (domain, size);
5178 start = code;
5179 for (i = 0; i < count; ++i) {
5180 MonoIMTCheckItem *item = imt_entries [i];
5181 item->code_target = code;
5182 if (item->is_equals) {
5183 if (item->check_target_idx) {
5184 if (!item->compare_done)
5185 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5186 item->jmp_code = code;
5187 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5188 if (item->has_target_code)
5189 x86_jump_code (code, item->value.target_code);
5190 else
5191 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5192 } else {
5193 if (fail_tramp) {
5194 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5195 item->jmp_code = code;
5196 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5197 if (item->has_target_code)
5198 x86_jump_code (code, item->value.target_code);
5199 else
5200 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5201 x86_patch (item->jmp_code, code);
5202 x86_jump_code (code, fail_tramp);
5203 item->jmp_code = NULL;
5204 } else {
5205 /* enable the commented code to assert on wrong method */
5206 #if ENABLE_WRONG_METHOD_CHECK
5207 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5208 item->jmp_code = code;
5209 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5210 #endif
5211 if (item->has_target_code)
5212 x86_jump_code (code, item->value.target_code);
5213 else
5214 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5215 #if ENABLE_WRONG_METHOD_CHECK
5216 x86_patch (item->jmp_code, code);
5217 x86_breakpoint (code);
5218 item->jmp_code = NULL;
5219 #endif
5222 } else {
5223 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5224 item->jmp_code = code;
5225 if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
5226 x86_branch8 (code, X86_CC_GE, 0, FALSE);
5227 else
5228 x86_branch32 (code, X86_CC_GE, 0, FALSE);
5231 /* patch the branches to get to the target items */
5232 for (i = 0; i < count; ++i) {
5233 MonoIMTCheckItem *item = imt_entries [i];
5234 if (item->jmp_code) {
5235 if (item->check_target_idx) {
5236 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
5241 if (!fail_tramp)
5242 mono_stats.imt_thunks_size += code - start;
5243 g_assert (code - start <= size);
5245 #if DEBUG_IMT
5247 char *buff = g_strdup_printf ("thunk_for_class_%s_%s_entries_%d", vtable->klass->name_space, vtable->klass->name, count);
5248 mono_disassemble_code (NULL, (guint8*)start, code - start, buff);
5249 g_free (buff);
5251 #endif
5253 return start;
5256 MonoMethod*
5257 mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
5259 return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
5261 #endif
5263 MonoVTable*
5264 mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code)
5266 return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
5269 MonoInst*
5270 mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
5272 MonoInst *ins = NULL;
5273 int opcode = 0;
5275 if (cmethod->klass == mono_defaults.math_class) {
5276 if (strcmp (cmethod->name, "Sin") == 0) {
5277 opcode = OP_SIN;
5278 } else if (strcmp (cmethod->name, "Cos") == 0) {
5279 opcode = OP_COS;
5280 } else if (strcmp (cmethod->name, "Tan") == 0) {
5281 opcode = OP_TAN;
5282 } else if (strcmp (cmethod->name, "Atan") == 0) {
5283 opcode = OP_ATAN;
5284 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
5285 opcode = OP_SQRT;
5286 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
5287 opcode = OP_ABS;
5288 } else if (strcmp (cmethod->name, "Round") == 0 && fsig->param_count == 1 && fsig->params [0]->type == MONO_TYPE_R8) {
5289 opcode = OP_ROUND;
5292 if (opcode) {
5293 MONO_INST_NEW (cfg, ins, opcode);
5294 ins->type = STACK_R8;
5295 ins->dreg = mono_alloc_freg (cfg);
5296 ins->sreg1 = args [0]->dreg;
5297 MONO_ADD_INS (cfg->cbb, ins);
5300 if (cfg->opt & MONO_OPT_CMOV) {
5301 int opcode = 0;
5303 if (strcmp (cmethod->name, "Min") == 0) {
5304 if (fsig->params [0]->type == MONO_TYPE_I4)
5305 opcode = OP_IMIN;
5306 } else if (strcmp (cmethod->name, "Max") == 0) {
5307 if (fsig->params [0]->type == MONO_TYPE_I4)
5308 opcode = OP_IMAX;
5311 if (opcode) {
5312 MONO_INST_NEW (cfg, ins, opcode);
5313 ins->type = STACK_I4;
5314 ins->dreg = mono_alloc_ireg (cfg);
5315 ins->sreg1 = args [0]->dreg;
5316 ins->sreg2 = args [1]->dreg;
5317 MONO_ADD_INS (cfg->cbb, ins);
5321 #if 0
5322 /* OP_FREM is not IEEE compatible */
5323 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
5324 MONO_INST_NEW (cfg, ins, OP_FREM);
5325 ins->inst_i0 = args [0];
5326 ins->inst_i1 = args [1];
5328 #endif
5331 return ins;
5334 gboolean
5335 mono_arch_print_tree (MonoInst *tree, int arity)
5337 return 0;
5340 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
5342 MonoInst* ins;
5344 return NULL;
5346 if (appdomain_tls_offset == -1)
5347 return NULL;
5349 MONO_INST_NEW (cfg, ins, OP_TLS_GET);
5350 ins->inst_offset = appdomain_tls_offset;
5351 return ins;
5354 guint32
5355 mono_arch_get_patch_offset (guint8 *code)
5357 if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
5358 return 2;
5359 else if ((code [0] == 0xba))
5360 return 1;
5361 else if ((code [0] == 0x68))
5362 /* push IMM */
5363 return 1;
5364 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
5365 /* push <OFFSET>(<REG>) */
5366 return 2;
5367 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
5368 /* call *<OFFSET>(<REG>) */
5369 return 2;
5370 else if ((code [0] == 0xdd) || (code [0] == 0xd9))
5371 /* fldl <ADDR> */
5372 return 2;
5373 else if ((code [0] == 0x58) && (code [1] == 0x05))
5374 /* pop %eax; add <OFFSET>, %eax */
5375 return 2;
5376 else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
5377 /* pop <REG>; add <OFFSET>, <REG> */
5378 return 3;
5379 else if ((code [0] >= 0xb8) && (code [0] < 0xb8 + 8))
5380 /* mov <REG>, imm */
5381 return 1;
5382 else {
5383 g_assert_not_reached ();
5384 return -1;
5389 * mono_breakpoint_clean_code:
5391 * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
5392 * breakpoints in the original code, they are removed in the copy.
5394 * Returns TRUE if no sw breakpoint was present.
5396 gboolean
5397 mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size)
5399 int i;
5400 gboolean can_write = TRUE;
5402 * If method_start is non-NULL we need to perform bound checks, since we access memory
5403 * at code - offset we could go before the start of the method and end up in a different
5404 * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
5405 * instead.
5407 if (!method_start || code - offset >= method_start) {
5408 memcpy (buf, code - offset, size);
5409 } else {
5410 int diff = code - method_start;
5411 memset (buf, 0, size);
5412 memcpy (buf + offset - diff, method_start, diff + size - offset);
5414 code -= offset;
5415 for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
5416 int idx = mono_breakpoint_info_index [i];
5417 guint8 *ptr;
5418 if (idx < 1)
5419 continue;
5420 ptr = mono_breakpoint_info [idx].address;
5421 if (ptr >= code && ptr < code + size) {
5422 guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
5423 can_write = FALSE;
5424 /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
5425 buf [ptr - code] = saved_byte;
5428 return can_write;
5431 gpointer
5432 mono_arch_get_vcall_slot (guint8 *code, mgreg_t *regs, int *displacement)
5434 guint8 buf [8];
5435 guint8 reg = 0;
5436 gint32 disp = 0;
5438 mono_breakpoint_clean_code (NULL, code, 8, buf, sizeof (buf));
5439 code = buf + 8;
5441 *displacement = 0;
5443 code -= 6;
5446 * A given byte sequence can match more than case here, so we have to be
5447 * really careful about the ordering of the cases. Longer sequences
5448 * come first.
5449 * There are two types of calls:
5450 * - direct calls: 0xff address_byte 8/32 bits displacement
5451 * - indirect calls: nop nop nop <call>
5452 * The nops make sure we don't confuse the instruction preceeding an indirect
5453 * call with a direct call.
5455 if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
5456 reg = code [4] & 0x07;
5457 disp = (signed char)code [5];
5458 } else if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
5459 reg = code [1] & 0x07;
5460 disp = *((gint32*)(code + 2));
5461 } else if ((code [1] == 0xe8)) {
5462 return NULL;
5463 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
5465 * This is a interface call
5466 * 8b 40 30 mov 0x30(%eax),%eax
5467 * ff 10 call *(%eax)
5469 disp = 0;
5470 reg = code [5] & 0x07;
5472 else
5473 return NULL;
5475 *displacement = disp;
5476 return (gpointer)regs [reg];
5480 * mono_x86_get_this_arg_offset:
5482 * Return the offset of the stack location where this is passed during a virtual
5483 * call.
5485 guint32
5486 mono_x86_get_this_arg_offset (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig)
5488 CallInfo *cinfo = NULL;
5489 int offset;
5491 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
5492 cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5494 offset = cinfo->args [0].offset;
5495 } else {
5496 offset = 0;
5499 return offset;
5502 gpointer
5503 mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig,
5504 mgreg_t *regs, guint8 *code)
5506 guint32 esp = regs [X86_ESP];
5507 CallInfo *cinfo = NULL;
5508 gpointer res;
5509 int offset;
5512 * Avoid expensive calls to get_generic_context_from_code () + get_call_info
5513 * if possible.
5515 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
5516 if (!gsctx && code)
5517 gsctx = mono_get_generic_context_from_code (code);
5518 cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5520 offset = cinfo->args [0].offset;
5521 } else {
5522 offset = 0;
5526 * The stack looks like:
5527 * <other args>
5528 * <this=delegate>
5529 * <possible vtype return address>
5530 * <return addr>
5531 * <4 pointers pushed by mono_arch_create_trampoline_code ()>
5533 res = (((MonoObject**)esp) [5 + (offset / 4)]);
5534 if (cinfo)
5535 g_free (cinfo);
5536 return res;
5539 #define MAX_ARCH_DELEGATE_PARAMS 10
5541 gpointer
5542 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
5544 guint8 *code, *start;
5546 if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
5547 return NULL;
5549 /* FIXME: Support more cases */
5550 if (MONO_TYPE_ISSTRUCT (sig->ret))
5551 return NULL;
5554 * The stack contains:
5555 * <delegate>
5556 * <return addr>
5559 if (has_target) {
5560 static guint8* cached = NULL;
5561 if (cached)
5562 return cached;
5564 start = code = mono_global_codeman_reserve (64);
5566 /* Replace the this argument with the target */
5567 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
5568 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
5569 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
5570 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5572 g_assert ((code - start) < 64);
5574 mono_debug_add_delegate_trampoline (start, code - start);
5576 mono_memory_barrier ();
5578 cached = start;
5579 } else {
5580 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
5581 int i = 0;
5582 /* 8 for mov_reg and jump, plus 8 for each parameter */
5583 int code_reserve = 8 + (sig->param_count * 8);
5585 for (i = 0; i < sig->param_count; ++i)
5586 if (!mono_is_regsize_var (sig->params [i]))
5587 return NULL;
5589 code = cache [sig->param_count];
5590 if (code)
5591 return code;
5594 * The stack contains:
5595 * <args in reverse order>
5596 * <delegate>
5597 * <return addr>
5599 * and we need:
5600 * <args in reverse order>
5601 * <return addr>
5603 * without unbalancing the stack.
5604 * So move each arg up a spot in the stack (overwriting un-needed 'this' arg)
5605 * and leaving original spot of first arg as placeholder in stack so
5606 * when callee pops stack everything works.
5609 start = code = mono_global_codeman_reserve (code_reserve);
5611 /* store delegate for access to method_ptr */
5612 x86_mov_reg_membase (code, X86_ECX, X86_ESP, 4, 4);
5614 /* move args up */
5615 for (i = 0; i < sig->param_count; ++i) {
5616 x86_mov_reg_membase (code, X86_EAX, X86_ESP, (i+2)*4, 4);
5617 x86_mov_membase_reg (code, X86_ESP, (i+1)*4, X86_EAX, 4);
5620 x86_jump_membase (code, X86_ECX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5622 g_assert ((code - start) < code_reserve);
5624 mono_debug_add_delegate_trampoline (start, code - start);
5626 mono_memory_barrier ();
5628 cache [sig->param_count] = start;
5631 return start;
5634 gpointer
5635 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
5637 switch (reg) {
5638 case X86_EAX: return (gpointer)ctx->eax;
5639 case X86_EBX: return (gpointer)ctx->ebx;
5640 case X86_ECX: return (gpointer)ctx->ecx;
5641 case X86_EDX: return (gpointer)ctx->edx;
5642 case X86_ESP: return (gpointer)ctx->esp;
5643 case X86_EBP: return (gpointer)ctx->ebp;
5644 case X86_ESI: return (gpointer)ctx->esi;
5645 case X86_EDI: return (gpointer)ctx->edi;
5646 default: g_assert_not_reached ();
5650 #ifdef MONO_ARCH_SIMD_INTRINSICS
5652 static MonoInst*
5653 get_float_to_x_spill_area (MonoCompile *cfg)
5655 if (!cfg->fconv_to_r8_x_var) {
5656 cfg->fconv_to_r8_x_var = mono_compile_create_var (cfg, &mono_defaults.double_class->byval_arg, OP_LOCAL);
5657 cfg->fconv_to_r8_x_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
5659 return cfg->fconv_to_r8_x_var;
5663 * Convert all fconv opts that MONO_OPT_SSE2 would get wrong.
5665 void
5666 mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
5668 MonoInst *fconv;
5669 int dreg, src_opcode;
5671 if (!(cfg->opt & MONO_OPT_SSE2) || !(cfg->opt & MONO_OPT_SIMD) || COMPILE_LLVM (cfg))
5672 return;
5674 switch (src_opcode = ins->opcode) {
5675 case OP_FCONV_TO_I1:
5676 case OP_FCONV_TO_U1:
5677 case OP_FCONV_TO_I2:
5678 case OP_FCONV_TO_U2:
5679 case OP_FCONV_TO_I4:
5680 case OP_FCONV_TO_I:
5681 break;
5682 default:
5683 return;
5686 /* dreg is the IREG and sreg1 is the FREG */
5687 MONO_INST_NEW (cfg, fconv, OP_FCONV_TO_R8_X);
5688 fconv->klass = NULL; /*FIXME, what can I use here as the Mono.Simd lib might not be loaded yet*/
5689 fconv->sreg1 = ins->sreg1;
5690 fconv->dreg = mono_alloc_ireg (cfg);
5691 fconv->type = STACK_VTYPE;
5692 fconv->backend.spill_var = get_float_to_x_spill_area (cfg);
5694 mono_bblock_insert_before_ins (cfg->cbb, ins, fconv);
5696 dreg = ins->dreg;
5697 NULLIFY_INS (ins);
5698 ins->opcode = OP_XCONV_R8_TO_I4;
5700 ins->klass = mono_defaults.int32_class;
5701 ins->sreg1 = fconv->dreg;
5702 ins->dreg = dreg;
5703 ins->type = STACK_I4;
5704 ins->backend.source_opcode = src_opcode;
5707 #endif /* #ifdef MONO_ARCH_SIMD_INTRINSICS */
5709 void
5710 mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins)
5712 MonoInst *ins;
5713 int vreg;
5715 if (long_ins->opcode == OP_LNEG) {
5716 ins = long_ins;
5717 MONO_EMIT_NEW_UNALU (cfg, OP_INEG, ins->dreg + 1, ins->sreg1 + 1);
5718 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ADC_IMM, ins->dreg + 2, ins->sreg1 + 2, 0);
5719 MONO_EMIT_NEW_UNALU (cfg, OP_INEG, ins->dreg + 2, ins->dreg + 2);
5720 NULLIFY_INS (ins);
5721 return;
5724 #ifdef MONO_ARCH_SIMD_INTRINSICS
5726 if (!(cfg->opt & MONO_OPT_SIMD))
5727 return;
5729 /*TODO move this to simd-intrinsic.c once we support sse 4.1 dword extractors since we need the runtime caps info */
5730 switch (long_ins->opcode) {
5731 case OP_EXTRACT_I8:
5732 vreg = long_ins->sreg1;
5734 if (long_ins->inst_c0) {
5735 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5736 ins->klass = long_ins->klass;
5737 ins->sreg1 = long_ins->sreg1;
5738 ins->inst_c0 = 2;
5739 ins->type = STACK_VTYPE;
5740 ins->dreg = vreg = alloc_ireg (cfg);
5741 MONO_ADD_INS (cfg->cbb, ins);
5744 MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
5745 ins->klass = mono_defaults.int32_class;
5746 ins->sreg1 = vreg;
5747 ins->type = STACK_I4;
5748 ins->dreg = long_ins->dreg + 1;
5749 MONO_ADD_INS (cfg->cbb, ins);
5751 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5752 ins->klass = long_ins->klass;
5753 ins->sreg1 = long_ins->sreg1;
5754 ins->inst_c0 = long_ins->inst_c0 ? 3 : 1;
5755 ins->type = STACK_VTYPE;
5756 ins->dreg = vreg = alloc_ireg (cfg);
5757 MONO_ADD_INS (cfg->cbb, ins);
5759 MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
5760 ins->klass = mono_defaults.int32_class;
5761 ins->sreg1 = vreg;
5762 ins->type = STACK_I4;
5763 ins->dreg = long_ins->dreg + 2;
5764 MONO_ADD_INS (cfg->cbb, ins);
5766 long_ins->opcode = OP_NOP;
5767 break;
5768 case OP_INSERTX_I8_SLOW:
5769 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5770 ins->dreg = long_ins->dreg;
5771 ins->sreg1 = long_ins->dreg;
5772 ins->sreg2 = long_ins->sreg2 + 1;
5773 ins->inst_c0 = long_ins->inst_c0 * 2;
5774 MONO_ADD_INS (cfg->cbb, ins);
5776 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5777 ins->dreg = long_ins->dreg;
5778 ins->sreg1 = long_ins->dreg;
5779 ins->sreg2 = long_ins->sreg2 + 2;
5780 ins->inst_c0 = long_ins->inst_c0 * 2 + 1;
5781 MONO_ADD_INS (cfg->cbb, ins);
5783 long_ins->opcode = OP_NOP;
5784 break;
5785 case OP_EXPAND_I8:
5786 MONO_INST_NEW (cfg, ins, OP_ICONV_TO_X);
5787 ins->dreg = long_ins->dreg;
5788 ins->sreg1 = long_ins->sreg1 + 1;
5789 ins->klass = long_ins->klass;
5790 ins->type = STACK_VTYPE;
5791 MONO_ADD_INS (cfg->cbb, ins);
5793 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5794 ins->dreg = long_ins->dreg;
5795 ins->sreg1 = long_ins->dreg;
5796 ins->sreg2 = long_ins->sreg1 + 2;
5797 ins->inst_c0 = 1;
5798 ins->klass = long_ins->klass;
5799 ins->type = STACK_VTYPE;
5800 MONO_ADD_INS (cfg->cbb, ins);
5802 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5803 ins->dreg = long_ins->dreg;
5804 ins->sreg1 = long_ins->dreg;;
5805 ins->inst_c0 = 0x44; /*Magic number for swizzling (X,Y,X,Y)*/
5806 ins->klass = long_ins->klass;
5807 ins->type = STACK_VTYPE;
5808 MONO_ADD_INS (cfg->cbb, ins);
5810 long_ins->opcode = OP_NOP;
5811 break;
5813 #endif /* MONO_ARCH_SIMD_INTRINSICS */
5816 /*MONO_ARCH_HAVE_HANDLER_BLOCK_GUARD*/
5817 gpointer
5818 mono_arch_install_handler_block_guard (MonoJitInfo *ji, MonoJitExceptionInfo *clause, MonoContext *ctx, gpointer new_value)
5820 int offset;
5821 gpointer *sp, old_value;
5822 char *bp;
5823 const unsigned char *handler;
5825 /*Decode the first instruction to figure out where did we store the spvar*/
5826 /*Our jit MUST generate the following:
5827 mov %esp, -?(%ebp)
5828 Which is encoded as: 0x89 mod_rm.
5829 mod_rm (esp, ebp, imm) which can be: (imm will never be zero)
5830 mod (reg + imm8): 01 reg(esp): 100 rm(ebp): 101 -> 01100101 (0x65)
5831 mod (reg + imm32): 10 reg(esp): 100 rm(ebp): 101 -> 10100101 (0xA5)
5833 handler = clause->handler_start;
5835 if (*handler != 0x89)
5836 return NULL;
5838 ++handler;
5840 if (*handler == 0x65)
5841 offset = *(signed char*)(handler + 1);
5842 else if (*handler == 0xA5)
5843 offset = *(int*)(handler + 1);
5844 else
5845 return NULL;
5847 /*Load the spvar*/
5848 bp = MONO_CONTEXT_GET_BP (ctx);
5849 sp = *(gpointer*)(bp + offset);
5851 old_value = *sp;
5852 if (old_value < ji->code_start || (char*)old_value > ((char*)ji->code_start + ji->code_size))
5853 return old_value;
5855 *sp = new_value;
5857 return old_value;
5860 #if __APPLE__
5861 #define DBG_SIGNAL SIGBUS
5862 #else
5863 #define DBG_SIGNAL SIGSEGV
5864 #endif
5866 /* Soft Debug support */
5867 #ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED
5870 * mono_arch_set_breakpoint:
5872 * Set a breakpoint at the native code corresponding to JI at NATIVE_OFFSET.
5873 * The location should contain code emitted by OP_SEQ_POINT.
5875 void
5876 mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
5878 guint8 *code = ip;
5881 * In production, we will use int3 (has to fix the size in the md
5882 * file). But that could confuse gdb, so during development, we emit a SIGSEGV
5883 * instead.
5885 g_assert (code [0] == 0x90);
5886 x86_alu_reg_mem (code, X86_CMP, X86_EAX, (guint32)bp_trigger_page);
5890 * mono_arch_clear_breakpoint:
5892 * Clear the breakpoint at IP.
5894 void
5895 mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
5897 guint8 *code = ip;
5898 int i;
5900 for (i = 0; i < 6; ++i)
5901 x86_nop (code);
5905 * mono_arch_start_single_stepping:
5907 * Start single stepping.
5909 void
5910 mono_arch_start_single_stepping (void)
5912 mono_mprotect (ss_trigger_page, mono_pagesize (), 0);
5916 * mono_arch_stop_single_stepping:
5918 * Stop single stepping.
5920 void
5921 mono_arch_stop_single_stepping (void)
5923 mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ);
5927 * mono_arch_is_single_step_event:
5929 * Return whenever the machine state in SIGCTX corresponds to a single
5930 * step event.
5932 gboolean
5933 mono_arch_is_single_step_event (void *info, void *sigctx)
5935 #ifdef TARGET_WIN32
5936 EXCEPTION_RECORD* einfo = (EXCEPTION_RECORD*)info; /* Sometimes the address is off by 4 */
5937 if ((einfo->ExceptionInformation[1] >= ss_trigger_page && (guint8*)einfo->ExceptionInformation[1] <= (guint8*)ss_trigger_page + 128))
5938 return TRUE;
5939 else
5940 return FALSE;
5941 #else
5942 siginfo_t* sinfo = (siginfo_t*) info;
5943 /* Sometimes the address is off by 4 */
5944 if (sinfo->si_signo == DBG_SIGNAL && (sinfo->si_addr >= ss_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)ss_trigger_page + 128))
5945 return TRUE;
5946 else
5947 return FALSE;
5948 #endif
5951 gboolean
5952 mono_arch_is_breakpoint_event (void *info, void *sigctx)
5954 #ifdef TARGET_WIN32
5955 EXCEPTION_RECORD* einfo = (EXCEPTION_RECORD*)info; /* Sometimes the address is off by 4 */
5956 if ((einfo->ExceptionInformation[1] >= bp_trigger_page && (guint8*)einfo->ExceptionInformation[1] <= (guint8*)bp_trigger_page + 128))
5957 return TRUE;
5958 else
5959 return FALSE;
5960 #else
5961 siginfo_t* sinfo = (siginfo_t*)info;
5962 /* Sometimes the address is off by 4 */
5963 if (sinfo->si_signo == DBG_SIGNAL && (sinfo->si_addr >= bp_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)bp_trigger_page + 128))
5964 return TRUE;
5965 else
5966 return FALSE;
5967 #endif
5971 * mono_arch_get_ip_for_breakpoint:
5973 * See mini-amd64.c for docs.
5975 guint8*
5976 mono_arch_get_ip_for_breakpoint (MonoJitInfo *ji, MonoContext *ctx)
5978 guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
5980 return ip;
5983 #define BREAKPOINT_SIZE 6
5986 * mono_arch_get_ip_for_single_step:
5988 * See mini-amd64.c for docs.
5990 guint8*
5991 mono_arch_get_ip_for_single_step (MonoJitInfo *ji, MonoContext *ctx)
5993 guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
5995 /* Size of x86_alu_reg_imm */
5996 ip += 6;
5998 return ip;
6002 * mono_arch_skip_breakpoint:
6004 * See mini-amd64.c for docs.
6006 void
6007 mono_arch_skip_breakpoint (MonoContext *ctx)
6009 MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + BREAKPOINT_SIZE);
6013 * mono_arch_skip_single_step:
6015 * See mini-amd64.c for docs.
6017 void
6018 mono_arch_skip_single_step (MonoContext *ctx)
6020 MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + 6);
6024 * mono_arch_get_seq_point_info:
6026 * See mini-amd64.c for docs.
6028 gpointer
6029 mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code)
6031 NOT_IMPLEMENTED;
6032 return NULL;
6035 #endif