* XplatUI.cs:
[mono-project.git] / mono / mini / mini-x86.c
blobced580bf70f8e612e7d5482cefe59f695afc195f
1 /*
2 * mini-x86.c: x86 backend for the Mono code generator
4 * Authors:
5 * Paolo Molaro (lupus@ximian.com)
6 * Dietmar Maurer (dietmar@ximian.com)
7 * Patrik Torstensson
9 * (C) 2003 Ximian, Inc.
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #include <unistd.h>
16 #include <mono/metadata/appdomain.h>
17 #include <mono/metadata/debug-helpers.h>
18 #include <mono/metadata/threads.h>
19 #include <mono/metadata/profiler-private.h>
20 #include <mono/utils/mono-math.h>
22 #include "trace.h"
23 #include "mini-x86.h"
24 #include "inssel.h"
25 #include "cpu-x86.h"
27 /* On windows, these hold the key returned by TlsAlloc () */
28 static gint lmf_tls_offset = -1;
29 static gint appdomain_tls_offset = -1;
30 static gint thread_tls_offset = -1;
32 #ifdef MONO_XEN_OPT
33 /* TRUE by default until we add runtime detection of Xen */
34 static gboolean optimize_for_xen = TRUE;
35 #else
36 #define optimize_for_xen 0
37 #endif
39 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
41 #define ARGS_OFFSET 8
43 #ifdef PLATFORM_WIN32
44 /* Under windows, the default pinvoke calling convention is stdcall */
45 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
46 #else
47 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
48 #endif
50 #define NOT_IMPLEMENTED g_assert_not_reached ()
52 const char*
53 mono_arch_regname (int reg) {
54 switch (reg) {
55 case X86_EAX: return "%eax";
56 case X86_EBX: return "%ebx";
57 case X86_ECX: return "%ecx";
58 case X86_EDX: return "%edx";
59 case X86_ESP: return "%esp"; case X86_EBP: return "%ebp";
60 case X86_EDI: return "%edi";
61 case X86_ESI: return "%esi";
63 return "unknown";
66 const char*
67 mono_arch_fregname (int reg) {
68 return "unknown";
71 typedef enum {
72 ArgInIReg,
73 ArgInFloatSSEReg,
74 ArgInDoubleSSEReg,
75 ArgOnStack,
76 ArgValuetypeInReg,
77 ArgOnFloatFpStack,
78 ArgOnDoubleFpStack,
79 ArgNone
80 } ArgStorage;
82 typedef struct {
83 gint16 offset;
84 gint8 reg;
85 ArgStorage storage;
87 /* Only if storage == ArgValuetypeInReg */
88 ArgStorage pair_storage [2];
89 gint8 pair_regs [2];
90 } ArgInfo;
92 typedef struct {
93 int nargs;
94 guint32 stack_usage;
95 guint32 reg_usage;
96 guint32 freg_usage;
97 gboolean need_stack_align;
98 guint32 stack_align_amount;
99 ArgInfo ret;
100 ArgInfo sig_cookie;
101 ArgInfo args [1];
102 } CallInfo;
104 #define PARAM_REGS 0
106 #define FLOAT_PARAM_REGS 0
108 static X86_Reg_No param_regs [] = { 0 };
110 #ifdef PLATFORM_WIN32
111 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
112 #endif
114 static void inline
115 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
117 ainfo->offset = *stack_size;
119 if (*gr >= PARAM_REGS) {
120 ainfo->storage = ArgOnStack;
121 (*stack_size) += sizeof (gpointer);
123 else {
124 ainfo->storage = ArgInIReg;
125 ainfo->reg = param_regs [*gr];
126 (*gr) ++;
130 static void inline
131 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
133 ainfo->offset = *stack_size;
135 g_assert (PARAM_REGS == 0);
137 ainfo->storage = ArgOnStack;
138 (*stack_size) += sizeof (gpointer) * 2;
141 static void inline
142 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
144 ainfo->offset = *stack_size;
146 if (*gr >= FLOAT_PARAM_REGS) {
147 ainfo->storage = ArgOnStack;
148 (*stack_size) += is_double ? 8 : 4;
150 else {
151 /* A double register */
152 if (is_double)
153 ainfo->storage = ArgInDoubleSSEReg;
154 else
155 ainfo->storage = ArgInFloatSSEReg;
156 ainfo->reg = *gr;
157 (*gr) += 1;
162 static void
163 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
164 gboolean is_return,
165 guint32 *gr, guint32 *fr, guint32 *stack_size)
167 guint32 size;
168 MonoClass *klass;
170 klass = mono_class_from_mono_type (type);
171 if (sig->pinvoke)
172 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
173 else
174 size = mono_type_stack_size (&klass->byval_arg, NULL);
176 #ifdef PLATFORM_WIN32
177 if (sig->pinvoke && is_return) {
178 MonoMarshalType *info;
181 * the exact rules are not very well documented, the code below seems to work with the
182 * code generated by gcc 3.3.3 -mno-cygwin.
184 info = mono_marshal_load_type_info (klass);
185 g_assert (info);
187 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
189 /* Special case structs with only a float member */
190 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
191 ainfo->storage = ArgValuetypeInReg;
192 ainfo->pair_storage [0] = ArgOnDoubleFpStack;
193 return;
195 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
196 ainfo->storage = ArgValuetypeInReg;
197 ainfo->pair_storage [0] = ArgOnFloatFpStack;
198 return;
200 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
201 ainfo->storage = ArgValuetypeInReg;
202 ainfo->pair_storage [0] = ArgInIReg;
203 ainfo->pair_regs [0] = return_regs [0];
204 if (info->native_size > 4) {
205 ainfo->pair_storage [1] = ArgInIReg;
206 ainfo->pair_regs [1] = return_regs [1];
208 return;
211 #endif
213 ainfo->offset = *stack_size;
214 ainfo->storage = ArgOnStack;
215 *stack_size += ALIGN_TO (size, sizeof (gpointer));
219 * get_call_info:
221 * Obtain information about a call according to the calling convention.
222 * For x86 ELF, see the "System V Application Binary Interface Intel386
223 * Architecture Processor Supplment, Fourth Edition" document for more
224 * information.
225 * For x86 win32, see ???.
227 static CallInfo*
228 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
230 guint32 i, gr, fr;
231 MonoType *ret_type;
232 int n = sig->hasthis + sig->param_count;
233 guint32 stack_size = 0;
234 CallInfo *cinfo;
236 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
238 gr = 0;
239 fr = 0;
241 /* return value */
243 ret_type = mono_type_get_underlying_type (sig->ret);
244 switch (ret_type->type) {
245 case MONO_TYPE_BOOLEAN:
246 case MONO_TYPE_I1:
247 case MONO_TYPE_U1:
248 case MONO_TYPE_I2:
249 case MONO_TYPE_U2:
250 case MONO_TYPE_CHAR:
251 case MONO_TYPE_I4:
252 case MONO_TYPE_U4:
253 case MONO_TYPE_I:
254 case MONO_TYPE_U:
255 case MONO_TYPE_PTR:
256 case MONO_TYPE_FNPTR:
257 case MONO_TYPE_CLASS:
258 case MONO_TYPE_OBJECT:
259 case MONO_TYPE_SZARRAY:
260 case MONO_TYPE_ARRAY:
261 case MONO_TYPE_STRING:
262 cinfo->ret.storage = ArgInIReg;
263 cinfo->ret.reg = X86_EAX;
264 break;
265 case MONO_TYPE_U8:
266 case MONO_TYPE_I8:
267 cinfo->ret.storage = ArgInIReg;
268 cinfo->ret.reg = X86_EAX;
269 break;
270 case MONO_TYPE_R4:
271 cinfo->ret.storage = ArgOnFloatFpStack;
272 break;
273 case MONO_TYPE_R8:
274 cinfo->ret.storage = ArgOnDoubleFpStack;
275 break;
276 case MONO_TYPE_GENERICINST:
277 if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
278 cinfo->ret.storage = ArgInIReg;
279 cinfo->ret.reg = X86_EAX;
280 break;
282 /* Fall through */
283 case MONO_TYPE_VALUETYPE: {
284 guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
286 add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
287 if (cinfo->ret.storage == ArgOnStack)
288 /* The caller passes the address where the value is stored */
289 add_general (&gr, &stack_size, &cinfo->ret);
290 break;
292 case MONO_TYPE_TYPEDBYREF:
293 /* Same as a valuetype with size 24 */
294 add_general (&gr, &stack_size, &cinfo->ret);
296 break;
297 case MONO_TYPE_VOID:
298 cinfo->ret.storage = ArgNone;
299 break;
300 default:
301 g_error ("Can't handle as return value 0x%x", sig->ret->type);
305 /* this */
306 if (sig->hasthis)
307 add_general (&gr, &stack_size, cinfo->args + 0);
309 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
310 gr = PARAM_REGS;
311 fr = FLOAT_PARAM_REGS;
313 /* Emit the signature cookie just before the implicit arguments */
314 add_general (&gr, &stack_size, &cinfo->sig_cookie);
317 for (i = 0; i < sig->param_count; ++i) {
318 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
319 MonoType *ptype;
321 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
322 /* We allways pass the sig cookie on the stack for simplicity */
324 * Prevent implicit arguments + the sig cookie from being passed
325 * in registers.
327 gr = PARAM_REGS;
328 fr = FLOAT_PARAM_REGS;
330 /* Emit the signature cookie just before the implicit arguments */
331 add_general (&gr, &stack_size, &cinfo->sig_cookie);
334 if (sig->params [i]->byref) {
335 add_general (&gr, &stack_size, ainfo);
336 continue;
338 ptype = mono_type_get_underlying_type (sig->params [i]);
339 switch (ptype->type) {
340 case MONO_TYPE_BOOLEAN:
341 case MONO_TYPE_I1:
342 case MONO_TYPE_U1:
343 add_general (&gr, &stack_size, ainfo);
344 break;
345 case MONO_TYPE_I2:
346 case MONO_TYPE_U2:
347 case MONO_TYPE_CHAR:
348 add_general (&gr, &stack_size, ainfo);
349 break;
350 case MONO_TYPE_I4:
351 case MONO_TYPE_U4:
352 add_general (&gr, &stack_size, ainfo);
353 break;
354 case MONO_TYPE_I:
355 case MONO_TYPE_U:
356 case MONO_TYPE_PTR:
357 case MONO_TYPE_FNPTR:
358 case MONO_TYPE_CLASS:
359 case MONO_TYPE_OBJECT:
360 case MONO_TYPE_STRING:
361 case MONO_TYPE_SZARRAY:
362 case MONO_TYPE_ARRAY:
363 add_general (&gr, &stack_size, ainfo);
364 break;
365 case MONO_TYPE_GENERICINST:
366 if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
367 add_general (&gr, &stack_size, ainfo);
368 break;
370 /* Fall through */
371 case MONO_TYPE_VALUETYPE:
372 add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
373 break;
374 case MONO_TYPE_TYPEDBYREF:
375 stack_size += sizeof (MonoTypedRef);
376 ainfo->storage = ArgOnStack;
377 break;
378 case MONO_TYPE_U8:
379 case MONO_TYPE_I8:
380 add_general_pair (&gr, &stack_size, ainfo);
381 break;
382 case MONO_TYPE_R4:
383 add_float (&fr, &stack_size, ainfo, FALSE);
384 break;
385 case MONO_TYPE_R8:
386 add_float (&fr, &stack_size, ainfo, TRUE);
387 break;
388 default:
389 g_error ("unexpected type 0x%x", ptype->type);
390 g_assert_not_reached ();
394 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
395 gr = PARAM_REGS;
396 fr = FLOAT_PARAM_REGS;
398 /* Emit the signature cookie just before the implicit arguments */
399 add_general (&gr, &stack_size, &cinfo->sig_cookie);
402 #if defined(__APPLE__)
403 if ((stack_size % 16) != 0) {
404 cinfo->need_stack_align = TRUE;
405 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
407 #endif
409 cinfo->stack_usage = stack_size;
410 cinfo->reg_usage = gr;
411 cinfo->freg_usage = fr;
412 return cinfo;
416 * mono_arch_get_argument_info:
417 * @csig: a method signature
418 * @param_count: the number of parameters to consider
419 * @arg_info: an array to store the result infos
421 * Gathers information on parameters such as size, alignment and
422 * padding. arg_info should be large enought to hold param_count + 1 entries.
424 * Returns the size of the activation frame.
427 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
429 int k, frame_size = 0;
430 int size, pad;
431 guint32 align;
432 int offset = 8;
433 CallInfo *cinfo;
435 cinfo = get_call_info (csig, FALSE);
437 if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
438 frame_size += sizeof (gpointer);
439 offset += 4;
442 arg_info [0].offset = offset;
444 if (csig->hasthis) {
445 frame_size += sizeof (gpointer);
446 offset += 4;
449 arg_info [0].size = frame_size;
451 for (k = 0; k < param_count; k++) {
453 if (csig->pinvoke)
454 size = mono_type_native_stack_size (csig->params [k], &align);
455 else {
456 int ialign;
457 size = mono_type_stack_size (csig->params [k], &ialign);
458 align = ialign;
461 /* ignore alignment for now */
462 align = 1;
464 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
465 arg_info [k].pad = pad;
466 frame_size += size;
467 arg_info [k + 1].pad = 0;
468 arg_info [k + 1].size = size;
469 offset += pad;
470 arg_info [k + 1].offset = offset;
471 offset += size;
474 align = MONO_ARCH_FRAME_ALIGNMENT;
475 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
476 arg_info [k].pad = pad;
478 g_free (cinfo);
480 return frame_size;
483 static const guchar cpuid_impl [] = {
484 0x55, /* push %ebp */
485 0x89, 0xe5, /* mov %esp,%ebp */
486 0x53, /* push %ebx */
487 0x8b, 0x45, 0x08, /* mov 0x8(%ebp),%eax */
488 0x0f, 0xa2, /* cpuid */
489 0x50, /* push %eax */
490 0x8b, 0x45, 0x10, /* mov 0x10(%ebp),%eax */
491 0x89, 0x18, /* mov %ebx,(%eax) */
492 0x8b, 0x45, 0x14, /* mov 0x14(%ebp),%eax */
493 0x89, 0x08, /* mov %ecx,(%eax) */
494 0x8b, 0x45, 0x18, /* mov 0x18(%ebp),%eax */
495 0x89, 0x10, /* mov %edx,(%eax) */
496 0x58, /* pop %eax */
497 0x8b, 0x55, 0x0c, /* mov 0xc(%ebp),%edx */
498 0x89, 0x02, /* mov %eax,(%edx) */
499 0x5b, /* pop %ebx */
500 0xc9, /* leave */
501 0xc3, /* ret */
504 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
506 static int
507 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
509 int have_cpuid = 0;
510 #ifndef _MSC_VER
511 __asm__ __volatile__ (
512 "pushfl\n"
513 "popl %%eax\n"
514 "movl %%eax, %%edx\n"
515 "xorl $0x200000, %%eax\n"
516 "pushl %%eax\n"
517 "popfl\n"
518 "pushfl\n"
519 "popl %%eax\n"
520 "xorl %%edx, %%eax\n"
521 "andl $0x200000, %%eax\n"
522 "movl %%eax, %0"
523 : "=r" (have_cpuid)
525 : "%eax", "%edx"
527 #else
528 __asm {
529 pushfd
530 pop eax
531 mov edx, eax
532 xor eax, 0x200000
533 push eax
534 popfd
535 pushfd
536 pop eax
537 xor eax, edx
538 and eax, 0x200000
539 mov have_cpuid, eax
541 #endif
542 if (have_cpuid) {
543 /* Have to use the code manager to get around WinXP DEP */
544 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
545 CpuidFunc func;
546 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
547 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
549 func = (CpuidFunc)ptr;
550 func (id, p_eax, p_ebx, p_ecx, p_edx);
552 mono_code_manager_destroy (codeman);
555 * We use this approach because of issues with gcc and pic code, see:
556 * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
557 __asm__ __volatile__ ("cpuid"
558 : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
559 : "a" (id));
561 return 1;
563 return 0;
567 * Initialize the cpu to execute managed code.
569 void
570 mono_arch_cpu_init (void)
572 /* spec compliance requires running with double precision */
573 #ifndef _MSC_VER
574 guint16 fpcw;
576 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
577 fpcw &= ~X86_FPCW_PRECC_MASK;
578 fpcw |= X86_FPCW_PREC_DOUBLE;
579 __asm__ __volatile__ ("fldcw %0\n": : "m" (fpcw));
580 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
581 #else
582 _control87 (_PC_53, MCW_PC);
583 #endif
587 * This function returns the optimizations supported on this cpu.
589 guint32
590 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
592 int eax, ebx, ecx, edx;
593 guint32 opts = 0;
595 *exclude_mask = 0;
596 /* Feature Flags function, flags returned in EDX. */
597 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
598 if (edx & (1 << 15)) {
599 opts |= MONO_OPT_CMOV;
600 if (edx & 1)
601 opts |= MONO_OPT_FCMOV;
602 else
603 *exclude_mask |= MONO_OPT_FCMOV;
604 } else
605 *exclude_mask |= MONO_OPT_CMOV;
607 return opts;
611 * Determine whenever the trap whose info is in SIGINFO is caused by
612 * integer overflow.
614 gboolean
615 mono_arch_is_int_overflow (void *sigctx, void *info)
617 MonoContext ctx;
618 guint8* ip;
620 mono_arch_sigctx_to_monoctx (sigctx, &ctx);
622 ip = (guint8*)ctx.eip;
624 if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
625 gint32 reg;
627 /* idiv REG */
628 switch (x86_modrm_rm (ip [1])) {
629 case X86_EAX:
630 reg = ctx.eax;
631 break;
632 case X86_ECX:
633 reg = ctx.ecx;
634 break;
635 case X86_EDX:
636 reg = ctx.edx;
637 break;
638 case X86_EBX:
639 reg = ctx.ebx;
640 break;
641 case X86_ESI:
642 reg = ctx.esi;
643 break;
644 case X86_EDI:
645 reg = ctx.edi;
646 break;
647 default:
648 g_assert_not_reached ();
649 reg = -1;
652 if (reg == -1)
653 return TRUE;
656 return FALSE;
659 static gboolean
660 is_regsize_var (MonoType *t) {
661 if (t->byref)
662 return TRUE;
663 switch (mono_type_get_underlying_type (t)->type) {
664 case MONO_TYPE_I4:
665 case MONO_TYPE_U4:
666 case MONO_TYPE_I:
667 case MONO_TYPE_U:
668 case MONO_TYPE_PTR:
669 case MONO_TYPE_FNPTR:
670 return TRUE;
671 case MONO_TYPE_OBJECT:
672 case MONO_TYPE_STRING:
673 case MONO_TYPE_CLASS:
674 case MONO_TYPE_SZARRAY:
675 case MONO_TYPE_ARRAY:
676 return TRUE;
677 case MONO_TYPE_GENERICINST:
678 if (!mono_type_generic_inst_is_valuetype (t))
679 return TRUE;
680 return FALSE;
681 case MONO_TYPE_VALUETYPE:
682 return FALSE;
684 return FALSE;
687 GList *
688 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
690 GList *vars = NULL;
691 int i;
693 for (i = 0; i < cfg->num_varinfo; i++) {
694 MonoInst *ins = cfg->varinfo [i];
695 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
697 /* unused vars */
698 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
699 continue;
701 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) ||
702 (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
703 continue;
705 /* we dont allocate I1 to registers because there is no simply way to sign extend
706 * 8bit quantities in caller saved registers on x86 */
707 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) ||
708 (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
709 (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
710 g_assert (MONO_VARINFO (cfg, i)->reg == -1);
711 g_assert (i == vmv->idx);
712 vars = g_list_prepend (vars, vmv);
716 vars = mono_varlist_sort (cfg, vars, 0);
718 return vars;
721 GList *
722 mono_arch_get_global_int_regs (MonoCompile *cfg)
724 GList *regs = NULL;
726 /* we can use 3 registers for global allocation */
727 regs = g_list_prepend (regs, (gpointer)X86_EBX);
728 regs = g_list_prepend (regs, (gpointer)X86_ESI);
729 regs = g_list_prepend (regs, (gpointer)X86_EDI);
731 return regs;
735 * mono_arch_regalloc_cost:
737 * Return the cost, in number of memory references, of the action of
738 * allocating the variable VMV into a register during global register
739 * allocation.
741 guint32
742 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
744 MonoInst *ins = cfg->varinfo [vmv->idx];
746 if (cfg->method->save_lmf)
747 /* The register is already saved */
748 return (ins->opcode == OP_ARG) ? 1 : 0;
749 else
750 /* push+pop+possible load if it is an argument */
751 return (ins->opcode == OP_ARG) ? 3 : 2;
755 * Set var information according to the calling convention. X86 version.
756 * The locals var stuff should most likely be split in another method.
758 void
759 mono_arch_allocate_vars (MonoCompile *cfg)
761 MonoMethodSignature *sig;
762 MonoMethodHeader *header;
763 MonoInst *inst;
764 guint32 locals_stack_size, locals_stack_align;
765 int i, offset;
766 gint32 *offsets;
767 CallInfo *cinfo;
769 header = mono_method_get_header (cfg->method);
770 sig = mono_method_signature (cfg->method);
772 cinfo = get_call_info (sig, FALSE);
774 cfg->frame_reg = MONO_ARCH_BASEREG;
775 offset = 0;
777 /* Reserve space to save LMF and caller saved registers */
779 if (cfg->method->save_lmf) {
780 offset += sizeof (MonoLMF);
781 } else {
782 if (cfg->used_int_regs & (1 << X86_EBX)) {
783 offset += 4;
786 if (cfg->used_int_regs & (1 << X86_EDI)) {
787 offset += 4;
790 if (cfg->used_int_regs & (1 << X86_ESI)) {
791 offset += 4;
795 switch (cinfo->ret.storage) {
796 case ArgValuetypeInReg:
797 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
798 offset += 8;
799 cfg->ret->opcode = OP_REGOFFSET;
800 cfg->ret->inst_basereg = X86_EBP;
801 cfg->ret->inst_offset = - offset;
802 break;
803 default:
804 break;
807 /* Allocate locals */
808 offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
809 if (locals_stack_align) {
810 offset += (locals_stack_align - 1);
811 offset &= ~(locals_stack_align - 1);
813 for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
814 if (offsets [i] != -1) {
815 MonoInst *inst = cfg->varinfo [i];
816 inst->opcode = OP_REGOFFSET;
817 inst->inst_basereg = X86_EBP;
818 inst->inst_offset = - (offset + offsets [i]);
819 //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
822 g_free (offsets);
823 offset += locals_stack_size;
827 * Allocate arguments+return value
830 switch (cinfo->ret.storage) {
831 case ArgOnStack:
832 cfg->ret->opcode = OP_REGOFFSET;
833 cfg->ret->inst_basereg = X86_EBP;
834 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
835 break;
836 case ArgValuetypeInReg:
837 break;
838 case ArgInIReg:
839 cfg->ret->opcode = OP_REGVAR;
840 cfg->ret->inst_c0 = cinfo->ret.reg;
841 break;
842 case ArgNone:
843 case ArgOnFloatFpStack:
844 case ArgOnDoubleFpStack:
845 break;
846 default:
847 g_assert_not_reached ();
850 if (sig->call_convention == MONO_CALL_VARARG) {
851 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
852 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
855 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
856 ArgInfo *ainfo = &cinfo->args [i];
857 inst = cfg->varinfo [i];
858 if (inst->opcode != OP_REGVAR) {
859 inst->opcode = OP_REGOFFSET;
860 inst->inst_basereg = X86_EBP;
862 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
865 offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
866 offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
868 cfg->stack_offset = offset;
870 g_free (cinfo);
873 void
874 mono_arch_create_vars (MonoCompile *cfg)
876 MonoMethodSignature *sig;
877 CallInfo *cinfo;
879 sig = mono_method_signature (cfg->method);
881 cinfo = get_call_info (sig, FALSE);
883 if (cinfo->ret.storage == ArgValuetypeInReg)
884 cfg->ret_var_is_local = TRUE;
886 g_free (cinfo);
889 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
890 * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info
893 static void
894 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
896 MonoInst *arg;
897 MonoMethodSignature *tmp_sig;
898 MonoInst *sig_arg;
900 /* FIXME: Add support for signature tokens to AOT */
901 cfg->disable_aot = TRUE;
902 MONO_INST_NEW (cfg, arg, OP_OUTARG);
905 * mono_ArgIterator_Setup assumes the signature cookie is
906 * passed first and all the arguments which were before it are
907 * passed on the stack after the signature. So compensate by
908 * passing a different signature.
910 tmp_sig = mono_metadata_signature_dup (call->signature);
911 tmp_sig->param_count -= call->signature->sentinelpos;
912 tmp_sig->sentinelpos = 0;
913 memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
915 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
916 sig_arg->inst_p0 = tmp_sig;
918 arg->inst_left = sig_arg;
919 arg->type = STACK_PTR;
920 /* prepend, so they get reversed */
921 arg->next = call->out_args;
922 call->out_args = arg;
926 * take the arguments and generate the arch-specific
927 * instructions to properly call the function in call.
928 * This includes pushing, moving arguments to the right register
929 * etc.
931 MonoCallInst*
932 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
933 MonoInst *arg, *in;
934 MonoMethodSignature *sig;
935 int i, n;
936 CallInfo *cinfo;
937 int sentinelpos = 0;
939 sig = call->signature;
940 n = sig->param_count + sig->hasthis;
942 cinfo = get_call_info (sig, FALSE);
944 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
945 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
947 for (i = 0; i < n; ++i) {
948 ArgInfo *ainfo = cinfo->args + i;
950 /* Emit the signature cookie just before the implicit arguments */
951 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
952 emit_sig_cookie (cfg, call);
955 if (is_virtual && i == 0) {
956 /* the argument will be attached to the call instrucion */
957 in = call->args [i];
958 } else {
959 MonoType *t;
961 if (i >= sig->hasthis)
962 t = sig->params [i - sig->hasthis];
963 else
964 t = &mono_defaults.int_class->byval_arg;
965 t = mono_type_get_underlying_type (t);
967 MONO_INST_NEW (cfg, arg, OP_OUTARG);
968 in = call->args [i];
969 arg->cil_code = in->cil_code;
970 arg->inst_left = in;
971 arg->type = in->type;
972 /* prepend, so they get reversed */
973 arg->next = call->out_args;
974 call->out_args = arg;
976 if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
977 guint32 size, align;
979 if (t->type == MONO_TYPE_TYPEDBYREF) {
980 size = sizeof (MonoTypedRef);
981 align = sizeof (gpointer);
983 else
984 if (sig->pinvoke)
985 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
986 else {
987 int ialign;
988 size = mono_type_stack_size (&in->klass->byval_arg, &ialign);
989 align = ialign;
991 arg->opcode = OP_OUTARG_VT;
992 arg->klass = in->klass;
993 arg->unused = sig->pinvoke;
994 arg->inst_imm = size;
996 else {
997 switch (ainfo->storage) {
998 case ArgOnStack:
999 arg->opcode = OP_OUTARG;
1000 if (!t->byref) {
1001 if (t->type == MONO_TYPE_R4)
1002 arg->opcode = OP_OUTARG_R4;
1003 else
1004 if (t->type == MONO_TYPE_R8)
1005 arg->opcode = OP_OUTARG_R8;
1007 break;
1008 default:
1009 g_assert_not_reached ();
1015 /* Handle the case where there are no implicit arguments */
1016 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1017 emit_sig_cookie (cfg, call);
1020 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1021 if (cinfo->ret.storage == ArgValuetypeInReg) {
1022 MonoInst *zero_inst;
1024 * After the call, the struct is in registers, but needs to be saved to the memory pointed
1025 * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1026 * before calling the function. So we add a dummy instruction to represent pushing the
1027 * struct return address to the stack. The return address will be saved to this stack slot
1028 * by the code emitted in this_vret_args.
1030 MONO_INST_NEW (cfg, arg, OP_OUTARG);
1031 MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1032 zero_inst->inst_p0 = 0;
1033 arg->inst_left = zero_inst;
1034 arg->type = STACK_PTR;
1035 /* prepend, so they get reversed */
1036 arg->next = call->out_args;
1037 call->out_args = arg;
1039 else
1040 /* if the function returns a struct, the called method already does a ret $0x4 */
1041 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1042 cinfo->stack_usage -= 4;
1045 call->stack_usage = cinfo->stack_usage;
1047 #if defined(__APPLE__)
1048 if (cinfo->need_stack_align) {
1049 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1050 arg->inst_c0 = cinfo->stack_align_amount;
1051 arg->next = call->out_args;
1052 call->out_args = arg;
1054 #endif
1056 g_free (cinfo);
1058 return call;
1062 * Allow tracing to work with this interface (with an optional argument)
1064 void*
1065 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1067 guchar *code = p;
1069 /* if some args are passed in registers, we need to save them here */
1070 x86_push_reg (code, X86_EBP);
1072 if (cfg->compile_aot) {
1073 x86_push_imm (code, cfg->method);
1074 x86_mov_reg_imm (code, X86_EAX, func);
1075 x86_call_reg (code, X86_EAX);
1076 } else {
1077 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1078 x86_push_imm (code, cfg->method);
1079 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1080 x86_call_code (code, 0);
1082 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1084 return code;
1087 enum {
1088 SAVE_NONE,
1089 SAVE_STRUCT,
1090 SAVE_EAX,
1091 SAVE_EAX_EDX,
1092 SAVE_FP
1095 void*
1096 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1098 guchar *code = p;
1099 int arg_size = 0, save_mode = SAVE_NONE;
1100 MonoMethod *method = cfg->method;
1102 switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1103 case MONO_TYPE_VOID:
1104 /* special case string .ctor icall */
1105 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1106 save_mode = SAVE_EAX;
1107 else
1108 save_mode = SAVE_NONE;
1109 break;
1110 case MONO_TYPE_I8:
1111 case MONO_TYPE_U8:
1112 save_mode = SAVE_EAX_EDX;
1113 break;
1114 case MONO_TYPE_R4:
1115 case MONO_TYPE_R8:
1116 save_mode = SAVE_FP;
1117 break;
1118 case MONO_TYPE_GENERICINST:
1119 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1120 save_mode = SAVE_EAX;
1121 break;
1123 /* Fall through */
1124 case MONO_TYPE_VALUETYPE:
1125 save_mode = SAVE_STRUCT;
1126 break;
1127 default:
1128 save_mode = SAVE_EAX;
1129 break;
1132 switch (save_mode) {
1133 case SAVE_EAX_EDX:
1134 x86_push_reg (code, X86_EDX);
1135 x86_push_reg (code, X86_EAX);
1136 if (enable_arguments) {
1137 x86_push_reg (code, X86_EDX);
1138 x86_push_reg (code, X86_EAX);
1139 arg_size = 8;
1141 break;
1142 case SAVE_EAX:
1143 x86_push_reg (code, X86_EAX);
1144 if (enable_arguments) {
1145 x86_push_reg (code, X86_EAX);
1146 arg_size = 4;
1148 break;
1149 case SAVE_FP:
1150 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1151 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1152 if (enable_arguments) {
1153 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1154 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1155 arg_size = 8;
1157 break;
1158 case SAVE_STRUCT:
1159 if (enable_arguments) {
1160 x86_push_membase (code, X86_EBP, 8);
1161 arg_size = 4;
1163 break;
1164 case SAVE_NONE:
1165 default:
1166 break;
1169 if (cfg->compile_aot) {
1170 x86_push_imm (code, method);
1171 x86_mov_reg_imm (code, X86_EAX, func);
1172 x86_call_reg (code, X86_EAX);
1173 } else {
1174 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1175 x86_push_imm (code, method);
1176 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1177 x86_call_code (code, 0);
1179 x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1181 switch (save_mode) {
1182 case SAVE_EAX_EDX:
1183 x86_pop_reg (code, X86_EAX);
1184 x86_pop_reg (code, X86_EDX);
1185 break;
1186 case SAVE_EAX:
1187 x86_pop_reg (code, X86_EAX);
1188 break;
1189 case SAVE_FP:
1190 x86_fld_membase (code, X86_ESP, 0, TRUE);
1191 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1192 break;
1193 case SAVE_NONE:
1194 default:
1195 break;
1198 return code;
1201 #define EMIT_COND_BRANCH(ins,cond,sign) \
1202 if (ins->flags & MONO_INST_BRLABEL) { \
1203 if (ins->inst_i0->inst_c0) { \
1204 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1205 } else { \
1206 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1207 if ((cfg->opt & MONO_OPT_BRANCH) && \
1208 x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1209 x86_branch8 (code, cond, 0, sign); \
1210 else \
1211 x86_branch32 (code, cond, 0, sign); \
1213 } else { \
1214 if (ins->inst_true_bb->native_offset) { \
1215 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1216 } else { \
1217 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1218 if ((cfg->opt & MONO_OPT_BRANCH) && \
1219 x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1220 x86_branch8 (code, cond, 0, sign); \
1221 else \
1222 x86_branch32 (code, cond, 0, sign); \
1227 * Emit an exception if condition is fail and
1228 * if possible do a directly branch to target
1230 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \
1231 do { \
1232 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1233 if (tins == NULL) { \
1234 mono_add_patch_info (cfg, code - cfg->native_code, \
1235 MONO_PATCH_INFO_EXC, exc_name); \
1236 x86_branch32 (code, cond, 0, signed); \
1237 } else { \
1238 EMIT_COND_BRANCH (tins, cond, signed); \
1240 } while (0);
1242 #define EMIT_FPCOMPARE(code) do { \
1243 x86_fcompp (code); \
1244 x86_fnstsw (code); \
1245 } while (0);
1248 static guint8*
1249 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1251 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1252 x86_call_code (code, 0);
1254 return code;
1257 /* FIXME: Add more instructions */
1258 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1260 static void
1261 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1263 MonoInst *ins, *last_ins = NULL;
1264 ins = bb->code;
1266 while (ins) {
1268 switch (ins->opcode) {
1269 case OP_ICONST:
1270 /* reg = 0 -> XOR (reg, reg) */
1271 /* XOR sets cflags on x86, so we cant do it always */
1272 if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1273 ins->opcode = CEE_XOR;
1274 ins->sreg1 = ins->dreg;
1275 ins->sreg2 = ins->dreg;
1277 break;
1278 case OP_MUL_IMM:
1279 /* remove unnecessary multiplication with 1 */
1280 if (ins->inst_imm == 1) {
1281 if (ins->dreg != ins->sreg1) {
1282 ins->opcode = OP_MOVE;
1283 } else {
1284 last_ins->next = ins->next;
1285 ins = ins->next;
1286 continue;
1289 break;
1290 case OP_COMPARE_IMM:
1291 /* OP_COMPARE_IMM (reg, 0)
1292 * -->
1293 * OP_X86_TEST_NULL (reg)
1295 if (!ins->inst_imm)
1296 ins->opcode = OP_X86_TEST_NULL;
1297 break;
1298 case OP_X86_COMPARE_MEMBASE_IMM:
1300 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1301 * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1302 * -->
1303 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1304 * OP_COMPARE_IMM reg, imm
1306 * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1308 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1309 ins->inst_basereg == last_ins->inst_destbasereg &&
1310 ins->inst_offset == last_ins->inst_offset) {
1311 ins->opcode = OP_COMPARE_IMM;
1312 ins->sreg1 = last_ins->sreg1;
1314 /* check if we can remove cmp reg,0 with test null */
1315 if (!ins->inst_imm)
1316 ins->opcode = OP_X86_TEST_NULL;
1319 break;
1320 case OP_LOAD_MEMBASE:
1321 case OP_LOADI4_MEMBASE:
1323 * Note: if reg1 = reg2 the load op is removed
1325 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1326 * OP_LOAD_MEMBASE offset(basereg), reg2
1327 * -->
1328 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1329 * OP_MOVE reg1, reg2
1331 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG
1332 || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1333 ins->inst_basereg == last_ins->inst_destbasereg &&
1334 ins->inst_offset == last_ins->inst_offset) {
1335 if (ins->dreg == last_ins->sreg1) {
1336 last_ins->next = ins->next;
1337 ins = ins->next;
1338 continue;
1339 } else {
1340 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1341 ins->opcode = OP_MOVE;
1342 ins->sreg1 = last_ins->sreg1;
1346 * Note: reg1 must be different from the basereg in the second load
1347 * Note: if reg1 = reg2 is equal then second load is removed
1349 * OP_LOAD_MEMBASE offset(basereg), reg1
1350 * OP_LOAD_MEMBASE offset(basereg), reg2
1351 * -->
1352 * OP_LOAD_MEMBASE offset(basereg), reg1
1353 * OP_MOVE reg1, reg2
1355 } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1356 || last_ins->opcode == OP_LOAD_MEMBASE) &&
1357 ins->inst_basereg != last_ins->dreg &&
1358 ins->inst_basereg == last_ins->inst_basereg &&
1359 ins->inst_offset == last_ins->inst_offset) {
1361 if (ins->dreg == last_ins->dreg) {
1362 last_ins->next = ins->next;
1363 ins = ins->next;
1364 continue;
1365 } else {
1366 ins->opcode = OP_MOVE;
1367 ins->sreg1 = last_ins->dreg;
1370 //g_assert_not_reached ();
1372 #if 0
1374 * OP_STORE_MEMBASE_IMM imm, offset(basereg)
1375 * OP_LOAD_MEMBASE offset(basereg), reg
1376 * -->
1377 * OP_STORE_MEMBASE_IMM imm, offset(basereg)
1378 * OP_ICONST reg, imm
1380 } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1381 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1382 ins->inst_basereg == last_ins->inst_destbasereg &&
1383 ins->inst_offset == last_ins->inst_offset) {
1384 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1385 ins->opcode = OP_ICONST;
1386 ins->inst_c0 = last_ins->inst_imm;
1387 g_assert_not_reached (); // check this rule
1388 #endif
1390 break;
1391 case OP_LOADU1_MEMBASE:
1392 case OP_LOADI1_MEMBASE:
1394 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1395 * OP_LOAD_MEMBASE offset(basereg), reg2
1396 * -->
1397 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1398 * CONV_I2/U2 reg1, reg2
1400 if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1401 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1402 ins->inst_basereg == last_ins->inst_destbasereg &&
1403 ins->inst_offset == last_ins->inst_offset) {
1404 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1405 ins->sreg1 = last_ins->sreg1;
1407 break;
1408 case OP_LOADU2_MEMBASE:
1409 case OP_LOADI2_MEMBASE:
1411 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1412 * OP_LOAD_MEMBASE offset(basereg), reg2
1413 * -->
1414 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1415 * CONV_I2/U2 reg1, reg2
1417 if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1418 ins->inst_basereg == last_ins->inst_destbasereg &&
1419 ins->inst_offset == last_ins->inst_offset) {
1420 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1421 ins->sreg1 = last_ins->sreg1;
1423 break;
1424 case CEE_CONV_I4:
1425 case CEE_CONV_U4:
1426 case OP_MOVE:
1428 * Removes:
1430 * OP_MOVE reg, reg
1432 if (ins->dreg == ins->sreg1) {
1433 if (last_ins)
1434 last_ins->next = ins->next;
1435 ins = ins->next;
1436 continue;
1439 * Removes:
1441 * OP_MOVE sreg, dreg
1442 * OP_MOVE dreg, sreg
1444 if (last_ins && last_ins->opcode == OP_MOVE &&
1445 ins->sreg1 == last_ins->dreg &&
1446 ins->dreg == last_ins->sreg1) {
1447 last_ins->next = ins->next;
1448 ins = ins->next;
1449 continue;
1451 break;
1453 case OP_X86_PUSH_MEMBASE:
1454 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1455 last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1456 ins->inst_basereg == last_ins->inst_destbasereg &&
1457 ins->inst_offset == last_ins->inst_offset) {
1458 ins->opcode = OP_X86_PUSH;
1459 ins->sreg1 = last_ins->sreg1;
1461 break;
1463 last_ins = ins;
1464 ins = ins->next;
1466 bb->last_ins = last_ins;
1469 static const int
1470 branch_cc_table [] = {
1471 X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1472 X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1473 X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1476 static const char*const * ins_spec = x86_desc;
1478 /*#include "cprop.c"*/
1479 void
1480 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1482 mono_local_regalloc (cfg, bb);
1485 static unsigned char*
1486 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1488 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1489 x86_fnstcw_membase(code, X86_ESP, 0);
1490 x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1491 x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1492 x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1493 x86_fldcw_membase (code, X86_ESP, 2);
1494 if (size == 8) {
1495 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1496 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1497 x86_pop_reg (code, dreg);
1498 /* FIXME: need the high register
1499 * x86_pop_reg (code, dreg_high);
1501 } else {
1502 x86_push_reg (code, X86_EAX); // SP = SP - 4
1503 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1504 x86_pop_reg (code, dreg);
1506 x86_fldcw_membase (code, X86_ESP, 0);
1507 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1509 if (size == 1)
1510 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1511 else if (size == 2)
1512 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1513 return code;
1516 static unsigned char*
1517 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1519 int sreg = tree->sreg1;
1520 int need_touch = FALSE;
1522 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1523 need_touch = TRUE;
1524 #endif
1526 if (need_touch) {
1527 guint8* br[5];
1530 * Under Windows:
1531 * If requested stack size is larger than one page,
1532 * perform stack-touch operation
1535 * Generate stack probe code.
1536 * Under Windows, it is necessary to allocate one page at a time,
1537 * "touching" stack after each successful sub-allocation. This is
1538 * because of the way stack growth is implemented - there is a
1539 * guard page before the lowest stack page that is currently commited.
1540 * Stack normally grows sequentially so OS traps access to the
1541 * guard page and commits more pages when needed.
1543 x86_test_reg_imm (code, sreg, ~0xFFF);
1544 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1546 br[2] = code; /* loop */
1547 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1548 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1551 * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1552 * that follows only initializes the last part of the area.
1554 /* Same as the init code below with size==0x1000 */
1555 if (tree->flags & MONO_INST_INIT) {
1556 x86_push_reg (code, X86_EAX);
1557 x86_push_reg (code, X86_ECX);
1558 x86_push_reg (code, X86_EDI);
1559 x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1560 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1561 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1562 x86_cld (code);
1563 x86_prefix (code, X86_REP_PREFIX);
1564 x86_stosl (code);
1565 x86_pop_reg (code, X86_EDI);
1566 x86_pop_reg (code, X86_ECX);
1567 x86_pop_reg (code, X86_EAX);
1570 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1571 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1572 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1573 x86_patch (br[3], br[2]);
1574 x86_test_reg_reg (code, sreg, sreg);
1575 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1576 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1578 br[1] = code; x86_jump8 (code, 0);
1580 x86_patch (br[0], code);
1581 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1582 x86_patch (br[1], code);
1583 x86_patch (br[4], code);
1585 else
1586 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1588 if (tree->flags & MONO_INST_INIT) {
1589 int offset = 0;
1590 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1591 x86_push_reg (code, X86_EAX);
1592 offset += 4;
1594 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1595 x86_push_reg (code, X86_ECX);
1596 offset += 4;
1598 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1599 x86_push_reg (code, X86_EDI);
1600 offset += 4;
1603 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1604 if (sreg != X86_ECX)
1605 x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1606 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1608 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1609 x86_cld (code);
1610 x86_prefix (code, X86_REP_PREFIX);
1611 x86_stosl (code);
1613 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1614 x86_pop_reg (code, X86_EDI);
1615 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1616 x86_pop_reg (code, X86_ECX);
1617 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1618 x86_pop_reg (code, X86_EAX);
1620 return code;
1624 static guint8*
1625 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1627 CallInfo *cinfo;
1628 int quad;
1630 /* Move return value to the target register */
1631 switch (ins->opcode) {
1632 case CEE_CALL:
1633 case OP_CALL_REG:
1634 case OP_CALL_MEMBASE:
1635 if (ins->dreg != X86_EAX)
1636 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1637 break;
1638 case OP_VCALL:
1639 case OP_VCALL_REG:
1640 case OP_VCALL_MEMBASE:
1641 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
1642 if (cinfo->ret.storage == ArgValuetypeInReg) {
1643 /* Pop the destination address from the stack */
1644 x86_pop_reg (code, X86_ECX);
1646 for (quad = 0; quad < 2; quad ++) {
1647 switch (cinfo->ret.pair_storage [quad]) {
1648 case ArgInIReg:
1649 g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1650 x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1651 break;
1652 case ArgNone:
1653 break;
1654 default:
1655 g_assert_not_reached ();
1659 g_free (cinfo);
1660 default:
1661 break;
1664 return code;
1668 * emit_tls_get:
1669 * @code: buffer to store code to
1670 * @dreg: hard register where to place the result
1671 * @tls_offset: offset info
1673 * emit_tls_get emits in @code the native code that puts in the dreg register
1674 * the item in the thread local storage identified by tls_offset.
1676 * Returns: a pointer to the end of the stored code
1678 static guint8*
1679 emit_tls_get (guint8* code, int dreg, int tls_offset)
1681 #ifdef PLATFORM_WIN32
1683 * See the Under the Hood article in the May 1996 issue of Microsoft Systems
1684 * Journal and/or a disassembly of the TlsGet () function.
1686 g_assert (tls_offset < 64);
1687 x86_prefix (code, X86_FS_PREFIX);
1688 x86_mov_reg_mem (code, dreg, 0x18, 4);
1689 /* Dunno what this does but TlsGetValue () contains it */
1690 x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1691 x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1692 #else
1693 if (optimize_for_xen) {
1694 x86_prefix (code, X86_GS_PREFIX);
1695 x86_mov_reg_mem (code, dreg, 0, 4);
1696 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1697 } else {
1698 x86_prefix (code, X86_GS_PREFIX);
1699 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1701 #endif
1702 return code;
1705 #define REAL_PRINT_REG(text,reg) \
1706 mono_assert (reg >= 0); \
1707 x86_push_reg (code, X86_EAX); \
1708 x86_push_reg (code, X86_EDX); \
1709 x86_push_reg (code, X86_ECX); \
1710 x86_push_reg (code, reg); \
1711 x86_push_imm (code, reg); \
1712 x86_push_imm (code, text " %d %p\n"); \
1713 x86_mov_reg_imm (code, X86_EAX, printf); \
1714 x86_call_reg (code, X86_EAX); \
1715 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1716 x86_pop_reg (code, X86_ECX); \
1717 x86_pop_reg (code, X86_EDX); \
1718 x86_pop_reg (code, X86_EAX);
1720 /* benchmark and set based on cpu */
1721 #define LOOP_ALIGNMENT 8
1722 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
1724 void
1725 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1727 MonoInst *ins;
1728 MonoCallInst *call;
1729 guint offset;
1730 guint8 *code = cfg->native_code + cfg->code_len;
1731 MonoInst *last_ins = NULL;
1732 guint last_offset = 0;
1733 int max_len, cpos;
1735 if (cfg->opt & MONO_OPT_PEEPHOLE)
1736 peephole_pass (cfg, bb);
1738 if (cfg->opt & MONO_OPT_LOOP) {
1739 int pad, align = LOOP_ALIGNMENT;
1740 /* set alignment depending on cpu */
1741 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
1742 pad = align - pad;
1743 /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
1744 x86_padding (code, pad);
1745 cfg->code_len += pad;
1746 bb->native_offset = cfg->code_len;
1750 if (cfg->verbose_level > 2)
1751 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1753 cpos = bb->max_offset;
1755 if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
1756 MonoProfileCoverageInfo *cov = cfg->coverage_info;
1757 g_assert (!cfg->compile_aot);
1758 cpos += 6;
1760 cov->data [bb->dfn].cil_code = bb->cil_code;
1761 /* this is not thread save, but good enough */
1762 x86_inc_mem (code, &cov->data [bb->dfn].count);
1765 offset = code - cfg->native_code;
1767 mono_debug_open_block (cfg, bb, offset);
1769 ins = bb->code;
1770 while (ins) {
1771 offset = code - cfg->native_code;
1773 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
1775 if (offset > (cfg->code_size - max_len - 16)) {
1776 cfg->code_size *= 2;
1777 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
1778 code = cfg->native_code + offset;
1779 mono_jit_stats.code_reallocs++;
1782 mono_debug_record_line_number (cfg, ins, offset);
1784 switch (ins->opcode) {
1785 case OP_BIGMUL:
1786 x86_mul_reg (code, ins->sreg2, TRUE);
1787 break;
1788 case OP_BIGMUL_UN:
1789 x86_mul_reg (code, ins->sreg2, FALSE);
1790 break;
1791 case OP_X86_SETEQ_MEMBASE:
1792 case OP_X86_SETNE_MEMBASE:
1793 x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
1794 ins->inst_basereg, ins->inst_offset, TRUE);
1795 break;
1796 case OP_STOREI1_MEMBASE_IMM:
1797 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
1798 break;
1799 case OP_STOREI2_MEMBASE_IMM:
1800 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
1801 break;
1802 case OP_STORE_MEMBASE_IMM:
1803 case OP_STOREI4_MEMBASE_IMM:
1804 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
1805 break;
1806 case OP_STOREI1_MEMBASE_REG:
1807 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
1808 break;
1809 case OP_STOREI2_MEMBASE_REG:
1810 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
1811 break;
1812 case OP_STORE_MEMBASE_REG:
1813 case OP_STOREI4_MEMBASE_REG:
1814 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
1815 break;
1816 case CEE_LDIND_I:
1817 case CEE_LDIND_I4:
1818 case CEE_LDIND_U4:
1819 x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
1820 break;
1821 case OP_LOADU4_MEM:
1822 x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
1823 x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
1824 break;
1825 case OP_LOAD_MEMBASE:
1826 case OP_LOADI4_MEMBASE:
1827 case OP_LOADU4_MEMBASE:
1828 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
1829 break;
1830 case OP_LOADU1_MEMBASE:
1831 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
1832 break;
1833 case OP_LOADI1_MEMBASE:
1834 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
1835 break;
1836 case OP_LOADU2_MEMBASE:
1837 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
1838 break;
1839 case OP_LOADI2_MEMBASE:
1840 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
1841 break;
1842 case CEE_CONV_I1:
1843 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
1844 break;
1845 case CEE_CONV_I2:
1846 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
1847 break;
1848 case CEE_CONV_U1:
1849 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
1850 break;
1851 case CEE_CONV_U2:
1852 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
1853 break;
1854 case OP_COMPARE:
1855 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
1856 break;
1857 case OP_COMPARE_IMM:
1858 x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
1859 break;
1860 case OP_X86_COMPARE_MEMBASE_REG:
1861 x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
1862 break;
1863 case OP_X86_COMPARE_MEMBASE_IMM:
1864 x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1865 break;
1866 case OP_X86_COMPARE_MEMBASE8_IMM:
1867 x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1868 break;
1869 case OP_X86_COMPARE_REG_MEMBASE:
1870 x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
1871 break;
1872 case OP_X86_COMPARE_MEM_IMM:
1873 x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
1874 break;
1875 case OP_X86_TEST_NULL:
1876 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
1877 break;
1878 case OP_X86_ADD_MEMBASE_IMM:
1879 x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1880 break;
1881 case OP_X86_ADD_MEMBASE:
1882 x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
1883 break;
1884 case OP_X86_SUB_MEMBASE_IMM:
1885 x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1886 break;
1887 case OP_X86_SUB_MEMBASE:
1888 x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
1889 break;
1890 case OP_X86_AND_MEMBASE_IMM:
1891 x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1892 break;
1893 case OP_X86_OR_MEMBASE_IMM:
1894 x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1895 break;
1896 case OP_X86_XOR_MEMBASE_IMM:
1897 x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1898 break;
1899 case OP_X86_INC_MEMBASE:
1900 x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
1901 break;
1902 case OP_X86_INC_REG:
1903 x86_inc_reg (code, ins->dreg);
1904 break;
1905 case OP_X86_DEC_MEMBASE:
1906 x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
1907 break;
1908 case OP_X86_DEC_REG:
1909 x86_dec_reg (code, ins->dreg);
1910 break;
1911 case OP_X86_MUL_MEMBASE:
1912 x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
1913 break;
1914 case CEE_BREAK:
1915 x86_breakpoint (code);
1916 break;
1917 case OP_ADDCC:
1918 case CEE_ADD:
1919 x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
1920 break;
1921 case OP_ADC:
1922 x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
1923 break;
1924 case OP_ADDCC_IMM:
1925 case OP_ADD_IMM:
1926 x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
1927 break;
1928 case OP_ADC_IMM:
1929 x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
1930 break;
1931 case OP_SUBCC:
1932 case CEE_SUB:
1933 x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
1934 break;
1935 case OP_SBB:
1936 x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
1937 break;
1938 case OP_SUBCC_IMM:
1939 case OP_SUB_IMM:
1940 x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
1941 break;
1942 case OP_SBB_IMM:
1943 x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
1944 break;
1945 case CEE_AND:
1946 x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
1947 break;
1948 case OP_AND_IMM:
1949 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
1950 break;
1951 case CEE_DIV:
1952 x86_cdq (code);
1953 x86_div_reg (code, ins->sreg2, TRUE);
1954 break;
1955 case CEE_DIV_UN:
1956 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1957 x86_div_reg (code, ins->sreg2, FALSE);
1958 break;
1959 case OP_DIV_IMM:
1960 x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1961 x86_cdq (code);
1962 x86_div_reg (code, ins->sreg2, TRUE);
1963 break;
1964 case CEE_REM:
1965 x86_cdq (code);
1966 x86_div_reg (code, ins->sreg2, TRUE);
1967 break;
1968 case CEE_REM_UN:
1969 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1970 x86_div_reg (code, ins->sreg2, FALSE);
1971 break;
1972 case OP_REM_IMM:
1973 x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1974 x86_cdq (code);
1975 x86_div_reg (code, ins->sreg2, TRUE);
1976 break;
1977 case CEE_OR:
1978 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
1979 break;
1980 case OP_OR_IMM:
1981 x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
1982 break;
1983 case CEE_XOR:
1984 x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
1985 break;
1986 case OP_XOR_IMM:
1987 x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
1988 break;
1989 case CEE_SHL:
1990 g_assert (ins->sreg2 == X86_ECX);
1991 x86_shift_reg (code, X86_SHL, ins->dreg);
1992 break;
1993 case CEE_SHR:
1994 g_assert (ins->sreg2 == X86_ECX);
1995 x86_shift_reg (code, X86_SAR, ins->dreg);
1996 break;
1997 case OP_SHR_IMM:
1998 x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
1999 break;
2000 case OP_SHR_UN_IMM:
2001 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2002 break;
2003 case CEE_SHR_UN:
2004 g_assert (ins->sreg2 == X86_ECX);
2005 x86_shift_reg (code, X86_SHR, ins->dreg);
2006 break;
2007 case OP_SHL_IMM:
2008 x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2009 break;
2010 case OP_LSHL: {
2011 guint8 *jump_to_end;
2013 /* handle shifts below 32 bits */
2014 x86_shld_reg (code, ins->unused, ins->sreg1);
2015 x86_shift_reg (code, X86_SHL, ins->sreg1);
2017 x86_test_reg_imm (code, X86_ECX, 32);
2018 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2020 /* handle shift over 32 bit */
2021 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
2022 x86_clear_reg (code, ins->sreg1);
2024 x86_patch (jump_to_end, code);
2026 break;
2027 case OP_LSHR: {
2028 guint8 *jump_to_end;
2030 /* handle shifts below 32 bits */
2031 x86_shrd_reg (code, ins->sreg1, ins->unused);
2032 x86_shift_reg (code, X86_SAR, ins->unused);
2034 x86_test_reg_imm (code, X86_ECX, 32);
2035 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2037 /* handle shifts over 31 bits */
2038 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2039 x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
2041 x86_patch (jump_to_end, code);
2043 break;
2044 case OP_LSHR_UN: {
2045 guint8 *jump_to_end;
2047 /* handle shifts below 32 bits */
2048 x86_shrd_reg (code, ins->sreg1, ins->unused);
2049 x86_shift_reg (code, X86_SHR, ins->unused);
2051 x86_test_reg_imm (code, X86_ECX, 32);
2052 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2054 /* handle shifts over 31 bits */
2055 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2056 x86_clear_reg (code, ins->unused);
2058 x86_patch (jump_to_end, code);
2060 break;
2061 case OP_LSHL_IMM:
2062 if (ins->inst_imm >= 32) {
2063 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
2064 x86_clear_reg (code, ins->sreg1);
2065 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
2066 } else {
2067 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
2068 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2070 break;
2071 case OP_LSHR_IMM:
2072 if (ins->inst_imm >= 32) {
2073 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2074 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
2075 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2076 } else {
2077 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2078 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
2080 break;
2081 case OP_LSHR_UN_IMM:
2082 if (ins->inst_imm >= 32) {
2083 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2084 x86_clear_reg (code, ins->unused);
2085 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2086 } else {
2087 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2088 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
2090 break;
2091 case CEE_NOT:
2092 x86_not_reg (code, ins->sreg1);
2093 break;
2094 case CEE_NEG:
2095 x86_neg_reg (code, ins->sreg1);
2096 break;
2097 case OP_SEXT_I1:
2098 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2099 break;
2100 case OP_SEXT_I2:
2101 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2102 break;
2103 case CEE_MUL:
2104 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2105 break;
2106 case OP_MUL_IMM:
2107 switch (ins->inst_imm) {
2108 case 2:
2109 /* MOV r1, r2 */
2110 /* ADD r1, r1 */
2111 if (ins->dreg != ins->sreg1)
2112 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2113 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2114 break;
2115 case 3:
2116 /* LEA r1, [r2 + r2*2] */
2117 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2118 break;
2119 case 5:
2120 /* LEA r1, [r2 + r2*4] */
2121 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2122 break;
2123 case 6:
2124 /* LEA r1, [r2 + r2*2] */
2125 /* ADD r1, r1 */
2126 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2127 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2128 break;
2129 case 9:
2130 /* LEA r1, [r2 + r2*8] */
2131 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2132 break;
2133 case 10:
2134 /* LEA r1, [r2 + r2*4] */
2135 /* ADD r1, r1 */
2136 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2137 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2138 break;
2139 case 12:
2140 /* LEA r1, [r2 + r2*2] */
2141 /* SHL r1, 2 */
2142 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2143 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2144 break;
2145 case 25:
2146 /* LEA r1, [r2 + r2*4] */
2147 /* LEA r1, [r1 + r1*4] */
2148 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2149 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2150 break;
2151 case 100:
2152 /* LEA r1, [r2 + r2*4] */
2153 /* SHL r1, 2 */
2154 /* LEA r1, [r1 + r1*4] */
2155 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2156 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2157 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2158 break;
2159 default:
2160 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2161 break;
2163 break;
2164 case CEE_MUL_OVF:
2165 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2166 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2167 break;
2168 case CEE_MUL_OVF_UN: {
2169 /* the mul operation and the exception check should most likely be split */
2170 int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2171 /*g_assert (ins->sreg2 == X86_EAX);
2172 g_assert (ins->dreg == X86_EAX);*/
2173 if (ins->sreg2 == X86_EAX) {
2174 non_eax_reg = ins->sreg1;
2175 } else if (ins->sreg1 == X86_EAX) {
2176 non_eax_reg = ins->sreg2;
2177 } else {
2178 /* no need to save since we're going to store to it anyway */
2179 if (ins->dreg != X86_EAX) {
2180 saved_eax = TRUE;
2181 x86_push_reg (code, X86_EAX);
2183 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2184 non_eax_reg = ins->sreg2;
2186 if (ins->dreg == X86_EDX) {
2187 if (!saved_eax) {
2188 saved_eax = TRUE;
2189 x86_push_reg (code, X86_EAX);
2191 } else if (ins->dreg != X86_EAX) {
2192 saved_edx = TRUE;
2193 x86_push_reg (code, X86_EDX);
2195 x86_mul_reg (code, non_eax_reg, FALSE);
2196 /* save before the check since pop and mov don't change the flags */
2197 if (ins->dreg != X86_EAX)
2198 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2199 if (saved_edx)
2200 x86_pop_reg (code, X86_EDX);
2201 if (saved_eax)
2202 x86_pop_reg (code, X86_EAX);
2203 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2204 break;
2206 case OP_ICONST:
2207 x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2208 break;
2209 case OP_AOTCONST:
2210 g_assert_not_reached ();
2211 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2212 x86_mov_reg_imm (code, ins->dreg, 0);
2213 break;
2214 case OP_LOAD_GOTADDR:
2215 x86_call_imm (code, 0);
2217 * The patch needs to point to the pop, since the GOT offset needs
2218 * to be added to that address.
2220 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2221 x86_pop_reg (code, ins->dreg);
2222 x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2223 break;
2224 case OP_GOT_ENTRY:
2225 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2226 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2227 break;
2228 case OP_X86_PUSH_GOT_ENTRY:
2229 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2230 x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2231 break;
2232 case CEE_CONV_I4:
2233 case OP_MOVE:
2234 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2235 break;
2236 case CEE_CONV_U4:
2237 g_assert_not_reached ();
2238 case CEE_JMP: {
2240 * Note: this 'frame destruction' logic is useful for tail calls, too.
2241 * Keep in sync with the code in emit_epilog.
2243 int pos = 0;
2245 /* FIXME: no tracing support... */
2246 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2247 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2248 /* reset offset to make max_len work */
2249 offset = code - cfg->native_code;
2251 g_assert (!cfg->method->save_lmf);
2253 if (cfg->used_int_regs & (1 << X86_EBX))
2254 pos -= 4;
2255 if (cfg->used_int_regs & (1 << X86_EDI))
2256 pos -= 4;
2257 if (cfg->used_int_regs & (1 << X86_ESI))
2258 pos -= 4;
2259 if (pos)
2260 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2262 if (cfg->used_int_regs & (1 << X86_ESI))
2263 x86_pop_reg (code, X86_ESI);
2264 if (cfg->used_int_regs & (1 << X86_EDI))
2265 x86_pop_reg (code, X86_EDI);
2266 if (cfg->used_int_regs & (1 << X86_EBX))
2267 x86_pop_reg (code, X86_EBX);
2269 /* restore ESP/EBP */
2270 x86_leave (code);
2271 offset = code - cfg->native_code;
2272 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2273 x86_jump32 (code, 0);
2274 break;
2276 case OP_CHECK_THIS:
2277 /* ensure ins->sreg1 is not NULL
2278 * note that cmp DWORD PTR [eax], eax is one byte shorter than
2279 * cmp DWORD PTR [eax], 0
2281 x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2282 break;
2283 case OP_ARGLIST: {
2284 int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2285 x86_push_reg (code, hreg);
2286 x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2287 x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2288 x86_pop_reg (code, hreg);
2289 break;
2291 case OP_FCALL:
2292 case OP_LCALL:
2293 case OP_VCALL:
2294 case OP_VOIDCALL:
2295 case CEE_CALL:
2296 call = (MonoCallInst*)ins;
2297 if (ins->flags & MONO_INST_HAS_METHOD)
2298 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2299 else
2300 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2301 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2302 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2303 * bytes to pop, we want to use pops. GCC does this (note it won't happen
2304 * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2305 * smart enough to do that optimization yet
2307 * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2308 * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2309 * (most likely from locality benefits). People with other processors should
2310 * check on theirs to see what happens.
2312 if (call->stack_usage == 4) {
2313 /* we want to use registers that won't get used soon, so use
2314 * ecx, as eax will get allocated first. edx is used by long calls,
2315 * so we can't use that.
2318 x86_pop_reg (code, X86_ECX);
2319 } else {
2320 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2323 code = emit_move_return_value (cfg, ins, code);
2324 break;
2325 case OP_FCALL_REG:
2326 case OP_LCALL_REG:
2327 case OP_VCALL_REG:
2328 case OP_VOIDCALL_REG:
2329 case OP_CALL_REG:
2330 call = (MonoCallInst*)ins;
2331 x86_call_reg (code, ins->sreg1);
2332 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2333 if (call->stack_usage == 4)
2334 x86_pop_reg (code, X86_ECX);
2335 else
2336 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2338 code = emit_move_return_value (cfg, ins, code);
2339 break;
2340 case OP_FCALL_MEMBASE:
2341 case OP_LCALL_MEMBASE:
2342 case OP_VCALL_MEMBASE:
2343 case OP_VOIDCALL_MEMBASE:
2344 case OP_CALL_MEMBASE:
2345 call = (MonoCallInst*)ins;
2346 x86_call_membase (code, ins->sreg1, ins->inst_offset);
2347 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2348 if (call->stack_usage == 4)
2349 x86_pop_reg (code, X86_ECX);
2350 else
2351 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2353 code = emit_move_return_value (cfg, ins, code);
2354 break;
2355 case OP_OUTARG:
2356 case OP_X86_PUSH:
2357 x86_push_reg (code, ins->sreg1);
2358 break;
2359 case OP_X86_PUSH_IMM:
2360 x86_push_imm (code, ins->inst_imm);
2361 break;
2362 case OP_X86_PUSH_MEMBASE:
2363 x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2364 break;
2365 case OP_X86_PUSH_OBJ:
2366 x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2367 x86_push_reg (code, X86_EDI);
2368 x86_push_reg (code, X86_ESI);
2369 x86_push_reg (code, X86_ECX);
2370 if (ins->inst_offset)
2371 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2372 else
2373 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2374 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2375 x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2376 x86_cld (code);
2377 x86_prefix (code, X86_REP_PREFIX);
2378 x86_movsd (code);
2379 x86_pop_reg (code, X86_ECX);
2380 x86_pop_reg (code, X86_ESI);
2381 x86_pop_reg (code, X86_EDI);
2382 break;
2383 case OP_X86_LEA:
2384 x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2385 break;
2386 case OP_X86_LEA_MEMBASE:
2387 x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2388 break;
2389 case OP_X86_XCHG:
2390 x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2391 break;
2392 case OP_LOCALLOC:
2393 /* keep alignment */
2394 x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2395 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2396 code = mono_emit_stack_alloc (code, ins);
2397 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2398 break;
2399 case CEE_RET:
2400 x86_ret (code);
2401 break;
2402 case CEE_THROW: {
2403 x86_push_reg (code, ins->sreg1);
2404 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
2405 (gpointer)"mono_arch_throw_exception");
2406 break;
2408 case OP_RETHROW: {
2409 x86_push_reg (code, ins->sreg1);
2410 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
2411 (gpointer)"mono_arch_rethrow_exception");
2412 break;
2414 case OP_CALL_HANDLER:
2415 /* Align stack */
2416 #ifdef __APPLE__
2417 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2418 #endif
2419 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2420 x86_call_imm (code, 0);
2421 #ifdef __APPLE__
2422 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2423 #endif
2424 break;
2425 case OP_LABEL:
2426 ins->inst_c0 = code - cfg->native_code;
2427 break;
2428 case CEE_BR:
2429 //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2430 //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2431 //break;
2432 if (ins->flags & MONO_INST_BRLABEL) {
2433 if (ins->inst_i0->inst_c0) {
2434 x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2435 } else {
2436 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2437 if ((cfg->opt & MONO_OPT_BRANCH) &&
2438 x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2439 x86_jump8 (code, 0);
2440 else
2441 x86_jump32 (code, 0);
2443 } else {
2444 if (ins->inst_target_bb->native_offset) {
2445 x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset);
2446 } else {
2447 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2448 if ((cfg->opt & MONO_OPT_BRANCH) &&
2449 x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2450 x86_jump8 (code, 0);
2451 else
2452 x86_jump32 (code, 0);
2455 break;
2456 case OP_BR_REG:
2457 x86_jump_reg (code, ins->sreg1);
2458 break;
2459 case OP_CEQ:
2460 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2461 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2462 break;
2463 case OP_CLT:
2464 x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2465 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2466 break;
2467 case OP_CLT_UN:
2468 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2469 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2470 break;
2471 case OP_CGT:
2472 x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2473 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2474 break;
2475 case OP_CGT_UN:
2476 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2477 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2478 break;
2479 case OP_CNE:
2480 x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2481 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2482 break;
2483 case OP_COND_EXC_EQ:
2484 case OP_COND_EXC_NE_UN:
2485 case OP_COND_EXC_LT:
2486 case OP_COND_EXC_LT_UN:
2487 case OP_COND_EXC_GT:
2488 case OP_COND_EXC_GT_UN:
2489 case OP_COND_EXC_GE:
2490 case OP_COND_EXC_GE_UN:
2491 case OP_COND_EXC_LE:
2492 case OP_COND_EXC_LE_UN:
2493 case OP_COND_EXC_OV:
2494 case OP_COND_EXC_NO:
2495 case OP_COND_EXC_C:
2496 case OP_COND_EXC_NC:
2497 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2498 break;
2499 case CEE_BEQ:
2500 case CEE_BNE_UN:
2501 case CEE_BLT:
2502 case CEE_BLT_UN:
2503 case CEE_BGT:
2504 case CEE_BGT_UN:
2505 case CEE_BGE:
2506 case CEE_BGE_UN:
2507 case CEE_BLE:
2508 case CEE_BLE_UN:
2509 EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2510 break;
2512 /* floating point opcodes */
2513 case OP_R8CONST: {
2514 double d = *(double *)ins->inst_p0;
2516 if ((d == 0.0) && (mono_signbit (d) == 0)) {
2517 x86_fldz (code);
2518 } else if (d == 1.0) {
2519 x86_fld1 (code);
2520 } else {
2521 if (cfg->compile_aot) {
2522 guint32 *val = (guint32*)&d;
2523 x86_push_imm (code, val [1]);
2524 x86_push_imm (code, val [0]);
2525 x86_fld_membase (code, X86_ESP, 0, TRUE);
2526 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2528 else {
2529 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2530 x86_fld (code, NULL, TRUE);
2533 break;
2535 case OP_R4CONST: {
2536 float f = *(float *)ins->inst_p0;
2538 if ((f == 0.0) && (mono_signbit (f) == 0)) {
2539 x86_fldz (code);
2540 } else if (f == 1.0) {
2541 x86_fld1 (code);
2542 } else {
2543 if (cfg->compile_aot) {
2544 guint32 val = *(guint32*)&f;
2545 x86_push_imm (code, val);
2546 x86_fld_membase (code, X86_ESP, 0, FALSE);
2547 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2549 else {
2550 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2551 x86_fld (code, NULL, FALSE);
2554 break;
2556 case OP_STORER8_MEMBASE_REG:
2557 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2558 break;
2559 case OP_LOADR8_SPILL_MEMBASE:
2560 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2561 x86_fxch (code, 1);
2562 break;
2563 case OP_LOADR8_MEMBASE:
2564 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2565 break;
2566 case OP_STORER4_MEMBASE_REG:
2567 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2568 break;
2569 case OP_LOADR4_MEMBASE:
2570 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2571 break;
2572 case CEE_CONV_R4: /* FIXME: change precision */
2573 case CEE_CONV_R8:
2574 x86_push_reg (code, ins->sreg1);
2575 x86_fild_membase (code, X86_ESP, 0, FALSE);
2576 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2577 break;
2578 case OP_X86_FP_LOAD_I8:
2579 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2580 break;
2581 case OP_X86_FP_LOAD_I4:
2582 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2583 break;
2584 case OP_FCONV_TO_I1:
2585 code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2586 break;
2587 case OP_FCONV_TO_U1:
2588 code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2589 break;
2590 case OP_FCONV_TO_I2:
2591 code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2592 break;
2593 case OP_FCONV_TO_U2:
2594 code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2595 break;
2596 case OP_FCONV_TO_I4:
2597 case OP_FCONV_TO_I:
2598 code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2599 break;
2600 case OP_FCONV_TO_I8:
2601 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2602 x86_fnstcw_membase(code, X86_ESP, 0);
2603 x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2604 x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2605 x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2606 x86_fldcw_membase (code, X86_ESP, 2);
2607 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2608 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2609 x86_pop_reg (code, ins->dreg);
2610 x86_pop_reg (code, ins->unused);
2611 x86_fldcw_membase (code, X86_ESP, 0);
2612 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2613 break;
2614 case OP_LCONV_TO_R_UN: {
2615 static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2616 guint8 *br;
2618 /* load 64bit integer to FP stack */
2619 x86_push_imm (code, 0);
2620 x86_push_reg (code, ins->sreg2);
2621 x86_push_reg (code, ins->sreg1);
2622 x86_fild_membase (code, X86_ESP, 0, TRUE);
2623 /* store as 80bit FP value */
2624 x86_fst80_membase (code, X86_ESP, 0);
2626 /* test if lreg is negative */
2627 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2628 br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2630 /* add correction constant mn */
2631 x86_fld80_mem (code, mn);
2632 x86_fld80_membase (code, X86_ESP, 0);
2633 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2634 x86_fst80_membase (code, X86_ESP, 0);
2636 x86_patch (br, code);
2638 x86_fld80_membase (code, X86_ESP, 0);
2639 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2641 break;
2643 case OP_LCONV_TO_OVF_I: {
2644 guint8 *br [3], *label [1];
2645 MonoInst *tins;
2648 * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2650 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2652 /* If the low word top bit is set, see if we are negative */
2653 br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2654 /* We are not negative (no top bit set, check for our top word to be zero */
2655 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2656 br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2657 label [0] = code;
2659 /* throw exception */
2660 tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2661 if (tins) {
2662 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2663 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2664 x86_jump8 (code, 0);
2665 else
2666 x86_jump32 (code, 0);
2667 } else {
2668 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2669 x86_jump32 (code, 0);
2673 x86_patch (br [0], code);
2674 /* our top bit is set, check that top word is 0xfffffff */
2675 x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2677 x86_patch (br [1], code);
2678 /* nope, emit exception */
2679 br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2680 x86_patch (br [2], label [0]);
2682 if (ins->dreg != ins->sreg1)
2683 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2684 break;
2686 case OP_FADD:
2687 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2688 break;
2689 case OP_FSUB:
2690 x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2691 break;
2692 case OP_FMUL:
2693 x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2694 break;
2695 case OP_FDIV:
2696 x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2697 break;
2698 case OP_FNEG:
2699 x86_fchs (code);
2700 break;
2701 case OP_SIN:
2702 x86_fsin (code);
2703 x86_fldz (code);
2704 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2705 break;
2706 case OP_COS:
2707 x86_fcos (code);
2708 x86_fldz (code);
2709 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2710 break;
2711 case OP_ABS:
2712 x86_fabs (code);
2713 break;
2714 case OP_TAN: {
2716 * it really doesn't make sense to inline all this code,
2717 * it's here just to show that things may not be as simple
2718 * as they appear.
2720 guchar *check_pos, *end_tan, *pop_jump;
2721 x86_push_reg (code, X86_EAX);
2722 x86_fptan (code);
2723 x86_fnstsw (code);
2724 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2725 check_pos = code;
2726 x86_branch8 (code, X86_CC_NE, 0, FALSE);
2727 x86_fstp (code, 0); /* pop the 1.0 */
2728 end_tan = code;
2729 x86_jump8 (code, 0);
2730 x86_fldpi (code);
2731 x86_fp_op (code, X86_FADD, 0);
2732 x86_fxch (code, 1);
2733 x86_fprem1 (code);
2734 x86_fstsw (code);
2735 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2736 pop_jump = code;
2737 x86_branch8 (code, X86_CC_NE, 0, FALSE);
2738 x86_fstp (code, 1);
2739 x86_fptan (code);
2740 x86_patch (pop_jump, code);
2741 x86_fstp (code, 0); /* pop the 1.0 */
2742 x86_patch (check_pos, code);
2743 x86_patch (end_tan, code);
2744 x86_fldz (code);
2745 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2746 x86_pop_reg (code, X86_EAX);
2747 break;
2749 case OP_ATAN:
2750 x86_fld1 (code);
2751 x86_fpatan (code);
2752 x86_fldz (code);
2753 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2754 break;
2755 case OP_SQRT:
2756 x86_fsqrt (code);
2757 break;
2758 case OP_X86_FPOP:
2759 x86_fstp (code, 0);
2760 break;
2761 case OP_FREM: {
2762 guint8 *l1, *l2;
2764 x86_push_reg (code, X86_EAX);
2765 /* we need to exchange ST(0) with ST(1) */
2766 x86_fxch (code, 1);
2768 /* this requires a loop, because fprem somtimes
2769 * returns a partial remainder */
2770 l1 = code;
2771 /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2772 /* x86_fprem1 (code); */
2773 x86_fprem (code);
2774 x86_fnstsw (code);
2775 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
2776 l2 = code + 2;
2777 x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2779 /* pop result */
2780 x86_fstp (code, 1);
2782 x86_pop_reg (code, X86_EAX);
2783 break;
2785 case OP_FCOMPARE:
2786 if (cfg->opt & MONO_OPT_FCMOV) {
2787 x86_fcomip (code, 1);
2788 x86_fstp (code, 0);
2789 break;
2791 /* this overwrites EAX */
2792 EMIT_FPCOMPARE(code);
2793 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2794 break;
2795 case OP_FCEQ:
2796 if (cfg->opt & MONO_OPT_FCMOV) {
2797 /* zeroing the register at the start results in
2798 * shorter and faster code (we can also remove the widening op)
2800 guchar *unordered_check;
2801 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2802 x86_fcomip (code, 1);
2803 x86_fstp (code, 0);
2804 unordered_check = code;
2805 x86_branch8 (code, X86_CC_P, 0, FALSE);
2806 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2807 x86_patch (unordered_check, code);
2808 break;
2810 if (ins->dreg != X86_EAX)
2811 x86_push_reg (code, X86_EAX);
2813 EMIT_FPCOMPARE(code);
2814 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2815 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2816 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2817 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2819 if (ins->dreg != X86_EAX)
2820 x86_pop_reg (code, X86_EAX);
2821 break;
2822 case OP_FCLT:
2823 case OP_FCLT_UN:
2824 if (cfg->opt & MONO_OPT_FCMOV) {
2825 /* zeroing the register at the start results in
2826 * shorter and faster code (we can also remove the widening op)
2828 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2829 x86_fcomip (code, 1);
2830 x86_fstp (code, 0);
2831 if (ins->opcode == OP_FCLT_UN) {
2832 guchar *unordered_check = code;
2833 guchar *jump_to_end;
2834 x86_branch8 (code, X86_CC_P, 0, FALSE);
2835 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2836 jump_to_end = code;
2837 x86_jump8 (code, 0);
2838 x86_patch (unordered_check, code);
2839 x86_inc_reg (code, ins->dreg);
2840 x86_patch (jump_to_end, code);
2841 } else {
2842 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2844 break;
2846 if (ins->dreg != X86_EAX)
2847 x86_push_reg (code, X86_EAX);
2849 EMIT_FPCOMPARE(code);
2850 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2851 if (ins->opcode == OP_FCLT_UN) {
2852 guchar *is_not_zero_check, *end_jump;
2853 is_not_zero_check = code;
2854 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2855 end_jump = code;
2856 x86_jump8 (code, 0);
2857 x86_patch (is_not_zero_check, code);
2858 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2860 x86_patch (end_jump, code);
2862 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2863 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2865 if (ins->dreg != X86_EAX)
2866 x86_pop_reg (code, X86_EAX);
2867 break;
2868 case OP_FCGT:
2869 case OP_FCGT_UN:
2870 if (cfg->opt & MONO_OPT_FCMOV) {
2871 /* zeroing the register at the start results in
2872 * shorter and faster code (we can also remove the widening op)
2874 guchar *unordered_check;
2875 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2876 x86_fcomip (code, 1);
2877 x86_fstp (code, 0);
2878 if (ins->opcode == OP_FCGT) {
2879 unordered_check = code;
2880 x86_branch8 (code, X86_CC_P, 0, FALSE);
2881 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2882 x86_patch (unordered_check, code);
2883 } else {
2884 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2886 break;
2888 if (ins->dreg != X86_EAX)
2889 x86_push_reg (code, X86_EAX);
2891 EMIT_FPCOMPARE(code);
2892 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2893 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2894 if (ins->opcode == OP_FCGT_UN) {
2895 guchar *is_not_zero_check, *end_jump;
2896 is_not_zero_check = code;
2897 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2898 end_jump = code;
2899 x86_jump8 (code, 0);
2900 x86_patch (is_not_zero_check, code);
2901 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2903 x86_patch (end_jump, code);
2905 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2906 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2908 if (ins->dreg != X86_EAX)
2909 x86_pop_reg (code, X86_EAX);
2910 break;
2911 case OP_FBEQ:
2912 if (cfg->opt & MONO_OPT_FCMOV) {
2913 guchar *jump = code;
2914 x86_branch8 (code, X86_CC_P, 0, TRUE);
2915 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2916 x86_patch (jump, code);
2917 break;
2919 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2920 EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2921 break;
2922 case OP_FBNE_UN:
2923 /* Branch if C013 != 100 */
2924 if (cfg->opt & MONO_OPT_FCMOV) {
2925 /* branch if !ZF or (PF|CF) */
2926 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2927 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2928 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
2929 break;
2931 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2932 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2933 break;
2934 case OP_FBLT:
2935 if (cfg->opt & MONO_OPT_FCMOV) {
2936 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2937 break;
2939 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2940 break;
2941 case OP_FBLT_UN:
2942 if (cfg->opt & MONO_OPT_FCMOV) {
2943 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2944 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2945 break;
2947 if (ins->opcode == OP_FBLT_UN) {
2948 guchar *is_not_zero_check, *end_jump;
2949 is_not_zero_check = code;
2950 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2951 end_jump = code;
2952 x86_jump8 (code, 0);
2953 x86_patch (is_not_zero_check, code);
2954 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2956 x86_patch (end_jump, code);
2958 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2959 break;
2960 case OP_FBGT:
2961 case OP_FBGT_UN:
2962 if (cfg->opt & MONO_OPT_FCMOV) {
2963 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
2964 break;
2966 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2967 if (ins->opcode == OP_FBGT_UN) {
2968 guchar *is_not_zero_check, *end_jump;
2969 is_not_zero_check = code;
2970 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2971 end_jump = code;
2972 x86_jump8 (code, 0);
2973 x86_patch (is_not_zero_check, code);
2974 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2976 x86_patch (end_jump, code);
2978 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2979 break;
2980 case OP_FBGE:
2981 /* Branch if C013 == 100 or 001 */
2982 if (cfg->opt & MONO_OPT_FCMOV) {
2983 guchar *br1;
2985 /* skip branch if C1=1 */
2986 br1 = code;
2987 x86_branch8 (code, X86_CC_P, 0, FALSE);
2988 /* branch if (C0 | C3) = 1 */
2989 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
2990 x86_patch (br1, code);
2991 break;
2993 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2994 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2995 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2996 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2997 break;
2998 case OP_FBGE_UN:
2999 /* Branch if C013 == 000 */
3000 if (cfg->opt & MONO_OPT_FCMOV) {
3001 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3002 break;
3004 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3005 break;
3006 case OP_FBLE:
3007 /* Branch if C013=000 or 100 */
3008 if (cfg->opt & MONO_OPT_FCMOV) {
3009 guchar *br1;
3011 /* skip branch if C1=1 */
3012 br1 = code;
3013 x86_branch8 (code, X86_CC_P, 0, FALSE);
3014 /* branch if C0=0 */
3015 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3016 x86_patch (br1, code);
3017 break;
3019 x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3020 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3021 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3022 break;
3023 case OP_FBLE_UN:
3024 /* Branch if C013 != 001 */
3025 if (cfg->opt & MONO_OPT_FCMOV) {
3026 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3027 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3028 break;
3030 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3031 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3032 break;
3033 case CEE_CKFINITE: {
3034 x86_push_reg (code, X86_EAX);
3035 x86_fxam (code);
3036 x86_fnstsw (code);
3037 x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3038 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3039 x86_pop_reg (code, X86_EAX);
3040 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3041 break;
3043 case OP_TLS_GET: {
3044 code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3045 break;
3047 case OP_MEMORY_BARRIER: {
3048 /* Not needed on x86 */
3049 break;
3051 case OP_ATOMIC_ADD_I4: {
3052 int dreg = ins->dreg;
3054 if (dreg == ins->inst_basereg) {
3055 x86_push_reg (code, ins->sreg2);
3056 dreg = ins->sreg2;
3059 if (dreg != ins->sreg2)
3060 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3062 x86_prefix (code, X86_LOCK_PREFIX);
3063 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3065 if (dreg != ins->dreg) {
3066 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3067 x86_pop_reg (code, dreg);
3070 break;
3072 case OP_ATOMIC_ADD_NEW_I4: {
3073 int dreg = ins->dreg;
3075 /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3076 if (ins->sreg2 == dreg) {
3077 if (dreg == X86_EBX) {
3078 dreg = X86_EDI;
3079 if (ins->inst_basereg == X86_EDI)
3080 dreg = X86_ESI;
3081 } else {
3082 dreg = X86_EBX;
3083 if (ins->inst_basereg == X86_EBX)
3084 dreg = X86_EDI;
3086 } else if (ins->inst_basereg == dreg) {
3087 if (dreg == X86_EBX) {
3088 dreg = X86_EDI;
3089 if (ins->sreg2 == X86_EDI)
3090 dreg = X86_ESI;
3091 } else {
3092 dreg = X86_EBX;
3093 if (ins->sreg2 == X86_EBX)
3094 dreg = X86_EDI;
3098 if (dreg != ins->dreg) {
3099 x86_push_reg (code, dreg);
3102 x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3103 x86_prefix (code, X86_LOCK_PREFIX);
3104 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3105 /* dreg contains the old value, add with sreg2 value */
3106 x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3108 if (ins->dreg != dreg) {
3109 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3110 x86_pop_reg (code, dreg);
3113 break;
3115 case OP_ATOMIC_EXCHANGE_I4: {
3116 guchar *br[2];
3117 int sreg2 = ins->sreg2;
3118 int breg = ins->inst_basereg;
3120 /* cmpxchg uses eax as comperand, need to make sure we can use it
3121 * hack to overcome limits in x86 reg allocator
3122 * (req: dreg == eax and sreg2 != eax and breg != eax)
3124 if (ins->dreg != X86_EAX)
3125 x86_push_reg (code, X86_EAX);
3127 /* We need the EAX reg for the cmpxchg */
3128 if (ins->sreg2 == X86_EAX) {
3129 x86_push_reg (code, X86_EDX);
3130 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3131 sreg2 = X86_EDX;
3134 if (breg == X86_EAX) {
3135 x86_push_reg (code, X86_ESI);
3136 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3137 breg = X86_ESI;
3140 x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3142 br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3143 x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3144 br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3145 x86_patch (br [1], br [0]);
3147 if (breg != ins->inst_basereg)
3148 x86_pop_reg (code, X86_ESI);
3150 if (ins->dreg != X86_EAX) {
3151 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3152 x86_pop_reg (code, X86_EAX);
3155 if (ins->sreg2 != sreg2)
3156 x86_pop_reg (code, X86_EDX);
3158 break;
3160 default:
3161 g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3162 g_assert_not_reached ();
3165 if ((code - cfg->native_code - offset) > max_len) {
3166 g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3167 mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3168 g_assert_not_reached ();
3171 cpos += max_len;
3173 last_ins = ins;
3174 last_offset = offset;
3176 ins = ins->next;
3179 cfg->code_len = code - cfg->native_code;
3182 void
3183 mono_arch_register_lowlevel_calls (void)
3187 void
3188 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3190 MonoJumpInfo *patch_info;
3191 gboolean compile_aot = !run_cctors;
3193 for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3194 unsigned char *ip = patch_info->ip.i + code;
3195 const unsigned char *target;
3197 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3199 if (compile_aot) {
3200 switch (patch_info->type) {
3201 case MONO_PATCH_INFO_BB:
3202 case MONO_PATCH_INFO_LABEL:
3203 break;
3204 default:
3205 /* No need to patch these */
3206 continue;
3210 switch (patch_info->type) {
3211 case MONO_PATCH_INFO_IP:
3212 *((gconstpointer *)(ip)) = target;
3213 break;
3214 case MONO_PATCH_INFO_CLASS_INIT: {
3215 guint8 *code = ip;
3216 /* Might already been changed to a nop */
3217 x86_call_code (code, 0);
3218 x86_patch (ip, target);
3219 break;
3221 case MONO_PATCH_INFO_ABS:
3222 case MONO_PATCH_INFO_METHOD:
3223 case MONO_PATCH_INFO_METHOD_JUMP:
3224 case MONO_PATCH_INFO_INTERNAL_METHOD:
3225 case MONO_PATCH_INFO_BB:
3226 case MONO_PATCH_INFO_LABEL:
3227 x86_patch (ip, target);
3228 break;
3229 case MONO_PATCH_INFO_NONE:
3230 break;
3231 default: {
3232 guint32 offset = mono_arch_get_patch_offset (ip);
3233 *((gconstpointer *)(ip + offset)) = target;
3234 break;
3240 guint8 *
3241 mono_arch_emit_prolog (MonoCompile *cfg)
3243 MonoMethod *method = cfg->method;
3244 MonoBasicBlock *bb;
3245 MonoMethodSignature *sig;
3246 MonoInst *inst;
3247 int alloc_size, pos, max_offset, i;
3248 guint8 *code;
3250 cfg->code_size = MAX (mono_method_get_header (method)->code_size * 4, 256);
3251 code = cfg->native_code = g_malloc (cfg->code_size);
3253 x86_push_reg (code, X86_EBP);
3254 x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3256 alloc_size = cfg->stack_offset;
3257 pos = 0;
3259 if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3260 /* Might need to attach the thread to the JIT */
3261 if (lmf_tls_offset != -1) {
3262 guint8 *buf;
3264 code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3265 x86_test_reg_reg (code, X86_EAX, X86_EAX);
3266 buf = code;
3267 x86_branch8 (code, X86_CC_NE, 0, 0);
3268 x86_push_imm (code, cfg->domain);
3269 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3270 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3271 x86_patch (buf, code);
3272 #ifdef PLATFORM_WIN32
3273 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3274 /* FIXME: Add a separate key for LMF to avoid this */
3275 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3276 #endif
3277 } else {
3278 g_assert (!cfg->compile_aot);
3279 x86_push_imm (code, cfg->domain);
3280 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3281 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3285 if (method->save_lmf) {
3286 pos += sizeof (MonoLMF);
3288 /* save the current IP */
3289 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3290 x86_push_imm_template (code);
3292 /* save all caller saved regs */
3293 x86_push_reg (code, X86_EBP);
3294 x86_push_reg (code, X86_ESI);
3295 x86_push_reg (code, X86_EDI);
3296 x86_push_reg (code, X86_EBX);
3298 /* save method info */
3299 x86_push_imm (code, method);
3301 /* get the address of lmf for the current thread */
3303 * This is performance critical so we try to use some tricks to make
3304 * it fast.
3306 if (lmf_tls_offset != -1) {
3307 /* Load lmf quicky using the GS register */
3308 code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
3309 #ifdef PLATFORM_WIN32
3310 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3311 /* FIXME: Add a separate key for LMF to avoid this */
3312 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3313 #endif
3314 } else {
3315 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3318 /* push lmf */
3319 x86_push_reg (code, X86_EAX);
3320 /* push *lfm (previous_lmf) */
3321 x86_push_membase (code, X86_EAX, 0);
3322 /* *(lmf) = ESP */
3323 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3324 } else {
3326 if (cfg->used_int_regs & (1 << X86_EBX)) {
3327 x86_push_reg (code, X86_EBX);
3328 pos += 4;
3331 if (cfg->used_int_regs & (1 << X86_EDI)) {
3332 x86_push_reg (code, X86_EDI);
3333 pos += 4;
3336 if (cfg->used_int_regs & (1 << X86_ESI)) {
3337 x86_push_reg (code, X86_ESI);
3338 pos += 4;
3342 alloc_size -= pos;
3344 #if __APPLE__
3345 /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3347 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3348 if (tot & 4) {
3349 tot += 4;
3350 alloc_size += 4;
3352 if (tot & 8) {
3353 alloc_size += 8;
3356 #endif
3358 if (alloc_size) {
3359 /* See mono_emit_stack_alloc */
3360 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3361 guint32 remaining_size = alloc_size;
3362 while (remaining_size >= 0x1000) {
3363 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3364 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3365 remaining_size -= 0x1000;
3367 if (remaining_size)
3368 x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3369 #else
3370 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3371 #endif
3374 #if __APPLE_
3375 /* check the stack is aligned */
3376 x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3377 x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3378 x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3379 x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3380 x86_breakpoint (code);
3381 #endif
3383 /* compute max_offset in order to use short forward jumps */
3384 max_offset = 0;
3385 if (cfg->opt & MONO_OPT_BRANCH) {
3386 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3387 MonoInst *ins = bb->code;
3388 bb->max_offset = max_offset;
3390 if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3391 max_offset += 6;
3392 /* max alignment for loops */
3393 if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3394 max_offset += LOOP_ALIGNMENT;
3396 while (ins) {
3397 if (ins->opcode == OP_LABEL)
3398 ins->inst_c1 = max_offset;
3400 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3401 ins = ins->next;
3406 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3407 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3409 /* load arguments allocated to register from the stack */
3410 sig = mono_method_signature (method);
3411 pos = 0;
3413 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3414 inst = cfg->varinfo [pos];
3415 if (inst->opcode == OP_REGVAR) {
3416 x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3417 if (cfg->verbose_level > 2)
3418 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3420 pos++;
3423 cfg->code_len = code - cfg->native_code;
3425 return code;
3428 void
3429 mono_arch_emit_epilog (MonoCompile *cfg)
3431 MonoMethod *method = cfg->method;
3432 MonoMethodSignature *sig = mono_method_signature (method);
3433 int quad, pos;
3434 guint32 stack_to_pop;
3435 guint8 *code;
3436 int max_epilog_size = 16;
3437 CallInfo *cinfo;
3439 if (cfg->method->save_lmf)
3440 max_epilog_size += 128;
3442 if (mono_jit_trace_calls != NULL)
3443 max_epilog_size += 50;
3445 while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3446 cfg->code_size *= 2;
3447 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3448 mono_jit_stats.code_reallocs++;
3451 code = cfg->native_code + cfg->code_len;
3453 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3454 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3456 /* the code restoring the registers must be kept in sync with CEE_JMP */
3457 pos = 0;
3459 if (method->save_lmf) {
3460 gint32 prev_lmf_reg;
3461 gint32 lmf_offset = -sizeof (MonoLMF);
3463 /* Find a spare register */
3464 switch (sig->ret->type) {
3465 case MONO_TYPE_I8:
3466 case MONO_TYPE_U8:
3467 prev_lmf_reg = X86_EDI;
3468 cfg->used_int_regs |= (1 << X86_EDI);
3469 break;
3470 default:
3471 prev_lmf_reg = X86_EDX;
3472 break;
3475 /* reg = previous_lmf */
3476 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3478 /* ecx = lmf */
3479 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3481 /* *(lmf) = previous_lmf */
3482 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3484 /* restore caller saved regs */
3485 if (cfg->used_int_regs & (1 << X86_EBX)) {
3486 x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3489 if (cfg->used_int_regs & (1 << X86_EDI)) {
3490 x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3492 if (cfg->used_int_regs & (1 << X86_ESI)) {
3493 x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3496 /* EBP is restored by LEAVE */
3497 } else {
3498 if (cfg->used_int_regs & (1 << X86_EBX)) {
3499 pos -= 4;
3501 if (cfg->used_int_regs & (1 << X86_EDI)) {
3502 pos -= 4;
3504 if (cfg->used_int_regs & (1 << X86_ESI)) {
3505 pos -= 4;
3508 if (pos)
3509 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3511 if (cfg->used_int_regs & (1 << X86_ESI)) {
3512 x86_pop_reg (code, X86_ESI);
3514 if (cfg->used_int_regs & (1 << X86_EDI)) {
3515 x86_pop_reg (code, X86_EDI);
3517 if (cfg->used_int_regs & (1 << X86_EBX)) {
3518 x86_pop_reg (code, X86_EBX);
3522 /* Load returned vtypes into registers if needed */
3523 cinfo = get_call_info (sig, FALSE);
3524 if (cinfo->ret.storage == ArgValuetypeInReg) {
3525 for (quad = 0; quad < 2; quad ++) {
3526 switch (cinfo->ret.pair_storage [quad]) {
3527 case ArgInIReg:
3528 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3529 break;
3530 case ArgOnFloatFpStack:
3531 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3532 break;
3533 case ArgOnDoubleFpStack:
3534 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3535 break;
3536 case ArgNone:
3537 break;
3538 default:
3539 g_assert_not_reached ();
3544 x86_leave (code);
3546 if (CALLCONV_IS_STDCALL (sig)) {
3547 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3549 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3550 } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3551 stack_to_pop = 4;
3552 else
3553 stack_to_pop = 0;
3555 if (stack_to_pop)
3556 x86_ret_imm (code, stack_to_pop);
3557 else
3558 x86_ret (code);
3560 g_free (cinfo);
3562 cfg->code_len = code - cfg->native_code;
3564 g_assert (cfg->code_len < cfg->code_size);
3567 void
3568 mono_arch_emit_exceptions (MonoCompile *cfg)
3570 MonoJumpInfo *patch_info;
3571 int nthrows, i;
3572 guint8 *code;
3573 MonoClass *exc_classes [16];
3574 guint8 *exc_throw_start [16], *exc_throw_end [16];
3575 guint32 code_size;
3576 int exc_count = 0;
3578 /* Compute needed space */
3579 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3580 if (patch_info->type == MONO_PATCH_INFO_EXC)
3581 exc_count++;
3585 * make sure we have enough space for exceptions
3586 * 16 is the size of two push_imm instructions and a call
3588 if (cfg->compile_aot)
3589 code_size = exc_count * 32;
3590 else
3591 code_size = exc_count * 16;
3593 while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3594 cfg->code_size *= 2;
3595 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3596 mono_jit_stats.code_reallocs++;
3599 code = cfg->native_code + cfg->code_len;
3601 nthrows = 0;
3602 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3603 switch (patch_info->type) {
3604 case MONO_PATCH_INFO_EXC: {
3605 MonoClass *exc_class;
3606 guint8 *buf, *buf2;
3607 guint32 throw_ip;
3609 x86_patch (patch_info->ip.i + cfg->native_code, code);
3611 exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3612 g_assert (exc_class);
3613 throw_ip = patch_info->ip.i;
3615 /* Find a throw sequence for the same exception class */
3616 for (i = 0; i < nthrows; ++i)
3617 if (exc_classes [i] == exc_class)
3618 break;
3619 if (i < nthrows) {
3620 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3621 x86_jump_code (code, exc_throw_start [i]);
3622 patch_info->type = MONO_PATCH_INFO_NONE;
3624 else {
3625 guint32 size;
3627 /* Compute size of code following the push <OFFSET> */
3628 size = 5 + 5;
3630 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3631 /* Use the shorter form */
3632 buf = buf2 = code;
3633 x86_push_imm (code, 0);
3635 else {
3636 buf = code;
3637 x86_push_imm (code, 0xf0f0f0f0);
3638 buf2 = code;
3641 if (nthrows < 16) {
3642 exc_classes [nthrows] = exc_class;
3643 exc_throw_start [nthrows] = code;
3646 x86_push_imm (code, exc_class->type_token);
3647 patch_info->data.name = "mono_arch_throw_corlib_exception";
3648 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3649 patch_info->ip.i = code - cfg->native_code;
3650 x86_call_code (code, 0);
3651 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3652 while (buf < buf2)
3653 x86_nop (buf);
3655 if (nthrows < 16) {
3656 exc_throw_end [nthrows] = code;
3657 nthrows ++;
3660 break;
3662 default:
3663 /* do nothing */
3664 break;
3668 cfg->code_len = code - cfg->native_code;
3670 g_assert (cfg->code_len < cfg->code_size);
3673 void
3674 mono_arch_flush_icache (guint8 *code, gint size)
3676 /* not needed */
3679 void
3680 mono_arch_flush_register_windows (void)
3685 * Support for fast access to the thread-local lmf structure using the GS
3686 * segment register on NPTL + kernel 2.6.x.
3689 static gboolean tls_offset_inited = FALSE;
3691 void
3692 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3694 if (!tls_offset_inited) {
3695 if (!getenv ("MONO_NO_TLS")) {
3696 #ifdef PLATFORM_WIN32
3698 * We need to init this multiple times, since when we are first called, the key might not
3699 * be initialized yet.
3701 appdomain_tls_offset = mono_domain_get_tls_key ();
3702 lmf_tls_offset = mono_get_jit_tls_key ();
3703 thread_tls_offset = mono_thread_get_tls_key ();
3705 /* Only 64 tls entries can be accessed using inline code */
3706 if (appdomain_tls_offset >= 64)
3707 appdomain_tls_offset = -1;
3708 if (lmf_tls_offset >= 64)
3709 lmf_tls_offset = -1;
3710 if (thread_tls_offset >= 64)
3711 thread_tls_offset = -1;
3712 #else
3713 #if MONO_XEN_OPT
3714 optimize_for_xen = access ("/proc/xen", F_OK) == 0;
3715 #endif
3716 tls_offset_inited = TRUE;
3717 appdomain_tls_offset = mono_domain_get_tls_offset ();
3718 lmf_tls_offset = mono_get_lmf_tls_offset ();
3719 thread_tls_offset = mono_thread_get_tls_offset ();
3720 #endif
3725 void
3726 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
3730 void
3731 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3733 MonoCallInst *call = (MonoCallInst*)inst;
3734 CallInfo *cinfo = get_call_info (inst->signature, FALSE);
3736 /* add the this argument */
3737 if (this_reg != -1) {
3738 if (cinfo->args [0].storage == ArgInIReg) {
3739 MonoInst *this;
3740 MONO_INST_NEW (cfg, this, OP_MOVE);
3741 this->type = this_type;
3742 this->sreg1 = this_reg;
3743 this->dreg = mono_regstate_next_int (cfg->rs);
3744 mono_bblock_add_inst (cfg->cbb, this);
3746 mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
3748 else {
3749 MonoInst *this;
3750 MONO_INST_NEW (cfg, this, OP_OUTARG);
3751 this->type = this_type;
3752 this->sreg1 = this_reg;
3753 mono_bblock_add_inst (cfg->cbb, this);
3757 if (vt_reg != -1) {
3758 MonoInst *vtarg;
3760 if (cinfo->ret.storage == ArgValuetypeInReg) {
3762 * The valuetype is in EAX:EDX after the call, needs to be copied to
3763 * the stack. Save the address here, so the call instruction can
3764 * access it.
3766 MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
3767 vtarg->inst_destbasereg = X86_ESP;
3768 vtarg->inst_offset = inst->stack_usage;
3769 vtarg->sreg1 = vt_reg;
3770 mono_bblock_add_inst (cfg->cbb, vtarg);
3772 else if (cinfo->ret.storage == ArgInIReg) {
3773 /* The return address is passed in a register */
3774 MONO_INST_NEW (cfg, vtarg, OP_MOVE);
3775 vtarg->sreg1 = vt_reg;
3776 vtarg->dreg = mono_regstate_next_int (cfg->rs);
3777 mono_bblock_add_inst (cfg->cbb, vtarg);
3779 mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
3780 } else {
3781 MonoInst *vtarg;
3782 MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3783 vtarg->type = STACK_MP;
3784 vtarg->sreg1 = vt_reg;
3785 mono_bblock_add_inst (cfg->cbb, vtarg);
3789 g_free (cinfo);
3792 MonoInst*
3793 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3795 MonoInst *ins = NULL;
3797 if (cmethod->klass == mono_defaults.math_class) {
3798 if (strcmp (cmethod->name, "Sin") == 0) {
3799 MONO_INST_NEW (cfg, ins, OP_SIN);
3800 ins->inst_i0 = args [0];
3801 } else if (strcmp (cmethod->name, "Cos") == 0) {
3802 MONO_INST_NEW (cfg, ins, OP_COS);
3803 ins->inst_i0 = args [0];
3804 } else if (strcmp (cmethod->name, "Tan") == 0) {
3805 MONO_INST_NEW (cfg, ins, OP_TAN);
3806 ins->inst_i0 = args [0];
3807 } else if (strcmp (cmethod->name, "Atan") == 0) {
3808 MONO_INST_NEW (cfg, ins, OP_ATAN);
3809 ins->inst_i0 = args [0];
3810 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
3811 MONO_INST_NEW (cfg, ins, OP_SQRT);
3812 ins->inst_i0 = args [0];
3813 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
3814 MONO_INST_NEW (cfg, ins, OP_ABS);
3815 ins->inst_i0 = args [0];
3817 #if 0
3818 /* OP_FREM is not IEEE compatible */
3819 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
3820 MONO_INST_NEW (cfg, ins, OP_FREM);
3821 ins->inst_i0 = args [0];
3822 ins->inst_i1 = args [1];
3824 #endif
3825 } else if (cmethod->klass == mono_defaults.thread_class &&
3826 strcmp (cmethod->name, "MemoryBarrier") == 0) {
3827 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
3828 } else if(cmethod->klass->image == mono_defaults.corlib &&
3829 (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
3830 (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
3832 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3833 MonoInst *ins_iconst;
3835 MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3836 MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3837 ins_iconst->inst_c0 = 1;
3839 ins->inst_i0 = args [0];
3840 ins->inst_i1 = ins_iconst;
3841 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3842 MonoInst *ins_iconst;
3844 MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3845 MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3846 ins_iconst->inst_c0 = -1;
3848 ins->inst_i0 = args [0];
3849 ins->inst_i1 = ins_iconst;
3850 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3851 MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
3853 ins->inst_i0 = args [0];
3854 ins->inst_i1 = args [1];
3855 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3856 MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3858 ins->inst_i0 = args [0];
3859 ins->inst_i1 = args [1];
3863 return ins;
3867 gboolean
3868 mono_arch_print_tree (MonoInst *tree, int arity)
3870 return 0;
3873 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
3875 MonoInst* ins;
3877 if (appdomain_tls_offset == -1)
3878 return NULL;
3880 MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3881 ins->inst_offset = appdomain_tls_offset;
3882 return ins;
3885 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
3887 MonoInst* ins;
3889 if (thread_tls_offset == -1)
3890 return NULL;
3892 MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3893 ins->inst_offset = thread_tls_offset;
3894 return ins;
3897 guint32
3898 mono_arch_get_patch_offset (guint8 *code)
3900 if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
3901 return 2;
3902 else if ((code [0] == 0xba))
3903 return 1;
3904 else if ((code [0] == 0x68))
3905 /* push IMM */
3906 return 1;
3907 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
3908 /* push <OFFSET>(<REG>) */
3909 return 2;
3910 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
3911 /* call *<OFFSET>(<REG>) */
3912 return 2;
3913 else if ((code [0] == 0xdd) || (code [0] == 0xd9))
3914 /* fldl <ADDR> */
3915 return 2;
3916 else if ((code [0] == 0x58) && (code [1] == 0x05))
3917 /* pop %eax; add <OFFSET>, %eax */
3918 return 2;
3919 else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
3920 /* pop <REG>; add <OFFSET>, <REG> */
3921 return 3;
3922 else {
3923 g_assert_not_reached ();
3924 return -1;
3928 gpointer*
3929 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
3931 guint8 reg = 0;
3932 gint32 disp = 0;
3934 /* go to the start of the call instruction
3936 * address_byte = (m << 6) | (o << 3) | reg
3937 * call opcode: 0xff address_byte displacement
3938 * 0xff m=1,o=2 imm8
3939 * 0xff m=2,o=2 imm32
3941 code -= 6;
3944 * A given byte sequence can match more than case here, so we have to be
3945 * really careful about the ordering of the cases. Longer sequences
3946 * come first.
3948 if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
3950 * This is an interface call
3951 * 8b 80 0c e8 ff ff mov 0xffffe80c(%eax),%eax
3952 * ff 10 call *(%eax)
3954 reg = x86_modrm_rm (code [5]);
3955 disp = 0;
3956 } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
3957 reg = code [4] & 0x07;
3958 disp = (signed char)code [5];
3959 } else {
3960 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
3961 reg = code [1] & 0x07;
3962 disp = *((gint32*)(code + 2));
3963 } else if ((code [1] == 0xe8)) {
3964 return NULL;
3965 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
3967 * This is a interface call
3968 * 8b 40 30 mov 0x30(%eax),%eax
3969 * ff 10 call *(%eax)
3971 disp = 0;
3972 reg = code [5] & 0x07;
3974 else
3975 return NULL;
3978 return (gpointer*)(((gint32)(regs [reg])) + disp);
3981 gpointer*
3982 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
3984 guint8 reg = 0;
3985 gint32 disp = 0;
3987 code -= 7;
3988 if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
3989 reg = x86_modrm_rm (code [1]);
3990 disp = code [4];
3992 if (reg == X86_EAX)
3993 return NULL;
3994 else
3995 return (gpointer*)(((gint32)(regs [reg])) + disp);
3998 return NULL;