Fix up bugs in x86-codegen for NaCl.
[mono-project/dkf.git] / mono / mini / mini-x86.c
blob201ebc01d660da78749231f9e03d1781a52afb3b
1 /*
2 * mini-x86.c: x86 backend for the Mono code generator
4 * Authors:
5 * Paolo Molaro (lupus@ximian.com)
6 * Dietmar Maurer (dietmar@ximian.com)
7 * Patrik Torstensson
9 * (C) 2003 Ximian, Inc.
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/metadata/mono-debug.h>
23 #include <mono/metadata/gc-internal.h>
24 #include <mono/utils/mono-math.h>
25 #include <mono/utils/mono-counters.h>
26 #include <mono/utils/mono-mmap.h>
27 #include <mono/utils/mono-memory-model.h>
29 #include "trace.h"
30 #include "mini-x86.h"
31 #include "cpu-x86.h"
32 #include "ir-emit.h"
34 /* On windows, these hold the key returned by TlsAlloc () */
35 static gint lmf_tls_offset = -1;
36 static gint lmf_addr_tls_offset = -1;
37 static gint appdomain_tls_offset = -1;
39 #ifdef MONO_XEN_OPT
40 static gboolean optimize_for_xen = TRUE;
41 #else
42 #define optimize_for_xen 0
43 #endif
45 #ifdef TARGET_WIN32
46 static gboolean is_win32 = TRUE;
47 #else
48 static gboolean is_win32 = FALSE;
49 #endif
51 /* This mutex protects architecture specific caches */
52 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
53 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
54 static CRITICAL_SECTION mini_arch_mutex;
56 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
58 #define ARGS_OFFSET 8
60 #ifdef TARGET_WIN32
61 /* Under windows, the default pinvoke calling convention is stdcall */
62 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
63 #else
64 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
65 #endif
67 #define X86_IS_CALLEE_SAVED_REG(reg) (((reg) == X86_EBX) || ((reg) == X86_EDI) || ((reg) == X86_ESI))
69 MonoBreakpointInfo
70 mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
73 #ifdef __native_client_codegen__
74 const guint kNaClAlignment = kNaClAlignmentX86;
75 const guint kNaClAlignmentMask = kNaClAlignmentMaskX86;
77 /* Default alignment for Native Client is 32-byte. */
78 gint8 nacl_align_byte = -32; /* signed version of 0xe0 */
80 /* mono_arch_nacl_pad: Add pad bytes of alignment instructions at code, */
81 /* Check that alignment doesn't cross an alignment boundary. */
82 guint8 *
83 mono_arch_nacl_pad (guint8 *code, int pad)
85 const int kMaxPadding = 7; /* see x86-codegen.h: x86_padding() */
87 if (pad == 0) return code;
88 /* assertion: alignment cannot cross a block boundary */
89 g_assert(((uintptr_t)code & (~kNaClAlignmentMask)) ==
90 (((uintptr_t)code + pad - 1) & (~kNaClAlignmentMask)));
91 while (pad >= kMaxPadding) {
92 x86_padding (code, kMaxPadding);
93 pad -= kMaxPadding;
95 if (pad != 0) x86_padding (code, pad);
96 return code;
99 guint8 *
100 mono_arch_nacl_skip_nops (guint8 *code)
102 x86_skip_nops (code);
103 return code;
106 #endif /* __native_client_codegen__ */
109 * The code generated for sequence points reads from this location, which is
110 * made read-only when single stepping is enabled.
112 static gpointer ss_trigger_page;
114 /* Enabled breakpoints read from this trigger page */
115 static gpointer bp_trigger_page;
117 const char*
118 mono_arch_regname (int reg)
120 switch (reg) {
121 case X86_EAX: return "%eax";
122 case X86_EBX: return "%ebx";
123 case X86_ECX: return "%ecx";
124 case X86_EDX: return "%edx";
125 case X86_ESP: return "%esp";
126 case X86_EBP: return "%ebp";
127 case X86_EDI: return "%edi";
128 case X86_ESI: return "%esi";
130 return "unknown";
133 const char*
134 mono_arch_fregname (int reg)
136 switch (reg) {
137 case 0:
138 return "%fr0";
139 case 1:
140 return "%fr1";
141 case 2:
142 return "%fr2";
143 case 3:
144 return "%fr3";
145 case 4:
146 return "%fr4";
147 case 5:
148 return "%fr5";
149 case 6:
150 return "%fr6";
151 case 7:
152 return "%fr7";
153 default:
154 return "unknown";
158 const char *
159 mono_arch_xregname (int reg)
161 switch (reg) {
162 case 0:
163 return "%xmm0";
164 case 1:
165 return "%xmm1";
166 case 2:
167 return "%xmm2";
168 case 3:
169 return "%xmm3";
170 case 4:
171 return "%xmm4";
172 case 5:
173 return "%xmm5";
174 case 6:
175 return "%xmm6";
176 case 7:
177 return "%xmm7";
178 default:
179 return "unknown";
183 void
184 mono_x86_patch (unsigned char* code, gpointer target)
186 x86_patch (code, (unsigned char*)target);
189 typedef enum {
190 ArgInIReg,
191 ArgInFloatSSEReg,
192 ArgInDoubleSSEReg,
193 ArgOnStack,
194 ArgValuetypeInReg,
195 ArgOnFloatFpStack,
196 ArgOnDoubleFpStack,
197 ArgNone
198 } ArgStorage;
200 typedef struct {
201 gint16 offset;
202 gint8 reg;
203 ArgStorage storage;
205 /* Only if storage == ArgValuetypeInReg */
206 ArgStorage pair_storage [2];
207 gint8 pair_regs [2];
208 } ArgInfo;
210 typedef struct {
211 int nargs;
212 guint32 stack_usage;
213 guint32 reg_usage;
214 guint32 freg_usage;
215 gboolean need_stack_align;
216 guint32 stack_align_amount;
217 gboolean vtype_retaddr;
218 /* The index of the vret arg in the argument list */
219 int vret_arg_index;
220 ArgInfo ret;
221 ArgInfo sig_cookie;
222 ArgInfo args [1];
223 } CallInfo;
225 #define PARAM_REGS 0
227 #define FLOAT_PARAM_REGS 0
229 static X86_Reg_No param_regs [] = { 0 };
231 #if defined(TARGET_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
232 #define SMALL_STRUCTS_IN_REGS
233 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
234 #endif
236 static void inline
237 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
239 ainfo->offset = *stack_size;
241 if (*gr >= PARAM_REGS) {
242 ainfo->storage = ArgOnStack;
243 (*stack_size) += sizeof (gpointer);
245 else {
246 ainfo->storage = ArgInIReg;
247 ainfo->reg = param_regs [*gr];
248 (*gr) ++;
252 static void inline
253 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
255 ainfo->offset = *stack_size;
257 g_assert (PARAM_REGS == 0);
259 ainfo->storage = ArgOnStack;
260 (*stack_size) += sizeof (gpointer) * 2;
263 static void inline
264 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
266 ainfo->offset = *stack_size;
268 if (*gr >= FLOAT_PARAM_REGS) {
269 ainfo->storage = ArgOnStack;
270 (*stack_size) += is_double ? 8 : 4;
272 else {
273 /* A double register */
274 if (is_double)
275 ainfo->storage = ArgInDoubleSSEReg;
276 else
277 ainfo->storage = ArgInFloatSSEReg;
278 ainfo->reg = *gr;
279 (*gr) += 1;
284 static void
285 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
286 gboolean is_return,
287 guint32 *gr, guint32 *fr, guint32 *stack_size)
289 guint32 size;
290 MonoClass *klass;
292 klass = mono_class_from_mono_type (type);
293 size = mini_type_stack_size_full (gsctx, &klass->byval_arg, NULL, sig->pinvoke);
295 #ifdef SMALL_STRUCTS_IN_REGS
296 if (sig->pinvoke && is_return) {
297 MonoMarshalType *info;
300 * the exact rules are not very well documented, the code below seems to work with the
301 * code generated by gcc 3.3.3 -mno-cygwin.
303 info = mono_marshal_load_type_info (klass);
304 g_assert (info);
306 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
308 /* Special case structs with only a float member */
309 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
310 ainfo->storage = ArgValuetypeInReg;
311 ainfo->pair_storage [0] = ArgOnDoubleFpStack;
312 return;
314 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
315 ainfo->storage = ArgValuetypeInReg;
316 ainfo->pair_storage [0] = ArgOnFloatFpStack;
317 return;
319 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
320 ainfo->storage = ArgValuetypeInReg;
321 ainfo->pair_storage [0] = ArgInIReg;
322 ainfo->pair_regs [0] = return_regs [0];
323 if (info->native_size > 4) {
324 ainfo->pair_storage [1] = ArgInIReg;
325 ainfo->pair_regs [1] = return_regs [1];
327 return;
330 #endif
332 ainfo->offset = *stack_size;
333 ainfo->storage = ArgOnStack;
334 *stack_size += ALIGN_TO (size, sizeof (gpointer));
338 * get_call_info:
340 * Obtain information about a call according to the calling convention.
341 * For x86 ELF, see the "System V Application Binary Interface Intel386
342 * Architecture Processor Supplment, Fourth Edition" document for more
343 * information.
344 * For x86 win32, see ???.
346 static CallInfo*
347 get_call_info_internal (MonoGenericSharingContext *gsctx, CallInfo *cinfo, MonoMethodSignature *sig)
349 guint32 i, gr, fr, pstart;
350 MonoType *ret_type;
351 int n = sig->hasthis + sig->param_count;
352 guint32 stack_size = 0;
353 gboolean is_pinvoke = sig->pinvoke;
355 gr = 0;
356 fr = 0;
358 /* return value */
360 ret_type = mini_type_get_underlying_type (gsctx, sig->ret);
361 switch (ret_type->type) {
362 case MONO_TYPE_BOOLEAN:
363 case MONO_TYPE_I1:
364 case MONO_TYPE_U1:
365 case MONO_TYPE_I2:
366 case MONO_TYPE_U2:
367 case MONO_TYPE_CHAR:
368 case MONO_TYPE_I4:
369 case MONO_TYPE_U4:
370 case MONO_TYPE_I:
371 case MONO_TYPE_U:
372 case MONO_TYPE_PTR:
373 case MONO_TYPE_FNPTR:
374 case MONO_TYPE_CLASS:
375 case MONO_TYPE_OBJECT:
376 case MONO_TYPE_SZARRAY:
377 case MONO_TYPE_ARRAY:
378 case MONO_TYPE_STRING:
379 cinfo->ret.storage = ArgInIReg;
380 cinfo->ret.reg = X86_EAX;
381 break;
382 case MONO_TYPE_U8:
383 case MONO_TYPE_I8:
384 cinfo->ret.storage = ArgInIReg;
385 cinfo->ret.reg = X86_EAX;
386 break;
387 case MONO_TYPE_R4:
388 cinfo->ret.storage = ArgOnFloatFpStack;
389 break;
390 case MONO_TYPE_R8:
391 cinfo->ret.storage = ArgOnDoubleFpStack;
392 break;
393 case MONO_TYPE_GENERICINST:
394 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
395 cinfo->ret.storage = ArgInIReg;
396 cinfo->ret.reg = X86_EAX;
397 break;
399 /* Fall through */
400 case MONO_TYPE_VALUETYPE: {
401 guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
403 add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
404 if (cinfo->ret.storage == ArgOnStack) {
405 cinfo->vtype_retaddr = TRUE;
406 /* The caller passes the address where the value is stored */
408 break;
410 case MONO_TYPE_TYPEDBYREF:
411 /* Same as a valuetype with size 12 */
412 cinfo->vtype_retaddr = TRUE;
413 break;
414 case MONO_TYPE_VOID:
415 cinfo->ret.storage = ArgNone;
416 break;
417 default:
418 g_error ("Can't handle as return value 0x%x", sig->ret->type);
422 pstart = 0;
424 * To simplify get_this_arg_reg () and LLVM integration, emit the vret arg after
425 * the first argument, allowing 'this' to be always passed in the first arg reg.
426 * Also do this if the first argument is a reference type, since virtual calls
427 * are sometimes made using calli without sig->hasthis set, like in the delegate
428 * invoke wrappers.
430 if (cinfo->vtype_retaddr && !is_pinvoke && (sig->hasthis || (sig->param_count > 0 && MONO_TYPE_IS_REFERENCE (mini_type_get_underlying_type (gsctx, sig->params [0]))))) {
431 if (sig->hasthis) {
432 add_general (&gr, &stack_size, cinfo->args + 0);
433 } else {
434 add_general (&gr, &stack_size, &cinfo->args [sig->hasthis + 0]);
435 pstart = 1;
437 add_general (&gr, &stack_size, &cinfo->ret);
438 cinfo->vret_arg_index = 1;
439 } else {
440 /* this */
441 if (sig->hasthis)
442 add_general (&gr, &stack_size, cinfo->args + 0);
444 if (cinfo->vtype_retaddr)
445 add_general (&gr, &stack_size, &cinfo->ret);
448 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
449 gr = PARAM_REGS;
450 fr = FLOAT_PARAM_REGS;
452 /* Emit the signature cookie just before the implicit arguments */
453 add_general (&gr, &stack_size, &cinfo->sig_cookie);
456 for (i = pstart; i < sig->param_count; ++i) {
457 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
458 MonoType *ptype;
460 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
461 /* We allways pass the sig cookie on the stack for simplicity */
463 * Prevent implicit arguments + the sig cookie from being passed
464 * in registers.
466 gr = PARAM_REGS;
467 fr = FLOAT_PARAM_REGS;
469 /* Emit the signature cookie just before the implicit arguments */
470 add_general (&gr, &stack_size, &cinfo->sig_cookie);
473 if (sig->params [i]->byref) {
474 add_general (&gr, &stack_size, ainfo);
475 continue;
477 ptype = mini_type_get_underlying_type (gsctx, sig->params [i]);
478 switch (ptype->type) {
479 case MONO_TYPE_BOOLEAN:
480 case MONO_TYPE_I1:
481 case MONO_TYPE_U1:
482 add_general (&gr, &stack_size, ainfo);
483 break;
484 case MONO_TYPE_I2:
485 case MONO_TYPE_U2:
486 case MONO_TYPE_CHAR:
487 add_general (&gr, &stack_size, ainfo);
488 break;
489 case MONO_TYPE_I4:
490 case MONO_TYPE_U4:
491 add_general (&gr, &stack_size, ainfo);
492 break;
493 case MONO_TYPE_I:
494 case MONO_TYPE_U:
495 case MONO_TYPE_PTR:
496 case MONO_TYPE_FNPTR:
497 case MONO_TYPE_CLASS:
498 case MONO_TYPE_OBJECT:
499 case MONO_TYPE_STRING:
500 case MONO_TYPE_SZARRAY:
501 case MONO_TYPE_ARRAY:
502 add_general (&gr, &stack_size, ainfo);
503 break;
504 case MONO_TYPE_GENERICINST:
505 if (!mono_type_generic_inst_is_valuetype (ptype)) {
506 add_general (&gr, &stack_size, ainfo);
507 break;
509 /* Fall through */
510 case MONO_TYPE_VALUETYPE:
511 add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
512 break;
513 case MONO_TYPE_TYPEDBYREF:
514 stack_size += sizeof (MonoTypedRef);
515 ainfo->storage = ArgOnStack;
516 break;
517 case MONO_TYPE_U8:
518 case MONO_TYPE_I8:
519 add_general_pair (&gr, &stack_size, ainfo);
520 break;
521 case MONO_TYPE_R4:
522 add_float (&fr, &stack_size, ainfo, FALSE);
523 break;
524 case MONO_TYPE_R8:
525 add_float (&fr, &stack_size, ainfo, TRUE);
526 break;
527 default:
528 g_error ("unexpected type 0x%x", ptype->type);
529 g_assert_not_reached ();
533 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
534 gr = PARAM_REGS;
535 fr = FLOAT_PARAM_REGS;
537 /* Emit the signature cookie just before the implicit arguments */
538 add_general (&gr, &stack_size, &cinfo->sig_cookie);
541 if (mono_do_x86_stack_align && (stack_size % MONO_ARCH_FRAME_ALIGNMENT) != 0) {
542 cinfo->need_stack_align = TRUE;
543 cinfo->stack_align_amount = MONO_ARCH_FRAME_ALIGNMENT - (stack_size % MONO_ARCH_FRAME_ALIGNMENT);
544 stack_size += cinfo->stack_align_amount;
547 cinfo->stack_usage = stack_size;
548 cinfo->reg_usage = gr;
549 cinfo->freg_usage = fr;
550 return cinfo;
553 static CallInfo*
554 get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig)
556 int n = sig->hasthis + sig->param_count;
557 CallInfo *cinfo;
559 if (mp)
560 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
561 else
562 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
564 return get_call_info_internal (gsctx, cinfo, sig);
568 * mono_arch_get_argument_info:
569 * @csig: a method signature
570 * @param_count: the number of parameters to consider
571 * @arg_info: an array to store the result infos
573 * Gathers information on parameters such as size, alignment and
574 * padding. arg_info should be large enought to hold param_count + 1 entries.
576 * Returns the size of the argument area on the stack.
577 * This should be signal safe, since it is called from
578 * mono_arch_find_jit_info ().
579 * FIXME: The metadata calls might not be signal safe.
582 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
584 int len, k, args_size = 0;
585 int size, pad;
586 guint32 align;
587 int offset = 8;
588 CallInfo *cinfo;
590 /* Avoid g_malloc as it is not signal safe */
591 len = sizeof (CallInfo) + (sizeof (ArgInfo) * (csig->param_count + 1));
592 cinfo = (CallInfo*)g_newa (guint8*, len);
593 memset (cinfo, 0, len);
595 cinfo = get_call_info_internal (NULL, cinfo, csig);
597 arg_info [0].offset = offset;
599 if (cinfo->vtype_retaddr && cinfo->vret_arg_index == 0) {
600 args_size += sizeof (gpointer);
601 offset += 4;
604 if (csig->hasthis) {
605 args_size += sizeof (gpointer);
606 offset += 4;
609 if (cinfo->vtype_retaddr && cinfo->vret_arg_index == 1 && csig->hasthis) {
610 /* Emitted after this */
611 args_size += sizeof (gpointer);
612 offset += 4;
615 arg_info [0].size = args_size;
617 for (k = 0; k < param_count; k++) {
618 size = mini_type_stack_size_full (NULL, csig->params [k], &align, csig->pinvoke);
620 /* ignore alignment for now */
621 align = 1;
623 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
624 arg_info [k].pad = pad;
625 args_size += size;
626 arg_info [k + 1].pad = 0;
627 arg_info [k + 1].size = size;
628 offset += pad;
629 arg_info [k + 1].offset = offset;
630 offset += size;
632 if (k == 0 && cinfo->vtype_retaddr && cinfo->vret_arg_index == 1 && !csig->hasthis) {
633 /* Emitted after the first arg */
634 args_size += sizeof (gpointer);
635 offset += 4;
639 if (mono_do_x86_stack_align && !CALLCONV_IS_STDCALL (csig))
640 align = MONO_ARCH_FRAME_ALIGNMENT;
641 else
642 align = 4;
643 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
644 arg_info [k].pad = pad;
646 return args_size;
649 gboolean
650 mono_x86_tail_call_supported (MonoMethodSignature *caller_sig, MonoMethodSignature *callee_sig)
652 CallInfo *c1, *c2;
653 gboolean res;
655 c1 = get_call_info (NULL, NULL, caller_sig);
656 c2 = get_call_info (NULL, NULL, callee_sig);
657 res = c1->stack_usage >= c2->stack_usage;
658 if (callee_sig->ret && MONO_TYPE_ISSTRUCT (callee_sig->ret) && c2->ret.storage != ArgValuetypeInReg)
659 /* An address on the callee's stack is passed as the first argument */
660 res = FALSE;
662 g_free (c1);
663 g_free (c2);
665 return res;
668 static const guchar cpuid_impl [] = {
669 0x55, /* push %ebp */
670 0x89, 0xe5, /* mov %esp,%ebp */
671 0x53, /* push %ebx */
672 0x8b, 0x45, 0x08, /* mov 0x8(%ebp),%eax */
673 0x0f, 0xa2, /* cpuid */
674 0x50, /* push %eax */
675 0x8b, 0x45, 0x10, /* mov 0x10(%ebp),%eax */
676 0x89, 0x18, /* mov %ebx,(%eax) */
677 0x8b, 0x45, 0x14, /* mov 0x14(%ebp),%eax */
678 0x89, 0x08, /* mov %ecx,(%eax) */
679 0x8b, 0x45, 0x18, /* mov 0x18(%ebp),%eax */
680 0x89, 0x10, /* mov %edx,(%eax) */
681 0x58, /* pop %eax */
682 0x8b, 0x55, 0x0c, /* mov 0xc(%ebp),%edx */
683 0x89, 0x02, /* mov %eax,(%edx) */
684 0x5b, /* pop %ebx */
685 0xc9, /* leave */
686 0xc3, /* ret */
689 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
691 static int
692 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
694 #if defined(__native_client__)
695 /* Taken from below, the bug listed in the comment is */
696 /* only valid for non-static cases. */
697 __asm__ __volatile__ ("cpuid"
698 : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
699 : "a" (id));
700 return 1;
701 #else
702 int have_cpuid = 0;
703 #ifndef _MSC_VER
704 __asm__ __volatile__ (
705 "pushfl\n"
706 "popl %%eax\n"
707 "movl %%eax, %%edx\n"
708 "xorl $0x200000, %%eax\n"
709 "pushl %%eax\n"
710 "popfl\n"
711 "pushfl\n"
712 "popl %%eax\n"
713 "xorl %%edx, %%eax\n"
714 "andl $0x200000, %%eax\n"
715 "movl %%eax, %0"
716 : "=r" (have_cpuid)
718 : "%eax", "%edx"
720 #else
721 __asm {
722 pushfd
723 pop eax
724 mov edx, eax
725 xor eax, 0x200000
726 push eax
727 popfd
728 pushfd
729 pop eax
730 xor eax, edx
731 and eax, 0x200000
732 mov have_cpuid, eax
734 #endif
735 if (have_cpuid) {
736 /* Have to use the code manager to get around WinXP DEP */
737 static CpuidFunc func = NULL;
738 void *ptr;
739 if (!func) {
740 ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
741 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
742 func = (CpuidFunc)ptr;
744 func (id, p_eax, p_ebx, p_ecx, p_edx);
747 * We use this approach because of issues with gcc and pic code, see:
748 * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
749 __asm__ __volatile__ ("cpuid"
750 : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
751 : "a" (id));
753 return 1;
755 return 0;
756 #endif
760 * Initialize the cpu to execute managed code.
762 void
763 mono_arch_cpu_init (void)
765 /* spec compliance requires running with double precision */
766 #ifndef _MSC_VER
767 guint16 fpcw;
769 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
770 fpcw &= ~X86_FPCW_PRECC_MASK;
771 fpcw |= X86_FPCW_PREC_DOUBLE;
772 __asm__ __volatile__ ("fldcw %0\n": : "m" (fpcw));
773 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
774 #else
775 _control87 (_PC_53, MCW_PC);
776 #endif
780 * Initialize architecture specific code.
782 void
783 mono_arch_init (void)
785 InitializeCriticalSection (&mini_arch_mutex);
787 ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ);
788 bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
789 mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
791 mono_aot_register_jit_icall ("mono_x86_throw_exception", mono_x86_throw_exception);
792 mono_aot_register_jit_icall ("mono_x86_throw_corlib_exception", mono_x86_throw_corlib_exception);
796 * Cleanup architecture specific code.
798 void
799 mono_arch_cleanup (void)
801 DeleteCriticalSection (&mini_arch_mutex);
805 * This function returns the optimizations supported on this cpu.
807 guint32
808 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
810 #if !defined(__native_client__)
811 int eax, ebx, ecx, edx;
812 guint32 opts = 0;
814 *exclude_mask = 0;
816 if (mono_aot_only)
817 /* The cpuid function allocates from the global codeman */
818 return opts;
820 /* Feature Flags function, flags returned in EDX. */
821 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
822 if (edx & (1 << 15)) {
823 opts |= MONO_OPT_CMOV;
824 if (edx & 1)
825 opts |= MONO_OPT_FCMOV;
826 else
827 *exclude_mask |= MONO_OPT_FCMOV;
828 } else
829 *exclude_mask |= MONO_OPT_CMOV;
830 if (edx & (1 << 26))
831 opts |= MONO_OPT_SSE2;
832 else
833 *exclude_mask |= MONO_OPT_SSE2;
835 #ifdef MONO_ARCH_SIMD_INTRINSICS
836 /*SIMD intrinsics require at least SSE2.*/
837 if (!(opts & MONO_OPT_SSE2))
838 *exclude_mask |= MONO_OPT_SIMD;
839 #endif
841 return opts;
842 #else
843 return MONO_OPT_CMOV | MONO_OPT_FCMOV | MONO_OPT_SSE2;
844 #endif
848 * This function test for all SSE functions supported.
850 * Returns a bitmask corresponding to all supported versions.
853 guint32
854 mono_arch_cpu_enumerate_simd_versions (void)
856 int eax, ebx, ecx, edx;
857 guint32 sse_opts = 0;
859 if (mono_aot_only)
860 /* The cpuid function allocates from the global codeman */
861 return sse_opts;
863 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
864 if (edx & (1 << 25))
865 sse_opts |= SIMD_VERSION_SSE1;
866 if (edx & (1 << 26))
867 sse_opts |= SIMD_VERSION_SSE2;
868 if (ecx & (1 << 0))
869 sse_opts |= SIMD_VERSION_SSE3;
870 if (ecx & (1 << 9))
871 sse_opts |= SIMD_VERSION_SSSE3;
872 if (ecx & (1 << 19))
873 sse_opts |= SIMD_VERSION_SSE41;
874 if (ecx & (1 << 20))
875 sse_opts |= SIMD_VERSION_SSE42;
878 /* Yes, all this needs to be done to check for sse4a.
879 See: "Amd: CPUID Specification"
881 if (cpuid (0x80000000, &eax, &ebx, &ecx, &edx)) {
882 /* eax greater or equal than 0x80000001, ebx = 'htuA', ecx = DMAc', edx = 'itne'*/
883 if ((((unsigned int) eax) >= 0x80000001) && (ebx == 0x68747541) && (ecx == 0x444D4163) && (edx == 0x69746E65)) {
884 cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
885 if (ecx & (1 << 6))
886 sse_opts |= SIMD_VERSION_SSE4a;
891 return sse_opts;
895 * Determine whenever the trap whose info is in SIGINFO is caused by
896 * integer overflow.
898 gboolean
899 mono_arch_is_int_overflow (void *sigctx, void *info)
901 MonoContext ctx;
902 guint8* ip;
904 mono_arch_sigctx_to_monoctx (sigctx, &ctx);
906 ip = (guint8*)ctx.eip;
908 if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
909 gint32 reg;
911 /* idiv REG */
912 switch (x86_modrm_rm (ip [1])) {
913 case X86_EAX:
914 reg = ctx.eax;
915 break;
916 case X86_ECX:
917 reg = ctx.ecx;
918 break;
919 case X86_EDX:
920 reg = ctx.edx;
921 break;
922 case X86_EBX:
923 reg = ctx.ebx;
924 break;
925 case X86_ESI:
926 reg = ctx.esi;
927 break;
928 case X86_EDI:
929 reg = ctx.edi;
930 break;
931 default:
932 g_assert_not_reached ();
933 reg = -1;
936 if (reg == -1)
937 return TRUE;
940 return FALSE;
943 GList *
944 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
946 GList *vars = NULL;
947 int i;
949 for (i = 0; i < cfg->num_varinfo; i++) {
950 MonoInst *ins = cfg->varinfo [i];
951 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
953 /* unused vars */
954 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
955 continue;
957 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) ||
958 (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
959 continue;
961 /* we dont allocate I1 to registers because there is no simply way to sign extend
962 * 8bit quantities in caller saved registers on x86 */
963 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
964 g_assert (MONO_VARINFO (cfg, i)->reg == -1);
965 g_assert (i == vmv->idx);
966 vars = g_list_prepend (vars, vmv);
970 vars = mono_varlist_sort (cfg, vars, 0);
972 return vars;
975 GList *
976 mono_arch_get_global_int_regs (MonoCompile *cfg)
978 GList *regs = NULL;
980 /* we can use 3 registers for global allocation */
981 regs = g_list_prepend (regs, (gpointer)X86_EBX);
982 regs = g_list_prepend (regs, (gpointer)X86_ESI);
983 regs = g_list_prepend (regs, (gpointer)X86_EDI);
985 return regs;
989 * mono_arch_regalloc_cost:
991 * Return the cost, in number of memory references, of the action of
992 * allocating the variable VMV into a register during global register
993 * allocation.
995 guint32
996 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
998 MonoInst *ins = cfg->varinfo [vmv->idx];
1000 if (cfg->method->save_lmf)
1001 /* The register is already saved */
1002 return (ins->opcode == OP_ARG) ? 1 : 0;
1003 else
1004 /* push+pop+possible load if it is an argument */
1005 return (ins->opcode == OP_ARG) ? 3 : 2;
1008 static void
1009 set_needs_stack_frame (MonoCompile *cfg, gboolean flag)
1011 static int inited = FALSE;
1012 static int count = 0;
1014 if (cfg->arch.need_stack_frame_inited) {
1015 g_assert (cfg->arch.need_stack_frame == flag);
1016 return;
1019 cfg->arch.need_stack_frame = flag;
1020 cfg->arch.need_stack_frame_inited = TRUE;
1022 if (flag)
1023 return;
1025 if (!inited) {
1026 mono_counters_register ("Could eliminate stack frame", MONO_COUNTER_INT|MONO_COUNTER_JIT, &count);
1027 inited = TRUE;
1029 ++count;
1031 //g_print ("will eliminate %s.%s.%s\n", cfg->method->klass->name_space, cfg->method->klass->name, cfg->method->name);
1034 static gboolean
1035 needs_stack_frame (MonoCompile *cfg)
1037 MonoMethodSignature *sig;
1038 MonoMethodHeader *header;
1039 gboolean result = FALSE;
1041 #if defined(__APPLE__)
1042 /*OSX requires stack frame code to have the correct alignment. */
1043 return TRUE;
1044 #endif
1046 if (cfg->arch.need_stack_frame_inited)
1047 return cfg->arch.need_stack_frame;
1049 header = cfg->header;
1050 sig = mono_method_signature (cfg->method);
1052 if (cfg->disable_omit_fp)
1053 result = TRUE;
1054 else if (cfg->flags & MONO_CFG_HAS_ALLOCA)
1055 result = TRUE;
1056 else if (cfg->method->save_lmf)
1057 result = TRUE;
1058 else if (cfg->stack_offset)
1059 result = TRUE;
1060 else if (cfg->param_area)
1061 result = TRUE;
1062 else if (cfg->flags & (MONO_CFG_HAS_CALLS | MONO_CFG_HAS_ALLOCA | MONO_CFG_HAS_TAIL))
1063 result = TRUE;
1064 else if (header->num_clauses)
1065 result = TRUE;
1066 else if (sig->param_count + sig->hasthis)
1067 result = TRUE;
1068 else if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1069 result = TRUE;
1070 else if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
1071 (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE))
1072 result = TRUE;
1074 set_needs_stack_frame (cfg, result);
1076 return cfg->arch.need_stack_frame;
1080 * Set var information according to the calling convention. X86 version.
1081 * The locals var stuff should most likely be split in another method.
1083 void
1084 mono_arch_allocate_vars (MonoCompile *cfg)
1086 MonoMethodSignature *sig;
1087 MonoMethodHeader *header;
1088 MonoInst *inst;
1089 guint32 locals_stack_size, locals_stack_align;
1090 int i, offset;
1091 gint32 *offsets;
1092 CallInfo *cinfo;
1094 header = cfg->header;
1095 sig = mono_method_signature (cfg->method);
1097 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig);
1099 cfg->frame_reg = X86_EBP;
1100 offset = 0;
1102 /* Reserve space to save LMF and caller saved registers */
1104 if (cfg->method->save_lmf) {
1105 offset += sizeof (MonoLMF);
1106 } else {
1107 if (cfg->used_int_regs & (1 << X86_EBX)) {
1108 offset += 4;
1111 if (cfg->used_int_regs & (1 << X86_EDI)) {
1112 offset += 4;
1115 if (cfg->used_int_regs & (1 << X86_ESI)) {
1116 offset += 4;
1120 switch (cinfo->ret.storage) {
1121 case ArgValuetypeInReg:
1122 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
1123 offset += 8;
1124 cfg->ret->opcode = OP_REGOFFSET;
1125 cfg->ret->inst_basereg = X86_EBP;
1126 cfg->ret->inst_offset = - offset;
1127 break;
1128 default:
1129 break;
1132 /* Allocate locals */
1133 offsets = mono_allocate_stack_slots (cfg, TRUE, &locals_stack_size, &locals_stack_align);
1134 if (locals_stack_size > MONO_ARCH_MAX_FRAME_SIZE) {
1135 char *mname = mono_method_full_name (cfg->method, TRUE);
1136 cfg->exception_type = MONO_EXCEPTION_INVALID_PROGRAM;
1137 cfg->exception_message = g_strdup_printf ("Method %s stack is too big.", mname);
1138 g_free (mname);
1139 return;
1141 if (locals_stack_align) {
1142 offset += (locals_stack_align - 1);
1143 offset &= ~(locals_stack_align - 1);
1145 cfg->locals_min_stack_offset = - (offset + locals_stack_size);
1146 cfg->locals_max_stack_offset = - offset;
1148 * EBP is at alignment 8 % MONO_ARCH_FRAME_ALIGNMENT, so if we
1149 * have locals larger than 8 bytes we need to make sure that
1150 * they have the appropriate offset.
1152 if (MONO_ARCH_FRAME_ALIGNMENT > 8 && locals_stack_align > 8)
1153 offset += MONO_ARCH_FRAME_ALIGNMENT - sizeof (gpointer) * 2;
1154 for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
1155 if (offsets [i] != -1) {
1156 MonoInst *inst = cfg->varinfo [i];
1157 inst->opcode = OP_REGOFFSET;
1158 inst->inst_basereg = X86_EBP;
1159 inst->inst_offset = - (offset + offsets [i]);
1160 //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
1163 offset += locals_stack_size;
1167 * Allocate arguments+return value
1170 switch (cinfo->ret.storage) {
1171 case ArgOnStack:
1172 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
1174 * In the new IR, the cfg->vret_addr variable represents the
1175 * vtype return value.
1177 cfg->vret_addr->opcode = OP_REGOFFSET;
1178 cfg->vret_addr->inst_basereg = cfg->frame_reg;
1179 cfg->vret_addr->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
1180 if (G_UNLIKELY (cfg->verbose_level > 1)) {
1181 printf ("vret_addr =");
1182 mono_print_ins (cfg->vret_addr);
1184 } else {
1185 cfg->ret->opcode = OP_REGOFFSET;
1186 cfg->ret->inst_basereg = X86_EBP;
1187 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
1189 break;
1190 case ArgValuetypeInReg:
1191 break;
1192 case ArgInIReg:
1193 cfg->ret->opcode = OP_REGVAR;
1194 cfg->ret->inst_c0 = cinfo->ret.reg;
1195 cfg->ret->dreg = cinfo->ret.reg;
1196 break;
1197 case ArgNone:
1198 case ArgOnFloatFpStack:
1199 case ArgOnDoubleFpStack:
1200 break;
1201 default:
1202 g_assert_not_reached ();
1205 if (sig->call_convention == MONO_CALL_VARARG) {
1206 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1207 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
1210 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1211 ArgInfo *ainfo = &cinfo->args [i];
1212 inst = cfg->args [i];
1213 if (inst->opcode != OP_REGVAR) {
1214 inst->opcode = OP_REGOFFSET;
1215 inst->inst_basereg = X86_EBP;
1217 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
1220 cfg->stack_offset = offset;
1223 void
1224 mono_arch_create_vars (MonoCompile *cfg)
1226 MonoMethodSignature *sig;
1227 CallInfo *cinfo;
1229 sig = mono_method_signature (cfg->method);
1231 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig);
1233 if (cinfo->ret.storage == ArgValuetypeInReg)
1234 cfg->ret_var_is_local = TRUE;
1235 if ((cinfo->ret.storage != ArgValuetypeInReg) && MONO_TYPE_ISSTRUCT (sig->ret)) {
1236 cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
1241 * It is expensive to adjust esp for each individual fp argument pushed on the stack
1242 * so we try to do it just once when we have multiple fp arguments in a row.
1243 * We don't use this mechanism generally because for int arguments the generated code
1244 * is slightly bigger and new generation cpus optimize away the dependency chains
1245 * created by push instructions on the esp value.
1246 * fp_arg_setup is the first argument in the execution sequence where the esp register
1247 * is modified.
1249 static G_GNUC_UNUSED int
1250 collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
1252 int fp_space = 0;
1253 MonoType *t;
1255 for (; start_arg < sig->param_count; ++start_arg) {
1256 t = mini_type_get_underlying_type (NULL, sig->params [start_arg]);
1257 if (!t->byref && t->type == MONO_TYPE_R8) {
1258 fp_space += sizeof (double);
1259 *fp_arg_setup = start_arg;
1260 } else {
1261 break;
1264 return fp_space;
1267 static void
1268 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
1270 MonoMethodSignature *tmp_sig;
1272 /* FIXME: Add support for signature tokens to AOT */
1273 cfg->disable_aot = TRUE;
1276 * mono_ArgIterator_Setup assumes the signature cookie is
1277 * passed first and all the arguments which were before it are
1278 * passed on the stack after the signature. So compensate by
1279 * passing a different signature.
1281 tmp_sig = mono_metadata_signature_dup (call->signature);
1282 tmp_sig->param_count -= call->signature->sentinelpos;
1283 tmp_sig->sentinelpos = 0;
1284 memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1286 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_X86_PUSH_IMM, -1, -1, tmp_sig);
1289 #ifdef ENABLE_LLVM
1290 LLVMCallInfo*
1291 mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
1293 int i, n;
1294 CallInfo *cinfo;
1295 ArgInfo *ainfo;
1296 LLVMCallInfo *linfo;
1297 MonoType *t;
1299 n = sig->param_count + sig->hasthis;
1301 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig);
1303 linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n));
1306 * LLVM always uses the native ABI while we use our own ABI, the
1307 * only difference is the handling of vtypes:
1308 * - we only pass/receive them in registers in some cases, and only
1309 * in 1 or 2 integer registers.
1311 if (cinfo->ret.storage == ArgValuetypeInReg) {
1312 if (sig->pinvoke) {
1313 cfg->exception_message = g_strdup ("pinvoke + vtypes");
1314 cfg->disable_llvm = TRUE;
1315 return linfo;
1318 cfg->exception_message = g_strdup ("vtype ret in call");
1319 cfg->disable_llvm = TRUE;
1321 linfo->ret.storage = LLVMArgVtypeInReg;
1322 for (j = 0; j < 2; ++j)
1323 linfo->ret.pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, cinfo->ret.pair_storage [j]);
1327 if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage == ArgInIReg) {
1328 /* Vtype returned using a hidden argument */
1329 linfo->ret.storage = LLVMArgVtypeRetAddr;
1330 linfo->vret_arg_index = cinfo->vret_arg_index;
1333 if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage != ArgInIReg) {
1334 // FIXME:
1335 cfg->exception_message = g_strdup ("vtype ret in call");
1336 cfg->disable_llvm = TRUE;
1339 for (i = 0; i < n; ++i) {
1340 ainfo = cinfo->args + i;
1342 if (i >= sig->hasthis)
1343 t = sig->params [i - sig->hasthis];
1344 else
1345 t = &mono_defaults.int_class->byval_arg;
1347 linfo->args [i].storage = LLVMArgNone;
1349 switch (ainfo->storage) {
1350 case ArgInIReg:
1351 linfo->args [i].storage = LLVMArgInIReg;
1352 break;
1353 case ArgInDoubleSSEReg:
1354 case ArgInFloatSSEReg:
1355 linfo->args [i].storage = LLVMArgInFPReg;
1356 break;
1357 case ArgOnStack:
1358 if (MONO_TYPE_ISSTRUCT (t)) {
1359 if (mono_class_value_size (mono_class_from_mono_type (t), NULL) == 0)
1360 /* LLVM seems to allocate argument space for empty structures too */
1361 linfo->args [i].storage = LLVMArgNone;
1362 else
1363 linfo->args [i].storage = LLVMArgVtypeByVal;
1364 } else {
1365 linfo->args [i].storage = LLVMArgInIReg;
1366 if (t->byref) {
1367 if (t->type == MONO_TYPE_R4)
1368 linfo->args [i].storage = LLVMArgInFPReg;
1369 else if (t->type == MONO_TYPE_R8)
1370 linfo->args [i].storage = LLVMArgInFPReg;
1373 break;
1374 case ArgValuetypeInReg:
1375 if (sig->pinvoke) {
1376 cfg->exception_message = g_strdup ("pinvoke + vtypes");
1377 cfg->disable_llvm = TRUE;
1378 return linfo;
1381 cfg->exception_message = g_strdup ("vtype arg");
1382 cfg->disable_llvm = TRUE;
1384 linfo->args [i].storage = LLVMArgVtypeInReg;
1385 for (j = 0; j < 2; ++j)
1386 linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
1388 break;
1389 default:
1390 cfg->exception_message = g_strdup ("ainfo->storage");
1391 cfg->disable_llvm = TRUE;
1392 break;
1396 return linfo;
1398 #endif
1400 void
1401 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
1403 MonoInst *arg, *in;
1404 MonoMethodSignature *sig;
1405 int i, n;
1406 CallInfo *cinfo;
1407 int sentinelpos = 0;
1409 sig = call->signature;
1410 n = sig->param_count + sig->hasthis;
1412 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig);
1414 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1415 sentinelpos = sig->sentinelpos + (sig->hasthis ? 1 : 0);
1417 if (cinfo->need_stack_align) {
1418 MONO_INST_NEW (cfg, arg, OP_SUB_IMM);
1419 arg->dreg = X86_ESP;
1420 arg->sreg1 = X86_ESP;
1421 arg->inst_imm = cinfo->stack_align_amount;
1422 MONO_ADD_INS (cfg->cbb, arg);
1425 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1426 if (cinfo->ret.storage == ArgValuetypeInReg) {
1428 * Tell the JIT to use a more efficient calling convention: call using
1429 * OP_CALL, compute the result location after the call, and save the
1430 * result there.
1432 call->vret_in_reg = TRUE;
1433 if (call->vret_var)
1434 NULLIFY_INS (call->vret_var);
1438 /* Handle the case where there are no implicit arguments */
1439 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1440 emit_sig_cookie (cfg, call, cinfo);
1443 /* Arguments are pushed in the reverse order */
1444 for (i = n - 1; i >= 0; i --) {
1445 ArgInfo *ainfo = cinfo->args + i;
1446 MonoType *t;
1448 if (cinfo->vtype_retaddr && cinfo->vret_arg_index == 1 && i == 0) {
1449 /* Push the vret arg before the first argument */
1450 MonoInst *vtarg;
1451 MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
1452 vtarg->type = STACK_MP;
1453 vtarg->sreg1 = call->vret_var->dreg;
1454 MONO_ADD_INS (cfg->cbb, vtarg);
1457 if (i >= sig->hasthis)
1458 t = sig->params [i - sig->hasthis];
1459 else
1460 t = &mono_defaults.int_class->byval_arg;
1461 t = mini_type_get_underlying_type (cfg->generic_sharing_context, t);
1463 MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
1465 in = call->args [i];
1466 arg->cil_code = in->cil_code;
1467 arg->sreg1 = in->dreg;
1468 arg->type = in->type;
1470 g_assert (in->dreg != -1);
1472 if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
1473 guint32 align;
1474 guint32 size;
1476 g_assert (in->klass);
1478 if (t->type == MONO_TYPE_TYPEDBYREF) {
1479 size = sizeof (MonoTypedRef);
1480 align = sizeof (gpointer);
1482 else {
1483 size = mini_type_stack_size_full (cfg->generic_sharing_context, &in->klass->byval_arg, &align, sig->pinvoke);
1486 if (size > 0) {
1487 arg->opcode = OP_OUTARG_VT;
1488 arg->sreg1 = in->dreg;
1489 arg->klass = in->klass;
1490 arg->backend.size = size;
1492 MONO_ADD_INS (cfg->cbb, arg);
1495 else {
1496 switch (ainfo->storage) {
1497 case ArgOnStack:
1498 arg->opcode = OP_X86_PUSH;
1499 if (!t->byref) {
1500 if (t->type == MONO_TYPE_R4) {
1501 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 4);
1502 arg->opcode = OP_STORER4_MEMBASE_REG;
1503 arg->inst_destbasereg = X86_ESP;
1504 arg->inst_offset = 0;
1505 } else if (t->type == MONO_TYPE_R8) {
1506 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
1507 arg->opcode = OP_STORER8_MEMBASE_REG;
1508 arg->inst_destbasereg = X86_ESP;
1509 arg->inst_offset = 0;
1510 } else if (t->type == MONO_TYPE_I8 || t->type == MONO_TYPE_U8) {
1511 arg->sreg1 ++;
1512 MONO_EMIT_NEW_UNALU (cfg, OP_X86_PUSH, -1, in->dreg + 2);
1515 break;
1516 default:
1517 g_assert_not_reached ();
1520 MONO_ADD_INS (cfg->cbb, arg);
1523 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
1524 /* Emit the signature cookie just before the implicit arguments */
1525 emit_sig_cookie (cfg, call, cinfo);
1529 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1530 MonoInst *vtarg;
1532 if (cinfo->ret.storage == ArgValuetypeInReg) {
1533 /* Already done */
1535 else if (cinfo->ret.storage == ArgInIReg) {
1536 NOT_IMPLEMENTED;
1537 /* The return address is passed in a register */
1538 MONO_INST_NEW (cfg, vtarg, OP_MOVE);
1539 vtarg->sreg1 = call->inst.dreg;
1540 vtarg->dreg = mono_alloc_ireg (cfg);
1541 MONO_ADD_INS (cfg->cbb, vtarg);
1543 mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
1544 } else if (cinfo->vtype_retaddr && cinfo->vret_arg_index == 0) {
1545 MonoInst *vtarg;
1546 MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
1547 vtarg->type = STACK_MP;
1548 vtarg->sreg1 = call->vret_var->dreg;
1549 MONO_ADD_INS (cfg->cbb, vtarg);
1552 /* if the function returns a struct on stack, the called method already does a ret $0x4 */
1553 if (cinfo->ret.storage != ArgValuetypeInReg)
1554 cinfo->stack_usage -= 4;
1557 call->stack_usage = cinfo->stack_usage;
1560 void
1561 mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
1563 MonoInst *arg;
1564 int size = ins->backend.size;
1566 if (size <= 4) {
1567 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_MEMBASE);
1568 arg->sreg1 = src->dreg;
1570 MONO_ADD_INS (cfg->cbb, arg);
1571 } else if (size <= 20) {
1572 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 4));
1573 mini_emit_memcpy (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
1574 } else {
1575 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ);
1576 arg->inst_basereg = src->dreg;
1577 arg->inst_offset = 0;
1578 arg->inst_imm = size;
1580 MONO_ADD_INS (cfg->cbb, arg);
1584 void
1585 mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
1587 MonoType *ret = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
1589 if (!ret->byref) {
1590 if (ret->type == MONO_TYPE_R4) {
1591 if (COMPILE_LLVM (cfg))
1592 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
1593 /* Nothing to do */
1594 return;
1595 } else if (ret->type == MONO_TYPE_R8) {
1596 if (COMPILE_LLVM (cfg))
1597 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
1598 /* Nothing to do */
1599 return;
1600 } else if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
1601 if (COMPILE_LLVM (cfg))
1602 MONO_EMIT_NEW_UNALU (cfg, OP_LMOVE, cfg->ret->dreg, val->dreg);
1603 else {
1604 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EAX, val->dreg + 1);
1605 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EDX, val->dreg + 2);
1607 return;
1611 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
1615 * Allow tracing to work with this interface (with an optional argument)
1617 void*
1618 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1620 guchar *code = p;
1622 g_assert (MONO_ARCH_FRAME_ALIGNMENT >= 8);
1623 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 8);
1625 /* if some args are passed in registers, we need to save them here */
1626 x86_push_reg (code, X86_EBP);
1628 if (cfg->compile_aot) {
1629 x86_push_imm (code, cfg->method);
1630 x86_mov_reg_imm (code, X86_EAX, func);
1631 x86_call_reg (code, X86_EAX);
1632 } else {
1633 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1634 x86_push_imm (code, cfg->method);
1635 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1636 x86_call_code (code, 0);
1638 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT);
1640 return code;
1643 enum {
1644 SAVE_NONE,
1645 SAVE_STRUCT,
1646 SAVE_EAX,
1647 SAVE_EAX_EDX,
1648 SAVE_FP
1651 void*
1652 mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers)
1654 guchar *code = p;
1655 int arg_size = 0, stack_usage = 0, save_mode = SAVE_NONE;
1656 MonoMethod *method = cfg->method;
1657 MonoType *ret_type = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
1659 switch (ret_type->type) {
1660 case MONO_TYPE_VOID:
1661 /* special case string .ctor icall */
1662 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class) {
1663 save_mode = SAVE_EAX;
1664 stack_usage = enable_arguments ? 8 : 4;
1665 } else
1666 save_mode = SAVE_NONE;
1667 break;
1668 case MONO_TYPE_I8:
1669 case MONO_TYPE_U8:
1670 save_mode = SAVE_EAX_EDX;
1671 stack_usage = enable_arguments ? 16 : 8;
1672 break;
1673 case MONO_TYPE_R4:
1674 case MONO_TYPE_R8:
1675 save_mode = SAVE_FP;
1676 stack_usage = enable_arguments ? 16 : 8;
1677 break;
1678 case MONO_TYPE_GENERICINST:
1679 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
1680 save_mode = SAVE_EAX;
1681 stack_usage = enable_arguments ? 8 : 4;
1682 break;
1684 /* Fall through */
1685 case MONO_TYPE_VALUETYPE:
1686 // FIXME: Handle SMALL_STRUCT_IN_REG here for proper alignment on darwin-x86
1687 save_mode = SAVE_STRUCT;
1688 stack_usage = enable_arguments ? 4 : 0;
1689 break;
1690 default:
1691 save_mode = SAVE_EAX;
1692 stack_usage = enable_arguments ? 8 : 4;
1693 break;
1696 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - stack_usage - 4);
1698 switch (save_mode) {
1699 case SAVE_EAX_EDX:
1700 x86_push_reg (code, X86_EDX);
1701 x86_push_reg (code, X86_EAX);
1702 if (enable_arguments) {
1703 x86_push_reg (code, X86_EDX);
1704 x86_push_reg (code, X86_EAX);
1705 arg_size = 8;
1707 break;
1708 case SAVE_EAX:
1709 x86_push_reg (code, X86_EAX);
1710 if (enable_arguments) {
1711 x86_push_reg (code, X86_EAX);
1712 arg_size = 4;
1714 break;
1715 case SAVE_FP:
1716 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1717 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1718 if (enable_arguments) {
1719 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1720 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1721 arg_size = 8;
1723 break;
1724 case SAVE_STRUCT:
1725 if (enable_arguments) {
1726 x86_push_membase (code, X86_EBP, 8);
1727 arg_size = 4;
1729 break;
1730 case SAVE_NONE:
1731 default:
1732 break;
1735 if (cfg->compile_aot) {
1736 x86_push_imm (code, method);
1737 x86_mov_reg_imm (code, X86_EAX, func);
1738 x86_call_reg (code, X86_EAX);
1739 } else {
1740 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1741 x86_push_imm (code, method);
1742 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1743 x86_call_code (code, 0);
1746 x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1748 switch (save_mode) {
1749 case SAVE_EAX_EDX:
1750 x86_pop_reg (code, X86_EAX);
1751 x86_pop_reg (code, X86_EDX);
1752 break;
1753 case SAVE_EAX:
1754 x86_pop_reg (code, X86_EAX);
1755 break;
1756 case SAVE_FP:
1757 x86_fld_membase (code, X86_ESP, 0, TRUE);
1758 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1759 break;
1760 case SAVE_NONE:
1761 default:
1762 break;
1765 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - stack_usage);
1767 return code;
1770 #define EMIT_COND_BRANCH(ins,cond,sign) \
1771 if (ins->inst_true_bb->native_offset) { \
1772 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1773 } else { \
1774 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1775 if ((cfg->opt & MONO_OPT_BRANCH) && \
1776 x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1777 x86_branch8 (code, cond, 0, sign); \
1778 else \
1779 x86_branch32 (code, cond, 0, sign); \
1783 * Emit an exception if condition is fail and
1784 * if possible do a directly branch to target
1786 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \
1787 do { \
1788 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1789 if (tins == NULL) { \
1790 mono_add_patch_info (cfg, code - cfg->native_code, \
1791 MONO_PATCH_INFO_EXC, exc_name); \
1792 x86_branch32 (code, cond, 0, signed); \
1793 } else { \
1794 EMIT_COND_BRANCH (tins, cond, signed); \
1796 } while (0);
1798 #define EMIT_FPCOMPARE(code) do { \
1799 x86_fcompp (code); \
1800 x86_fnstsw (code); \
1801 } while (0);
1804 static guint8*
1805 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1807 gboolean needs_paddings = TRUE;
1808 guint32 pad_size;
1810 if (cfg->abs_patches && g_hash_table_lookup (cfg->abs_patches, data)) {
1811 } else {
1812 MonoJitICallInfo *info = mono_find_jit_icall_by_addr (data);
1813 if (info) {
1814 if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && strstr (cfg->method->name, info->name))
1815 needs_paddings = FALSE; /* A call to the wrapped function */
1819 if (cfg->compile_aot)
1820 needs_paddings = FALSE;
1821 /*The address must be 4 bytes aligned to avoid spanning multiple cache lines.
1822 This is required for code patching to be safe on SMP machines.
1824 pad_size = (guint32)(code + 1 - cfg->native_code) & 0x3;
1825 #ifndef __native_client_codegen__
1826 if (needs_paddings && pad_size)
1827 x86_padding (code, 4 - pad_size);
1828 #endif
1830 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1831 x86_call_code (code, 0);
1833 return code;
1836 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1839 * mono_peephole_pass_1:
1841 * Perform peephole opts which should/can be performed before local regalloc
1843 void
1844 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1846 MonoInst *ins, *n;
1848 MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1849 MonoInst *last_ins = ins->prev;
1851 switch (ins->opcode) {
1852 case OP_IADD_IMM:
1853 case OP_ADD_IMM:
1854 if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1856 * X86_LEA is like ADD, but doesn't have the
1857 * sreg1==dreg restriction.
1859 ins->opcode = OP_X86_LEA_MEMBASE;
1860 ins->inst_basereg = ins->sreg1;
1861 } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1862 ins->opcode = OP_X86_INC_REG;
1863 break;
1864 case OP_SUB_IMM:
1865 case OP_ISUB_IMM:
1866 if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1867 ins->opcode = OP_X86_LEA_MEMBASE;
1868 ins->inst_basereg = ins->sreg1;
1869 ins->inst_imm = -ins->inst_imm;
1870 } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1871 ins->opcode = OP_X86_DEC_REG;
1872 break;
1873 case OP_COMPARE_IMM:
1874 case OP_ICOMPARE_IMM:
1875 /* OP_COMPARE_IMM (reg, 0)
1876 * -->
1877 * OP_X86_TEST_NULL (reg)
1879 if (!ins->inst_imm)
1880 ins->opcode = OP_X86_TEST_NULL;
1881 break;
1882 case OP_X86_COMPARE_MEMBASE_IMM:
1884 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1885 * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1886 * -->
1887 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1888 * OP_COMPARE_IMM reg, imm
1890 * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1892 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1893 ins->inst_basereg == last_ins->inst_destbasereg &&
1894 ins->inst_offset == last_ins->inst_offset) {
1895 ins->opcode = OP_COMPARE_IMM;
1896 ins->sreg1 = last_ins->sreg1;
1898 /* check if we can remove cmp reg,0 with test null */
1899 if (!ins->inst_imm)
1900 ins->opcode = OP_X86_TEST_NULL;
1903 break;
1904 case OP_X86_PUSH_MEMBASE:
1905 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1906 last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1907 ins->inst_basereg == last_ins->inst_destbasereg &&
1908 ins->inst_offset == last_ins->inst_offset) {
1909 ins->opcode = OP_X86_PUSH;
1910 ins->sreg1 = last_ins->sreg1;
1912 break;
1915 mono_peephole_ins (bb, ins);
1919 void
1920 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
1922 MonoInst *ins, *n;
1924 MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1925 switch (ins->opcode) {
1926 case OP_ICONST:
1927 /* reg = 0 -> XOR (reg, reg) */
1928 /* XOR sets cflags on x86, so we cant do it always */
1929 if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1930 MonoInst *ins2;
1932 ins->opcode = OP_IXOR;
1933 ins->sreg1 = ins->dreg;
1934 ins->sreg2 = ins->dreg;
1937 * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG
1938 * since it takes 3 bytes instead of 7.
1940 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1941 if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1942 ins2->opcode = OP_STORE_MEMBASE_REG;
1943 ins2->sreg1 = ins->dreg;
1945 else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1946 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1947 ins2->sreg1 = ins->dreg;
1949 else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1950 /* Continue iteration */
1952 else
1953 break;
1956 break;
1957 case OP_IADD_IMM:
1958 case OP_ADD_IMM:
1959 if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1960 ins->opcode = OP_X86_INC_REG;
1961 break;
1962 case OP_ISUB_IMM:
1963 case OP_SUB_IMM:
1964 if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1965 ins->opcode = OP_X86_DEC_REG;
1966 break;
1969 mono_peephole_ins (bb, ins);
1974 * mono_arch_lowering_pass:
1976 * Converts complex opcodes into simpler ones so that each IR instruction
1977 * corresponds to one machine instruction.
1979 void
1980 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1982 MonoInst *ins, *next;
1985 * FIXME: Need to add more instructions, but the current machine
1986 * description can't model some parts of the composite instructions like
1987 * cdq.
1989 MONO_BB_FOR_EACH_INS_SAFE (bb, next, ins) {
1990 switch (ins->opcode) {
1991 case OP_IREM_IMM:
1992 case OP_IDIV_IMM:
1993 case OP_IDIV_UN_IMM:
1994 case OP_IREM_UN_IMM:
1996 * Keep the cases where we could generated optimized code, otherwise convert
1997 * to the non-imm variant.
1999 if ((ins->opcode == OP_IREM_IMM) && mono_is_power_of_two (ins->inst_imm) >= 0)
2000 break;
2001 mono_decompose_op_imm (cfg, bb, ins);
2002 break;
2003 default:
2004 break;
2008 bb->max_vreg = cfg->next_vreg;
2011 static const int
2012 branch_cc_table [] = {
2013 X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
2014 X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
2015 X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
2018 /* Maps CMP_... constants to X86_CC_... constants */
2019 static const int
2020 cc_table [] = {
2021 X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
2022 X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
2025 static const int
2026 cc_signed_table [] = {
2027 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
2028 FALSE, FALSE, FALSE, FALSE
2031 static unsigned char*
2032 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2034 #define XMM_TEMP_REG 0
2035 /*This SSE2 optimization must not be done which OPT_SIMD in place as it clobbers xmm0.*/
2036 /*The xmm pass decomposes OP_FCONV_ ops anyway anyway.*/
2037 if (cfg->opt & MONO_OPT_SSE2 && size < 8 && !(cfg->opt & MONO_OPT_SIMD)) {
2038 /* optimize by assigning a local var for this use so we avoid
2039 * the stack manipulations */
2040 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2041 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
2042 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
2043 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
2044 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2045 if (size == 1)
2046 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
2047 else if (size == 2)
2048 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
2049 return code;
2051 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2052 x86_fnstcw_membase(code, X86_ESP, 0);
2053 x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
2054 x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2055 x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
2056 x86_fldcw_membase (code, X86_ESP, 2);
2057 if (size == 8) {
2058 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2059 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2060 x86_pop_reg (code, dreg);
2061 /* FIXME: need the high register
2062 * x86_pop_reg (code, dreg_high);
2064 } else {
2065 x86_push_reg (code, X86_EAX); // SP = SP - 4
2066 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
2067 x86_pop_reg (code, dreg);
2069 x86_fldcw_membase (code, X86_ESP, 0);
2070 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2072 if (size == 1)
2073 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
2074 else if (size == 2)
2075 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
2076 return code;
2079 static unsigned char*
2080 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2082 int sreg = tree->sreg1;
2083 int need_touch = FALSE;
2085 #if defined(TARGET_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
2086 need_touch = TRUE;
2087 #endif
2089 if (need_touch) {
2090 guint8* br[5];
2093 * Under Windows:
2094 * If requested stack size is larger than one page,
2095 * perform stack-touch operation
2098 * Generate stack probe code.
2099 * Under Windows, it is necessary to allocate one page at a time,
2100 * "touching" stack after each successful sub-allocation. This is
2101 * because of the way stack growth is implemented - there is a
2102 * guard page before the lowest stack page that is currently commited.
2103 * Stack normally grows sequentially so OS traps access to the
2104 * guard page and commits more pages when needed.
2106 x86_test_reg_imm (code, sreg, ~0xFFF);
2107 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2109 br[2] = code; /* loop */
2110 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
2111 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
2114 * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
2115 * that follows only initializes the last part of the area.
2117 /* Same as the init code below with size==0x1000 */
2118 if (tree->flags & MONO_INST_INIT) {
2119 x86_push_reg (code, X86_EAX);
2120 x86_push_reg (code, X86_ECX);
2121 x86_push_reg (code, X86_EDI);
2122 x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
2123 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2124 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2125 x86_cld (code);
2126 x86_prefix (code, X86_REP_PREFIX);
2127 x86_stosl (code);
2128 x86_pop_reg (code, X86_EDI);
2129 x86_pop_reg (code, X86_ECX);
2130 x86_pop_reg (code, X86_EAX);
2133 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2134 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2135 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2136 x86_patch (br[3], br[2]);
2137 x86_test_reg_reg (code, sreg, sreg);
2138 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2139 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2141 br[1] = code; x86_jump8 (code, 0);
2143 x86_patch (br[0], code);
2144 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2145 x86_patch (br[1], code);
2146 x86_patch (br[4], code);
2148 else
2149 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
2151 if (tree->flags & MONO_INST_INIT) {
2152 int offset = 0;
2153 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
2154 x86_push_reg (code, X86_EAX);
2155 offset += 4;
2157 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
2158 x86_push_reg (code, X86_ECX);
2159 offset += 4;
2161 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
2162 x86_push_reg (code, X86_EDI);
2163 offset += 4;
2166 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
2167 if (sreg != X86_ECX)
2168 x86_mov_reg_reg (code, X86_ECX, sreg, 4);
2169 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2171 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2172 x86_cld (code);
2173 x86_prefix (code, X86_REP_PREFIX);
2174 x86_stosl (code);
2176 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2177 x86_pop_reg (code, X86_EDI);
2178 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2179 x86_pop_reg (code, X86_ECX);
2180 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2181 x86_pop_reg (code, X86_EAX);
2183 return code;
2187 static guint8*
2188 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2190 /* Move return value to the target register */
2191 switch (ins->opcode) {
2192 case OP_CALL:
2193 case OP_CALL_REG:
2194 case OP_CALL_MEMBASE:
2195 if (ins->dreg != X86_EAX)
2196 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2197 break;
2198 default:
2199 break;
2202 return code;
2205 #ifdef __APPLE__
2206 static int tls_gs_offset;
2207 #endif
2209 gboolean
2210 mono_x86_have_tls_get (void)
2212 #ifdef __APPLE__
2213 static gboolean have_tls_get = FALSE;
2214 static gboolean inited = FALSE;
2216 if (inited)
2217 return have_tls_get;
2219 guint32 *ins = (guint32*)pthread_getspecific;
2221 * We're looking for these two instructions:
2223 * mov 0x4(%esp),%eax
2224 * mov %gs:[offset](,%eax,4),%eax
2226 have_tls_get = ins [0] == 0x0424448b && ins [1] == 0x85048b65;
2227 tls_gs_offset = ins [2];
2229 inited = TRUE;
2231 return have_tls_get;
2232 #else
2233 return TRUE;
2234 #endif
2238 * mono_x86_emit_tls_get:
2239 * @code: buffer to store code to
2240 * @dreg: hard register where to place the result
2241 * @tls_offset: offset info
2243 * mono_x86_emit_tls_get emits in @code the native code that puts in
2244 * the dreg register the item in the thread local storage identified
2245 * by tls_offset.
2247 * Returns: a pointer to the end of the stored code
2249 guint8*
2250 mono_x86_emit_tls_get (guint8* code, int dreg, int tls_offset)
2252 #if defined(__APPLE__)
2253 x86_prefix (code, X86_GS_PREFIX);
2254 x86_mov_reg_mem (code, dreg, tls_gs_offset + (tls_offset * 4), 4);
2255 #elif defined(TARGET_WIN32)
2257 * See the Under the Hood article in the May 1996 issue of Microsoft Systems
2258 * Journal and/or a disassembly of the TlsGet () function.
2260 g_assert (tls_offset < 64);
2261 x86_prefix (code, X86_FS_PREFIX);
2262 x86_mov_reg_mem (code, dreg, 0x18, 4);
2263 /* Dunno what this does but TlsGetValue () contains it */
2264 x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2265 x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2266 #else
2267 if (optimize_for_xen) {
2268 x86_prefix (code, X86_GS_PREFIX);
2269 x86_mov_reg_mem (code, dreg, 0, 4);
2270 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
2271 } else {
2272 x86_prefix (code, X86_GS_PREFIX);
2273 x86_mov_reg_mem (code, dreg, tls_offset, 4);
2275 #endif
2276 return code;
2280 * emit_load_volatile_arguments:
2282 * Load volatile arguments from the stack to the original input registers.
2283 * Required before a tail call.
2285 static guint8*
2286 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2288 MonoMethod *method = cfg->method;
2289 MonoMethodSignature *sig;
2290 MonoInst *inst;
2291 CallInfo *cinfo;
2292 guint32 i;
2294 /* FIXME: Generate intermediate code instead */
2296 sig = mono_method_signature (method);
2298 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig);
2300 /* This is the opposite of the code in emit_prolog */
2302 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2303 ArgInfo *ainfo = cinfo->args + i;
2304 MonoType *arg_type;
2305 inst = cfg->args [i];
2307 if (sig->hasthis && (i == 0))
2308 arg_type = &mono_defaults.object_class->byval_arg;
2309 else
2310 arg_type = sig->params [i - sig->hasthis];
2313 * On x86, the arguments are either in their original stack locations, or in
2314 * global regs.
2316 if (inst->opcode == OP_REGVAR) {
2317 g_assert (ainfo->storage == ArgOnStack);
2319 x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
2323 return code;
2326 #define REAL_PRINT_REG(text,reg) \
2327 mono_assert (reg >= 0); \
2328 x86_push_reg (code, X86_EAX); \
2329 x86_push_reg (code, X86_EDX); \
2330 x86_push_reg (code, X86_ECX); \
2331 x86_push_reg (code, reg); \
2332 x86_push_imm (code, reg); \
2333 x86_push_imm (code, text " %d %p\n"); \
2334 x86_mov_reg_imm (code, X86_EAX, printf); \
2335 x86_call_reg (code, X86_EAX); \
2336 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2337 x86_pop_reg (code, X86_ECX); \
2338 x86_pop_reg (code, X86_EDX); \
2339 x86_pop_reg (code, X86_EAX);
2341 /* REAL_PRINT_REG does not appear to be used, and was not adapted to work with Native Client. */
2342 #ifdef __native__client_codegen__
2343 #define REAL_PRINT_REG(text, reg) g_assert_not_reached()
2344 #endif
2346 /* benchmark and set based on cpu */
2347 #define LOOP_ALIGNMENT 8
2348 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2350 #ifndef DISABLE_JIT
2352 #if defined(__native_client__) || defined(__native_client_codegen__)
2353 void
2354 mono_nacl_gc()
2356 #ifdef __native_client_gc__
2357 __nacl_suspend_thread_if_needed();
2358 #endif
2360 #endif
2362 void
2363 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2365 MonoInst *ins;
2366 MonoCallInst *call;
2367 guint offset;
2368 guint8 *code = cfg->native_code + cfg->code_len;
2369 int max_len, cpos;
2371 if (cfg->opt & MONO_OPT_LOOP) {
2372 int pad, align = LOOP_ALIGNMENT;
2373 /* set alignment depending on cpu */
2374 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2375 pad = align - pad;
2376 /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2377 x86_padding (code, pad);
2378 cfg->code_len += pad;
2379 bb->native_offset = cfg->code_len;
2382 #ifdef __native_client_codegen__
2384 /* For Native Client, all indirect call/jump targets must be */
2385 /* 32-byte aligned. Exception handler blocks are jumped to */
2386 /* indirectly as well. */
2387 gboolean bb_needs_alignment = (bb->flags & BB_INDIRECT_JUMP_TARGET) ||
2388 (bb->flags & BB_EXCEPTION_HANDLER);
2390 /* if ((cfg->code_len & kNaClAlignmentMask) != 0) { */
2391 if ( bb_needs_alignment && ((cfg->code_len & kNaClAlignmentMask) != 0)) {
2392 int pad = kNaClAlignment - (cfg->code_len & kNaClAlignmentMask);
2393 if (pad != kNaClAlignment) code = mono_arch_nacl_pad(code, pad);
2394 cfg->code_len += pad;
2395 bb->native_offset = cfg->code_len;
2398 #endif /* __native_client_codegen__ */
2399 if (cfg->verbose_level > 2)
2400 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2402 cpos = bb->max_offset;
2404 if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2405 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2406 g_assert (!cfg->compile_aot);
2407 cpos += 6;
2409 cov->data [bb->dfn].cil_code = bb->cil_code;
2410 /* this is not thread save, but good enough */
2411 x86_inc_mem (code, &cov->data [bb->dfn].count);
2414 offset = code - cfg->native_code;
2416 mono_debug_open_block (cfg, bb, offset);
2418 if (mono_break_at_bb_method && mono_method_desc_full_match (mono_break_at_bb_method, cfg->method) && bb->block_num == mono_break_at_bb_bb_num)
2419 x86_breakpoint (code);
2421 MONO_BB_FOR_EACH_INS (bb, ins) {
2422 offset = code - cfg->native_code;
2424 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2426 #define EXTRA_CODE_SPACE (NACL_SIZE (16, 16 + kNaClAlignment))
2428 if (G_UNLIKELY (offset > (cfg->code_size - max_len - EXTRA_CODE_SPACE))) {
2429 cfg->code_size *= 2;
2430 cfg->native_code = mono_realloc_native_code(cfg);
2431 code = cfg->native_code + offset;
2432 cfg->stat_code_reallocs++;
2435 if (cfg->debug_info)
2436 mono_debug_record_line_number (cfg, ins, offset);
2438 switch (ins->opcode) {
2439 case OP_BIGMUL:
2440 x86_mul_reg (code, ins->sreg2, TRUE);
2441 break;
2442 case OP_BIGMUL_UN:
2443 x86_mul_reg (code, ins->sreg2, FALSE);
2444 break;
2445 case OP_X86_SETEQ_MEMBASE:
2446 case OP_X86_SETNE_MEMBASE:
2447 x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2448 ins->inst_basereg, ins->inst_offset, TRUE);
2449 break;
2450 case OP_STOREI1_MEMBASE_IMM:
2451 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2452 break;
2453 case OP_STOREI2_MEMBASE_IMM:
2454 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2455 break;
2456 case OP_STORE_MEMBASE_IMM:
2457 case OP_STOREI4_MEMBASE_IMM:
2458 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2459 break;
2460 case OP_STOREI1_MEMBASE_REG:
2461 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2462 break;
2463 case OP_STOREI2_MEMBASE_REG:
2464 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2465 break;
2466 case OP_STORE_MEMBASE_REG:
2467 case OP_STOREI4_MEMBASE_REG:
2468 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2469 break;
2470 case OP_STORE_MEM_IMM:
2471 x86_mov_mem_imm (code, ins->inst_p0, ins->inst_c0, 4);
2472 break;
2473 case OP_LOADU4_MEM:
2474 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2475 break;
2476 case OP_LOAD_MEM:
2477 case OP_LOADI4_MEM:
2478 /* These are created by the cprop pass so they use inst_imm as the source */
2479 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2480 break;
2481 case OP_LOADU1_MEM:
2482 x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, FALSE);
2483 break;
2484 case OP_LOADU2_MEM:
2485 x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, TRUE);
2486 break;
2487 case OP_LOAD_MEMBASE:
2488 case OP_LOADI4_MEMBASE:
2489 case OP_LOADU4_MEMBASE:
2490 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2491 break;
2492 case OP_LOADU1_MEMBASE:
2493 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2494 break;
2495 case OP_LOADI1_MEMBASE:
2496 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2497 break;
2498 case OP_LOADU2_MEMBASE:
2499 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2500 break;
2501 case OP_LOADI2_MEMBASE:
2502 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2503 break;
2504 case OP_ICONV_TO_I1:
2505 case OP_SEXT_I1:
2506 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2507 break;
2508 case OP_ICONV_TO_I2:
2509 case OP_SEXT_I2:
2510 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2511 break;
2512 case OP_ICONV_TO_U1:
2513 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2514 break;
2515 case OP_ICONV_TO_U2:
2516 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2517 break;
2518 case OP_COMPARE:
2519 case OP_ICOMPARE:
2520 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2521 break;
2522 case OP_COMPARE_IMM:
2523 case OP_ICOMPARE_IMM:
2524 x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2525 break;
2526 case OP_X86_COMPARE_MEMBASE_REG:
2527 x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2528 break;
2529 case OP_X86_COMPARE_MEMBASE_IMM:
2530 x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2531 break;
2532 case OP_X86_COMPARE_MEMBASE8_IMM:
2533 x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2534 break;
2535 case OP_X86_COMPARE_REG_MEMBASE:
2536 x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2537 break;
2538 case OP_X86_COMPARE_MEM_IMM:
2539 x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2540 break;
2541 case OP_X86_TEST_NULL:
2542 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2543 break;
2544 case OP_X86_ADD_MEMBASE_IMM:
2545 x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2546 break;
2547 case OP_X86_ADD_REG_MEMBASE:
2548 x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2549 break;
2550 case OP_X86_SUB_MEMBASE_IMM:
2551 x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2552 break;
2553 case OP_X86_SUB_REG_MEMBASE:
2554 x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2555 break;
2556 case OP_X86_AND_MEMBASE_IMM:
2557 x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2558 break;
2559 case OP_X86_OR_MEMBASE_IMM:
2560 x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2561 break;
2562 case OP_X86_XOR_MEMBASE_IMM:
2563 x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2564 break;
2565 case OP_X86_ADD_MEMBASE_REG:
2566 x86_alu_membase_reg (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2567 break;
2568 case OP_X86_SUB_MEMBASE_REG:
2569 x86_alu_membase_reg (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2570 break;
2571 case OP_X86_AND_MEMBASE_REG:
2572 x86_alu_membase_reg (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2573 break;
2574 case OP_X86_OR_MEMBASE_REG:
2575 x86_alu_membase_reg (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2576 break;
2577 case OP_X86_XOR_MEMBASE_REG:
2578 x86_alu_membase_reg (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2579 break;
2580 case OP_X86_INC_MEMBASE:
2581 x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2582 break;
2583 case OP_X86_INC_REG:
2584 x86_inc_reg (code, ins->dreg);
2585 break;
2586 case OP_X86_DEC_MEMBASE:
2587 x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2588 break;
2589 case OP_X86_DEC_REG:
2590 x86_dec_reg (code, ins->dreg);
2591 break;
2592 case OP_X86_MUL_REG_MEMBASE:
2593 x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2594 break;
2595 case OP_X86_AND_REG_MEMBASE:
2596 x86_alu_reg_membase (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset);
2597 break;
2598 case OP_X86_OR_REG_MEMBASE:
2599 x86_alu_reg_membase (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset);
2600 break;
2601 case OP_X86_XOR_REG_MEMBASE:
2602 x86_alu_reg_membase (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset);
2603 break;
2604 case OP_BREAK:
2605 x86_breakpoint (code);
2606 break;
2607 case OP_RELAXED_NOP:
2608 x86_prefix (code, X86_REP_PREFIX);
2609 x86_nop (code);
2610 break;
2611 case OP_HARD_NOP:
2612 x86_nop (code);
2613 break;
2614 case OP_NOP:
2615 case OP_DUMMY_USE:
2616 case OP_DUMMY_STORE:
2617 case OP_NOT_REACHED:
2618 case OP_NOT_NULL:
2619 break;
2620 case OP_SEQ_POINT: {
2621 int i;
2623 if (cfg->compile_aot)
2624 NOT_IMPLEMENTED;
2627 * Read from the single stepping trigger page. This will cause a
2628 * SIGSEGV when single stepping is enabled.
2629 * We do this _before_ the breakpoint, so single stepping after
2630 * a breakpoint is hit will step to the next IL offset.
2632 if (ins->flags & MONO_INST_SINGLE_STEP_LOC)
2633 x86_alu_reg_mem (code, X86_CMP, X86_EAX, (guint32)ss_trigger_page);
2635 mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
2638 * A placeholder for a possible breakpoint inserted by
2639 * mono_arch_set_breakpoint ().
2641 for (i = 0; i < 6; ++i)
2642 x86_nop (code);
2643 break;
2645 case OP_ADDCC:
2646 case OP_IADDCC:
2647 case OP_IADD:
2648 x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2649 break;
2650 case OP_ADC:
2651 case OP_IADC:
2652 x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2653 break;
2654 case OP_ADDCC_IMM:
2655 case OP_ADD_IMM:
2656 case OP_IADD_IMM:
2657 x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2658 break;
2659 case OP_ADC_IMM:
2660 case OP_IADC_IMM:
2661 x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2662 break;
2663 case OP_SUBCC:
2664 case OP_ISUBCC:
2665 case OP_ISUB:
2666 x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2667 break;
2668 case OP_SBB:
2669 case OP_ISBB:
2670 x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2671 break;
2672 case OP_SUBCC_IMM:
2673 case OP_SUB_IMM:
2674 case OP_ISUB_IMM:
2675 x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2676 break;
2677 case OP_SBB_IMM:
2678 case OP_ISBB_IMM:
2679 x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2680 break;
2681 case OP_IAND:
2682 x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2683 break;
2684 case OP_AND_IMM:
2685 case OP_IAND_IMM:
2686 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2687 break;
2688 case OP_IDIV:
2689 case OP_IREM:
2691 * The code is the same for div/rem, the allocator will allocate dreg
2692 * to RAX/RDX as appropriate.
2694 if (ins->sreg2 == X86_EDX) {
2695 /* cdq clobbers this */
2696 x86_push_reg (code, ins->sreg2);
2697 x86_cdq (code);
2698 x86_div_membase (code, X86_ESP, 0, TRUE);
2699 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2700 } else {
2701 x86_cdq (code);
2702 x86_div_reg (code, ins->sreg2, TRUE);
2704 break;
2705 case OP_IDIV_UN:
2706 case OP_IREM_UN:
2707 if (ins->sreg2 == X86_EDX) {
2708 x86_push_reg (code, ins->sreg2);
2709 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2710 x86_div_membase (code, X86_ESP, 0, FALSE);
2711 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2712 } else {
2713 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2714 x86_div_reg (code, ins->sreg2, FALSE);
2716 break;
2717 case OP_DIV_IMM:
2718 x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2719 x86_cdq (code);
2720 x86_div_reg (code, ins->sreg2, TRUE);
2721 break;
2722 case OP_IREM_IMM: {
2723 int power = mono_is_power_of_two (ins->inst_imm);
2725 g_assert (ins->sreg1 == X86_EAX);
2726 g_assert (ins->dreg == X86_EAX);
2727 g_assert (power >= 0);
2729 if (power == 1) {
2730 /* Based on http://compilers.iecc.com/comparch/article/93-04-079 */
2731 x86_cdq (code);
2732 x86_alu_reg_imm (code, X86_AND, X86_EAX, 1);
2734 * If the divident is >= 0, this does not nothing. If it is positive, it
2735 * it transforms %eax=0 into %eax=0, and %eax=1 into %eax=-1.
2737 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EDX);
2738 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2739 } else if (power == 0) {
2740 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2741 } else {
2742 /* Based on gcc code */
2744 /* Add compensation for negative dividents */
2745 x86_cdq (code);
2746 x86_shift_reg_imm (code, X86_SHR, X86_EDX, 32 - power);
2747 x86_alu_reg_reg (code, X86_ADD, X86_EAX, X86_EDX);
2748 /* Compute remainder */
2749 x86_alu_reg_imm (code, X86_AND, X86_EAX, (1 << power) - 1);
2750 /* Remove compensation */
2751 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2753 break;
2755 case OP_IOR:
2756 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2757 break;
2758 case OP_OR_IMM:
2759 case OP_IOR_IMM:
2760 x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2761 break;
2762 case OP_IXOR:
2763 x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2764 break;
2765 case OP_XOR_IMM:
2766 case OP_IXOR_IMM:
2767 x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2768 break;
2769 case OP_ISHL:
2770 g_assert (ins->sreg2 == X86_ECX);
2771 x86_shift_reg (code, X86_SHL, ins->dreg);
2772 break;
2773 case OP_ISHR:
2774 g_assert (ins->sreg2 == X86_ECX);
2775 x86_shift_reg (code, X86_SAR, ins->dreg);
2776 break;
2777 case OP_SHR_IMM:
2778 case OP_ISHR_IMM:
2779 x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2780 break;
2781 case OP_SHR_UN_IMM:
2782 case OP_ISHR_UN_IMM:
2783 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2784 break;
2785 case OP_ISHR_UN:
2786 g_assert (ins->sreg2 == X86_ECX);
2787 x86_shift_reg (code, X86_SHR, ins->dreg);
2788 break;
2789 case OP_SHL_IMM:
2790 case OP_ISHL_IMM:
2791 x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2792 break;
2793 case OP_LSHL: {
2794 guint8 *jump_to_end;
2796 /* handle shifts below 32 bits */
2797 x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2798 x86_shift_reg (code, X86_SHL, ins->sreg1);
2800 x86_test_reg_imm (code, X86_ECX, 32);
2801 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2803 /* handle shift over 32 bit */
2804 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2805 x86_clear_reg (code, ins->sreg1);
2807 x86_patch (jump_to_end, code);
2809 break;
2810 case OP_LSHR: {
2811 guint8 *jump_to_end;
2813 /* handle shifts below 32 bits */
2814 x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2815 x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2817 x86_test_reg_imm (code, X86_ECX, 32);
2818 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2820 /* handle shifts over 31 bits */
2821 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2822 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2824 x86_patch (jump_to_end, code);
2826 break;
2827 case OP_LSHR_UN: {
2828 guint8 *jump_to_end;
2830 /* handle shifts below 32 bits */
2831 x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2832 x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2834 x86_test_reg_imm (code, X86_ECX, 32);
2835 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2837 /* handle shifts over 31 bits */
2838 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2839 x86_clear_reg (code, ins->backend.reg3);
2841 x86_patch (jump_to_end, code);
2843 break;
2844 case OP_LSHL_IMM:
2845 if (ins->inst_imm >= 32) {
2846 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2847 x86_clear_reg (code, ins->sreg1);
2848 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2849 } else {
2850 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2851 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2853 break;
2854 case OP_LSHR_IMM:
2855 if (ins->inst_imm >= 32) {
2856 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2857 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2858 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2859 } else {
2860 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2861 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2863 break;
2864 case OP_LSHR_UN_IMM:
2865 if (ins->inst_imm >= 32) {
2866 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2867 x86_clear_reg (code, ins->backend.reg3);
2868 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2869 } else {
2870 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2871 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2873 break;
2874 case OP_INOT:
2875 x86_not_reg (code, ins->sreg1);
2876 break;
2877 case OP_INEG:
2878 x86_neg_reg (code, ins->sreg1);
2879 break;
2881 case OP_IMUL:
2882 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2883 break;
2884 case OP_MUL_IMM:
2885 case OP_IMUL_IMM:
2886 switch (ins->inst_imm) {
2887 case 2:
2888 /* MOV r1, r2 */
2889 /* ADD r1, r1 */
2890 if (ins->dreg != ins->sreg1)
2891 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2892 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2893 break;
2894 case 3:
2895 /* LEA r1, [r2 + r2*2] */
2896 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2897 break;
2898 case 5:
2899 /* LEA r1, [r2 + r2*4] */
2900 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2901 break;
2902 case 6:
2903 /* LEA r1, [r2 + r2*2] */
2904 /* ADD r1, r1 */
2905 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2906 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2907 break;
2908 case 9:
2909 /* LEA r1, [r2 + r2*8] */
2910 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2911 break;
2912 case 10:
2913 /* LEA r1, [r2 + r2*4] */
2914 /* ADD r1, r1 */
2915 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2916 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2917 break;
2918 case 12:
2919 /* LEA r1, [r2 + r2*2] */
2920 /* SHL r1, 2 */
2921 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2922 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2923 break;
2924 case 25:
2925 /* LEA r1, [r2 + r2*4] */
2926 /* LEA r1, [r1 + r1*4] */
2927 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2928 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2929 break;
2930 case 100:
2931 /* LEA r1, [r2 + r2*4] */
2932 /* SHL r1, 2 */
2933 /* LEA r1, [r1 + r1*4] */
2934 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2935 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2936 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2937 break;
2938 default:
2939 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2940 break;
2942 break;
2943 case OP_IMUL_OVF:
2944 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2945 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2946 break;
2947 case OP_IMUL_OVF_UN: {
2948 /* the mul operation and the exception check should most likely be split */
2949 int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2950 /*g_assert (ins->sreg2 == X86_EAX);
2951 g_assert (ins->dreg == X86_EAX);*/
2952 if (ins->sreg2 == X86_EAX) {
2953 non_eax_reg = ins->sreg1;
2954 } else if (ins->sreg1 == X86_EAX) {
2955 non_eax_reg = ins->sreg2;
2956 } else {
2957 /* no need to save since we're going to store to it anyway */
2958 if (ins->dreg != X86_EAX) {
2959 saved_eax = TRUE;
2960 x86_push_reg (code, X86_EAX);
2962 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2963 non_eax_reg = ins->sreg2;
2965 if (ins->dreg == X86_EDX) {
2966 if (!saved_eax) {
2967 saved_eax = TRUE;
2968 x86_push_reg (code, X86_EAX);
2970 } else if (ins->dreg != X86_EAX) {
2971 saved_edx = TRUE;
2972 x86_push_reg (code, X86_EDX);
2974 x86_mul_reg (code, non_eax_reg, FALSE);
2975 /* save before the check since pop and mov don't change the flags */
2976 if (ins->dreg != X86_EAX)
2977 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2978 if (saved_edx)
2979 x86_pop_reg (code, X86_EDX);
2980 if (saved_eax)
2981 x86_pop_reg (code, X86_EAX);
2982 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2983 break;
2985 case OP_ICONST:
2986 x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2987 break;
2988 case OP_AOTCONST:
2989 g_assert_not_reached ();
2990 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2991 x86_mov_reg_imm (code, ins->dreg, 0);
2992 break;
2993 case OP_JUMP_TABLE:
2994 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2995 x86_mov_reg_imm (code, ins->dreg, 0);
2996 break;
2997 case OP_LOAD_GOTADDR:
2998 g_assert (ins->dreg == MONO_ARCH_GOT_REG);
2999 code = mono_arch_emit_load_got_addr (cfg->native_code, code, cfg, NULL);
3000 break;
3001 case OP_GOT_ENTRY:
3002 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3003 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
3004 break;
3005 case OP_X86_PUSH_GOT_ENTRY:
3006 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3007 x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
3008 break;
3009 case OP_MOVE:
3010 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3011 break;
3012 case OP_JMP: {
3014 * Note: this 'frame destruction' logic is useful for tail calls, too.
3015 * Keep in sync with the code in emit_epilog.
3017 int pos = 0;
3019 /* FIXME: no tracing support... */
3020 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3021 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3022 /* reset offset to make max_len work */
3023 offset = code - cfg->native_code;
3025 g_assert (!cfg->method->save_lmf);
3027 code = emit_load_volatile_arguments (cfg, code);
3029 if (cfg->used_int_regs & (1 << X86_EBX))
3030 pos -= 4;
3031 if (cfg->used_int_regs & (1 << X86_EDI))
3032 pos -= 4;
3033 if (cfg->used_int_regs & (1 << X86_ESI))
3034 pos -= 4;
3035 if (pos)
3036 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3038 if (cfg->used_int_regs & (1 << X86_ESI))
3039 x86_pop_reg (code, X86_ESI);
3040 if (cfg->used_int_regs & (1 << X86_EDI))
3041 x86_pop_reg (code, X86_EDI);
3042 if (cfg->used_int_regs & (1 << X86_EBX))
3043 x86_pop_reg (code, X86_EBX);
3045 /* restore ESP/EBP */
3046 x86_leave (code);
3047 offset = code - cfg->native_code;
3048 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3049 x86_jump32 (code, 0);
3051 cfg->disable_aot = TRUE;
3052 break;
3054 case OP_TAILCALL: {
3055 MonoCallInst *call = (MonoCallInst*)ins;
3056 int pos = 0, i;
3058 /* FIXME: no tracing support... */
3059 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3060 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3061 /* reset offset to make max_len work */
3062 offset = code - cfg->native_code;
3064 g_assert (!cfg->method->save_lmf);
3066 //code = emit_load_volatile_arguments (cfg, code);
3068 /* restore callee saved registers */
3069 for (i = 0; i < X86_NREG; ++i)
3070 if (X86_IS_CALLEE_SAVED_REG (i) && cfg->used_int_regs & (1 << i))
3071 pos -= 4;
3072 if (cfg->used_int_regs & (1 << X86_ESI)) {
3073 x86_mov_reg_membase (code, X86_ESI, X86_EBP, pos, 4);
3074 pos += 4;
3076 if (cfg->used_int_regs & (1 << X86_EDI)) {
3077 x86_mov_reg_membase (code, X86_EDI, X86_EBP, pos, 4);
3078 pos += 4;
3080 if (cfg->used_int_regs & (1 << X86_EBX)) {
3081 x86_mov_reg_membase (code, X86_EBX, X86_EBP, pos, 4);
3082 pos += 4;
3085 /* Copy arguments on the stack to our argument area */
3086 for (i = 0; i < call->stack_usage; i += 4) {
3087 x86_mov_reg_membase (code, X86_EAX, X86_ESP, i, 4);
3088 x86_mov_membase_reg (code, X86_EBP, 8 + i, X86_EAX, 4);
3091 /* restore ESP/EBP */
3092 x86_leave (code);
3093 offset = code - cfg->native_code;
3094 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3095 x86_jump32 (code, 0);
3097 cfg->disable_aot = TRUE;
3098 break;
3100 case OP_CHECK_THIS:
3101 /* ensure ins->sreg1 is not NULL
3102 * note that cmp DWORD PTR [eax], eax is one byte shorter than
3103 * cmp DWORD PTR [eax], 0
3105 x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
3106 break;
3107 case OP_ARGLIST: {
3108 int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
3109 x86_push_reg (code, hreg);
3110 x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
3111 x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
3112 x86_pop_reg (code, hreg);
3113 break;
3115 case OP_FCALL:
3116 case OP_LCALL:
3117 case OP_VCALL:
3118 case OP_VCALL2:
3119 case OP_VOIDCALL:
3120 case OP_CALL:
3121 call = (MonoCallInst*)ins;
3122 if (ins->flags & MONO_INST_HAS_METHOD)
3123 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
3124 else
3125 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
3126 ins->flags |= MONO_INST_GC_CALLSITE;
3127 ins->backend.pc_offset = code - cfg->native_code;
3128 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3129 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
3130 * bytes to pop, we want to use pops. GCC does this (note it won't happen
3131 * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
3132 * smart enough to do that optimization yet
3134 * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
3135 * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
3136 * (most likely from locality benefits). People with other processors should
3137 * check on theirs to see what happens.
3139 if (call->stack_usage == 4) {
3140 /* we want to use registers that won't get used soon, so use
3141 * ecx, as eax will get allocated first. edx is used by long calls,
3142 * so we can't use that.
3145 x86_pop_reg (code, X86_ECX);
3146 } else {
3147 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3150 code = emit_move_return_value (cfg, ins, code);
3151 break;
3152 case OP_FCALL_REG:
3153 case OP_LCALL_REG:
3154 case OP_VCALL_REG:
3155 case OP_VCALL2_REG:
3156 case OP_VOIDCALL_REG:
3157 case OP_CALL_REG:
3158 call = (MonoCallInst*)ins;
3159 x86_call_reg (code, ins->sreg1);
3160 ins->flags |= MONO_INST_GC_CALLSITE;
3161 ins->backend.pc_offset = code - cfg->native_code;
3162 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3163 if (call->stack_usage == 4)
3164 x86_pop_reg (code, X86_ECX);
3165 else
3166 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3168 code = emit_move_return_value (cfg, ins, code);
3169 break;
3170 case OP_FCALL_MEMBASE:
3171 case OP_LCALL_MEMBASE:
3172 case OP_VCALL_MEMBASE:
3173 case OP_VCALL2_MEMBASE:
3174 case OP_VOIDCALL_MEMBASE:
3175 case OP_CALL_MEMBASE:
3176 call = (MonoCallInst*)ins;
3178 x86_call_membase (code, ins->sreg1, ins->inst_offset);
3179 ins->flags |= MONO_INST_GC_CALLSITE;
3180 ins->backend.pc_offset = code - cfg->native_code;
3181 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3182 if (call->stack_usage == 4)
3183 x86_pop_reg (code, X86_ECX);
3184 else
3185 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3187 code = emit_move_return_value (cfg, ins, code);
3188 break;
3189 case OP_X86_PUSH:
3190 x86_push_reg (code, ins->sreg1);
3191 break;
3192 case OP_X86_PUSH_IMM:
3193 x86_push_imm (code, ins->inst_imm);
3194 break;
3195 case OP_X86_PUSH_MEMBASE:
3196 x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
3197 break;
3198 case OP_X86_PUSH_OBJ:
3199 x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
3200 x86_push_reg (code, X86_EDI);
3201 x86_push_reg (code, X86_ESI);
3202 x86_push_reg (code, X86_ECX);
3203 if (ins->inst_offset)
3204 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
3205 else
3206 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
3207 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
3208 x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
3209 x86_cld (code);
3210 x86_prefix (code, X86_REP_PREFIX);
3211 x86_movsd (code);
3212 x86_pop_reg (code, X86_ECX);
3213 x86_pop_reg (code, X86_ESI);
3214 x86_pop_reg (code, X86_EDI);
3215 break;
3216 case OP_X86_LEA:
3217 x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
3218 break;
3219 case OP_X86_LEA_MEMBASE:
3220 x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3221 break;
3222 case OP_X86_XCHG:
3223 x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3224 break;
3225 case OP_LOCALLOC:
3226 /* keep alignment */
3227 x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
3228 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
3229 code = mono_emit_stack_alloc (code, ins);
3230 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
3231 break;
3232 case OP_LOCALLOC_IMM: {
3233 guint32 size = ins->inst_imm;
3234 size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
3236 if (ins->flags & MONO_INST_INIT) {
3237 /* FIXME: Optimize this */
3238 x86_mov_reg_imm (code, ins->dreg, size);
3239 ins->sreg1 = ins->dreg;
3241 code = mono_emit_stack_alloc (code, ins);
3242 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
3243 } else {
3244 x86_alu_reg_imm (code, X86_SUB, X86_ESP, size);
3245 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
3247 break;
3249 case OP_THROW: {
3250 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
3251 x86_push_reg (code, ins->sreg1);
3252 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
3253 (gpointer)"mono_arch_throw_exception");
3254 ins->flags |= MONO_INST_GC_CALLSITE;
3255 ins->backend.pc_offset = code - cfg->native_code;
3256 break;
3258 case OP_RETHROW: {
3259 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
3260 x86_push_reg (code, ins->sreg1);
3261 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
3262 (gpointer)"mono_arch_rethrow_exception");
3263 ins->flags |= MONO_INST_GC_CALLSITE;
3264 ins->backend.pc_offset = code - cfg->native_code;
3265 break;
3267 case OP_CALL_HANDLER:
3268 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
3269 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3270 x86_call_imm (code, 0);
3271 mono_cfg_add_try_hole (cfg, ins->inst_eh_block, code, bb);
3272 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
3273 break;
3274 case OP_START_HANDLER: {
3275 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3276 x86_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, X86_ESP, 4);
3277 break;
3279 case OP_ENDFINALLY: {
3280 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3281 x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
3282 x86_ret (code);
3283 break;
3285 case OP_ENDFILTER: {
3286 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3287 x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
3288 /* The local allocator will put the result into EAX */
3289 x86_ret (code);
3290 break;
3293 case OP_LABEL:
3294 ins->inst_c0 = code - cfg->native_code;
3295 break;
3296 case OP_BR:
3297 if (ins->inst_target_bb->native_offset) {
3298 x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset);
3299 } else {
3300 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3301 if ((cfg->opt & MONO_OPT_BRANCH) &&
3302 x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3303 x86_jump8 (code, 0);
3304 else
3305 x86_jump32 (code, 0);
3307 break;
3308 case OP_BR_REG:
3309 x86_jump_reg (code, ins->sreg1);
3310 break;
3311 case OP_CEQ:
3312 case OP_CLT:
3313 case OP_CLT_UN:
3314 case OP_CGT:
3315 case OP_CGT_UN:
3316 case OP_CNE:
3317 case OP_ICEQ:
3318 case OP_ICLT:
3319 case OP_ICLT_UN:
3320 case OP_ICGT:
3321 case OP_ICGT_UN:
3322 x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
3323 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3324 break;
3325 case OP_COND_EXC_EQ:
3326 case OP_COND_EXC_NE_UN:
3327 case OP_COND_EXC_LT:
3328 case OP_COND_EXC_LT_UN:
3329 case OP_COND_EXC_GT:
3330 case OP_COND_EXC_GT_UN:
3331 case OP_COND_EXC_GE:
3332 case OP_COND_EXC_GE_UN:
3333 case OP_COND_EXC_LE:
3334 case OP_COND_EXC_LE_UN:
3335 case OP_COND_EXC_IEQ:
3336 case OP_COND_EXC_INE_UN:
3337 case OP_COND_EXC_ILT:
3338 case OP_COND_EXC_ILT_UN:
3339 case OP_COND_EXC_IGT:
3340 case OP_COND_EXC_IGT_UN:
3341 case OP_COND_EXC_IGE:
3342 case OP_COND_EXC_IGE_UN:
3343 case OP_COND_EXC_ILE:
3344 case OP_COND_EXC_ILE_UN:
3345 EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
3346 break;
3347 case OP_COND_EXC_OV:
3348 case OP_COND_EXC_NO:
3349 case OP_COND_EXC_C:
3350 case OP_COND_EXC_NC:
3351 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3352 break;
3353 case OP_COND_EXC_IOV:
3354 case OP_COND_EXC_INO:
3355 case OP_COND_EXC_IC:
3356 case OP_COND_EXC_INC:
3357 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_IEQ], (ins->opcode < OP_COND_EXC_INE_UN), ins->inst_p1);
3358 break;
3359 case OP_IBEQ:
3360 case OP_IBNE_UN:
3361 case OP_IBLT:
3362 case OP_IBLT_UN:
3363 case OP_IBGT:
3364 case OP_IBGT_UN:
3365 case OP_IBGE:
3366 case OP_IBGE_UN:
3367 case OP_IBLE:
3368 case OP_IBLE_UN:
3369 EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
3370 break;
3372 case OP_CMOV_IEQ:
3373 case OP_CMOV_IGE:
3374 case OP_CMOV_IGT:
3375 case OP_CMOV_ILE:
3376 case OP_CMOV_ILT:
3377 case OP_CMOV_INE_UN:
3378 case OP_CMOV_IGE_UN:
3379 case OP_CMOV_IGT_UN:
3380 case OP_CMOV_ILE_UN:
3381 case OP_CMOV_ILT_UN:
3382 g_assert (ins->dreg == ins->sreg1);
3383 x86_cmov_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, ins->sreg2);
3384 break;
3386 /* floating point opcodes */
3387 case OP_R8CONST: {
3388 double d = *(double *)ins->inst_p0;
3390 if ((d == 0.0) && (mono_signbit (d) == 0)) {
3391 x86_fldz (code);
3392 } else if (d == 1.0) {
3393 x86_fld1 (code);
3394 } else {
3395 if (cfg->compile_aot) {
3396 guint32 *val = (guint32*)&d;
3397 x86_push_imm (code, val [1]);
3398 x86_push_imm (code, val [0]);
3399 x86_fld_membase (code, X86_ESP, 0, TRUE);
3400 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3402 else {
3403 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3404 x86_fld (code, NULL, TRUE);
3407 break;
3409 case OP_R4CONST: {
3410 float f = *(float *)ins->inst_p0;
3412 if ((f == 0.0) && (mono_signbit (f) == 0)) {
3413 x86_fldz (code);
3414 } else if (f == 1.0) {
3415 x86_fld1 (code);
3416 } else {
3417 if (cfg->compile_aot) {
3418 guint32 val = *(guint32*)&f;
3419 x86_push_imm (code, val);
3420 x86_fld_membase (code, X86_ESP, 0, FALSE);
3421 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3423 else {
3424 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3425 x86_fld (code, NULL, FALSE);
3428 break;
3430 case OP_STORER8_MEMBASE_REG:
3431 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3432 break;
3433 case OP_LOADR8_MEMBASE:
3434 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3435 break;
3436 case OP_STORER4_MEMBASE_REG:
3437 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3438 break;
3439 case OP_LOADR4_MEMBASE:
3440 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3441 break;
3442 case OP_ICONV_TO_R4:
3443 x86_push_reg (code, ins->sreg1);
3444 x86_fild_membase (code, X86_ESP, 0, FALSE);
3445 /* Change precision */
3446 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3447 x86_fld_membase (code, X86_ESP, 0, FALSE);
3448 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3449 break;
3450 case OP_ICONV_TO_R8:
3451 x86_push_reg (code, ins->sreg1);
3452 x86_fild_membase (code, X86_ESP, 0, FALSE);
3453 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3454 break;
3455 case OP_ICONV_TO_R_UN:
3456 x86_push_imm (code, 0);
3457 x86_push_reg (code, ins->sreg1);
3458 x86_fild_membase (code, X86_ESP, 0, TRUE);
3459 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3460 break;
3461 case OP_X86_FP_LOAD_I8:
3462 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3463 break;
3464 case OP_X86_FP_LOAD_I4:
3465 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3466 break;
3467 case OP_FCONV_TO_R4:
3468 /* Change precision */
3469 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3470 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3471 x86_fld_membase (code, X86_ESP, 0, FALSE);
3472 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3473 break;
3474 case OP_FCONV_TO_I1:
3475 code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3476 break;
3477 case OP_FCONV_TO_U1:
3478 code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3479 break;
3480 case OP_FCONV_TO_I2:
3481 code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3482 break;
3483 case OP_FCONV_TO_U2:
3484 code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3485 break;
3486 case OP_FCONV_TO_I4:
3487 case OP_FCONV_TO_I:
3488 code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3489 break;
3490 case OP_FCONV_TO_I8:
3491 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3492 x86_fnstcw_membase(code, X86_ESP, 0);
3493 x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3494 x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3495 x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3496 x86_fldcw_membase (code, X86_ESP, 2);
3497 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3498 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3499 x86_pop_reg (code, ins->dreg);
3500 x86_pop_reg (code, ins->backend.reg3);
3501 x86_fldcw_membase (code, X86_ESP, 0);
3502 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3503 break;
3504 case OP_LCONV_TO_R8_2:
3505 x86_push_reg (code, ins->sreg2);
3506 x86_push_reg (code, ins->sreg1);
3507 x86_fild_membase (code, X86_ESP, 0, TRUE);
3508 /* Change precision */
3509 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
3510 x86_fld_membase (code, X86_ESP, 0, TRUE);
3511 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3512 break;
3513 case OP_LCONV_TO_R4_2:
3514 x86_push_reg (code, ins->sreg2);
3515 x86_push_reg (code, ins->sreg1);
3516 x86_fild_membase (code, X86_ESP, 0, TRUE);
3517 /* Change precision */
3518 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3519 x86_fld_membase (code, X86_ESP, 0, FALSE);
3520 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3521 break;
3522 case OP_LCONV_TO_R_UN_2: {
3523 static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3524 guint8 *br;
3526 /* load 64bit integer to FP stack */
3527 x86_push_reg (code, ins->sreg2);
3528 x86_push_reg (code, ins->sreg1);
3529 x86_fild_membase (code, X86_ESP, 0, TRUE);
3531 /* test if lreg is negative */
3532 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3533 br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3535 /* add correction constant mn */
3536 x86_fld80_mem (code, mn);
3537 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3539 x86_patch (br, code);
3541 /* Change precision */
3542 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
3543 x86_fld_membase (code, X86_ESP, 0, TRUE);
3545 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3547 break;
3549 case OP_LCONV_TO_OVF_I:
3550 case OP_LCONV_TO_OVF_I4_2: {
3551 guint8 *br [3], *label [1];
3552 MonoInst *tins;
3555 * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3557 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3559 /* If the low word top bit is set, see if we are negative */
3560 br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3561 /* We are not negative (no top bit set, check for our top word to be zero */
3562 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3563 br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3564 label [0] = code;
3566 /* throw exception */
3567 tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
3568 if (tins) {
3569 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
3570 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
3571 x86_jump8 (code, 0);
3572 else
3573 x86_jump32 (code, 0);
3574 } else {
3575 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3576 x86_jump32 (code, 0);
3580 x86_patch (br [0], code);
3581 /* our top bit is set, check that top word is 0xfffffff */
3582 x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3584 x86_patch (br [1], code);
3585 /* nope, emit exception */
3586 br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3587 x86_patch (br [2], label [0]);
3589 if (ins->dreg != ins->sreg1)
3590 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3591 break;
3593 case OP_FMOVE:
3594 /* Not needed on the fp stack */
3595 break;
3596 case OP_FADD:
3597 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3598 break;
3599 case OP_FSUB:
3600 x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3601 break;
3602 case OP_FMUL:
3603 x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3604 break;
3605 case OP_FDIV:
3606 x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3607 break;
3608 case OP_FNEG:
3609 x86_fchs (code);
3610 break;
3611 case OP_SIN:
3612 x86_fsin (code);
3613 x86_fldz (code);
3614 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3615 break;
3616 case OP_COS:
3617 x86_fcos (code);
3618 x86_fldz (code);
3619 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3620 break;
3621 case OP_ABS:
3622 x86_fabs (code);
3623 break;
3624 case OP_TAN: {
3626 * it really doesn't make sense to inline all this code,
3627 * it's here just to show that things may not be as simple
3628 * as they appear.
3630 guchar *check_pos, *end_tan, *pop_jump;
3631 x86_push_reg (code, X86_EAX);
3632 x86_fptan (code);
3633 x86_fnstsw (code);
3634 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3635 check_pos = code;
3636 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3637 x86_fstp (code, 0); /* pop the 1.0 */
3638 end_tan = code;
3639 x86_jump8 (code, 0);
3640 x86_fldpi (code);
3641 x86_fp_op (code, X86_FADD, 0);
3642 x86_fxch (code, 1);
3643 x86_fprem1 (code);
3644 x86_fstsw (code);
3645 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3646 pop_jump = code;
3647 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3648 x86_fstp (code, 1);
3649 x86_fptan (code);
3650 x86_patch (pop_jump, code);
3651 x86_fstp (code, 0); /* pop the 1.0 */
3652 x86_patch (check_pos, code);
3653 x86_patch (end_tan, code);
3654 x86_fldz (code);
3655 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3656 x86_pop_reg (code, X86_EAX);
3657 break;
3659 case OP_ATAN:
3660 x86_fld1 (code);
3661 x86_fpatan (code);
3662 x86_fldz (code);
3663 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3664 break;
3665 case OP_SQRT:
3666 x86_fsqrt (code);
3667 break;
3668 case OP_ROUND:
3669 x86_frndint (code);
3670 break;
3671 case OP_IMIN:
3672 g_assert (cfg->opt & MONO_OPT_CMOV);
3673 g_assert (ins->dreg == ins->sreg1);
3674 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3675 x86_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2);
3676 break;
3677 case OP_IMIN_UN:
3678 g_assert (cfg->opt & MONO_OPT_CMOV);
3679 g_assert (ins->dreg == ins->sreg1);
3680 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3681 x86_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2);
3682 break;
3683 case OP_IMAX:
3684 g_assert (cfg->opt & MONO_OPT_CMOV);
3685 g_assert (ins->dreg == ins->sreg1);
3686 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3687 x86_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2);
3688 break;
3689 case OP_IMAX_UN:
3690 g_assert (cfg->opt & MONO_OPT_CMOV);
3691 g_assert (ins->dreg == ins->sreg1);
3692 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3693 x86_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2);
3694 break;
3695 case OP_X86_FPOP:
3696 x86_fstp (code, 0);
3697 break;
3698 case OP_X86_FXCH:
3699 x86_fxch (code, ins->inst_imm);
3700 break;
3701 case OP_FREM: {
3702 guint8 *l1, *l2;
3704 x86_push_reg (code, X86_EAX);
3705 /* we need to exchange ST(0) with ST(1) */
3706 x86_fxch (code, 1);
3708 /* this requires a loop, because fprem somtimes
3709 * returns a partial remainder */
3710 l1 = code;
3711 /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3712 /* x86_fprem1 (code); */
3713 x86_fprem (code);
3714 x86_fnstsw (code);
3715 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3716 l2 = code + 2;
3717 x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3719 /* pop result */
3720 x86_fstp (code, 1);
3722 x86_pop_reg (code, X86_EAX);
3723 break;
3725 case OP_FCOMPARE:
3726 if (cfg->opt & MONO_OPT_FCMOV) {
3727 x86_fcomip (code, 1);
3728 x86_fstp (code, 0);
3729 break;
3731 /* this overwrites EAX */
3732 EMIT_FPCOMPARE(code);
3733 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3734 break;
3735 case OP_FCEQ:
3736 if (cfg->opt & MONO_OPT_FCMOV) {
3737 /* zeroing the register at the start results in
3738 * shorter and faster code (we can also remove the widening op)
3740 guchar *unordered_check;
3741 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3742 x86_fcomip (code, 1);
3743 x86_fstp (code, 0);
3744 unordered_check = code;
3745 x86_branch8 (code, X86_CC_P, 0, FALSE);
3746 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3747 x86_patch (unordered_check, code);
3748 break;
3750 if (ins->dreg != X86_EAX)
3751 x86_push_reg (code, X86_EAX);
3753 EMIT_FPCOMPARE(code);
3754 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3755 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3756 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3757 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3759 if (ins->dreg != X86_EAX)
3760 x86_pop_reg (code, X86_EAX);
3761 break;
3762 case OP_FCLT:
3763 case OP_FCLT_UN:
3764 if (cfg->opt & MONO_OPT_FCMOV) {
3765 /* zeroing the register at the start results in
3766 * shorter and faster code (we can also remove the widening op)
3768 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3769 x86_fcomip (code, 1);
3770 x86_fstp (code, 0);
3771 if (ins->opcode == OP_FCLT_UN) {
3772 guchar *unordered_check = code;
3773 guchar *jump_to_end;
3774 x86_branch8 (code, X86_CC_P, 0, FALSE);
3775 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3776 jump_to_end = code;
3777 x86_jump8 (code, 0);
3778 x86_patch (unordered_check, code);
3779 x86_inc_reg (code, ins->dreg);
3780 x86_patch (jump_to_end, code);
3781 } else {
3782 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3784 break;
3786 if (ins->dreg != X86_EAX)
3787 x86_push_reg (code, X86_EAX);
3789 EMIT_FPCOMPARE(code);
3790 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3791 if (ins->opcode == OP_FCLT_UN) {
3792 guchar *is_not_zero_check, *end_jump;
3793 is_not_zero_check = code;
3794 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3795 end_jump = code;
3796 x86_jump8 (code, 0);
3797 x86_patch (is_not_zero_check, code);
3798 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3800 x86_patch (end_jump, code);
3802 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3803 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3805 if (ins->dreg != X86_EAX)
3806 x86_pop_reg (code, X86_EAX);
3807 break;
3808 case OP_FCGT:
3809 case OP_FCGT_UN:
3810 if (cfg->opt & MONO_OPT_FCMOV) {
3811 /* zeroing the register at the start results in
3812 * shorter and faster code (we can also remove the widening op)
3814 guchar *unordered_check;
3815 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3816 x86_fcomip (code, 1);
3817 x86_fstp (code, 0);
3818 if (ins->opcode == OP_FCGT) {
3819 unordered_check = code;
3820 x86_branch8 (code, X86_CC_P, 0, FALSE);
3821 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3822 x86_patch (unordered_check, code);
3823 } else {
3824 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3826 break;
3828 if (ins->dreg != X86_EAX)
3829 x86_push_reg (code, X86_EAX);
3831 EMIT_FPCOMPARE(code);
3832 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3833 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3834 if (ins->opcode == OP_FCGT_UN) {
3835 guchar *is_not_zero_check, *end_jump;
3836 is_not_zero_check = code;
3837 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3838 end_jump = code;
3839 x86_jump8 (code, 0);
3840 x86_patch (is_not_zero_check, code);
3841 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3843 x86_patch (end_jump, code);
3845 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3846 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3848 if (ins->dreg != X86_EAX)
3849 x86_pop_reg (code, X86_EAX);
3850 break;
3851 case OP_FBEQ:
3852 if (cfg->opt & MONO_OPT_FCMOV) {
3853 guchar *jump = code;
3854 x86_branch8 (code, X86_CC_P, 0, TRUE);
3855 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3856 x86_patch (jump, code);
3857 break;
3859 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3860 EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3861 break;
3862 case OP_FBNE_UN:
3863 /* Branch if C013 != 100 */
3864 if (cfg->opt & MONO_OPT_FCMOV) {
3865 /* branch if !ZF or (PF|CF) */
3866 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3867 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3868 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3869 break;
3871 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3872 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3873 break;
3874 case OP_FBLT:
3875 if (cfg->opt & MONO_OPT_FCMOV) {
3876 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3877 break;
3879 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3880 break;
3881 case OP_FBLT_UN:
3882 if (cfg->opt & MONO_OPT_FCMOV) {
3883 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3884 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3885 break;
3887 if (ins->opcode == OP_FBLT_UN) {
3888 guchar *is_not_zero_check, *end_jump;
3889 is_not_zero_check = code;
3890 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3891 end_jump = code;
3892 x86_jump8 (code, 0);
3893 x86_patch (is_not_zero_check, code);
3894 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3896 x86_patch (end_jump, code);
3898 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3899 break;
3900 case OP_FBGT:
3901 case OP_FBGT_UN:
3902 if (cfg->opt & MONO_OPT_FCMOV) {
3903 if (ins->opcode == OP_FBGT) {
3904 guchar *br1;
3906 /* skip branch if C1=1 */
3907 br1 = code;
3908 x86_branch8 (code, X86_CC_P, 0, FALSE);
3909 /* branch if (C0 | C3) = 1 */
3910 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3911 x86_patch (br1, code);
3912 } else {
3913 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3915 break;
3917 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3918 if (ins->opcode == OP_FBGT_UN) {
3919 guchar *is_not_zero_check, *end_jump;
3920 is_not_zero_check = code;
3921 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3922 end_jump = code;
3923 x86_jump8 (code, 0);
3924 x86_patch (is_not_zero_check, code);
3925 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3927 x86_patch (end_jump, code);
3929 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3930 break;
3931 case OP_FBGE:
3932 /* Branch if C013 == 100 or 001 */
3933 if (cfg->opt & MONO_OPT_FCMOV) {
3934 guchar *br1;
3936 /* skip branch if C1=1 */
3937 br1 = code;
3938 x86_branch8 (code, X86_CC_P, 0, FALSE);
3939 /* branch if (C0 | C3) = 1 */
3940 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3941 x86_patch (br1, code);
3942 break;
3944 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3945 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3946 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3947 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3948 break;
3949 case OP_FBGE_UN:
3950 /* Branch if C013 == 000 */
3951 if (cfg->opt & MONO_OPT_FCMOV) {
3952 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3953 break;
3955 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3956 break;
3957 case OP_FBLE:
3958 /* Branch if C013=000 or 100 */
3959 if (cfg->opt & MONO_OPT_FCMOV) {
3960 guchar *br1;
3962 /* skip branch if C1=1 */
3963 br1 = code;
3964 x86_branch8 (code, X86_CC_P, 0, FALSE);
3965 /* branch if C0=0 */
3966 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3967 x86_patch (br1, code);
3968 break;
3970 x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3971 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3972 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3973 break;
3974 case OP_FBLE_UN:
3975 /* Branch if C013 != 001 */
3976 if (cfg->opt & MONO_OPT_FCMOV) {
3977 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3978 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3979 break;
3981 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3982 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3983 break;
3984 case OP_CKFINITE: {
3985 guchar *br1;
3986 x86_push_reg (code, X86_EAX);
3987 x86_fxam (code);
3988 x86_fnstsw (code);
3989 x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3990 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3991 x86_pop_reg (code, X86_EAX);
3993 /* Have to clean up the fp stack before throwing the exception */
3994 br1 = code;
3995 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3997 x86_fstp (code, 0);
3998 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
4000 x86_patch (br1, code);
4001 break;
4003 case OP_TLS_GET: {
4004 code = mono_x86_emit_tls_get (code, ins->dreg, ins->inst_offset);
4005 break;
4007 case OP_MEMORY_BARRIER: {
4008 /* x86 only needs barrier for StoreLoad and FullBarrier */
4009 switch (ins->backend.memory_barrier_kind) {
4010 case StoreLoadBarrier:
4011 case FullBarrier:
4012 /* http://blogs.sun.com/dave/resource/NHM-Pipeline-Blog-V2.txt */
4013 x86_prefix (code, X86_LOCK_PREFIX);
4014 x86_alu_membase_imm (code, X86_ADD, X86_ESP, 0, 0);
4015 break;
4017 break;
4019 case OP_ATOMIC_ADD_I4: {
4020 int dreg = ins->dreg;
4022 if (dreg == ins->inst_basereg) {
4023 x86_push_reg (code, ins->sreg2);
4024 dreg = ins->sreg2;
4027 if (dreg != ins->sreg2)
4028 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
4030 x86_prefix (code, X86_LOCK_PREFIX);
4031 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4033 if (dreg != ins->dreg) {
4034 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4035 x86_pop_reg (code, dreg);
4038 break;
4040 case OP_ATOMIC_ADD_NEW_I4: {
4041 int dreg = ins->dreg;
4043 /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
4044 if (ins->sreg2 == dreg) {
4045 if (dreg == X86_EBX) {
4046 dreg = X86_EDI;
4047 if (ins->inst_basereg == X86_EDI)
4048 dreg = X86_ESI;
4049 } else {
4050 dreg = X86_EBX;
4051 if (ins->inst_basereg == X86_EBX)
4052 dreg = X86_EDI;
4054 } else if (ins->inst_basereg == dreg) {
4055 if (dreg == X86_EBX) {
4056 dreg = X86_EDI;
4057 if (ins->sreg2 == X86_EDI)
4058 dreg = X86_ESI;
4059 } else {
4060 dreg = X86_EBX;
4061 if (ins->sreg2 == X86_EBX)
4062 dreg = X86_EDI;
4066 if (dreg != ins->dreg) {
4067 x86_push_reg (code, dreg);
4070 x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
4071 x86_prefix (code, X86_LOCK_PREFIX);
4072 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4073 /* dreg contains the old value, add with sreg2 value */
4074 x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
4076 if (ins->dreg != dreg) {
4077 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4078 x86_pop_reg (code, dreg);
4081 break;
4083 case OP_ATOMIC_EXCHANGE_I4: {
4084 guchar *br[2];
4085 int sreg2 = ins->sreg2;
4086 int breg = ins->inst_basereg;
4088 /* cmpxchg uses eax as comperand, need to make sure we can use it
4089 * hack to overcome limits in x86 reg allocator
4090 * (req: dreg == eax and sreg2 != eax and breg != eax)
4092 g_assert (ins->dreg == X86_EAX);
4094 /* We need the EAX reg for the cmpxchg */
4095 if (ins->sreg2 == X86_EAX) {
4096 sreg2 = (breg == X86_EDX) ? X86_EBX : X86_EDX;
4097 x86_push_reg (code, sreg2);
4098 x86_mov_reg_reg (code, sreg2, X86_EAX, 4);
4101 if (breg == X86_EAX) {
4102 breg = (sreg2 == X86_ESI) ? X86_EDI : X86_ESI;
4103 x86_push_reg (code, breg);
4104 x86_mov_reg_reg (code, breg, X86_EAX, 4);
4107 x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
4109 br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
4110 x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
4111 br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
4112 x86_patch (br [1], br [0]);
4114 if (breg != ins->inst_basereg)
4115 x86_pop_reg (code, breg);
4117 if (ins->sreg2 != sreg2)
4118 x86_pop_reg (code, sreg2);
4120 break;
4122 case OP_ATOMIC_CAS_I4: {
4123 g_assert (ins->dreg == X86_EAX);
4124 g_assert (ins->sreg3 == X86_EAX);
4125 g_assert (ins->sreg1 != X86_EAX);
4126 g_assert (ins->sreg1 != ins->sreg2);
4128 x86_prefix (code, X86_LOCK_PREFIX);
4129 x86_cmpxchg_membase_reg (code, ins->sreg1, ins->inst_offset, ins->sreg2);
4130 break;
4132 case OP_CARD_TABLE_WBARRIER: {
4133 int ptr = ins->sreg1;
4134 int value = ins->sreg2;
4135 guchar *br;
4136 int nursery_shift, card_table_shift;
4137 gpointer card_table_mask;
4138 size_t nursery_size;
4139 gulong card_table = (gulong)mono_gc_get_card_table (&card_table_shift, &card_table_mask);
4140 gulong nursery_start = (gulong)mono_gc_get_nursery (&nursery_shift, &nursery_size);
4143 * We need one register we can clobber, we choose EDX and make sreg1
4144 * fixed EAX to work around limitations in the local register allocator.
4145 * sreg2 might get allocated to EDX, but that is not a problem since
4146 * we use it before clobbering EDX.
4148 g_assert (ins->sreg1 == X86_EAX);
4151 * This is the code we produce:
4153 * edx = value
4154 * edx >>= nursery_shift
4155 * cmp edx, (nursery_start >> nursery_shift)
4156 * jne done
4157 * edx = ptr
4158 * edx >>= card_table_shift
4159 * card_table[edx] = 1
4160 * done:
4163 if (value != X86_EDX)
4164 x86_mov_reg_reg (code, X86_EDX, value, 4);
4165 x86_shift_reg_imm (code, X86_SHR, X86_EDX, nursery_shift);
4166 x86_alu_reg_imm (code, X86_CMP, X86_EDX, nursery_start >> nursery_shift);
4167 br = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
4168 x86_mov_reg_reg (code, X86_EDX, ptr, 4);
4169 x86_shift_reg_imm (code, X86_SHR, X86_EDX, card_table_shift);
4170 if (card_table_mask)
4171 x86_alu_reg_imm (code, X86_AND, X86_EDX, (int)card_table_mask);
4172 x86_mov_membase_imm (code, X86_EDX, card_table, 1, 1);
4173 x86_patch (br, code);
4174 break;
4176 #ifdef MONO_ARCH_SIMD_INTRINSICS
4177 case OP_ADDPS:
4178 x86_sse_alu_ps_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
4179 break;
4180 case OP_DIVPS:
4181 x86_sse_alu_ps_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
4182 break;
4183 case OP_MULPS:
4184 x86_sse_alu_ps_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
4185 break;
4186 case OP_SUBPS:
4187 x86_sse_alu_ps_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
4188 break;
4189 case OP_MAXPS:
4190 x86_sse_alu_ps_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
4191 break;
4192 case OP_MINPS:
4193 x86_sse_alu_ps_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
4194 break;
4195 case OP_COMPPS:
4196 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
4197 x86_sse_alu_ps_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
4198 break;
4199 case OP_ANDPS:
4200 x86_sse_alu_ps_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
4201 break;
4202 case OP_ANDNPS:
4203 x86_sse_alu_ps_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
4204 break;
4205 case OP_ORPS:
4206 x86_sse_alu_ps_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
4207 break;
4208 case OP_XORPS:
4209 x86_sse_alu_ps_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
4210 break;
4211 case OP_SQRTPS:
4212 x86_sse_alu_ps_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
4213 break;
4214 case OP_RSQRTPS:
4215 x86_sse_alu_ps_reg_reg (code, X86_SSE_RSQRT, ins->dreg, ins->sreg1);
4216 break;
4217 case OP_RCPPS:
4218 x86_sse_alu_ps_reg_reg (code, X86_SSE_RCP, ins->dreg, ins->sreg1);
4219 break;
4220 case OP_ADDSUBPS:
4221 x86_sse_alu_sd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
4222 break;
4223 case OP_HADDPS:
4224 x86_sse_alu_sd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
4225 break;
4226 case OP_HSUBPS:
4227 x86_sse_alu_sd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
4228 break;
4229 case OP_DUPPS_HIGH:
4230 x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSHDUP, ins->dreg, ins->sreg1);
4231 break;
4232 case OP_DUPPS_LOW:
4233 x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSLDUP, ins->dreg, ins->sreg1);
4234 break;
4236 case OP_PSHUFLEW_HIGH:
4237 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
4238 x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 1);
4239 break;
4240 case OP_PSHUFLEW_LOW:
4241 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
4242 x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 0);
4243 break;
4244 case OP_PSHUFLED:
4245 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
4246 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->sreg1, ins->inst_c0);
4247 break;
4248 case OP_SHUFPS:
4249 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
4250 x86_sse_alu_reg_reg_imm8 (code, X86_SSE_SHUFP, ins->sreg1, ins->sreg2, ins->inst_c0);
4251 break;
4252 case OP_SHUFPD:
4253 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0x3);
4254 x86_sse_alu_pd_reg_reg_imm8 (code, X86_SSE_SHUFP, ins->sreg1, ins->sreg2, ins->inst_c0);
4255 break;
4257 case OP_ADDPD:
4258 x86_sse_alu_pd_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
4259 break;
4260 case OP_DIVPD:
4261 x86_sse_alu_pd_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
4262 break;
4263 case OP_MULPD:
4264 x86_sse_alu_pd_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
4265 break;
4266 case OP_SUBPD:
4267 x86_sse_alu_pd_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
4268 break;
4269 case OP_MAXPD:
4270 x86_sse_alu_pd_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
4271 break;
4272 case OP_MINPD:
4273 x86_sse_alu_pd_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
4274 break;
4275 case OP_COMPPD:
4276 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
4277 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
4278 break;
4279 case OP_ANDPD:
4280 x86_sse_alu_pd_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
4281 break;
4282 case OP_ANDNPD:
4283 x86_sse_alu_pd_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
4284 break;
4285 case OP_ORPD:
4286 x86_sse_alu_pd_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
4287 break;
4288 case OP_XORPD:
4289 x86_sse_alu_pd_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
4290 break;
4291 case OP_SQRTPD:
4292 x86_sse_alu_pd_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
4293 break;
4294 case OP_ADDSUBPD:
4295 x86_sse_alu_pd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
4296 break;
4297 case OP_HADDPD:
4298 x86_sse_alu_pd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
4299 break;
4300 case OP_HSUBPD:
4301 x86_sse_alu_pd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
4302 break;
4303 case OP_DUPPD:
4304 x86_sse_alu_sd_reg_reg (code, X86_SSE_MOVDDUP, ins->dreg, ins->sreg1);
4305 break;
4307 case OP_EXTRACT_MASK:
4308 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMOVMSKB, ins->dreg, ins->sreg1);
4309 break;
4311 case OP_PAND:
4312 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAND, ins->sreg1, ins->sreg2);
4313 break;
4314 case OP_POR:
4315 x86_sse_alu_pd_reg_reg (code, X86_SSE_POR, ins->sreg1, ins->sreg2);
4316 break;
4317 case OP_PXOR:
4318 x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->sreg1, ins->sreg2);
4319 break;
4321 case OP_PADDB:
4322 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDB, ins->sreg1, ins->sreg2);
4323 break;
4324 case OP_PADDW:
4325 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDW, ins->sreg1, ins->sreg2);
4326 break;
4327 case OP_PADDD:
4328 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDD, ins->sreg1, ins->sreg2);
4329 break;
4330 case OP_PADDQ:
4331 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDQ, ins->sreg1, ins->sreg2);
4332 break;
4334 case OP_PSUBB:
4335 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBB, ins->sreg1, ins->sreg2);
4336 break;
4337 case OP_PSUBW:
4338 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBW, ins->sreg1, ins->sreg2);
4339 break;
4340 case OP_PSUBD:
4341 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBD, ins->sreg1, ins->sreg2);
4342 break;
4343 case OP_PSUBQ:
4344 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBQ, ins->sreg1, ins->sreg2);
4345 break;
4347 case OP_PMAXB_UN:
4348 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXUB, ins->sreg1, ins->sreg2);
4349 break;
4350 case OP_PMAXW_UN:
4351 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUW, ins->sreg1, ins->sreg2);
4352 break;
4353 case OP_PMAXD_UN:
4354 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUD, ins->sreg1, ins->sreg2);
4355 break;
4357 case OP_PMAXB:
4358 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSB, ins->sreg1, ins->sreg2);
4359 break;
4360 case OP_PMAXW:
4361 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXSW, ins->sreg1, ins->sreg2);
4362 break;
4363 case OP_PMAXD:
4364 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSD, ins->sreg1, ins->sreg2);
4365 break;
4367 case OP_PAVGB_UN:
4368 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGB, ins->sreg1, ins->sreg2);
4369 break;
4370 case OP_PAVGW_UN:
4371 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGW, ins->sreg1, ins->sreg2);
4372 break;
4374 case OP_PMINB_UN:
4375 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINUB, ins->sreg1, ins->sreg2);
4376 break;
4377 case OP_PMINW_UN:
4378 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUW, ins->sreg1, ins->sreg2);
4379 break;
4380 case OP_PMIND_UN:
4381 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUD, ins->sreg1, ins->sreg2);
4382 break;
4384 case OP_PMINB:
4385 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSB, ins->sreg1, ins->sreg2);
4386 break;
4387 case OP_PMINW:
4388 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINSW, ins->sreg1, ins->sreg2);
4389 break;
4390 case OP_PMIND:
4391 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSD, ins->sreg1, ins->sreg2);
4392 break;
4394 case OP_PCMPEQB:
4395 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQB, ins->sreg1, ins->sreg2);
4396 break;
4397 case OP_PCMPEQW:
4398 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQW, ins->sreg1, ins->sreg2);
4399 break;
4400 case OP_PCMPEQD:
4401 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQD, ins->sreg1, ins->sreg2);
4402 break;
4403 case OP_PCMPEQQ:
4404 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPEQQ, ins->sreg1, ins->sreg2);
4405 break;
4407 case OP_PCMPGTB:
4408 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTB, ins->sreg1, ins->sreg2);
4409 break;
4410 case OP_PCMPGTW:
4411 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTW, ins->sreg1, ins->sreg2);
4412 break;
4413 case OP_PCMPGTD:
4414 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTD, ins->sreg1, ins->sreg2);
4415 break;
4416 case OP_PCMPGTQ:
4417 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPGTQ, ins->sreg1, ins->sreg2);
4418 break;
4420 case OP_PSUM_ABS_DIFF:
4421 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSADBW, ins->sreg1, ins->sreg2);
4422 break;
4424 case OP_UNPACK_LOWB:
4425 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLBW, ins->sreg1, ins->sreg2);
4426 break;
4427 case OP_UNPACK_LOWW:
4428 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLWD, ins->sreg1, ins->sreg2);
4429 break;
4430 case OP_UNPACK_LOWD:
4431 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLDQ, ins->sreg1, ins->sreg2);
4432 break;
4433 case OP_UNPACK_LOWQ:
4434 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLQDQ, ins->sreg1, ins->sreg2);
4435 break;
4436 case OP_UNPACK_LOWPS:
4437 x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
4438 break;
4439 case OP_UNPACK_LOWPD:
4440 x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
4441 break;
4443 case OP_UNPACK_HIGHB:
4444 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHBW, ins->sreg1, ins->sreg2);
4445 break;
4446 case OP_UNPACK_HIGHW:
4447 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHWD, ins->sreg1, ins->sreg2);
4448 break;
4449 case OP_UNPACK_HIGHD:
4450 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHDQ, ins->sreg1, ins->sreg2);
4451 break;
4452 case OP_UNPACK_HIGHQ:
4453 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHQDQ, ins->sreg1, ins->sreg2);
4454 break;
4455 case OP_UNPACK_HIGHPS:
4456 x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
4457 break;
4458 case OP_UNPACK_HIGHPD:
4459 x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
4460 break;
4462 case OP_PACKW:
4463 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSWB, ins->sreg1, ins->sreg2);
4464 break;
4465 case OP_PACKD:
4466 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSDW, ins->sreg1, ins->sreg2);
4467 break;
4468 case OP_PACKW_UN:
4469 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKUSWB, ins->sreg1, ins->sreg2);
4470 break;
4471 case OP_PACKD_UN:
4472 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PACKUSDW, ins->sreg1, ins->sreg2);
4473 break;
4475 case OP_PADDB_SAT_UN:
4476 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSB, ins->sreg1, ins->sreg2);
4477 break;
4478 case OP_PSUBB_SAT_UN:
4479 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSB, ins->sreg1, ins->sreg2);
4480 break;
4481 case OP_PADDW_SAT_UN:
4482 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSW, ins->sreg1, ins->sreg2);
4483 break;
4484 case OP_PSUBW_SAT_UN:
4485 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSW, ins->sreg1, ins->sreg2);
4486 break;
4488 case OP_PADDB_SAT:
4489 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSB, ins->sreg1, ins->sreg2);
4490 break;
4491 case OP_PSUBB_SAT:
4492 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSB, ins->sreg1, ins->sreg2);
4493 break;
4494 case OP_PADDW_SAT:
4495 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSW, ins->sreg1, ins->sreg2);
4496 break;
4497 case OP_PSUBW_SAT:
4498 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSW, ins->sreg1, ins->sreg2);
4499 break;
4501 case OP_PMULW:
4502 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULLW, ins->sreg1, ins->sreg2);
4503 break;
4504 case OP_PMULD:
4505 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMULLD, ins->sreg1, ins->sreg2);
4506 break;
4507 case OP_PMULQ:
4508 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULUDQ, ins->sreg1, ins->sreg2);
4509 break;
4510 case OP_PMULW_HIGH_UN:
4511 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHUW, ins->sreg1, ins->sreg2);
4512 break;
4513 case OP_PMULW_HIGH:
4514 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHW, ins->sreg1, ins->sreg2);
4515 break;
4517 case OP_PSHRW:
4518 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4519 break;
4520 case OP_PSHRW_REG:
4521 x86_sse_shift_reg_reg (code, X86_SSE_PSRLW_REG, ins->dreg, ins->sreg2);
4522 break;
4524 case OP_PSARW:
4525 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4526 break;
4527 case OP_PSARW_REG:
4528 x86_sse_shift_reg_reg (code, X86_SSE_PSRAW_REG, ins->dreg, ins->sreg2);
4529 break;
4531 case OP_PSHLW:
4532 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4533 break;
4534 case OP_PSHLW_REG:
4535 x86_sse_shift_reg_reg (code, X86_SSE_PSLLW_REG, ins->dreg, ins->sreg2);
4536 break;
4538 case OP_PSHRD:
4539 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4540 break;
4541 case OP_PSHRD_REG:
4542 x86_sse_shift_reg_reg (code, X86_SSE_PSRLD_REG, ins->dreg, ins->sreg2);
4543 break;
4545 case OP_PSARD:
4546 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4547 break;
4548 case OP_PSARD_REG:
4549 x86_sse_shift_reg_reg (code, X86_SSE_PSRAD_REG, ins->dreg, ins->sreg2);
4550 break;
4552 case OP_PSHLD:
4553 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4554 break;
4555 case OP_PSHLD_REG:
4556 x86_sse_shift_reg_reg (code, X86_SSE_PSLLD_REG, ins->dreg, ins->sreg2);
4557 break;
4559 case OP_PSHRQ:
4560 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4561 break;
4562 case OP_PSHRQ_REG:
4563 x86_sse_shift_reg_reg (code, X86_SSE_PSRLQ_REG, ins->dreg, ins->sreg2);
4564 break;
4566 case OP_PSHLQ:
4567 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4568 break;
4569 case OP_PSHLQ_REG:
4570 x86_sse_shift_reg_reg (code, X86_SSE_PSLLQ_REG, ins->dreg, ins->sreg2);
4571 break;
4573 case OP_ICONV_TO_X:
4574 x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4575 break;
4576 case OP_EXTRACT_I4:
4577 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4578 break;
4579 case OP_EXTRACT_I1:
4580 case OP_EXTRACT_U1:
4581 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4582 if (ins->inst_c0)
4583 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
4584 x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE);
4585 break;
4586 case OP_EXTRACT_I2:
4587 case OP_EXTRACT_U2:
4588 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4589 if (ins->inst_c0)
4590 x86_shift_reg_imm (code, X86_SHR, ins->dreg, 16);
4591 x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE);
4592 break;
4593 case OP_EXTRACT_R8:
4594 if (ins->inst_c0)
4595 x86_sse_alu_pd_membase_reg (code, X86_SSE_MOVHPD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
4596 else
4597 x86_sse_alu_sd_membase_reg (code, X86_SSE_MOVSD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
4598 x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE);
4599 break;
4601 case OP_INSERT_I2:
4602 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->sreg1, ins->sreg2, ins->inst_c0);
4603 break;
4604 case OP_EXTRACTX_U2:
4605 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PEXTRW, ins->dreg, ins->sreg1, ins->inst_c0);
4606 break;
4607 case OP_INSERTX_U1_SLOW:
4608 /*sreg1 is the extracted ireg (scratch)
4609 /sreg2 is the to be inserted ireg (scratch)
4610 /dreg is the xreg to receive the value*/
4612 /*clear the bits from the extracted word*/
4613 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00);
4614 /*shift the value to insert if needed*/
4615 if (ins->inst_c0 & 1)
4616 x86_shift_reg_imm (code, X86_SHL, ins->sreg2, 8);
4617 /*join them together*/
4618 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
4619 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
4620 break;
4621 case OP_INSERTX_I4_SLOW:
4622 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
4623 x86_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
4624 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
4625 break;
4627 case OP_INSERTX_R4_SLOW:
4628 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
4629 /*TODO if inst_c0 == 0 use movss*/
4630 x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 0, ins->inst_c0 * 2);
4631 x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 2, ins->inst_c0 * 2 + 1);
4632 break;
4633 case OP_INSERTX_R8_SLOW:
4634 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4635 if (cfg->verbose_level)
4636 printf ("CONVERTING a OP_INSERTX_R8_SLOW %d offset %x\n", ins->inst_c0, offset);
4637 if (ins->inst_c0)
4638 x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVHPD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4639 else
4640 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4641 break;
4643 case OP_STOREX_MEMBASE_REG:
4644 case OP_STOREX_MEMBASE:
4645 x86_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4646 break;
4647 case OP_LOADX_MEMBASE:
4648 x86_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4649 break;
4650 case OP_LOADX_ALIGNED_MEMBASE:
4651 x86_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4652 break;
4653 case OP_STOREX_ALIGNED_MEMBASE_REG:
4654 x86_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4655 break;
4656 case OP_STOREX_NTA_MEMBASE_REG:
4657 x86_sse_alu_reg_membase (code, X86_SSE_MOVNTPS, ins->dreg, ins->sreg1, ins->inst_offset);
4658 break;
4659 case OP_PREFETCH_MEMBASE:
4660 x86_sse_alu_reg_membase (code, X86_SSE_PREFETCH, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
4662 break;
4663 case OP_XMOVE:
4664 /*FIXME the peephole pass should have killed this*/
4665 if (ins->dreg != ins->sreg1)
4666 x86_movaps_reg_reg (code, ins->dreg, ins->sreg1);
4667 break;
4668 case OP_XZERO:
4669 x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->dreg, ins->dreg);
4670 break;
4671 case OP_ICONV_TO_R8_RAW:
4672 x86_mov_membase_reg (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1, 4);
4673 x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE);
4674 break;
4676 case OP_FCONV_TO_R8_X:
4677 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4678 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4679 break;
4681 case OP_XCONV_R8_TO_I4:
4682 x86_cvttsd2si (code, ins->dreg, ins->sreg1);
4683 switch (ins->backend.source_opcode) {
4684 case OP_FCONV_TO_I1:
4685 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
4686 break;
4687 case OP_FCONV_TO_U1:
4688 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4689 break;
4690 case OP_FCONV_TO_I2:
4691 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
4692 break;
4693 case OP_FCONV_TO_U2:
4694 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
4695 break;
4697 break;
4699 case OP_EXPAND_I1:
4700 /*FIXME this causes a partial register stall, maybe it would not be that bad to use shift + mask + or*/
4701 /*The +4 is to get a mov ?h, ?l over the same reg.*/
4702 x86_mov_reg_reg (code, ins->dreg + 4, ins->dreg, 1);
4703 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
4704 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
4705 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4706 break;
4707 case OP_EXPAND_I2:
4708 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
4709 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
4710 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4711 break;
4712 case OP_EXPAND_I4:
4713 x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4714 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4715 break;
4716 case OP_EXPAND_R4:
4717 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
4718 x86_movd_xreg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4719 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4720 break;
4721 case OP_EXPAND_R8:
4722 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4723 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4724 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0x44);
4725 break;
4727 case OP_CVTDQ2PD:
4728 x86_sse_alu_ss_reg_reg (code, X86_SSE_CVTDQ2PD, ins->dreg, ins->sreg1);
4729 break;
4730 case OP_CVTDQ2PS:
4731 x86_sse_alu_ps_reg_reg (code, X86_SSE_CVTDQ2PS, ins->dreg, ins->sreg1);
4732 break;
4733 case OP_CVTPD2DQ:
4734 x86_sse_alu_sd_reg_reg (code, X86_SSE_CVTPD2DQ, ins->dreg, ins->sreg1);
4735 break;
4736 case OP_CVTPD2PS:
4737 x86_sse_alu_pd_reg_reg (code, X86_SSE_CVTPD2PS, ins->dreg, ins->sreg1);
4738 break;
4739 case OP_CVTPS2DQ:
4740 x86_sse_alu_pd_reg_reg (code, X86_SSE_CVTPS2DQ, ins->dreg, ins->sreg1);
4741 break;
4742 case OP_CVTPS2PD:
4743 x86_sse_alu_ps_reg_reg (code, X86_SSE_CVTPS2PD, ins->dreg, ins->sreg1);
4744 break;
4745 case OP_CVTTPD2DQ:
4746 x86_sse_alu_pd_reg_reg (code, X86_SSE_CVTTPD2DQ, ins->dreg, ins->sreg1);
4747 break;
4748 case OP_CVTTPS2DQ:
4749 x86_sse_alu_ss_reg_reg (code, X86_SSE_CVTTPS2DQ, ins->dreg, ins->sreg1);
4750 break;
4752 #endif
4753 case OP_LIVERANGE_START: {
4754 if (cfg->verbose_level > 1)
4755 printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
4756 MONO_VARINFO (cfg, ins->inst_c0)->live_range_start = code - cfg->native_code;
4757 break;
4759 case OP_LIVERANGE_END: {
4760 if (cfg->verbose_level > 1)
4761 printf ("R%d END=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
4762 MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code;
4763 break;
4765 case OP_NACL_GC_SAFE_POINT: {
4766 #if defined(__native_client_codegen__)
4767 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)mono_nacl_gc);
4768 #endif
4769 break;
4771 case OP_GC_LIVENESS_DEF:
4772 case OP_GC_LIVENESS_USE:
4773 case OP_GC_PARAM_SLOT_LIVENESS_DEF:
4774 ins->backend.pc_offset = code - cfg->native_code;
4775 break;
4776 case OP_GC_SPILL_SLOT_LIVENESS_DEF:
4777 ins->backend.pc_offset = code - cfg->native_code;
4778 bb->spill_slot_defs = g_slist_prepend_mempool (cfg->mempool, bb->spill_slot_defs, ins);
4779 break;
4780 default:
4781 g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
4782 g_assert_not_reached ();
4785 if (G_UNLIKELY ((code - cfg->native_code - offset) > max_len)) {
4786 #ifndef __native_client_codegen__
4787 g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4788 mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4789 g_assert_not_reached ();
4790 #endif /* __native_client_codegen__ */
4793 cpos += max_len;
4796 cfg->code_len = code - cfg->native_code;
4799 #endif /* DISABLE_JIT */
4801 void
4802 mono_arch_register_lowlevel_calls (void)
4806 void
4807 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, MonoCodeManager *dyn_code_mp, gboolean run_cctors)
4809 MonoJumpInfo *patch_info;
4810 gboolean compile_aot = !run_cctors;
4812 for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4813 unsigned char *ip = patch_info->ip.i + code;
4814 const unsigned char *target;
4816 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4818 if (compile_aot) {
4819 switch (patch_info->type) {
4820 case MONO_PATCH_INFO_BB:
4821 case MONO_PATCH_INFO_LABEL:
4822 break;
4823 default:
4824 /* No need to patch these */
4825 continue;
4829 switch (patch_info->type) {
4830 case MONO_PATCH_INFO_IP:
4831 *((gconstpointer *)(ip)) = target;
4832 break;
4833 case MONO_PATCH_INFO_CLASS_INIT: {
4834 guint8 *code = ip;
4835 /* Might already been changed to a nop */
4836 x86_call_code (code, 0);
4837 x86_patch (ip, target);
4838 break;
4840 case MONO_PATCH_INFO_ABS:
4841 case MONO_PATCH_INFO_METHOD:
4842 case MONO_PATCH_INFO_METHOD_JUMP:
4843 case MONO_PATCH_INFO_INTERNAL_METHOD:
4844 case MONO_PATCH_INFO_BB:
4845 case MONO_PATCH_INFO_LABEL:
4846 case MONO_PATCH_INFO_RGCTX_FETCH:
4847 case MONO_PATCH_INFO_GENERIC_CLASS_INIT:
4848 case MONO_PATCH_INFO_MONITOR_ENTER:
4849 case MONO_PATCH_INFO_MONITOR_EXIT:
4850 #if defined(__native_client_codegen__) && defined(__native_client__)
4851 if (nacl_is_code_address (code)) {
4852 /* For tail calls, code is patched after being installed */
4853 /* but not through the normal "patch callsite" method. */
4854 unsigned char buf[kNaClAlignment];
4855 unsigned char *aligned_code = (uintptr_t)code & ~kNaClAlignmentMask;
4856 unsigned char *_target = target;
4857 int ret;
4858 /* All patch targets modified in x86_patch */
4859 /* are IP relative. */
4860 _target = _target + (uintptr_t)buf - (uintptr_t)aligned_code;
4861 memcpy (buf, aligned_code, kNaClAlignment);
4862 /* Patch a temp buffer of bundle size, */
4863 /* then install to actual location. */
4864 x86_patch (buf + ((uintptr_t)code - (uintptr_t)aligned_code), _target);
4865 ret = nacl_dyncode_modify (aligned_code, buf, kNaClAlignment);
4866 g_assert (ret == 0);
4868 else {
4869 x86_patch (ip, target);
4871 #else
4872 x86_patch (ip, target);
4873 #endif
4874 break;
4875 case MONO_PATCH_INFO_NONE:
4876 break;
4877 case MONO_PATCH_INFO_R4:
4878 case MONO_PATCH_INFO_R8: {
4879 guint32 offset = mono_arch_get_patch_offset (ip);
4880 *((gconstpointer *)(ip + offset)) = target;
4881 break;
4883 default: {
4884 guint32 offset = mono_arch_get_patch_offset (ip);
4885 #if !defined(__native_client__)
4886 *((gconstpointer *)(ip + offset)) = target;
4887 #else
4888 *((gconstpointer *)(ip + offset)) = nacl_modify_patch_target (target);
4889 #endif
4890 break;
4896 guint8 *
4897 mono_arch_emit_prolog (MonoCompile *cfg)
4899 MonoMethod *method = cfg->method;
4900 MonoBasicBlock *bb;
4901 MonoMethodSignature *sig;
4902 MonoInst *inst;
4903 int alloc_size, pos, max_offset, i, cfa_offset;
4904 guint8 *code;
4905 gboolean need_stack_frame;
4906 #ifdef __native_client_codegen__
4907 guint alignment_check;
4908 #endif
4910 cfg->code_size = MAX (cfg->header->code_size * 4, 10240);
4912 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
4913 cfg->code_size += 512;
4915 #if defined(__default_codegen__)
4916 code = cfg->native_code = g_malloc (cfg->code_size);
4917 #elif defined(__native_client_codegen__)
4918 /* native_code_alloc is not 32-byte aligned, native_code is. */
4919 cfg->native_code_alloc = g_malloc (cfg->code_size + kNaClAlignment);
4921 /* Align native_code to next nearest kNaclAlignment byte. */
4922 cfg->native_code = (guint)cfg->native_code_alloc + kNaClAlignment;
4923 cfg->native_code = (guint)cfg->native_code & ~kNaClAlignmentMask;
4925 code = cfg->native_code;
4927 alignment_check = (guint)cfg->native_code & kNaClAlignmentMask;
4928 g_assert(alignment_check == 0);
4929 #endif
4931 /* Offset between RSP and the CFA */
4932 cfa_offset = 0;
4934 // CFA = sp + 4
4935 cfa_offset = sizeof (gpointer);
4936 mono_emit_unwind_op_def_cfa (cfg, code, X86_ESP, sizeof (gpointer));
4937 // IP saved at CFA - 4
4938 /* There is no IP reg on x86 */
4939 mono_emit_unwind_op_offset (cfg, code, X86_NREG, -cfa_offset);
4941 need_stack_frame = needs_stack_frame (cfg);
4943 if (need_stack_frame) {
4944 x86_push_reg (code, X86_EBP);
4945 cfa_offset += sizeof (gpointer);
4946 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
4947 mono_emit_unwind_op_offset (cfg, code, X86_EBP, - cfa_offset);
4948 x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4949 mono_emit_unwind_op_def_cfa_reg (cfg, code, X86_EBP);
4950 } else {
4951 cfg->frame_reg = X86_ESP;
4954 alloc_size = cfg->stack_offset;
4955 pos = 0;
4957 if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4958 /* Might need to attach the thread to the JIT or change the domain for the callback */
4959 if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
4960 guint8 *buf, *no_domain_branch;
4962 code = mono_x86_emit_tls_get (code, X86_EAX, appdomain_tls_offset);
4963 x86_alu_reg_imm (code, X86_CMP, X86_EAX, GPOINTER_TO_UINT (cfg->domain));
4964 no_domain_branch = code;
4965 x86_branch8 (code, X86_CC_NE, 0, 0);
4966 code = mono_x86_emit_tls_get ( code, X86_EAX, lmf_tls_offset);
4967 x86_test_reg_reg (code, X86_EAX, X86_EAX);
4968 buf = code;
4969 x86_branch8 (code, X86_CC_NE, 0, 0);
4970 x86_patch (no_domain_branch, code);
4971 x86_push_imm (code, cfg->domain);
4972 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4973 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4974 x86_patch (buf, code);
4975 #ifdef TARGET_WIN32
4976 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4977 /* FIXME: Add a separate key for LMF to avoid this */
4978 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4979 #endif
4981 else {
4982 if (cfg->compile_aot) {
4984 * This goes before the saving of callee saved regs, so save the got reg
4985 * ourselves.
4987 x86_push_reg (code, MONO_ARCH_GOT_REG);
4988 code = mono_arch_emit_load_got_addr (cfg->native_code, code, cfg, NULL);
4989 x86_push_imm (code, 0);
4990 } else {
4991 x86_push_imm (code, cfg->domain);
4993 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4994 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4995 if (cfg->compile_aot)
4996 x86_pop_reg (code, MONO_ARCH_GOT_REG);
5000 if (method->save_lmf) {
5001 pos += sizeof (MonoLMF);
5003 /* save the current IP */
5004 if (cfg->compile_aot) {
5005 /* This pushes the current ip */
5006 x86_call_imm (code, 0);
5007 } else {
5008 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
5009 x86_push_imm_template (code);
5011 cfa_offset += sizeof (gpointer);
5013 /* save all caller saved regs */
5014 x86_push_reg (code, X86_EBP);
5015 cfa_offset += sizeof (gpointer);
5016 x86_push_reg (code, X86_ESI);
5017 cfa_offset += sizeof (gpointer);
5018 mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset);
5019 x86_push_reg (code, X86_EDI);
5020 cfa_offset += sizeof (gpointer);
5021 mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset);
5022 x86_push_reg (code, X86_EBX);
5023 cfa_offset += sizeof (gpointer);
5024 mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset);
5026 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
5028 * Optimized version which uses the mono_lmf TLS variable instead of indirection
5029 * through the mono_lmf_addr TLS variable.
5031 /* %eax = previous_lmf */
5032 x86_prefix (code, X86_GS_PREFIX);
5033 x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
5034 /* skip esp + method_info + lmf */
5035 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
5036 /* push previous_lmf */
5037 x86_push_reg (code, X86_EAX);
5038 /* new lmf = ESP */
5039 x86_prefix (code, X86_GS_PREFIX);
5040 x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
5041 } else {
5042 /* get the address of lmf for the current thread */
5044 * This is performance critical so we try to use some tricks to make
5045 * it fast.
5048 if (lmf_addr_tls_offset != -1) {
5049 /* Load lmf quicky using the GS register */
5050 code = mono_x86_emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
5051 #ifdef TARGET_WIN32
5052 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
5053 /* FIXME: Add a separate key for LMF to avoid this */
5054 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
5055 #endif
5056 } else {
5057 if (cfg->compile_aot)
5058 code = mono_arch_emit_load_got_addr (cfg->native_code, code, cfg, NULL);
5059 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
5062 /* Skip esp + method info */
5063 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
5065 /* push lmf */
5066 x86_push_reg (code, X86_EAX);
5067 /* push *lfm (previous_lmf) */
5068 x86_push_membase (code, X86_EAX, 0);
5069 /* *(lmf) = ESP */
5070 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
5072 } else {
5074 if (cfg->used_int_regs & (1 << X86_EBX)) {
5075 x86_push_reg (code, X86_EBX);
5076 pos += 4;
5077 cfa_offset += sizeof (gpointer);
5078 mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset);
5081 if (cfg->used_int_regs & (1 << X86_EDI)) {
5082 x86_push_reg (code, X86_EDI);
5083 pos += 4;
5084 cfa_offset += sizeof (gpointer);
5085 mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset);
5088 if (cfg->used_int_regs & (1 << X86_ESI)) {
5089 x86_push_reg (code, X86_ESI);
5090 pos += 4;
5091 cfa_offset += sizeof (gpointer);
5092 mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset);
5096 alloc_size -= pos;
5098 /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
5099 if (mono_do_x86_stack_align && need_stack_frame) {
5100 int tot = alloc_size + pos + 4; /* ret ip */
5101 if (need_stack_frame)
5102 tot += 4; /* ebp */
5103 tot &= MONO_ARCH_FRAME_ALIGNMENT - 1;
5104 if (tot)
5105 alloc_size += MONO_ARCH_FRAME_ALIGNMENT - tot;
5108 if (alloc_size) {
5109 /* See mono_emit_stack_alloc */
5110 #if defined(TARGET_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
5111 guint32 remaining_size = alloc_size;
5112 /*FIXME handle unbounded code expansion, we should use a loop in case of more than X interactions*/
5113 guint32 required_code_size = ((remaining_size / 0x1000) + 1) * 8; /*8 is the max size of x86_alu_reg_imm + x86_test_membase_reg*/
5114 guint32 offset = code - cfg->native_code;
5115 if (G_UNLIKELY (required_code_size >= (cfg->code_size - offset))) {
5116 while (required_code_size >= (cfg->code_size - offset))
5117 cfg->code_size *= 2;
5118 cfg->native_code = mono_realloc_native_code(cfg);
5119 code = cfg->native_code + offset;
5120 cfg->stat_code_reallocs++;
5122 while (remaining_size >= 0x1000) {
5123 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
5124 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
5125 remaining_size -= 0x1000;
5127 if (remaining_size)
5128 x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
5129 #else
5130 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
5131 #endif
5133 g_assert (need_stack_frame);
5136 if (cfg->method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED ||
5137 cfg->method->wrapper_type == MONO_WRAPPER_RUNTIME_INVOKE) {
5138 x86_alu_reg_imm (code, X86_AND, X86_ESP, -MONO_ARCH_FRAME_ALIGNMENT);
5141 #if DEBUG_STACK_ALIGNMENT
5142 /* check the stack is aligned */
5143 if (need_stack_frame && method->wrapper_type == MONO_WRAPPER_NONE) {
5144 x86_mov_reg_reg (code, X86_ECX, X86_ESP, 4);
5145 x86_alu_reg_imm (code, X86_AND, X86_ECX, MONO_ARCH_FRAME_ALIGNMENT - 1);
5146 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
5147 x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
5148 x86_breakpoint (code);
5150 #endif
5152 /* compute max_offset in order to use short forward jumps */
5153 max_offset = 0;
5154 if (cfg->opt & MONO_OPT_BRANCH) {
5155 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
5156 MonoInst *ins;
5157 bb->max_offset = max_offset;
5159 if (cfg->prof_options & MONO_PROFILE_COVERAGE)
5160 max_offset += 6;
5161 /* max alignment for loops */
5162 if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
5163 max_offset += LOOP_ALIGNMENT;
5164 #ifdef __native_client_codegen__
5165 /* max alignment for native client */
5166 max_offset += kNaClAlignment;
5167 #endif
5168 MONO_BB_FOR_EACH_INS (bb, ins) {
5169 if (ins->opcode == OP_LABEL)
5170 ins->inst_c1 = max_offset;
5171 #ifdef __native_client_codegen__
5173 int space_in_block = kNaClAlignment -
5174 ((max_offset + cfg->code_len) & kNaClAlignmentMask);
5175 int max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
5176 if (space_in_block < max_len && max_len < kNaClAlignment) {
5177 max_offset += space_in_block;
5180 #endif /* __native_client_codegen__ */
5181 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
5186 /* store runtime generic context */
5187 if (cfg->rgctx_var) {
5188 g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET && cfg->rgctx_var->inst_basereg == X86_EBP);
5190 x86_mov_membase_reg (code, X86_EBP, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 4);
5193 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
5194 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
5196 /* load arguments allocated to register from the stack */
5197 sig = mono_method_signature (method);
5198 pos = 0;
5200 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
5201 inst = cfg->args [pos];
5202 if (inst->opcode == OP_REGVAR) {
5203 g_assert (need_stack_frame);
5204 x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
5205 if (cfg->verbose_level > 2)
5206 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
5208 pos++;
5211 cfg->code_len = code - cfg->native_code;
5213 g_assert (cfg->code_len < cfg->code_size);
5215 return code;
5218 void
5219 mono_arch_emit_epilog (MonoCompile *cfg)
5221 MonoMethod *method = cfg->method;
5222 MonoMethodSignature *sig = mono_method_signature (method);
5223 int quad, pos;
5224 guint32 stack_to_pop;
5225 guint8 *code;
5226 int max_epilog_size = 16;
5227 CallInfo *cinfo;
5228 gboolean need_stack_frame = needs_stack_frame (cfg);
5230 if (cfg->method->save_lmf)
5231 max_epilog_size += 128;
5233 while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
5234 cfg->code_size *= 2;
5235 cfg->native_code = mono_realloc_native_code(cfg);
5236 cfg->stat_code_reallocs++;
5239 code = cfg->native_code + cfg->code_len;
5241 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
5242 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
5244 /* the code restoring the registers must be kept in sync with OP_JMP */
5245 pos = 0;
5247 if (method->save_lmf) {
5248 gint32 prev_lmf_reg;
5249 gint32 lmf_offset = -sizeof (MonoLMF);
5251 /* check if we need to restore protection of the stack after a stack overflow */
5252 if (mono_get_jit_tls_offset () != -1) {
5253 guint8 *patch;
5254 code = mono_x86_emit_tls_get (code, X86_ECX, mono_get_jit_tls_offset ());
5255 /* we load the value in a separate instruction: this mechanism may be
5256 * used later as a safer way to do thread interruption
5258 x86_mov_reg_membase (code, X86_ECX, X86_ECX, G_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 4);
5259 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
5260 patch = code;
5261 x86_branch8 (code, X86_CC_Z, 0, FALSE);
5262 /* note that the call trampoline will preserve eax/edx */
5263 x86_call_reg (code, X86_ECX);
5264 x86_patch (patch, code);
5265 } else {
5266 /* FIXME: maybe save the jit tls in the prolog */
5268 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
5270 * Optimized version which uses the mono_lmf TLS variable instead of indirection
5271 * through the mono_lmf_addr TLS variable.
5273 /* reg = previous_lmf */
5274 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
5276 /* lmf = previous_lmf */
5277 x86_prefix (code, X86_GS_PREFIX);
5278 x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
5279 } else {
5280 /* Find a spare register */
5281 switch (mini_type_get_underlying_type (cfg->generic_sharing_context, sig->ret)->type) {
5282 case MONO_TYPE_I8:
5283 case MONO_TYPE_U8:
5284 prev_lmf_reg = X86_EDI;
5285 cfg->used_int_regs |= (1 << X86_EDI);
5286 break;
5287 default:
5288 prev_lmf_reg = X86_EDX;
5289 break;
5292 /* reg = previous_lmf */
5293 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
5295 /* ecx = lmf */
5296 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
5298 /* *(lmf) = previous_lmf */
5299 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
5302 /* restore caller saved regs */
5303 if (cfg->used_int_regs & (1 << X86_EBX)) {
5304 x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
5307 if (cfg->used_int_regs & (1 << X86_EDI)) {
5308 x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
5310 if (cfg->used_int_regs & (1 << X86_ESI)) {
5311 x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
5314 /* EBP is restored by LEAVE */
5315 } else {
5316 if (cfg->used_int_regs & (1 << X86_EBX)) {
5317 pos -= 4;
5319 if (cfg->used_int_regs & (1 << X86_EDI)) {
5320 pos -= 4;
5322 if (cfg->used_int_regs & (1 << X86_ESI)) {
5323 pos -= 4;
5326 if (pos) {
5327 g_assert (need_stack_frame);
5328 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
5331 if (cfg->used_int_regs & (1 << X86_ESI)) {
5332 x86_pop_reg (code, X86_ESI);
5334 if (cfg->used_int_regs & (1 << X86_EDI)) {
5335 x86_pop_reg (code, X86_EDI);
5337 if (cfg->used_int_regs & (1 << X86_EBX)) {
5338 x86_pop_reg (code, X86_EBX);
5342 /* Load returned vtypes into registers if needed */
5343 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig);
5344 if (cinfo->ret.storage == ArgValuetypeInReg) {
5345 for (quad = 0; quad < 2; quad ++) {
5346 switch (cinfo->ret.pair_storage [quad]) {
5347 case ArgInIReg:
5348 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
5349 break;
5350 case ArgOnFloatFpStack:
5351 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
5352 break;
5353 case ArgOnDoubleFpStack:
5354 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
5355 break;
5356 case ArgNone:
5357 break;
5358 default:
5359 g_assert_not_reached ();
5364 if (need_stack_frame)
5365 x86_leave (code);
5367 if (CALLCONV_IS_STDCALL (sig)) {
5368 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
5370 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
5371 } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
5372 stack_to_pop = 4;
5373 else
5374 stack_to_pop = 0;
5376 if (stack_to_pop) {
5377 g_assert (need_stack_frame);
5378 x86_ret_imm (code, stack_to_pop);
5379 } else {
5380 x86_ret (code);
5383 cfg->code_len = code - cfg->native_code;
5385 g_assert (cfg->code_len < cfg->code_size);
5388 void
5389 mono_arch_emit_exceptions (MonoCompile *cfg)
5391 MonoJumpInfo *patch_info;
5392 int nthrows, i;
5393 guint8 *code;
5394 MonoClass *exc_classes [16];
5395 guint8 *exc_throw_start [16], *exc_throw_end [16];
5396 guint32 code_size;
5397 int exc_count = 0;
5399 /* Compute needed space */
5400 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
5401 if (patch_info->type == MONO_PATCH_INFO_EXC)
5402 exc_count++;
5406 * make sure we have enough space for exceptions
5407 * 16 is the size of two push_imm instructions and a call
5409 if (cfg->compile_aot)
5410 code_size = exc_count * 32;
5411 else
5412 code_size = exc_count * 16;
5414 while (cfg->code_len + code_size > (cfg->code_size - 16)) {
5415 cfg->code_size *= 2;
5416 cfg->native_code = mono_realloc_native_code(cfg);
5417 cfg->stat_code_reallocs++;
5420 code = cfg->native_code + cfg->code_len;
5422 nthrows = 0;
5423 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
5424 switch (patch_info->type) {
5425 case MONO_PATCH_INFO_EXC: {
5426 MonoClass *exc_class;
5427 guint8 *buf, *buf2;
5428 guint32 throw_ip;
5430 x86_patch (patch_info->ip.i + cfg->native_code, code);
5432 exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
5433 g_assert (exc_class);
5434 throw_ip = patch_info->ip.i;
5436 /* Find a throw sequence for the same exception class */
5437 for (i = 0; i < nthrows; ++i)
5438 if (exc_classes [i] == exc_class)
5439 break;
5440 if (i < nthrows) {
5441 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
5442 x86_jump_code (code, exc_throw_start [i]);
5443 patch_info->type = MONO_PATCH_INFO_NONE;
5445 else {
5446 guint32 size;
5448 /* Compute size of code following the push <OFFSET> */
5449 #if defined(__default_codegen__)
5450 size = 5 + 5;
5451 #elif defined(__native_client_codegen__)
5452 code = mono_nacl_align (code);
5453 size = kNaClAlignment;
5454 #endif
5455 /*This is aligned to 16 bytes by the callee. This way we save a few bytes here.*/
5457 if ((code - cfg->native_code) - throw_ip < 126 - size) {
5458 /* Use the shorter form */
5459 buf = buf2 = code;
5460 x86_push_imm (code, 0);
5462 else {
5463 buf = code;
5464 x86_push_imm (code, 0xf0f0f0f0);
5465 buf2 = code;
5468 if (nthrows < 16) {
5469 exc_classes [nthrows] = exc_class;
5470 exc_throw_start [nthrows] = code;
5473 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
5474 patch_info->data.name = "mono_arch_throw_corlib_exception";
5475 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
5476 patch_info->ip.i = code - cfg->native_code;
5477 x86_call_code (code, 0);
5478 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
5479 while (buf < buf2)
5480 x86_nop (buf);
5482 if (nthrows < 16) {
5483 exc_throw_end [nthrows] = code;
5484 nthrows ++;
5487 break;
5489 default:
5490 /* do nothing */
5491 break;
5495 cfg->code_len = code - cfg->native_code;
5497 g_assert (cfg->code_len < cfg->code_size);
5500 void
5501 mono_arch_flush_icache (guint8 *code, gint size)
5503 /* not needed */
5506 void
5507 mono_arch_flush_register_windows (void)
5511 gboolean
5512 mono_arch_is_inst_imm (gint64 imm)
5514 return TRUE;
5518 * Support for fast access to the thread-local lmf structure using the GS
5519 * segment register on NPTL + kernel 2.6.x.
5522 static gboolean tls_offset_inited = FALSE;
5524 void
5525 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
5527 if (!tls_offset_inited) {
5528 if (!getenv ("MONO_NO_TLS")) {
5529 #ifdef TARGET_WIN32
5531 * We need to init this multiple times, since when we are first called, the key might not
5532 * be initialized yet.
5534 appdomain_tls_offset = mono_domain_get_tls_key ();
5535 lmf_tls_offset = mono_get_jit_tls_key ();
5537 /* Only 64 tls entries can be accessed using inline code */
5538 if (appdomain_tls_offset >= 64)
5539 appdomain_tls_offset = -1;
5540 if (lmf_tls_offset >= 64)
5541 lmf_tls_offset = -1;
5542 #else
5543 #if MONO_XEN_OPT
5544 optimize_for_xen = access ("/proc/xen", F_OK) == 0;
5545 #endif
5546 tls_offset_inited = TRUE;
5547 appdomain_tls_offset = mono_domain_get_tls_offset ();
5548 lmf_tls_offset = mono_get_lmf_tls_offset ();
5549 lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
5550 #endif
5555 void
5556 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
5560 #ifdef MONO_ARCH_HAVE_IMT
5562 // Linear handler, the bsearch head compare is shorter
5563 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
5564 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
5565 // x86_patch(ins,target)
5566 //[1 + 5] x86_jump_mem(inst,mem)
5568 #define CMP_SIZE 6
5569 #if defined(__default_codegen__)
5570 #define BR_SMALL_SIZE 2
5571 #define BR_LARGE_SIZE 5
5572 #elif defined(__native_client_codegen__)
5573 /* I suspect the size calculation below is actually incorrect. */
5574 /* TODO: fix the calculation that uses these sizes. */
5575 #define BR_SMALL_SIZE 16
5576 #define BR_LARGE_SIZE 12
5577 #endif /*__native_client_codegen__*/
5578 #define JUMP_IMM_SIZE 6
5579 #define ENABLE_WRONG_METHOD_CHECK 0
5580 #define DEBUG_IMT 0
5582 static int
5583 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
5585 int i, distance = 0;
5586 for (i = start; i < target; ++i)
5587 distance += imt_entries [i]->chunk_size;
5588 return distance;
5592 * LOCKING: called with the domain lock held
5594 gpointer
5595 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
5596 gpointer fail_tramp)
5598 int i;
5599 int size = 0;
5600 guint8 *code, *start;
5602 for (i = 0; i < count; ++i) {
5603 MonoIMTCheckItem *item = imt_entries [i];
5604 if (item->is_equals) {
5605 if (item->check_target_idx) {
5606 if (!item->compare_done)
5607 item->chunk_size += CMP_SIZE;
5608 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
5609 } else {
5610 if (fail_tramp) {
5611 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + JUMP_IMM_SIZE * 2;
5612 } else {
5613 item->chunk_size += JUMP_IMM_SIZE;
5614 #if ENABLE_WRONG_METHOD_CHECK
5615 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
5616 #endif
5619 } else {
5620 item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
5621 imt_entries [item->check_target_idx]->compare_done = TRUE;
5623 size += item->chunk_size;
5625 #if defined(__native_client__) && defined(__native_client_codegen__)
5626 /* In Native Client, we don't re-use thunks, allocate from the */
5627 /* normal code manager paths. */
5628 code = mono_domain_code_reserve (domain, size);
5629 #else
5630 if (fail_tramp)
5631 code = mono_method_alloc_generic_virtual_thunk (domain, size);
5632 else
5633 code = mono_domain_code_reserve (domain, size);
5634 #endif
5635 start = code;
5636 for (i = 0; i < count; ++i) {
5637 MonoIMTCheckItem *item = imt_entries [i];
5638 item->code_target = code;
5639 if (item->is_equals) {
5640 if (item->check_target_idx) {
5641 if (!item->compare_done)
5642 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5643 item->jmp_code = code;
5644 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5645 if (item->has_target_code)
5646 x86_jump_code (code, item->value.target_code);
5647 else
5648 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5649 } else {
5650 if (fail_tramp) {
5651 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5652 item->jmp_code = code;
5653 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5654 if (item->has_target_code)
5655 x86_jump_code (code, item->value.target_code);
5656 else
5657 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5658 x86_patch (item->jmp_code, code);
5659 x86_jump_code (code, fail_tramp);
5660 item->jmp_code = NULL;
5661 } else {
5662 /* enable the commented code to assert on wrong method */
5663 #if ENABLE_WRONG_METHOD_CHECK
5664 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5665 item->jmp_code = code;
5666 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5667 #endif
5668 if (item->has_target_code)
5669 x86_jump_code (code, item->value.target_code);
5670 else
5671 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5672 #if ENABLE_WRONG_METHOD_CHECK
5673 x86_patch (item->jmp_code, code);
5674 x86_breakpoint (code);
5675 item->jmp_code = NULL;
5676 #endif
5679 } else {
5680 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5681 item->jmp_code = code;
5682 if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
5683 x86_branch8 (code, X86_CC_GE, 0, FALSE);
5684 else
5685 x86_branch32 (code, X86_CC_GE, 0, FALSE);
5688 /* patch the branches to get to the target items */
5689 for (i = 0; i < count; ++i) {
5690 MonoIMTCheckItem *item = imt_entries [i];
5691 if (item->jmp_code) {
5692 if (item->check_target_idx) {
5693 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
5698 if (!fail_tramp)
5699 mono_stats.imt_thunks_size += code - start;
5700 g_assert (code - start <= size);
5702 #if DEBUG_IMT
5704 char *buff = g_strdup_printf ("thunk_for_class_%s_%s_entries_%d", vtable->klass->name_space, vtable->klass->name, count);
5705 mono_disassemble_code (NULL, (guint8*)start, code - start, buff);
5706 g_free (buff);
5708 #endif
5709 if (mono_jit_map_is_enabled ()) {
5710 char *buff;
5711 if (vtable)
5712 buff = g_strdup_printf ("imt_%s_%s_entries_%d", vtable->klass->name_space, vtable->klass->name, count);
5713 else
5714 buff = g_strdup_printf ("imt_thunk_entries_%d", count);
5715 mono_emit_jit_tramp (start, code - start, buff);
5716 g_free (buff);
5719 nacl_domain_code_validate (domain, &start, size, &code);
5721 return start;
5724 MonoMethod*
5725 mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
5727 return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
5729 #endif
5731 MonoVTable*
5732 mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code)
5734 return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
5737 GSList*
5738 mono_arch_get_cie_program (void)
5740 GSList *l = NULL;
5742 mono_add_unwind_op_def_cfa (l, (guint8*)NULL, (guint8*)NULL, X86_ESP, 4);
5743 mono_add_unwind_op_offset (l, (guint8*)NULL, (guint8*)NULL, X86_NREG, -4);
5745 return l;
5748 MonoInst*
5749 mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
5751 MonoInst *ins = NULL;
5752 int opcode = 0;
5754 if (cmethod->klass == mono_defaults.math_class) {
5755 if (strcmp (cmethod->name, "Sin") == 0) {
5756 opcode = OP_SIN;
5757 } else if (strcmp (cmethod->name, "Cos") == 0) {
5758 opcode = OP_COS;
5759 } else if (strcmp (cmethod->name, "Tan") == 0) {
5760 opcode = OP_TAN;
5761 } else if (strcmp (cmethod->name, "Atan") == 0) {
5762 opcode = OP_ATAN;
5763 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
5764 opcode = OP_SQRT;
5765 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
5766 opcode = OP_ABS;
5767 } else if (strcmp (cmethod->name, "Round") == 0 && fsig->param_count == 1 && fsig->params [0]->type == MONO_TYPE_R8) {
5768 opcode = OP_ROUND;
5771 if (opcode) {
5772 MONO_INST_NEW (cfg, ins, opcode);
5773 ins->type = STACK_R8;
5774 ins->dreg = mono_alloc_freg (cfg);
5775 ins->sreg1 = args [0]->dreg;
5776 MONO_ADD_INS (cfg->cbb, ins);
5779 if (cfg->opt & MONO_OPT_CMOV) {
5780 int opcode = 0;
5782 if (strcmp (cmethod->name, "Min") == 0) {
5783 if (fsig->params [0]->type == MONO_TYPE_I4)
5784 opcode = OP_IMIN;
5785 } else if (strcmp (cmethod->name, "Max") == 0) {
5786 if (fsig->params [0]->type == MONO_TYPE_I4)
5787 opcode = OP_IMAX;
5790 if (opcode) {
5791 MONO_INST_NEW (cfg, ins, opcode);
5792 ins->type = STACK_I4;
5793 ins->dreg = mono_alloc_ireg (cfg);
5794 ins->sreg1 = args [0]->dreg;
5795 ins->sreg2 = args [1]->dreg;
5796 MONO_ADD_INS (cfg->cbb, ins);
5800 #if 0
5801 /* OP_FREM is not IEEE compatible */
5802 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
5803 MONO_INST_NEW (cfg, ins, OP_FREM);
5804 ins->inst_i0 = args [0];
5805 ins->inst_i1 = args [1];
5807 #endif
5810 return ins;
5813 gboolean
5814 mono_arch_print_tree (MonoInst *tree, int arity)
5816 return 0;
5819 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
5821 MonoInst* ins;
5823 return NULL;
5825 if (appdomain_tls_offset == -1)
5826 return NULL;
5828 MONO_INST_NEW (cfg, ins, OP_TLS_GET);
5829 ins->inst_offset = appdomain_tls_offset;
5830 return ins;
5833 guint32
5834 mono_arch_get_patch_offset (guint8 *code)
5836 if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
5837 return 2;
5838 else if ((code [0] == 0xba))
5839 return 1;
5840 else if ((code [0] == 0x68))
5841 /* push IMM */
5842 return 1;
5843 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
5844 /* push <OFFSET>(<REG>) */
5845 return 2;
5846 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
5847 /* call *<OFFSET>(<REG>) */
5848 return 2;
5849 else if ((code [0] == 0xdd) || (code [0] == 0xd9))
5850 /* fldl <ADDR> */
5851 return 2;
5852 else if ((code [0] == 0x58) && (code [1] == 0x05))
5853 /* pop %eax; add <OFFSET>, %eax */
5854 return 2;
5855 else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
5856 /* pop <REG>; add <OFFSET>, <REG> */
5857 return 3;
5858 else if ((code [0] >= 0xb8) && (code [0] < 0xb8 + 8))
5859 /* mov <REG>, imm */
5860 return 1;
5861 else {
5862 g_assert_not_reached ();
5863 return -1;
5868 * mono_breakpoint_clean_code:
5870 * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
5871 * breakpoints in the original code, they are removed in the copy.
5873 * Returns TRUE if no sw breakpoint was present.
5875 gboolean
5876 mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size)
5878 int i;
5879 gboolean can_write = TRUE;
5881 * If method_start is non-NULL we need to perform bound checks, since we access memory
5882 * at code - offset we could go before the start of the method and end up in a different
5883 * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
5884 * instead.
5886 if (!method_start || code - offset >= method_start) {
5887 memcpy (buf, code - offset, size);
5888 } else {
5889 int diff = code - method_start;
5890 memset (buf, 0, size);
5891 memcpy (buf + offset - diff, method_start, diff + size - offset);
5893 code -= offset;
5894 for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
5895 int idx = mono_breakpoint_info_index [i];
5896 guint8 *ptr;
5897 if (idx < 1)
5898 continue;
5899 ptr = mono_breakpoint_info [idx].address;
5900 if (ptr >= code && ptr < code + size) {
5901 guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
5902 can_write = FALSE;
5903 /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
5904 buf [ptr - code] = saved_byte;
5907 return can_write;
5911 * mono_x86_get_this_arg_offset:
5913 * Return the offset of the stack location where this is passed during a virtual
5914 * call.
5916 guint32
5917 mono_x86_get_this_arg_offset (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig)
5919 return 0;
5922 gpointer
5923 mono_arch_get_this_arg_from_call (mgreg_t *regs, guint8 *code)
5925 guint32 esp = regs [X86_ESP];
5926 CallInfo *cinfo = NULL;
5927 gpointer res;
5928 int offset;
5930 offset = 0;
5933 * The stack looks like:
5934 * <other args>
5935 * <this=delegate>
5936 * <return addr>
5937 * <4 pointers pushed by mono_arch_create_trampoline_code ()>
5939 res = (((MonoObject**)esp) [5 + (offset / 4)]);
5940 if (cinfo)
5941 g_free (cinfo);
5942 return res;
5945 #define MAX_ARCH_DELEGATE_PARAMS 10
5947 static gpointer
5948 get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *code_len)
5950 guint8 *code, *start;
5951 int code_reserve = 64;
5954 * The stack contains:
5955 * <delegate>
5956 * <return addr>
5959 if (has_target) {
5960 start = code = mono_global_codeman_reserve (code_reserve);
5962 /* Replace the this argument with the target */
5963 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
5964 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
5965 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
5966 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5968 g_assert ((code - start) < code_reserve);
5969 } else {
5970 int i = 0;
5971 /* 8 for mov_reg and jump, plus 8 for each parameter */
5972 #ifdef __native_client_codegen__
5973 /* TODO: calculate this size correctly */
5974 code_reserve = 13 + (param_count * 8) + 2 * kNaClAlignment;
5975 #else
5976 code_reserve = 8 + (param_count * 8);
5977 #endif /* __native_client_codegen__ */
5979 * The stack contains:
5980 * <args in reverse order>
5981 * <delegate>
5982 * <return addr>
5984 * and we need:
5985 * <args in reverse order>
5986 * <return addr>
5988 * without unbalancing the stack.
5989 * So move each arg up a spot in the stack (overwriting un-needed 'this' arg)
5990 * and leaving original spot of first arg as placeholder in stack so
5991 * when callee pops stack everything works.
5994 start = code = mono_global_codeman_reserve (code_reserve);
5996 /* store delegate for access to method_ptr */
5997 x86_mov_reg_membase (code, X86_ECX, X86_ESP, 4, 4);
5999 /* move args up */
6000 for (i = 0; i < param_count; ++i) {
6001 x86_mov_reg_membase (code, X86_EAX, X86_ESP, (i+2)*4, 4);
6002 x86_mov_membase_reg (code, X86_ESP, (i+1)*4, X86_EAX, 4);
6005 x86_jump_membase (code, X86_ECX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
6007 g_assert ((code - start) < code_reserve);
6010 nacl_global_codeman_validate(&start, code_reserve, &code);
6011 mono_debug_add_delegate_trampoline (start, code - start);
6013 if (code_len)
6014 *code_len = code - start;
6016 if (mono_jit_map_is_enabled ()) {
6017 char *buff;
6018 if (has_target)
6019 buff = (char*)"delegate_invoke_has_target";
6020 else
6021 buff = g_strdup_printf ("delegate_invoke_no_target_%d", param_count);
6022 mono_emit_jit_tramp (start, code - start, buff);
6023 if (!has_target)
6024 g_free (buff);
6027 return start;
6030 GSList*
6031 mono_arch_get_delegate_invoke_impls (void)
6033 GSList *res = NULL;
6034 guint8 *code;
6035 guint32 code_len;
6036 int i;
6038 code = get_delegate_invoke_impl (TRUE, 0, &code_len);
6039 res = g_slist_prepend (res, mono_tramp_info_create (g_strdup ("delegate_invoke_impl_has_target"), code, code_len, NULL, NULL));
6041 for (i = 0; i < MAX_ARCH_DELEGATE_PARAMS; ++i) {
6042 code = get_delegate_invoke_impl (FALSE, i, &code_len);
6043 res = g_slist_prepend (res, mono_tramp_info_create (g_strdup_printf ("delegate_invoke_impl_target_%d", i), code, code_len, NULL, NULL));
6046 return res;
6049 gpointer
6050 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
6052 guint8 *code, *start;
6054 if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
6055 return NULL;
6057 /* FIXME: Support more cases */
6058 if (MONO_TYPE_ISSTRUCT (sig->ret))
6059 return NULL;
6062 * The stack contains:
6063 * <delegate>
6064 * <return addr>
6067 if (has_target) {
6068 static guint8* cached = NULL;
6069 if (cached)
6070 return cached;
6072 if (mono_aot_only)
6073 start = mono_aot_get_trampoline ("delegate_invoke_impl_has_target");
6074 else
6075 start = get_delegate_invoke_impl (TRUE, 0, NULL);
6077 mono_memory_barrier ();
6079 cached = start;
6080 } else {
6081 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
6082 int i = 0;
6084 for (i = 0; i < sig->param_count; ++i)
6085 if (!mono_is_regsize_var (sig->params [i]))
6086 return NULL;
6088 code = cache [sig->param_count];
6089 if (code)
6090 return code;
6092 if (mono_aot_only) {
6093 char *name = g_strdup_printf ("delegate_invoke_impl_target_%d", sig->param_count);
6094 start = mono_aot_get_trampoline (name);
6095 g_free (name);
6096 } else {
6097 start = get_delegate_invoke_impl (FALSE, sig->param_count, NULL);
6100 mono_memory_barrier ();
6102 cache [sig->param_count] = start;
6105 return start;
6108 mgreg_t
6109 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
6111 switch (reg) {
6112 case X86_EAX: return ctx->eax;
6113 case X86_EBX: return ctx->ebx;
6114 case X86_ECX: return ctx->ecx;
6115 case X86_EDX: return ctx->edx;
6116 case X86_ESP: return ctx->esp;
6117 case X86_EBP: return ctx->ebp;
6118 case X86_ESI: return ctx->esi;
6119 case X86_EDI: return ctx->edi;
6120 default: g_assert_not_reached ();
6124 void
6125 mono_arch_context_set_int_reg (MonoContext *ctx, int reg, mgreg_t val)
6127 switch (reg) {
6128 case X86_EAX:
6129 ctx->eax = val;
6130 break;
6131 case X86_EBX:
6132 ctx->ebx = val;
6133 break;
6134 case X86_ECX:
6135 ctx->ecx = val;
6136 break;
6137 case X86_EDX:
6138 ctx->edx = val;
6139 break;
6140 case X86_ESP:
6141 ctx->esp = val;
6142 break;
6143 case X86_EBP:
6144 ctx->ebp = val;
6145 break;
6146 case X86_ESI:
6147 ctx->esi = val;
6148 break;
6149 case X86_EDI:
6150 ctx->edi = val;
6151 break;
6152 default:
6153 g_assert_not_reached ();
6157 #ifdef MONO_ARCH_SIMD_INTRINSICS
6159 static MonoInst*
6160 get_float_to_x_spill_area (MonoCompile *cfg)
6162 if (!cfg->fconv_to_r8_x_var) {
6163 cfg->fconv_to_r8_x_var = mono_compile_create_var (cfg, &mono_defaults.double_class->byval_arg, OP_LOCAL);
6164 cfg->fconv_to_r8_x_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
6166 return cfg->fconv_to_r8_x_var;
6170 * Convert all fconv opts that MONO_OPT_SSE2 would get wrong.
6172 void
6173 mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
6175 MonoInst *fconv;
6176 int dreg, src_opcode;
6178 if (!(cfg->opt & MONO_OPT_SSE2) || !(cfg->opt & MONO_OPT_SIMD) || COMPILE_LLVM (cfg))
6179 return;
6181 switch (src_opcode = ins->opcode) {
6182 case OP_FCONV_TO_I1:
6183 case OP_FCONV_TO_U1:
6184 case OP_FCONV_TO_I2:
6185 case OP_FCONV_TO_U2:
6186 case OP_FCONV_TO_I4:
6187 case OP_FCONV_TO_I:
6188 break;
6189 default:
6190 return;
6193 /* dreg is the IREG and sreg1 is the FREG */
6194 MONO_INST_NEW (cfg, fconv, OP_FCONV_TO_R8_X);
6195 fconv->klass = NULL; /*FIXME, what can I use here as the Mono.Simd lib might not be loaded yet*/
6196 fconv->sreg1 = ins->sreg1;
6197 fconv->dreg = mono_alloc_ireg (cfg);
6198 fconv->type = STACK_VTYPE;
6199 fconv->backend.spill_var = get_float_to_x_spill_area (cfg);
6201 mono_bblock_insert_before_ins (cfg->cbb, ins, fconv);
6203 dreg = ins->dreg;
6204 NULLIFY_INS (ins);
6205 ins->opcode = OP_XCONV_R8_TO_I4;
6207 ins->klass = mono_defaults.int32_class;
6208 ins->sreg1 = fconv->dreg;
6209 ins->dreg = dreg;
6210 ins->type = STACK_I4;
6211 ins->backend.source_opcode = src_opcode;
6214 #endif /* #ifdef MONO_ARCH_SIMD_INTRINSICS */
6216 void
6217 mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins)
6219 MonoInst *ins;
6220 int vreg;
6222 if (long_ins->opcode == OP_LNEG) {
6223 ins = long_ins;
6224 MONO_EMIT_NEW_UNALU (cfg, OP_INEG, ins->dreg + 1, ins->sreg1 + 1);
6225 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ADC_IMM, ins->dreg + 2, ins->sreg1 + 2, 0);
6226 MONO_EMIT_NEW_UNALU (cfg, OP_INEG, ins->dreg + 2, ins->dreg + 2);
6227 NULLIFY_INS (ins);
6228 return;
6231 #ifdef MONO_ARCH_SIMD_INTRINSICS
6233 if (!(cfg->opt & MONO_OPT_SIMD))
6234 return;
6236 /*TODO move this to simd-intrinsic.c once we support sse 4.1 dword extractors since we need the runtime caps info */
6237 switch (long_ins->opcode) {
6238 case OP_EXTRACT_I8:
6239 vreg = long_ins->sreg1;
6241 if (long_ins->inst_c0) {
6242 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
6243 ins->klass = long_ins->klass;
6244 ins->sreg1 = long_ins->sreg1;
6245 ins->inst_c0 = 2;
6246 ins->type = STACK_VTYPE;
6247 ins->dreg = vreg = alloc_ireg (cfg);
6248 MONO_ADD_INS (cfg->cbb, ins);
6251 MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
6252 ins->klass = mono_defaults.int32_class;
6253 ins->sreg1 = vreg;
6254 ins->type = STACK_I4;
6255 ins->dreg = long_ins->dreg + 1;
6256 MONO_ADD_INS (cfg->cbb, ins);
6258 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
6259 ins->klass = long_ins->klass;
6260 ins->sreg1 = long_ins->sreg1;
6261 ins->inst_c0 = long_ins->inst_c0 ? 3 : 1;
6262 ins->type = STACK_VTYPE;
6263 ins->dreg = vreg = alloc_ireg (cfg);
6264 MONO_ADD_INS (cfg->cbb, ins);
6266 MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
6267 ins->klass = mono_defaults.int32_class;
6268 ins->sreg1 = vreg;
6269 ins->type = STACK_I4;
6270 ins->dreg = long_ins->dreg + 2;
6271 MONO_ADD_INS (cfg->cbb, ins);
6273 long_ins->opcode = OP_NOP;
6274 break;
6275 case OP_INSERTX_I8_SLOW:
6276 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
6277 ins->dreg = long_ins->dreg;
6278 ins->sreg1 = long_ins->dreg;
6279 ins->sreg2 = long_ins->sreg2 + 1;
6280 ins->inst_c0 = long_ins->inst_c0 * 2;
6281 MONO_ADD_INS (cfg->cbb, ins);
6283 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
6284 ins->dreg = long_ins->dreg;
6285 ins->sreg1 = long_ins->dreg;
6286 ins->sreg2 = long_ins->sreg2 + 2;
6287 ins->inst_c0 = long_ins->inst_c0 * 2 + 1;
6288 MONO_ADD_INS (cfg->cbb, ins);
6290 long_ins->opcode = OP_NOP;
6291 break;
6292 case OP_EXPAND_I8:
6293 MONO_INST_NEW (cfg, ins, OP_ICONV_TO_X);
6294 ins->dreg = long_ins->dreg;
6295 ins->sreg1 = long_ins->sreg1 + 1;
6296 ins->klass = long_ins->klass;
6297 ins->type = STACK_VTYPE;
6298 MONO_ADD_INS (cfg->cbb, ins);
6300 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
6301 ins->dreg = long_ins->dreg;
6302 ins->sreg1 = long_ins->dreg;
6303 ins->sreg2 = long_ins->sreg1 + 2;
6304 ins->inst_c0 = 1;
6305 ins->klass = long_ins->klass;
6306 ins->type = STACK_VTYPE;
6307 MONO_ADD_INS (cfg->cbb, ins);
6309 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
6310 ins->dreg = long_ins->dreg;
6311 ins->sreg1 = long_ins->dreg;;
6312 ins->inst_c0 = 0x44; /*Magic number for swizzling (X,Y,X,Y)*/
6313 ins->klass = long_ins->klass;
6314 ins->type = STACK_VTYPE;
6315 MONO_ADD_INS (cfg->cbb, ins);
6317 long_ins->opcode = OP_NOP;
6318 break;
6320 #endif /* MONO_ARCH_SIMD_INTRINSICS */
6323 /*MONO_ARCH_HAVE_HANDLER_BLOCK_GUARD*/
6324 gpointer
6325 mono_arch_install_handler_block_guard (MonoJitInfo *ji, MonoJitExceptionInfo *clause, MonoContext *ctx, gpointer new_value)
6327 int offset;
6328 gpointer *sp, old_value;
6329 char *bp;
6330 const unsigned char *handler;
6332 /*Decode the first instruction to figure out where did we store the spvar*/
6333 /*Our jit MUST generate the following:
6334 mov %esp, -?(%ebp)
6335 Which is encoded as: 0x89 mod_rm.
6336 mod_rm (esp, ebp, imm) which can be: (imm will never be zero)
6337 mod (reg + imm8): 01 reg(esp): 100 rm(ebp): 101 -> 01100101 (0x65)
6338 mod (reg + imm32): 10 reg(esp): 100 rm(ebp): 101 -> 10100101 (0xA5)
6340 handler = clause->handler_start;
6342 if (*handler != 0x89)
6343 return NULL;
6345 ++handler;
6347 if (*handler == 0x65)
6348 offset = *(signed char*)(handler + 1);
6349 else if (*handler == 0xA5)
6350 offset = *(int*)(handler + 1);
6351 else
6352 return NULL;
6354 /*Load the spvar*/
6355 bp = MONO_CONTEXT_GET_BP (ctx);
6356 sp = *(gpointer*)(bp + offset);
6358 old_value = *sp;
6359 if (old_value < ji->code_start || (char*)old_value > ((char*)ji->code_start + ji->code_size))
6360 return old_value;
6362 *sp = new_value;
6364 return old_value;
6368 * mono_aot_emit_load_got_addr:
6370 * Emit code to load the got address.
6371 * On x86, the result is placed into EBX.
6373 guint8*
6374 mono_arch_emit_load_got_addr (guint8 *start, guint8 *code, MonoCompile *cfg, MonoJumpInfo **ji)
6376 x86_call_imm (code, 0);
6378 * The patch needs to point to the pop, since the GOT offset needs
6379 * to be added to that address.
6381 if (cfg)
6382 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
6383 else
6384 *ji = mono_patch_info_list_prepend (*ji, code - start, MONO_PATCH_INFO_GOT_OFFSET, NULL);
6385 x86_pop_reg (code, MONO_ARCH_GOT_REG);
6386 x86_alu_reg_imm (code, X86_ADD, MONO_ARCH_GOT_REG, 0xf0f0f0f0);
6388 return code;
6392 * mono_ppc_emit_load_aotconst:
6394 * Emit code to load the contents of the GOT slot identified by TRAMP_TYPE and
6395 * TARGET from the mscorlib GOT in full-aot code.
6396 * On x86, the GOT address is assumed to be in EBX, and the result is placed into
6397 * EAX.
6399 guint8*
6400 mono_arch_emit_load_aotconst (guint8 *start, guint8 *code, MonoJumpInfo **ji, int tramp_type, gconstpointer target)
6402 /* Load the mscorlib got address */
6403 x86_mov_reg_membase (code, X86_EAX, MONO_ARCH_GOT_REG, sizeof (gpointer), 4);
6404 *ji = mono_patch_info_list_prepend (*ji, code - start, tramp_type, target);
6405 /* arch_emit_got_access () patches this */
6406 x86_mov_reg_membase (code, X86_EAX, X86_EAX, 0xf0f0f0f0, 4);
6408 return code;
6411 /* Can't put this into mini-x86.h */
6412 gpointer
6413 mono_x86_get_signal_exception_trampoline (MonoTrampInfo **info, gboolean aot);
6415 GSList *
6416 mono_arch_get_trampolines (gboolean aot)
6418 MonoTrampInfo *info;
6419 GSList *tramps = NULL;
6421 mono_x86_get_signal_exception_trampoline (&info, aot);
6423 tramps = g_slist_append (tramps, info);
6425 return tramps;
6429 #if __APPLE__
6430 #define DBG_SIGNAL SIGBUS
6431 #else
6432 #define DBG_SIGNAL SIGSEGV
6433 #endif
6435 /* Soft Debug support */
6436 #ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED
6439 * mono_arch_set_breakpoint:
6441 * Set a breakpoint at the native code corresponding to JI at NATIVE_OFFSET.
6442 * The location should contain code emitted by OP_SEQ_POINT.
6444 void
6445 mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
6447 guint8 *code = ip;
6450 * In production, we will use int3 (has to fix the size in the md
6451 * file). But that could confuse gdb, so during development, we emit a SIGSEGV
6452 * instead.
6454 g_assert (code [0] == 0x90);
6455 x86_alu_reg_mem (code, X86_CMP, X86_EAX, (guint32)bp_trigger_page);
6459 * mono_arch_clear_breakpoint:
6461 * Clear the breakpoint at IP.
6463 void
6464 mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
6466 guint8 *code = ip;
6467 int i;
6469 for (i = 0; i < 6; ++i)
6470 x86_nop (code);
6474 * mono_arch_start_single_stepping:
6476 * Start single stepping.
6478 void
6479 mono_arch_start_single_stepping (void)
6481 mono_mprotect (ss_trigger_page, mono_pagesize (), 0);
6485 * mono_arch_stop_single_stepping:
6487 * Stop single stepping.
6489 void
6490 mono_arch_stop_single_stepping (void)
6492 mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ);
6496 * mono_arch_is_single_step_event:
6498 * Return whenever the machine state in SIGCTX corresponds to a single
6499 * step event.
6501 gboolean
6502 mono_arch_is_single_step_event (void *info, void *sigctx)
6504 #ifdef TARGET_WIN32
6505 EXCEPTION_RECORD* einfo = ((EXCEPTION_POINTERS*)info)->ExceptionRecord; /* Sometimes the address is off by 4 */
6507 if ((einfo->ExceptionInformation[1] >= ss_trigger_page && (guint8*)einfo->ExceptionInformation[1] <= (guint8*)ss_trigger_page + 128))
6508 return TRUE;
6509 else
6510 return FALSE;
6511 #else
6512 siginfo_t* sinfo = (siginfo_t*) info;
6513 /* Sometimes the address is off by 4 */
6514 if (sinfo->si_signo == DBG_SIGNAL && (sinfo->si_addr >= ss_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)ss_trigger_page + 128))
6515 return TRUE;
6516 else
6517 return FALSE;
6518 #endif
6521 gboolean
6522 mono_arch_is_breakpoint_event (void *info, void *sigctx)
6524 #ifdef TARGET_WIN32
6525 EXCEPTION_RECORD* einfo = ((EXCEPTION_POINTERS*)info)->ExceptionRecord; /* Sometimes the address is off by 4 */
6526 if ((einfo->ExceptionInformation[1] >= bp_trigger_page && (guint8*)einfo->ExceptionInformation[1] <= (guint8*)bp_trigger_page + 128))
6527 return TRUE;
6528 else
6529 return FALSE;
6530 #else
6531 siginfo_t* sinfo = (siginfo_t*)info;
6532 /* Sometimes the address is off by 4 */
6533 if (sinfo->si_signo == DBG_SIGNAL && (sinfo->si_addr >= bp_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)bp_trigger_page + 128))
6534 return TRUE;
6535 else
6536 return FALSE;
6537 #endif
6541 * mono_arch_get_ip_for_breakpoint:
6543 * See mini-amd64.c for docs.
6545 guint8*
6546 mono_arch_get_ip_for_breakpoint (MonoJitInfo *ji, MonoContext *ctx)
6548 guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
6550 return ip;
6553 #define BREAKPOINT_SIZE 6
6556 * mono_arch_get_ip_for_single_step:
6558 * See mini-amd64.c for docs.
6560 guint8*
6561 mono_arch_get_ip_for_single_step (MonoJitInfo *ji, MonoContext *ctx)
6563 guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
6565 /* Size of x86_alu_reg_imm */
6566 ip += 6;
6568 return ip;
6572 * mono_arch_skip_breakpoint:
6574 * See mini-amd64.c for docs.
6576 void
6577 mono_arch_skip_breakpoint (MonoContext *ctx)
6579 MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + BREAKPOINT_SIZE);
6583 * mono_arch_skip_single_step:
6585 * See mini-amd64.c for docs.
6587 void
6588 mono_arch_skip_single_step (MonoContext *ctx)
6590 MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + 6);
6594 * mono_arch_get_seq_point_info:
6596 * See mini-amd64.c for docs.
6598 gpointer
6599 mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code)
6601 NOT_IMPLEMENTED;
6602 return NULL;
6605 #endif