add spec file for monkeywrench
[mono-project/dkf.git] / mono / mini / mini-x86.c
blobfec00b31fd76d6df21eb0d708c0b0e916ada3fbe
1 /*
2 * mini-x86.c: x86 backend for the Mono code generator
4 * Authors:
5 * Paolo Molaro (lupus@ximian.com)
6 * Dietmar Maurer (dietmar@ximian.com)
7 * Patrik Torstensson
9 * (C) 2003 Ximian, Inc.
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/metadata/mono-debug.h>
23 #include <mono/utils/mono-math.h>
24 #include <mono/utils/mono-counters.h>
26 #include "trace.h"
27 #include "mini-x86.h"
28 #include "cpu-x86.h"
29 #include "ir-emit.h"
31 /* On windows, these hold the key returned by TlsAlloc () */
32 static gint lmf_tls_offset = -1;
33 static gint lmf_addr_tls_offset = -1;
34 static gint appdomain_tls_offset = -1;
35 static gint thread_tls_offset = -1;
37 #ifdef MONO_XEN_OPT
38 static gboolean optimize_for_xen = TRUE;
39 #else
40 #define optimize_for_xen 0
41 #endif
43 #ifdef PLATFORM_WIN32
44 static gboolean is_win32 = TRUE;
45 #else
46 static gboolean is_win32 = FALSE;
47 #endif
49 /* This mutex protects architecture specific caches */
50 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
51 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
52 static CRITICAL_SECTION mini_arch_mutex;
54 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
56 #define ARGS_OFFSET 8
58 #ifdef PLATFORM_WIN32
59 /* Under windows, the default pinvoke calling convention is stdcall */
60 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
61 #else
62 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
63 #endif
65 MonoBreakpointInfo
66 mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
68 const char*
69 mono_arch_regname (int reg)
71 switch (reg) {
72 case X86_EAX: return "%eax";
73 case X86_EBX: return "%ebx";
74 case X86_ECX: return "%ecx";
75 case X86_EDX: return "%edx";
76 case X86_ESP: return "%esp";
77 case X86_EBP: return "%ebp";
78 case X86_EDI: return "%edi";
79 case X86_ESI: return "%esi";
81 return "unknown";
84 const char*
85 mono_arch_fregname (int reg)
87 switch (reg) {
88 case 0:
89 return "%fr0";
90 case 1:
91 return "%fr1";
92 case 2:
93 return "%fr2";
94 case 3:
95 return "%fr3";
96 case 4:
97 return "%fr4";
98 case 5:
99 return "%fr5";
100 case 6:
101 return "%fr6";
102 case 7:
103 return "%fr7";
104 default:
105 return "unknown";
109 const char *
110 mono_arch_xregname (int reg)
112 switch (reg) {
113 case 0:
114 return "%xmm0";
115 case 1:
116 return "%xmm1";
117 case 2:
118 return "%xmm2";
119 case 3:
120 return "%xmm3";
121 case 4:
122 return "%xmm4";
123 case 5:
124 return "%xmm5";
125 case 6:
126 return "%xmm6";
127 case 7:
128 return "%xmm7";
129 default:
130 return "unknown";
135 typedef enum {
136 ArgInIReg,
137 ArgInFloatSSEReg,
138 ArgInDoubleSSEReg,
139 ArgOnStack,
140 ArgValuetypeInReg,
141 ArgOnFloatFpStack,
142 ArgOnDoubleFpStack,
143 ArgNone
144 } ArgStorage;
146 typedef struct {
147 gint16 offset;
148 gint8 reg;
149 ArgStorage storage;
151 /* Only if storage == ArgValuetypeInReg */
152 ArgStorage pair_storage [2];
153 gint8 pair_regs [2];
154 } ArgInfo;
156 typedef struct {
157 int nargs;
158 guint32 stack_usage;
159 guint32 reg_usage;
160 guint32 freg_usage;
161 gboolean need_stack_align;
162 guint32 stack_align_amount;
163 ArgInfo ret;
164 ArgInfo sig_cookie;
165 ArgInfo args [1];
166 } CallInfo;
168 #define PARAM_REGS 0
170 #define FLOAT_PARAM_REGS 0
172 static X86_Reg_No param_regs [] = { 0 };
174 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
175 #define SMALL_STRUCTS_IN_REGS
176 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
177 #endif
179 static void inline
180 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
182 ainfo->offset = *stack_size;
184 if (*gr >= PARAM_REGS) {
185 ainfo->storage = ArgOnStack;
186 (*stack_size) += sizeof (gpointer);
188 else {
189 ainfo->storage = ArgInIReg;
190 ainfo->reg = param_regs [*gr];
191 (*gr) ++;
195 static void inline
196 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
198 ainfo->offset = *stack_size;
200 g_assert (PARAM_REGS == 0);
202 ainfo->storage = ArgOnStack;
203 (*stack_size) += sizeof (gpointer) * 2;
206 static void inline
207 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
209 ainfo->offset = *stack_size;
211 if (*gr >= FLOAT_PARAM_REGS) {
212 ainfo->storage = ArgOnStack;
213 (*stack_size) += is_double ? 8 : 4;
215 else {
216 /* A double register */
217 if (is_double)
218 ainfo->storage = ArgInDoubleSSEReg;
219 else
220 ainfo->storage = ArgInFloatSSEReg;
221 ainfo->reg = *gr;
222 (*gr) += 1;
227 static void
228 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
229 gboolean is_return,
230 guint32 *gr, guint32 *fr, guint32 *stack_size)
232 guint32 size;
233 MonoClass *klass;
235 klass = mono_class_from_mono_type (type);
236 size = mini_type_stack_size_full (gsctx, &klass->byval_arg, NULL, sig->pinvoke);
238 #ifdef SMALL_STRUCTS_IN_REGS
239 if (sig->pinvoke && is_return) {
240 MonoMarshalType *info;
243 * the exact rules are not very well documented, the code below seems to work with the
244 * code generated by gcc 3.3.3 -mno-cygwin.
246 info = mono_marshal_load_type_info (klass);
247 g_assert (info);
249 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
251 /* Special case structs with only a float member */
252 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
253 ainfo->storage = ArgValuetypeInReg;
254 ainfo->pair_storage [0] = ArgOnDoubleFpStack;
255 return;
257 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
258 ainfo->storage = ArgValuetypeInReg;
259 ainfo->pair_storage [0] = ArgOnFloatFpStack;
260 return;
262 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
263 ainfo->storage = ArgValuetypeInReg;
264 ainfo->pair_storage [0] = ArgInIReg;
265 ainfo->pair_regs [0] = return_regs [0];
266 if (info->native_size > 4) {
267 ainfo->pair_storage [1] = ArgInIReg;
268 ainfo->pair_regs [1] = return_regs [1];
270 return;
273 #endif
275 ainfo->offset = *stack_size;
276 ainfo->storage = ArgOnStack;
277 *stack_size += ALIGN_TO (size, sizeof (gpointer));
281 * get_call_info:
283 * Obtain information about a call according to the calling convention.
284 * For x86 ELF, see the "System V Application Binary Interface Intel386
285 * Architecture Processor Supplment, Fourth Edition" document for more
286 * information.
287 * For x86 win32, see ???.
289 static CallInfo*
290 get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
292 guint32 i, gr, fr;
293 MonoType *ret_type;
294 int n = sig->hasthis + sig->param_count;
295 guint32 stack_size = 0;
296 CallInfo *cinfo;
298 if (mp)
299 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
300 else
301 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
303 gr = 0;
304 fr = 0;
306 /* return value */
308 ret_type = mini_type_get_underlying_type (gsctx, sig->ret);
309 switch (ret_type->type) {
310 case MONO_TYPE_BOOLEAN:
311 case MONO_TYPE_I1:
312 case MONO_TYPE_U1:
313 case MONO_TYPE_I2:
314 case MONO_TYPE_U2:
315 case MONO_TYPE_CHAR:
316 case MONO_TYPE_I4:
317 case MONO_TYPE_U4:
318 case MONO_TYPE_I:
319 case MONO_TYPE_U:
320 case MONO_TYPE_PTR:
321 case MONO_TYPE_FNPTR:
322 case MONO_TYPE_CLASS:
323 case MONO_TYPE_OBJECT:
324 case MONO_TYPE_SZARRAY:
325 case MONO_TYPE_ARRAY:
326 case MONO_TYPE_STRING:
327 cinfo->ret.storage = ArgInIReg;
328 cinfo->ret.reg = X86_EAX;
329 break;
330 case MONO_TYPE_U8:
331 case MONO_TYPE_I8:
332 cinfo->ret.storage = ArgInIReg;
333 cinfo->ret.reg = X86_EAX;
334 break;
335 case MONO_TYPE_R4:
336 cinfo->ret.storage = ArgOnFloatFpStack;
337 break;
338 case MONO_TYPE_R8:
339 cinfo->ret.storage = ArgOnDoubleFpStack;
340 break;
341 case MONO_TYPE_GENERICINST:
342 if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
343 cinfo->ret.storage = ArgInIReg;
344 cinfo->ret.reg = X86_EAX;
345 break;
347 /* Fall through */
348 case MONO_TYPE_VALUETYPE: {
349 guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
351 add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
352 if (cinfo->ret.storage == ArgOnStack)
353 /* The caller passes the address where the value is stored */
354 add_general (&gr, &stack_size, &cinfo->ret);
355 break;
357 case MONO_TYPE_TYPEDBYREF:
358 /* Same as a valuetype with size 24 */
359 add_general (&gr, &stack_size, &cinfo->ret);
361 break;
362 case MONO_TYPE_VOID:
363 cinfo->ret.storage = ArgNone;
364 break;
365 default:
366 g_error ("Can't handle as return value 0x%x", sig->ret->type);
370 /* this */
371 if (sig->hasthis)
372 add_general (&gr, &stack_size, cinfo->args + 0);
374 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
375 gr = PARAM_REGS;
376 fr = FLOAT_PARAM_REGS;
378 /* Emit the signature cookie just before the implicit arguments */
379 add_general (&gr, &stack_size, &cinfo->sig_cookie);
382 for (i = 0; i < sig->param_count; ++i) {
383 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
384 MonoType *ptype;
386 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
387 /* We allways pass the sig cookie on the stack for simplicity */
389 * Prevent implicit arguments + the sig cookie from being passed
390 * in registers.
392 gr = PARAM_REGS;
393 fr = FLOAT_PARAM_REGS;
395 /* Emit the signature cookie just before the implicit arguments */
396 add_general (&gr, &stack_size, &cinfo->sig_cookie);
399 if (sig->params [i]->byref) {
400 add_general (&gr, &stack_size, ainfo);
401 continue;
403 ptype = mini_type_get_underlying_type (gsctx, sig->params [i]);
404 switch (ptype->type) {
405 case MONO_TYPE_BOOLEAN:
406 case MONO_TYPE_I1:
407 case MONO_TYPE_U1:
408 add_general (&gr, &stack_size, ainfo);
409 break;
410 case MONO_TYPE_I2:
411 case MONO_TYPE_U2:
412 case MONO_TYPE_CHAR:
413 add_general (&gr, &stack_size, ainfo);
414 break;
415 case MONO_TYPE_I4:
416 case MONO_TYPE_U4:
417 add_general (&gr, &stack_size, ainfo);
418 break;
419 case MONO_TYPE_I:
420 case MONO_TYPE_U:
421 case MONO_TYPE_PTR:
422 case MONO_TYPE_FNPTR:
423 case MONO_TYPE_CLASS:
424 case MONO_TYPE_OBJECT:
425 case MONO_TYPE_STRING:
426 case MONO_TYPE_SZARRAY:
427 case MONO_TYPE_ARRAY:
428 add_general (&gr, &stack_size, ainfo);
429 break;
430 case MONO_TYPE_GENERICINST:
431 if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
432 add_general (&gr, &stack_size, ainfo);
433 break;
435 /* Fall through */
436 case MONO_TYPE_VALUETYPE:
437 add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
438 break;
439 case MONO_TYPE_TYPEDBYREF:
440 stack_size += sizeof (MonoTypedRef);
441 ainfo->storage = ArgOnStack;
442 break;
443 case MONO_TYPE_U8:
444 case MONO_TYPE_I8:
445 add_general_pair (&gr, &stack_size, ainfo);
446 break;
447 case MONO_TYPE_R4:
448 add_float (&fr, &stack_size, ainfo, FALSE);
449 break;
450 case MONO_TYPE_R8:
451 add_float (&fr, &stack_size, ainfo, TRUE);
452 break;
453 default:
454 g_error ("unexpected type 0x%x", ptype->type);
455 g_assert_not_reached ();
459 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
460 gr = PARAM_REGS;
461 fr = FLOAT_PARAM_REGS;
463 /* Emit the signature cookie just before the implicit arguments */
464 add_general (&gr, &stack_size, &cinfo->sig_cookie);
467 if (mono_do_x86_stack_align && (stack_size % MONO_ARCH_FRAME_ALIGNMENT) != 0) {
468 cinfo->need_stack_align = TRUE;
469 cinfo->stack_align_amount = MONO_ARCH_FRAME_ALIGNMENT - (stack_size % MONO_ARCH_FRAME_ALIGNMENT);
470 stack_size += cinfo->stack_align_amount;
473 cinfo->stack_usage = stack_size;
474 cinfo->reg_usage = gr;
475 cinfo->freg_usage = fr;
476 return cinfo;
480 * mono_arch_get_argument_info:
481 * @csig: a method signature
482 * @param_count: the number of parameters to consider
483 * @arg_info: an array to store the result infos
485 * Gathers information on parameters such as size, alignment and
486 * padding. arg_info should be large enought to hold param_count + 1 entries.
488 * Returns the size of the argument area on the stack.
491 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
493 int k, args_size = 0;
494 int size, pad;
495 guint32 align;
496 int offset = 8;
497 CallInfo *cinfo;
499 cinfo = get_call_info (NULL, NULL, csig, FALSE);
501 if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
502 args_size += sizeof (gpointer);
503 offset += 4;
506 arg_info [0].offset = offset;
508 if (csig->hasthis) {
509 args_size += sizeof (gpointer);
510 offset += 4;
513 arg_info [0].size = args_size;
515 for (k = 0; k < param_count; k++) {
516 size = mini_type_stack_size_full (NULL, csig->params [k], &align, csig->pinvoke);
518 /* ignore alignment for now */
519 align = 1;
521 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
522 arg_info [k].pad = pad;
523 args_size += size;
524 arg_info [k + 1].pad = 0;
525 arg_info [k + 1].size = size;
526 offset += pad;
527 arg_info [k + 1].offset = offset;
528 offset += size;
531 if (mono_do_x86_stack_align && !CALLCONV_IS_STDCALL (csig))
532 align = MONO_ARCH_FRAME_ALIGNMENT;
533 else
534 align = 4;
535 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
536 arg_info [k].pad = pad;
538 g_free (cinfo);
540 return args_size;
543 static const guchar cpuid_impl [] = {
544 0x55, /* push %ebp */
545 0x89, 0xe5, /* mov %esp,%ebp */
546 0x53, /* push %ebx */
547 0x8b, 0x45, 0x08, /* mov 0x8(%ebp),%eax */
548 0x0f, 0xa2, /* cpuid */
549 0x50, /* push %eax */
550 0x8b, 0x45, 0x10, /* mov 0x10(%ebp),%eax */
551 0x89, 0x18, /* mov %ebx,(%eax) */
552 0x8b, 0x45, 0x14, /* mov 0x14(%ebp),%eax */
553 0x89, 0x08, /* mov %ecx,(%eax) */
554 0x8b, 0x45, 0x18, /* mov 0x18(%ebp),%eax */
555 0x89, 0x10, /* mov %edx,(%eax) */
556 0x58, /* pop %eax */
557 0x8b, 0x55, 0x0c, /* mov 0xc(%ebp),%edx */
558 0x89, 0x02, /* mov %eax,(%edx) */
559 0x5b, /* pop %ebx */
560 0xc9, /* leave */
561 0xc3, /* ret */
564 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
566 static int
567 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
569 int have_cpuid = 0;
570 #ifndef _MSC_VER
571 __asm__ __volatile__ (
572 "pushfl\n"
573 "popl %%eax\n"
574 "movl %%eax, %%edx\n"
575 "xorl $0x200000, %%eax\n"
576 "pushl %%eax\n"
577 "popfl\n"
578 "pushfl\n"
579 "popl %%eax\n"
580 "xorl %%edx, %%eax\n"
581 "andl $0x200000, %%eax\n"
582 "movl %%eax, %0"
583 : "=r" (have_cpuid)
585 : "%eax", "%edx"
587 #else
588 __asm {
589 pushfd
590 pop eax
591 mov edx, eax
592 xor eax, 0x200000
593 push eax
594 popfd
595 pushfd
596 pop eax
597 xor eax, edx
598 and eax, 0x200000
599 mov have_cpuid, eax
601 #endif
602 if (have_cpuid) {
603 /* Have to use the code manager to get around WinXP DEP */
604 static CpuidFunc func = NULL;
605 void *ptr;
606 if (!func) {
607 ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
608 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
609 func = (CpuidFunc)ptr;
611 func (id, p_eax, p_ebx, p_ecx, p_edx);
614 * We use this approach because of issues with gcc and pic code, see:
615 * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
616 __asm__ __volatile__ ("cpuid"
617 : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
618 : "a" (id));
620 return 1;
622 return 0;
626 * Initialize the cpu to execute managed code.
628 void
629 mono_arch_cpu_init (void)
631 /* spec compliance requires running with double precision */
632 #ifndef _MSC_VER
633 guint16 fpcw;
635 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
636 fpcw &= ~X86_FPCW_PRECC_MASK;
637 fpcw |= X86_FPCW_PREC_DOUBLE;
638 __asm__ __volatile__ ("fldcw %0\n": : "m" (fpcw));
639 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
640 #else
641 _control87 (_PC_53, MCW_PC);
642 #endif
646 * Initialize architecture specific code.
648 void
649 mono_arch_init (void)
651 InitializeCriticalSection (&mini_arch_mutex);
655 * Cleanup architecture specific code.
657 void
658 mono_arch_cleanup (void)
660 DeleteCriticalSection (&mini_arch_mutex);
664 * This function returns the optimizations supported on this cpu.
666 guint32
667 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
669 int eax, ebx, ecx, edx;
670 guint32 opts = 0;
672 *exclude_mask = 0;
673 /* Feature Flags function, flags returned in EDX. */
674 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
675 if (edx & (1 << 15)) {
676 opts |= MONO_OPT_CMOV;
677 if (edx & 1)
678 opts |= MONO_OPT_FCMOV;
679 else
680 *exclude_mask |= MONO_OPT_FCMOV;
681 } else
682 *exclude_mask |= MONO_OPT_CMOV;
683 if (edx & (1 << 26))
684 opts |= MONO_OPT_SSE2;
685 else
686 *exclude_mask |= MONO_OPT_SSE2;
688 #ifdef MONO_ARCH_SIMD_INTRINSICS
689 /*SIMD intrinsics require at least SSE2.*/
690 if (!(opts & MONO_OPT_SSE2))
691 *exclude_mask |= MONO_OPT_SIMD;
692 #endif
694 return opts;
698 * This function test for all SSE functions supported.
700 * Returns a bitmask corresponding to all supported versions.
702 * TODO detect other versions like SSE4a.
704 guint32
705 mono_arch_cpu_enumerate_simd_versions (void)
707 int eax, ebx, ecx, edx;
708 guint32 sse_opts = 0;
710 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
711 if (edx & (1 << 25))
712 sse_opts |= 1 << SIMD_VERSION_SSE1;
713 if (edx & (1 << 26))
714 sse_opts |= 1 << SIMD_VERSION_SSE2;
715 if (ecx & (1 << 0))
716 sse_opts |= 1 << SIMD_VERSION_SSE3;
717 if (ecx & (1 << 9))
718 sse_opts |= 1 << SIMD_VERSION_SSSE3;
719 if (ecx & (1 << 19))
720 sse_opts |= 1 << SIMD_VERSION_SSE41;
721 if (ecx & (1 << 20))
722 sse_opts |= 1 << SIMD_VERSION_SSE42;
724 return sse_opts;
728 * Determine whenever the trap whose info is in SIGINFO is caused by
729 * integer overflow.
731 gboolean
732 mono_arch_is_int_overflow (void *sigctx, void *info)
734 MonoContext ctx;
735 guint8* ip;
737 mono_arch_sigctx_to_monoctx (sigctx, &ctx);
739 ip = (guint8*)ctx.eip;
741 if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
742 gint32 reg;
744 /* idiv REG */
745 switch (x86_modrm_rm (ip [1])) {
746 case X86_EAX:
747 reg = ctx.eax;
748 break;
749 case X86_ECX:
750 reg = ctx.ecx;
751 break;
752 case X86_EDX:
753 reg = ctx.edx;
754 break;
755 case X86_EBX:
756 reg = ctx.ebx;
757 break;
758 case X86_ESI:
759 reg = ctx.esi;
760 break;
761 case X86_EDI:
762 reg = ctx.edi;
763 break;
764 default:
765 g_assert_not_reached ();
766 reg = -1;
769 if (reg == -1)
770 return TRUE;
773 return FALSE;
776 GList *
777 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
779 GList *vars = NULL;
780 int i;
782 for (i = 0; i < cfg->num_varinfo; i++) {
783 MonoInst *ins = cfg->varinfo [i];
784 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
786 /* unused vars */
787 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
788 continue;
790 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) ||
791 (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
792 continue;
794 /* we dont allocate I1 to registers because there is no simply way to sign extend
795 * 8bit quantities in caller saved registers on x86 */
796 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
797 g_assert (MONO_VARINFO (cfg, i)->reg == -1);
798 g_assert (i == vmv->idx);
799 vars = g_list_prepend (vars, vmv);
803 vars = mono_varlist_sort (cfg, vars, 0);
805 return vars;
808 GList *
809 mono_arch_get_global_int_regs (MonoCompile *cfg)
811 GList *regs = NULL;
813 /* we can use 3 registers for global allocation */
814 regs = g_list_prepend (regs, (gpointer)X86_EBX);
815 regs = g_list_prepend (regs, (gpointer)X86_ESI);
816 regs = g_list_prepend (regs, (gpointer)X86_EDI);
818 return regs;
822 * mono_arch_regalloc_cost:
824 * Return the cost, in number of memory references, of the action of
825 * allocating the variable VMV into a register during global register
826 * allocation.
828 guint32
829 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
831 MonoInst *ins = cfg->varinfo [vmv->idx];
833 if (cfg->method->save_lmf)
834 /* The register is already saved */
835 return (ins->opcode == OP_ARG) ? 1 : 0;
836 else
837 /* push+pop+possible load if it is an argument */
838 return (ins->opcode == OP_ARG) ? 3 : 2;
841 static void
842 set_needs_stack_frame (MonoCompile *cfg, gboolean flag)
844 static int inited = FALSE;
845 static int count = 0;
847 if (cfg->arch.need_stack_frame_inited) {
848 g_assert (cfg->arch.need_stack_frame == flag);
849 return;
852 cfg->arch.need_stack_frame = flag;
853 cfg->arch.need_stack_frame_inited = TRUE;
855 if (flag)
856 return;
858 if (!inited) {
859 mono_counters_register ("Could eliminate stack frame", MONO_COUNTER_INT|MONO_COUNTER_JIT, &count);
860 inited = TRUE;
862 ++count;
864 //g_print ("will eliminate %s.%s.%s\n", cfg->method->klass->name_space, cfg->method->klass->name, cfg->method->name);
867 static gboolean
868 needs_stack_frame (MonoCompile *cfg)
870 MonoMethodSignature *sig;
871 MonoMethodHeader *header;
872 gboolean result = FALSE;
874 if (cfg->arch.need_stack_frame_inited)
875 return cfg->arch.need_stack_frame;
877 header = mono_method_get_header (cfg->method);
878 sig = mono_method_signature (cfg->method);
880 if (cfg->disable_omit_fp)
881 result = TRUE;
882 else if (cfg->flags & MONO_CFG_HAS_ALLOCA)
883 result = TRUE;
884 else if (cfg->method->save_lmf)
885 result = TRUE;
886 else if (cfg->stack_offset)
887 result = TRUE;
888 else if (cfg->param_area)
889 result = TRUE;
890 else if (cfg->flags & (MONO_CFG_HAS_CALLS | MONO_CFG_HAS_ALLOCA | MONO_CFG_HAS_TAIL))
891 result = TRUE;
892 else if (header->num_clauses)
893 result = TRUE;
894 else if (sig->param_count + sig->hasthis)
895 result = TRUE;
896 else if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
897 result = TRUE;
898 else if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
899 (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE))
900 result = TRUE;
902 set_needs_stack_frame (cfg, result);
904 return cfg->arch.need_stack_frame;
908 * Set var information according to the calling convention. X86 version.
909 * The locals var stuff should most likely be split in another method.
911 void
912 mono_arch_allocate_vars (MonoCompile *cfg)
914 MonoMethodSignature *sig;
915 MonoMethodHeader *header;
916 MonoInst *inst;
917 guint32 locals_stack_size, locals_stack_align;
918 int i, offset;
919 gint32 *offsets;
920 CallInfo *cinfo;
922 header = mono_method_get_header (cfg->method);
923 sig = mono_method_signature (cfg->method);
925 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
927 cfg->frame_reg = X86_EBP;
928 offset = 0;
930 /* Reserve space to save LMF and caller saved registers */
932 if (cfg->method->save_lmf) {
933 offset += sizeof (MonoLMF);
934 } else {
935 if (cfg->used_int_regs & (1 << X86_EBX)) {
936 offset += 4;
939 if (cfg->used_int_regs & (1 << X86_EDI)) {
940 offset += 4;
943 if (cfg->used_int_regs & (1 << X86_ESI)) {
944 offset += 4;
948 switch (cinfo->ret.storage) {
949 case ArgValuetypeInReg:
950 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
951 offset += 8;
952 cfg->ret->opcode = OP_REGOFFSET;
953 cfg->ret->inst_basereg = X86_EBP;
954 cfg->ret->inst_offset = - offset;
955 break;
956 default:
957 break;
960 /* Allocate locals */
961 offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
962 if (locals_stack_align) {
963 offset += (locals_stack_align - 1);
964 offset &= ~(locals_stack_align - 1);
967 * EBP is at alignment 8 % MONO_ARCH_FRAME_ALIGNMENT, so if we
968 * have locals larger than 8 bytes we need to make sure that
969 * they have the appropriate offset.
971 if (MONO_ARCH_FRAME_ALIGNMENT > 8 && locals_stack_align > 8)
972 offset += MONO_ARCH_FRAME_ALIGNMENT - sizeof (gpointer) * 2;
973 for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
974 if (offsets [i] != -1) {
975 MonoInst *inst = cfg->varinfo [i];
976 inst->opcode = OP_REGOFFSET;
977 inst->inst_basereg = X86_EBP;
978 inst->inst_offset = - (offset + offsets [i]);
979 //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
982 offset += locals_stack_size;
986 * Allocate arguments+return value
989 switch (cinfo->ret.storage) {
990 case ArgOnStack:
991 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
993 * In the new IR, the cfg->vret_addr variable represents the
994 * vtype return value.
996 cfg->vret_addr->opcode = OP_REGOFFSET;
997 cfg->vret_addr->inst_basereg = cfg->frame_reg;
998 cfg->vret_addr->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
999 if (G_UNLIKELY (cfg->verbose_level > 1)) {
1000 printf ("vret_addr =");
1001 mono_print_ins (cfg->vret_addr);
1003 } else {
1004 cfg->ret->opcode = OP_REGOFFSET;
1005 cfg->ret->inst_basereg = X86_EBP;
1006 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
1008 break;
1009 case ArgValuetypeInReg:
1010 break;
1011 case ArgInIReg:
1012 cfg->ret->opcode = OP_REGVAR;
1013 cfg->ret->inst_c0 = cinfo->ret.reg;
1014 cfg->ret->dreg = cinfo->ret.reg;
1015 break;
1016 case ArgNone:
1017 case ArgOnFloatFpStack:
1018 case ArgOnDoubleFpStack:
1019 break;
1020 default:
1021 g_assert_not_reached ();
1024 if (sig->call_convention == MONO_CALL_VARARG) {
1025 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1026 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
1029 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1030 ArgInfo *ainfo = &cinfo->args [i];
1031 inst = cfg->args [i];
1032 if (inst->opcode != OP_REGVAR) {
1033 inst->opcode = OP_REGOFFSET;
1034 inst->inst_basereg = X86_EBP;
1036 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
1039 cfg->stack_offset = offset;
1042 void
1043 mono_arch_create_vars (MonoCompile *cfg)
1045 MonoMethodSignature *sig;
1046 CallInfo *cinfo;
1048 sig = mono_method_signature (cfg->method);
1050 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1052 if (cinfo->ret.storage == ArgValuetypeInReg)
1053 cfg->ret_var_is_local = TRUE;
1054 if ((cinfo->ret.storage != ArgValuetypeInReg) && MONO_TYPE_ISSTRUCT (sig->ret)) {
1055 cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
1060 * It is expensive to adjust esp for each individual fp argument pushed on the stack
1061 * so we try to do it just once when we have multiple fp arguments in a row.
1062 * We don't use this mechanism generally because for int arguments the generated code
1063 * is slightly bigger and new generation cpus optimize away the dependency chains
1064 * created by push instructions on the esp value.
1065 * fp_arg_setup is the first argument in the execution sequence where the esp register
1066 * is modified.
1068 static G_GNUC_UNUSED int
1069 collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
1071 int fp_space = 0;
1072 MonoType *t;
1074 for (; start_arg < sig->param_count; ++start_arg) {
1075 t = mini_type_get_underlying_type (NULL, sig->params [start_arg]);
1076 if (!t->byref && t->type == MONO_TYPE_R8) {
1077 fp_space += sizeof (double);
1078 *fp_arg_setup = start_arg;
1079 } else {
1080 break;
1083 return fp_space;
1086 static void
1087 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
1089 MonoMethodSignature *tmp_sig;
1091 /* FIXME: Add support for signature tokens to AOT */
1092 cfg->disable_aot = TRUE;
1095 * mono_ArgIterator_Setup assumes the signature cookie is
1096 * passed first and all the arguments which were before it are
1097 * passed on the stack after the signature. So compensate by
1098 * passing a different signature.
1100 tmp_sig = mono_metadata_signature_dup (call->signature);
1101 tmp_sig->param_count -= call->signature->sentinelpos;
1102 tmp_sig->sentinelpos = 0;
1103 memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1105 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_X86_PUSH_IMM, -1, -1, tmp_sig);
1108 #ifdef ENABLE_LLVM
1109 LLVMCallInfo*
1110 mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
1112 int i, n;
1113 CallInfo *cinfo;
1114 ArgInfo *ainfo;
1115 int j;
1116 LLVMCallInfo *linfo;
1118 n = sig->param_count + sig->hasthis;
1120 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke);
1122 linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n));
1125 * LLVM always uses the native ABI while we use our own ABI, the
1126 * only difference is the handling of vtypes:
1127 * - we only pass/receive them in registers in some cases, and only
1128 * in 1 or 2 integer registers.
1130 if (cinfo->ret.storage == ArgValuetypeInReg) {
1131 if (sig->pinvoke) {
1132 cfg->exception_message = g_strdup ("pinvoke + vtypes");
1133 cfg->disable_llvm = TRUE;
1134 return linfo;
1137 cfg->exception_message = g_strdup ("vtype ret in call");
1138 cfg->disable_llvm = TRUE;
1140 linfo->ret.storage = LLVMArgVtypeInReg;
1141 for (j = 0; j < 2; ++j)
1142 linfo->ret.pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, cinfo->ret.pair_storage [j]);
1146 if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage == ArgInIReg) {
1147 /* Vtype returned using a hidden argument */
1148 linfo->ret.storage = LLVMArgVtypeRetAddr;
1151 if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage != ArgInIReg) {
1152 // FIXME:
1153 cfg->exception_message = g_strdup ("vtype ret in call");
1154 cfg->disable_llvm = TRUE;
1157 for (i = 0; i < n; ++i) {
1158 ainfo = cinfo->args + i;
1160 linfo->args [i].storage = LLVMArgNone;
1162 switch (ainfo->storage) {
1163 case ArgInIReg:
1164 linfo->args [i].storage = LLVMArgInIReg;
1165 break;
1166 case ArgInDoubleSSEReg:
1167 case ArgInFloatSSEReg:
1168 linfo->args [i].storage = LLVMArgInFPReg;
1169 break;
1170 case ArgOnStack:
1171 if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
1172 linfo->args [i].storage = LLVMArgVtypeByVal;
1173 } else {
1174 linfo->args [i].storage = LLVMArgInIReg;
1175 if (!sig->params [i - sig->hasthis]->byref) {
1176 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4) {
1177 linfo->args [i].storage = LLVMArgInFPReg;
1178 } else if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8) {
1179 linfo->args [i].storage = LLVMArgInFPReg;
1183 break;
1184 case ArgValuetypeInReg:
1185 if (sig->pinvoke) {
1186 cfg->exception_message = g_strdup ("pinvoke + vtypes");
1187 cfg->disable_llvm = TRUE;
1188 return linfo;
1191 cfg->exception_message = g_strdup ("vtype arg");
1192 cfg->disable_llvm = TRUE;
1194 linfo->args [i].storage = LLVMArgVtypeInReg;
1195 for (j = 0; j < 2; ++j)
1196 linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
1198 break;
1199 default:
1200 cfg->exception_message = g_strdup ("ainfo->storage");
1201 cfg->disable_llvm = TRUE;
1202 break;
1206 return linfo;
1208 #endif
1210 void
1211 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
1213 MonoInst *arg, *in;
1214 MonoMethodSignature *sig;
1215 int i, n;
1216 CallInfo *cinfo;
1217 int sentinelpos = 0;
1219 sig = call->signature;
1220 n = sig->param_count + sig->hasthis;
1222 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1224 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1225 sentinelpos = sig->sentinelpos + (sig->hasthis ? 1 : 0);
1227 if (cinfo->need_stack_align) {
1228 MONO_INST_NEW (cfg, arg, OP_SUB_IMM);
1229 arg->dreg = X86_ESP;
1230 arg->sreg1 = X86_ESP;
1231 arg->inst_imm = cinfo->stack_align_amount;
1232 MONO_ADD_INS (cfg->cbb, arg);
1235 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1236 if (cinfo->ret.storage == ArgValuetypeInReg) {
1238 * Tell the JIT to use a more efficient calling convention: call using
1239 * OP_CALL, compute the result location after the call, and save the
1240 * result there.
1242 call->vret_in_reg = TRUE;
1243 if (call->vret_var)
1244 NULLIFY_INS (call->vret_var);
1248 /* Handle the case where there are no implicit arguments */
1249 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1250 emit_sig_cookie (cfg, call, cinfo);
1253 /* Arguments are pushed in the reverse order */
1254 for (i = n - 1; i >= 0; i --) {
1255 ArgInfo *ainfo = cinfo->args + i;
1256 MonoType *t;
1258 if (i >= sig->hasthis)
1259 t = sig->params [i - sig->hasthis];
1260 else
1261 t = &mono_defaults.int_class->byval_arg;
1262 t = mini_type_get_underlying_type (cfg->generic_sharing_context, t);
1264 MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
1266 in = call->args [i];
1267 arg->cil_code = in->cil_code;
1268 arg->sreg1 = in->dreg;
1269 arg->type = in->type;
1271 g_assert (in->dreg != -1);
1273 if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
1274 guint32 align;
1275 guint32 size;
1277 g_assert (in->klass);
1279 if (t->type == MONO_TYPE_TYPEDBYREF) {
1280 size = sizeof (MonoTypedRef);
1281 align = sizeof (gpointer);
1283 else {
1284 size = mini_type_stack_size_full (cfg->generic_sharing_context, &in->klass->byval_arg, &align, sig->pinvoke);
1287 if (size > 0) {
1288 arg->opcode = OP_OUTARG_VT;
1289 arg->sreg1 = in->dreg;
1290 arg->klass = in->klass;
1291 arg->backend.size = size;
1293 MONO_ADD_INS (cfg->cbb, arg);
1296 else {
1297 switch (ainfo->storage) {
1298 case ArgOnStack:
1299 arg->opcode = OP_X86_PUSH;
1300 if (!t->byref) {
1301 if (t->type == MONO_TYPE_R4) {
1302 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 4);
1303 arg->opcode = OP_STORER4_MEMBASE_REG;
1304 arg->inst_destbasereg = X86_ESP;
1305 arg->inst_offset = 0;
1306 } else if (t->type == MONO_TYPE_R8) {
1307 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
1308 arg->opcode = OP_STORER8_MEMBASE_REG;
1309 arg->inst_destbasereg = X86_ESP;
1310 arg->inst_offset = 0;
1311 } else if (t->type == MONO_TYPE_I8 || t->type == MONO_TYPE_U8) {
1312 arg->sreg1 ++;
1313 MONO_EMIT_NEW_UNALU (cfg, OP_X86_PUSH, -1, in->dreg + 2);
1316 break;
1317 default:
1318 g_assert_not_reached ();
1321 MONO_ADD_INS (cfg->cbb, arg);
1324 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
1325 /* Emit the signature cookie just before the implicit arguments */
1326 emit_sig_cookie (cfg, call, cinfo);
1330 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1331 MonoInst *vtarg;
1333 if (cinfo->ret.storage == ArgValuetypeInReg) {
1334 /* Already done */
1336 else if (cinfo->ret.storage == ArgInIReg) {
1337 NOT_IMPLEMENTED;
1338 /* The return address is passed in a register */
1339 MONO_INST_NEW (cfg, vtarg, OP_MOVE);
1340 vtarg->sreg1 = call->inst.dreg;
1341 vtarg->dreg = mono_alloc_ireg (cfg);
1342 MONO_ADD_INS (cfg->cbb, vtarg);
1344 mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
1345 } else {
1346 MonoInst *vtarg;
1347 MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
1348 vtarg->type = STACK_MP;
1349 vtarg->sreg1 = call->vret_var->dreg;
1350 MONO_ADD_INS (cfg->cbb, vtarg);
1353 /* if the function returns a struct, the called method already does a ret $0x4 */
1354 cinfo->stack_usage -= 4;
1357 call->stack_usage = cinfo->stack_usage;
1360 void
1361 mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
1363 MonoInst *arg;
1364 int size = ins->backend.size;
1366 if (size <= 4) {
1367 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_MEMBASE);
1368 arg->sreg1 = src->dreg;
1370 MONO_ADD_INS (cfg->cbb, arg);
1371 } else if (size <= 20) {
1372 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 4));
1373 mini_emit_memcpy (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
1374 } else {
1375 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ);
1376 arg->inst_basereg = src->dreg;
1377 arg->inst_offset = 0;
1378 arg->inst_imm = size;
1380 MONO_ADD_INS (cfg->cbb, arg);
1384 void
1385 mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
1387 MonoType *ret = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
1389 if (!ret->byref) {
1390 if (ret->type == MONO_TYPE_R4) {
1391 if (COMPILE_LLVM (cfg))
1392 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
1393 /* Nothing to do */
1394 return;
1395 } else if (ret->type == MONO_TYPE_R8) {
1396 if (COMPILE_LLVM (cfg))
1397 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
1398 /* Nothing to do */
1399 return;
1400 } else if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
1401 if (COMPILE_LLVM (cfg))
1402 MONO_EMIT_NEW_UNALU (cfg, OP_LMOVE, cfg->ret->dreg, val->dreg);
1403 else {
1404 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EAX, val->dreg + 1);
1405 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EDX, val->dreg + 2);
1407 return;
1411 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
1415 * Allow tracing to work with this interface (with an optional argument)
1417 void*
1418 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1420 guchar *code = p;
1422 g_assert (MONO_ARCH_FRAME_ALIGNMENT >= 8);
1423 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 8);
1425 /* if some args are passed in registers, we need to save them here */
1426 x86_push_reg (code, X86_EBP);
1428 if (cfg->compile_aot) {
1429 x86_push_imm (code, cfg->method);
1430 x86_mov_reg_imm (code, X86_EAX, func);
1431 x86_call_reg (code, X86_EAX);
1432 } else {
1433 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1434 x86_push_imm (code, cfg->method);
1435 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1436 x86_call_code (code, 0);
1438 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT);
1440 return code;
1443 enum {
1444 SAVE_NONE,
1445 SAVE_STRUCT,
1446 SAVE_EAX,
1447 SAVE_EAX_EDX,
1448 SAVE_FP
1451 void*
1452 mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers)
1454 guchar *code = p;
1455 int arg_size = 0, stack_usage = 0, save_mode = SAVE_NONE;
1456 MonoMethod *method = cfg->method;
1458 switch (mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret)->type) {
1459 case MONO_TYPE_VOID:
1460 /* special case string .ctor icall */
1461 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class) {
1462 save_mode = SAVE_EAX;
1463 stack_usage = enable_arguments ? 8 : 4;
1464 } else
1465 save_mode = SAVE_NONE;
1466 break;
1467 case MONO_TYPE_I8:
1468 case MONO_TYPE_U8:
1469 save_mode = SAVE_EAX_EDX;
1470 stack_usage = enable_arguments ? 16 : 8;
1471 break;
1472 case MONO_TYPE_R4:
1473 case MONO_TYPE_R8:
1474 save_mode = SAVE_FP;
1475 stack_usage = enable_arguments ? 16 : 8;
1476 break;
1477 case MONO_TYPE_GENERICINST:
1478 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1479 save_mode = SAVE_EAX;
1480 stack_usage = enable_arguments ? 8 : 4;
1481 break;
1483 /* Fall through */
1484 case MONO_TYPE_VALUETYPE:
1485 // FIXME: Handle SMALL_STRUCT_IN_REG here for proper alignment on darwin-x86
1486 save_mode = SAVE_STRUCT;
1487 stack_usage = enable_arguments ? 4 : 0;
1488 break;
1489 default:
1490 save_mode = SAVE_EAX;
1491 stack_usage = enable_arguments ? 8 : 4;
1492 break;
1495 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - stack_usage - 4);
1497 switch (save_mode) {
1498 case SAVE_EAX_EDX:
1499 x86_push_reg (code, X86_EDX);
1500 x86_push_reg (code, X86_EAX);
1501 if (enable_arguments) {
1502 x86_push_reg (code, X86_EDX);
1503 x86_push_reg (code, X86_EAX);
1504 arg_size = 8;
1506 break;
1507 case SAVE_EAX:
1508 x86_push_reg (code, X86_EAX);
1509 if (enable_arguments) {
1510 x86_push_reg (code, X86_EAX);
1511 arg_size = 4;
1513 break;
1514 case SAVE_FP:
1515 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1516 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1517 if (enable_arguments) {
1518 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1519 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1520 arg_size = 8;
1522 break;
1523 case SAVE_STRUCT:
1524 if (enable_arguments) {
1525 x86_push_membase (code, X86_EBP, 8);
1526 arg_size = 4;
1528 break;
1529 case SAVE_NONE:
1530 default:
1531 break;
1534 if (cfg->compile_aot) {
1535 x86_push_imm (code, method);
1536 x86_mov_reg_imm (code, X86_EAX, func);
1537 x86_call_reg (code, X86_EAX);
1538 } else {
1539 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1540 x86_push_imm (code, method);
1541 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1542 x86_call_code (code, 0);
1545 x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1547 switch (save_mode) {
1548 case SAVE_EAX_EDX:
1549 x86_pop_reg (code, X86_EAX);
1550 x86_pop_reg (code, X86_EDX);
1551 break;
1552 case SAVE_EAX:
1553 x86_pop_reg (code, X86_EAX);
1554 break;
1555 case SAVE_FP:
1556 x86_fld_membase (code, X86_ESP, 0, TRUE);
1557 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1558 break;
1559 case SAVE_NONE:
1560 default:
1561 break;
1564 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - stack_usage);
1566 return code;
1569 #define EMIT_COND_BRANCH(ins,cond,sign) \
1570 if (ins->inst_true_bb->native_offset) { \
1571 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1572 } else { \
1573 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1574 if ((cfg->opt & MONO_OPT_BRANCH) && \
1575 x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1576 x86_branch8 (code, cond, 0, sign); \
1577 else \
1578 x86_branch32 (code, cond, 0, sign); \
1582 * Emit an exception if condition is fail and
1583 * if possible do a directly branch to target
1585 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \
1586 do { \
1587 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1588 if (tins == NULL) { \
1589 mono_add_patch_info (cfg, code - cfg->native_code, \
1590 MONO_PATCH_INFO_EXC, exc_name); \
1591 x86_branch32 (code, cond, 0, signed); \
1592 } else { \
1593 EMIT_COND_BRANCH (tins, cond, signed); \
1595 } while (0);
1597 #define EMIT_FPCOMPARE(code) do { \
1598 x86_fcompp (code); \
1599 x86_fnstsw (code); \
1600 } while (0);
1603 static guint8*
1604 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1606 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1607 x86_call_code (code, 0);
1609 return code;
1612 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1615 * mono_peephole_pass_1:
1617 * Perform peephole opts which should/can be performed before local regalloc
1619 void
1620 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1622 MonoInst *ins, *n;
1624 MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1625 MonoInst *last_ins = ins->prev;
1627 switch (ins->opcode) {
1628 case OP_IADD_IMM:
1629 case OP_ADD_IMM:
1630 if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1632 * X86_LEA is like ADD, but doesn't have the
1633 * sreg1==dreg restriction.
1635 ins->opcode = OP_X86_LEA_MEMBASE;
1636 ins->inst_basereg = ins->sreg1;
1637 } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1638 ins->opcode = OP_X86_INC_REG;
1639 break;
1640 case OP_SUB_IMM:
1641 case OP_ISUB_IMM:
1642 if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1643 ins->opcode = OP_X86_LEA_MEMBASE;
1644 ins->inst_basereg = ins->sreg1;
1645 ins->inst_imm = -ins->inst_imm;
1646 } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1647 ins->opcode = OP_X86_DEC_REG;
1648 break;
1649 case OP_COMPARE_IMM:
1650 case OP_ICOMPARE_IMM:
1651 /* OP_COMPARE_IMM (reg, 0)
1652 * -->
1653 * OP_X86_TEST_NULL (reg)
1655 if (!ins->inst_imm)
1656 ins->opcode = OP_X86_TEST_NULL;
1657 break;
1658 case OP_X86_COMPARE_MEMBASE_IMM:
1660 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1661 * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1662 * -->
1663 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1664 * OP_COMPARE_IMM reg, imm
1666 * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1668 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1669 ins->inst_basereg == last_ins->inst_destbasereg &&
1670 ins->inst_offset == last_ins->inst_offset) {
1671 ins->opcode = OP_COMPARE_IMM;
1672 ins->sreg1 = last_ins->sreg1;
1674 /* check if we can remove cmp reg,0 with test null */
1675 if (!ins->inst_imm)
1676 ins->opcode = OP_X86_TEST_NULL;
1679 break;
1680 case OP_X86_PUSH_MEMBASE:
1681 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1682 last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1683 ins->inst_basereg == last_ins->inst_destbasereg &&
1684 ins->inst_offset == last_ins->inst_offset) {
1685 ins->opcode = OP_X86_PUSH;
1686 ins->sreg1 = last_ins->sreg1;
1688 break;
1691 mono_peephole_ins (bb, ins);
1695 void
1696 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
1698 MonoInst *ins, *n;
1700 MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1701 switch (ins->opcode) {
1702 case OP_ICONST:
1703 /* reg = 0 -> XOR (reg, reg) */
1704 /* XOR sets cflags on x86, so we cant do it always */
1705 if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1706 MonoInst *ins2;
1708 ins->opcode = OP_IXOR;
1709 ins->sreg1 = ins->dreg;
1710 ins->sreg2 = ins->dreg;
1713 * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG
1714 * since it takes 3 bytes instead of 7.
1716 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1717 if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1718 ins2->opcode = OP_STORE_MEMBASE_REG;
1719 ins2->sreg1 = ins->dreg;
1721 else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1722 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1723 ins2->sreg1 = ins->dreg;
1725 else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1726 /* Continue iteration */
1728 else
1729 break;
1732 break;
1733 case OP_IADD_IMM:
1734 case OP_ADD_IMM:
1735 if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1736 ins->opcode = OP_X86_INC_REG;
1737 break;
1738 case OP_ISUB_IMM:
1739 case OP_SUB_IMM:
1740 if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1741 ins->opcode = OP_X86_DEC_REG;
1742 break;
1745 mono_peephole_ins (bb, ins);
1750 * mono_arch_lowering_pass:
1752 * Converts complex opcodes into simpler ones so that each IR instruction
1753 * corresponds to one machine instruction.
1755 void
1756 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1758 MonoInst *ins, *next;
1761 * FIXME: Need to add more instructions, but the current machine
1762 * description can't model some parts of the composite instructions like
1763 * cdq.
1765 MONO_BB_FOR_EACH_INS_SAFE (bb, next, ins) {
1766 switch (ins->opcode) {
1767 case OP_IREM_IMM:
1768 case OP_IDIV_IMM:
1769 case OP_IDIV_UN_IMM:
1770 case OP_IREM_UN_IMM:
1772 * Keep the cases where we could generated optimized code, otherwise convert
1773 * to the non-imm variant.
1775 if ((ins->opcode == OP_IREM_IMM) && mono_is_power_of_two (ins->inst_imm) >= 0)
1776 break;
1777 mono_decompose_op_imm (cfg, bb, ins);
1778 break;
1779 default:
1780 break;
1784 bb->max_vreg = cfg->next_vreg;
1787 static const int
1788 branch_cc_table [] = {
1789 X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1790 X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1791 X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1794 /* Maps CMP_... constants to X86_CC_... constants */
1795 static const int
1796 cc_table [] = {
1797 X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1798 X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1801 static const int
1802 cc_signed_table [] = {
1803 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1804 FALSE, FALSE, FALSE, FALSE
1807 static unsigned char*
1808 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1810 #define XMM_TEMP_REG 0
1811 /*This SSE2 optimization must not be done which OPT_SIMD in place as it clobbers xmm0.*/
1812 /*The xmm pass decomposes OP_FCONV_ ops anyway anyway.*/
1813 if (cfg->opt & MONO_OPT_SSE2 && size < 8 && !(cfg->opt & MONO_OPT_SIMD)) {
1814 /* optimize by assigning a local var for this use so we avoid
1815 * the stack manipulations */
1816 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1817 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1818 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1819 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1820 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1821 if (size == 1)
1822 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1823 else if (size == 2)
1824 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1825 return code;
1827 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1828 x86_fnstcw_membase(code, X86_ESP, 0);
1829 x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1830 x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1831 x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1832 x86_fldcw_membase (code, X86_ESP, 2);
1833 if (size == 8) {
1834 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1835 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1836 x86_pop_reg (code, dreg);
1837 /* FIXME: need the high register
1838 * x86_pop_reg (code, dreg_high);
1840 } else {
1841 x86_push_reg (code, X86_EAX); // SP = SP - 4
1842 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1843 x86_pop_reg (code, dreg);
1845 x86_fldcw_membase (code, X86_ESP, 0);
1846 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1848 if (size == 1)
1849 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1850 else if (size == 2)
1851 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1852 return code;
1855 static unsigned char*
1856 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1858 int sreg = tree->sreg1;
1859 int need_touch = FALSE;
1861 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1862 need_touch = TRUE;
1863 #endif
1865 if (need_touch) {
1866 guint8* br[5];
1869 * Under Windows:
1870 * If requested stack size is larger than one page,
1871 * perform stack-touch operation
1874 * Generate stack probe code.
1875 * Under Windows, it is necessary to allocate one page at a time,
1876 * "touching" stack after each successful sub-allocation. This is
1877 * because of the way stack growth is implemented - there is a
1878 * guard page before the lowest stack page that is currently commited.
1879 * Stack normally grows sequentially so OS traps access to the
1880 * guard page and commits more pages when needed.
1882 x86_test_reg_imm (code, sreg, ~0xFFF);
1883 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1885 br[2] = code; /* loop */
1886 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1887 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1890 * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1891 * that follows only initializes the last part of the area.
1893 /* Same as the init code below with size==0x1000 */
1894 if (tree->flags & MONO_INST_INIT) {
1895 x86_push_reg (code, X86_EAX);
1896 x86_push_reg (code, X86_ECX);
1897 x86_push_reg (code, X86_EDI);
1898 x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1899 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1900 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1901 x86_cld (code);
1902 x86_prefix (code, X86_REP_PREFIX);
1903 x86_stosl (code);
1904 x86_pop_reg (code, X86_EDI);
1905 x86_pop_reg (code, X86_ECX);
1906 x86_pop_reg (code, X86_EAX);
1909 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1910 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1911 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1912 x86_patch (br[3], br[2]);
1913 x86_test_reg_reg (code, sreg, sreg);
1914 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1915 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1917 br[1] = code; x86_jump8 (code, 0);
1919 x86_patch (br[0], code);
1920 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1921 x86_patch (br[1], code);
1922 x86_patch (br[4], code);
1924 else
1925 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1927 if (tree->flags & MONO_INST_INIT) {
1928 int offset = 0;
1929 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1930 x86_push_reg (code, X86_EAX);
1931 offset += 4;
1933 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1934 x86_push_reg (code, X86_ECX);
1935 offset += 4;
1937 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1938 x86_push_reg (code, X86_EDI);
1939 offset += 4;
1942 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1943 if (sreg != X86_ECX)
1944 x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1945 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1947 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1948 x86_cld (code);
1949 x86_prefix (code, X86_REP_PREFIX);
1950 x86_stosl (code);
1952 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1953 x86_pop_reg (code, X86_EDI);
1954 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1955 x86_pop_reg (code, X86_ECX);
1956 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1957 x86_pop_reg (code, X86_EAX);
1959 return code;
1963 static guint8*
1964 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1966 /* Move return value to the target register */
1967 switch (ins->opcode) {
1968 case OP_CALL:
1969 case OP_CALL_REG:
1970 case OP_CALL_MEMBASE:
1971 if (ins->dreg != X86_EAX)
1972 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1973 break;
1974 default:
1975 break;
1978 return code;
1982 * mono_x86_emit_tls_get:
1983 * @code: buffer to store code to
1984 * @dreg: hard register where to place the result
1985 * @tls_offset: offset info
1987 * mono_x86_emit_tls_get emits in @code the native code that puts in
1988 * the dreg register the item in the thread local storage identified
1989 * by tls_offset.
1991 * Returns: a pointer to the end of the stored code
1993 guint8*
1994 mono_x86_emit_tls_get (guint8* code, int dreg, int tls_offset)
1996 #ifdef PLATFORM_WIN32
1998 * See the Under the Hood article in the May 1996 issue of Microsoft Systems
1999 * Journal and/or a disassembly of the TlsGet () function.
2001 g_assert (tls_offset < 64);
2002 x86_prefix (code, X86_FS_PREFIX);
2003 x86_mov_reg_mem (code, dreg, 0x18, 4);
2004 /* Dunno what this does but TlsGetValue () contains it */
2005 x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2006 x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2007 #else
2008 if (optimize_for_xen) {
2009 x86_prefix (code, X86_GS_PREFIX);
2010 x86_mov_reg_mem (code, dreg, 0, 4);
2011 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
2012 } else {
2013 x86_prefix (code, X86_GS_PREFIX);
2014 x86_mov_reg_mem (code, dreg, tls_offset, 4);
2016 #endif
2017 return code;
2021 * emit_load_volatile_arguments:
2023 * Load volatile arguments from the stack to the original input registers.
2024 * Required before a tail call.
2026 static guint8*
2027 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2029 MonoMethod *method = cfg->method;
2030 MonoMethodSignature *sig;
2031 MonoInst *inst;
2032 CallInfo *cinfo;
2033 guint32 i;
2035 /* FIXME: Generate intermediate code instead */
2037 sig = mono_method_signature (method);
2039 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
2041 /* This is the opposite of the code in emit_prolog */
2043 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2044 ArgInfo *ainfo = cinfo->args + i;
2045 MonoType *arg_type;
2046 inst = cfg->args [i];
2048 if (sig->hasthis && (i == 0))
2049 arg_type = &mono_defaults.object_class->byval_arg;
2050 else
2051 arg_type = sig->params [i - sig->hasthis];
2054 * On x86, the arguments are either in their original stack locations, or in
2055 * global regs.
2057 if (inst->opcode == OP_REGVAR) {
2058 g_assert (ainfo->storage == ArgOnStack);
2060 x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
2064 return code;
2067 #define REAL_PRINT_REG(text,reg) \
2068 mono_assert (reg >= 0); \
2069 x86_push_reg (code, X86_EAX); \
2070 x86_push_reg (code, X86_EDX); \
2071 x86_push_reg (code, X86_ECX); \
2072 x86_push_reg (code, reg); \
2073 x86_push_imm (code, reg); \
2074 x86_push_imm (code, text " %d %p\n"); \
2075 x86_mov_reg_imm (code, X86_EAX, printf); \
2076 x86_call_reg (code, X86_EAX); \
2077 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2078 x86_pop_reg (code, X86_ECX); \
2079 x86_pop_reg (code, X86_EDX); \
2080 x86_pop_reg (code, X86_EAX);
2082 /* benchmark and set based on cpu */
2083 #define LOOP_ALIGNMENT 8
2084 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2086 #ifndef DISABLE_JIT
2088 void
2089 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2091 MonoInst *ins;
2092 MonoCallInst *call;
2093 guint offset;
2094 guint8 *code = cfg->native_code + cfg->code_len;
2095 int max_len, cpos;
2097 if (cfg->opt & MONO_OPT_LOOP) {
2098 int pad, align = LOOP_ALIGNMENT;
2099 /* set alignment depending on cpu */
2100 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2101 pad = align - pad;
2102 /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2103 x86_padding (code, pad);
2104 cfg->code_len += pad;
2105 bb->native_offset = cfg->code_len;
2109 if (cfg->verbose_level > 2)
2110 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2112 cpos = bb->max_offset;
2114 if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2115 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2116 g_assert (!cfg->compile_aot);
2117 cpos += 6;
2119 cov->data [bb->dfn].cil_code = bb->cil_code;
2120 /* this is not thread save, but good enough */
2121 x86_inc_mem (code, &cov->data [bb->dfn].count);
2124 offset = code - cfg->native_code;
2126 mono_debug_open_block (cfg, bb, offset);
2128 MONO_BB_FOR_EACH_INS (bb, ins) {
2129 offset = code - cfg->native_code;
2131 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2133 if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) {
2134 cfg->code_size *= 2;
2135 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2136 code = cfg->native_code + offset;
2137 mono_jit_stats.code_reallocs++;
2140 if (cfg->debug_info)
2141 mono_debug_record_line_number (cfg, ins, offset);
2143 switch (ins->opcode) {
2144 case OP_BIGMUL:
2145 x86_mul_reg (code, ins->sreg2, TRUE);
2146 break;
2147 case OP_BIGMUL_UN:
2148 x86_mul_reg (code, ins->sreg2, FALSE);
2149 break;
2150 case OP_X86_SETEQ_MEMBASE:
2151 case OP_X86_SETNE_MEMBASE:
2152 x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2153 ins->inst_basereg, ins->inst_offset, TRUE);
2154 break;
2155 case OP_STOREI1_MEMBASE_IMM:
2156 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2157 break;
2158 case OP_STOREI2_MEMBASE_IMM:
2159 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2160 break;
2161 case OP_STORE_MEMBASE_IMM:
2162 case OP_STOREI4_MEMBASE_IMM:
2163 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2164 break;
2165 case OP_STOREI1_MEMBASE_REG:
2166 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2167 break;
2168 case OP_STOREI2_MEMBASE_REG:
2169 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2170 break;
2171 case OP_STORE_MEMBASE_REG:
2172 case OP_STOREI4_MEMBASE_REG:
2173 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2174 break;
2175 case OP_STORE_MEM_IMM:
2176 x86_mov_mem_imm (code, ins->inst_p0, ins->inst_c0, 4);
2177 break;
2178 case OP_LOADU4_MEM:
2179 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2180 break;
2181 case OP_LOAD_MEM:
2182 case OP_LOADI4_MEM:
2183 /* These are created by the cprop pass so they use inst_imm as the source */
2184 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2185 break;
2186 case OP_LOADU1_MEM:
2187 x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, FALSE);
2188 break;
2189 case OP_LOADU2_MEM:
2190 x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, TRUE);
2191 break;
2192 case OP_LOAD_MEMBASE:
2193 case OP_LOADI4_MEMBASE:
2194 case OP_LOADU4_MEMBASE:
2195 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2196 break;
2197 case OP_LOADU1_MEMBASE:
2198 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2199 break;
2200 case OP_LOADI1_MEMBASE:
2201 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2202 break;
2203 case OP_LOADU2_MEMBASE:
2204 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2205 break;
2206 case OP_LOADI2_MEMBASE:
2207 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2208 break;
2209 case OP_ICONV_TO_I1:
2210 case OP_SEXT_I1:
2211 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2212 break;
2213 case OP_ICONV_TO_I2:
2214 case OP_SEXT_I2:
2215 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2216 break;
2217 case OP_ICONV_TO_U1:
2218 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2219 break;
2220 case OP_ICONV_TO_U2:
2221 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2222 break;
2223 case OP_COMPARE:
2224 case OP_ICOMPARE:
2225 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2226 break;
2227 case OP_COMPARE_IMM:
2228 case OP_ICOMPARE_IMM:
2229 x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2230 break;
2231 case OP_X86_COMPARE_MEMBASE_REG:
2232 x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2233 break;
2234 case OP_X86_COMPARE_MEMBASE_IMM:
2235 x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2236 break;
2237 case OP_X86_COMPARE_MEMBASE8_IMM:
2238 x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2239 break;
2240 case OP_X86_COMPARE_REG_MEMBASE:
2241 x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2242 break;
2243 case OP_X86_COMPARE_MEM_IMM:
2244 x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2245 break;
2246 case OP_X86_TEST_NULL:
2247 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2248 break;
2249 case OP_X86_ADD_MEMBASE_IMM:
2250 x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2251 break;
2252 case OP_X86_ADD_REG_MEMBASE:
2253 x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2254 break;
2255 case OP_X86_SUB_MEMBASE_IMM:
2256 x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2257 break;
2258 case OP_X86_SUB_REG_MEMBASE:
2259 x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2260 break;
2261 case OP_X86_AND_MEMBASE_IMM:
2262 x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2263 break;
2264 case OP_X86_OR_MEMBASE_IMM:
2265 x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2266 break;
2267 case OP_X86_XOR_MEMBASE_IMM:
2268 x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2269 break;
2270 case OP_X86_ADD_MEMBASE_REG:
2271 x86_alu_membase_reg (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2272 break;
2273 case OP_X86_SUB_MEMBASE_REG:
2274 x86_alu_membase_reg (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2275 break;
2276 case OP_X86_AND_MEMBASE_REG:
2277 x86_alu_membase_reg (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2278 break;
2279 case OP_X86_OR_MEMBASE_REG:
2280 x86_alu_membase_reg (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2281 break;
2282 case OP_X86_XOR_MEMBASE_REG:
2283 x86_alu_membase_reg (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2284 break;
2285 case OP_X86_INC_MEMBASE:
2286 x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2287 break;
2288 case OP_X86_INC_REG:
2289 x86_inc_reg (code, ins->dreg);
2290 break;
2291 case OP_X86_DEC_MEMBASE:
2292 x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2293 break;
2294 case OP_X86_DEC_REG:
2295 x86_dec_reg (code, ins->dreg);
2296 break;
2297 case OP_X86_MUL_REG_MEMBASE:
2298 x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2299 break;
2300 case OP_X86_AND_REG_MEMBASE:
2301 x86_alu_reg_membase (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset);
2302 break;
2303 case OP_X86_OR_REG_MEMBASE:
2304 x86_alu_reg_membase (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset);
2305 break;
2306 case OP_X86_XOR_REG_MEMBASE:
2307 x86_alu_reg_membase (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset);
2308 break;
2309 case OP_BREAK:
2310 x86_breakpoint (code);
2311 break;
2312 case OP_RELAXED_NOP:
2313 x86_prefix (code, X86_REP_PREFIX);
2314 x86_nop (code);
2315 break;
2316 case OP_HARD_NOP:
2317 x86_nop (code);
2318 break;
2319 case OP_NOP:
2320 case OP_DUMMY_USE:
2321 case OP_DUMMY_STORE:
2322 case OP_NOT_REACHED:
2323 case OP_NOT_NULL:
2324 break;
2325 case OP_ADDCC:
2326 case OP_IADDCC:
2327 case OP_IADD:
2328 x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2329 break;
2330 case OP_ADC:
2331 case OP_IADC:
2332 x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2333 break;
2334 case OP_ADDCC_IMM:
2335 case OP_ADD_IMM:
2336 case OP_IADD_IMM:
2337 x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2338 break;
2339 case OP_ADC_IMM:
2340 case OP_IADC_IMM:
2341 x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2342 break;
2343 case OP_SUBCC:
2344 case OP_ISUBCC:
2345 case OP_ISUB:
2346 x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2347 break;
2348 case OP_SBB:
2349 case OP_ISBB:
2350 x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2351 break;
2352 case OP_SUBCC_IMM:
2353 case OP_SUB_IMM:
2354 case OP_ISUB_IMM:
2355 x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2356 break;
2357 case OP_SBB_IMM:
2358 case OP_ISBB_IMM:
2359 x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2360 break;
2361 case OP_IAND:
2362 x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2363 break;
2364 case OP_AND_IMM:
2365 case OP_IAND_IMM:
2366 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2367 break;
2368 case OP_IDIV:
2369 case OP_IREM:
2371 * The code is the same for div/rem, the allocator will allocate dreg
2372 * to RAX/RDX as appropriate.
2374 if (ins->sreg2 == X86_EDX) {
2375 /* cdq clobbers this */
2376 x86_push_reg (code, ins->sreg2);
2377 x86_cdq (code);
2378 x86_div_membase (code, X86_ESP, 0, TRUE);
2379 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2380 } else {
2381 x86_cdq (code);
2382 x86_div_reg (code, ins->sreg2, TRUE);
2384 break;
2385 case OP_IDIV_UN:
2386 case OP_IREM_UN:
2387 if (ins->sreg2 == X86_EDX) {
2388 x86_push_reg (code, ins->sreg2);
2389 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2390 x86_div_membase (code, X86_ESP, 0, FALSE);
2391 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2392 } else {
2393 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2394 x86_div_reg (code, ins->sreg2, FALSE);
2396 break;
2397 case OP_DIV_IMM:
2398 x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2399 x86_cdq (code);
2400 x86_div_reg (code, ins->sreg2, TRUE);
2401 break;
2402 case OP_IREM_IMM: {
2403 int power = mono_is_power_of_two (ins->inst_imm);
2405 g_assert (ins->sreg1 == X86_EAX);
2406 g_assert (ins->dreg == X86_EAX);
2407 g_assert (power >= 0);
2409 if (power == 1) {
2410 /* Based on http://compilers.iecc.com/comparch/article/93-04-079 */
2411 x86_cdq (code);
2412 x86_alu_reg_imm (code, X86_AND, X86_EAX, 1);
2414 * If the divident is >= 0, this does not nothing. If it is positive, it
2415 * it transforms %eax=0 into %eax=0, and %eax=1 into %eax=-1.
2417 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EDX);
2418 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2419 } else {
2420 /* Based on gcc code */
2422 /* Add compensation for negative dividents */
2423 x86_cdq (code);
2424 x86_shift_reg_imm (code, X86_SHR, X86_EDX, 32 - power);
2425 x86_alu_reg_reg (code, X86_ADD, X86_EAX, X86_EDX);
2426 /* Compute remainder */
2427 x86_alu_reg_imm (code, X86_AND, X86_EAX, (1 << power) - 1);
2428 /* Remove compensation */
2429 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2431 break;
2433 case OP_IOR:
2434 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2435 break;
2436 case OP_OR_IMM:
2437 case OP_IOR_IMM:
2438 x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2439 break;
2440 case OP_IXOR:
2441 x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2442 break;
2443 case OP_XOR_IMM:
2444 case OP_IXOR_IMM:
2445 x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2446 break;
2447 case OP_ISHL:
2448 g_assert (ins->sreg2 == X86_ECX);
2449 x86_shift_reg (code, X86_SHL, ins->dreg);
2450 break;
2451 case OP_ISHR:
2452 g_assert (ins->sreg2 == X86_ECX);
2453 x86_shift_reg (code, X86_SAR, ins->dreg);
2454 break;
2455 case OP_SHR_IMM:
2456 case OP_ISHR_IMM:
2457 x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2458 break;
2459 case OP_SHR_UN_IMM:
2460 case OP_ISHR_UN_IMM:
2461 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2462 break;
2463 case OP_ISHR_UN:
2464 g_assert (ins->sreg2 == X86_ECX);
2465 x86_shift_reg (code, X86_SHR, ins->dreg);
2466 break;
2467 case OP_SHL_IMM:
2468 case OP_ISHL_IMM:
2469 x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2470 break;
2471 case OP_LSHL: {
2472 guint8 *jump_to_end;
2474 /* handle shifts below 32 bits */
2475 x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2476 x86_shift_reg (code, X86_SHL, ins->sreg1);
2478 x86_test_reg_imm (code, X86_ECX, 32);
2479 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2481 /* handle shift over 32 bit */
2482 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2483 x86_clear_reg (code, ins->sreg1);
2485 x86_patch (jump_to_end, code);
2487 break;
2488 case OP_LSHR: {
2489 guint8 *jump_to_end;
2491 /* handle shifts below 32 bits */
2492 x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2493 x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2495 x86_test_reg_imm (code, X86_ECX, 32);
2496 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2498 /* handle shifts over 31 bits */
2499 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2500 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2502 x86_patch (jump_to_end, code);
2504 break;
2505 case OP_LSHR_UN: {
2506 guint8 *jump_to_end;
2508 /* handle shifts below 32 bits */
2509 x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2510 x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2512 x86_test_reg_imm (code, X86_ECX, 32);
2513 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2515 /* handle shifts over 31 bits */
2516 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2517 x86_clear_reg (code, ins->backend.reg3);
2519 x86_patch (jump_to_end, code);
2521 break;
2522 case OP_LSHL_IMM:
2523 if (ins->inst_imm >= 32) {
2524 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2525 x86_clear_reg (code, ins->sreg1);
2526 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2527 } else {
2528 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2529 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2531 break;
2532 case OP_LSHR_IMM:
2533 if (ins->inst_imm >= 32) {
2534 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2535 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2536 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2537 } else {
2538 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2539 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2541 break;
2542 case OP_LSHR_UN_IMM:
2543 if (ins->inst_imm >= 32) {
2544 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2545 x86_clear_reg (code, ins->backend.reg3);
2546 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2547 } else {
2548 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2549 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2551 break;
2552 case OP_INOT:
2553 x86_not_reg (code, ins->sreg1);
2554 break;
2555 case OP_INEG:
2556 x86_neg_reg (code, ins->sreg1);
2557 break;
2559 case OP_IMUL:
2560 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2561 break;
2562 case OP_MUL_IMM:
2563 case OP_IMUL_IMM:
2564 switch (ins->inst_imm) {
2565 case 2:
2566 /* MOV r1, r2 */
2567 /* ADD r1, r1 */
2568 if (ins->dreg != ins->sreg1)
2569 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2570 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2571 break;
2572 case 3:
2573 /* LEA r1, [r2 + r2*2] */
2574 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2575 break;
2576 case 5:
2577 /* LEA r1, [r2 + r2*4] */
2578 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2579 break;
2580 case 6:
2581 /* LEA r1, [r2 + r2*2] */
2582 /* ADD r1, r1 */
2583 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2584 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2585 break;
2586 case 9:
2587 /* LEA r1, [r2 + r2*8] */
2588 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2589 break;
2590 case 10:
2591 /* LEA r1, [r2 + r2*4] */
2592 /* ADD r1, r1 */
2593 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2594 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2595 break;
2596 case 12:
2597 /* LEA r1, [r2 + r2*2] */
2598 /* SHL r1, 2 */
2599 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2600 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2601 break;
2602 case 25:
2603 /* LEA r1, [r2 + r2*4] */
2604 /* LEA r1, [r1 + r1*4] */
2605 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2606 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2607 break;
2608 case 100:
2609 /* LEA r1, [r2 + r2*4] */
2610 /* SHL r1, 2 */
2611 /* LEA r1, [r1 + r1*4] */
2612 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2613 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2614 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2615 break;
2616 default:
2617 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2618 break;
2620 break;
2621 case OP_IMUL_OVF:
2622 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2623 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2624 break;
2625 case OP_IMUL_OVF_UN: {
2626 /* the mul operation and the exception check should most likely be split */
2627 int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2628 /*g_assert (ins->sreg2 == X86_EAX);
2629 g_assert (ins->dreg == X86_EAX);*/
2630 if (ins->sreg2 == X86_EAX) {
2631 non_eax_reg = ins->sreg1;
2632 } else if (ins->sreg1 == X86_EAX) {
2633 non_eax_reg = ins->sreg2;
2634 } else {
2635 /* no need to save since we're going to store to it anyway */
2636 if (ins->dreg != X86_EAX) {
2637 saved_eax = TRUE;
2638 x86_push_reg (code, X86_EAX);
2640 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2641 non_eax_reg = ins->sreg2;
2643 if (ins->dreg == X86_EDX) {
2644 if (!saved_eax) {
2645 saved_eax = TRUE;
2646 x86_push_reg (code, X86_EAX);
2648 } else if (ins->dreg != X86_EAX) {
2649 saved_edx = TRUE;
2650 x86_push_reg (code, X86_EDX);
2652 x86_mul_reg (code, non_eax_reg, FALSE);
2653 /* save before the check since pop and mov don't change the flags */
2654 if (ins->dreg != X86_EAX)
2655 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2656 if (saved_edx)
2657 x86_pop_reg (code, X86_EDX);
2658 if (saved_eax)
2659 x86_pop_reg (code, X86_EAX);
2660 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2661 break;
2663 case OP_ICONST:
2664 x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2665 break;
2666 case OP_AOTCONST:
2667 g_assert_not_reached ();
2668 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2669 x86_mov_reg_imm (code, ins->dreg, 0);
2670 break;
2671 case OP_JUMP_TABLE:
2672 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2673 x86_mov_reg_imm (code, ins->dreg, 0);
2674 break;
2675 case OP_LOAD_GOTADDR:
2676 x86_call_imm (code, 0);
2678 * The patch needs to point to the pop, since the GOT offset needs
2679 * to be added to that address.
2681 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2682 x86_pop_reg (code, ins->dreg);
2683 x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2684 break;
2685 case OP_GOT_ENTRY:
2686 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2687 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2688 break;
2689 case OP_X86_PUSH_GOT_ENTRY:
2690 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2691 x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2692 break;
2693 case OP_MOVE:
2694 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2695 break;
2696 case OP_JMP: {
2698 * Note: this 'frame destruction' logic is useful for tail calls, too.
2699 * Keep in sync with the code in emit_epilog.
2701 int pos = 0;
2703 /* FIXME: no tracing support... */
2704 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2705 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2706 /* reset offset to make max_len work */
2707 offset = code - cfg->native_code;
2709 g_assert (!cfg->method->save_lmf);
2711 code = emit_load_volatile_arguments (cfg, code);
2713 if (cfg->used_int_regs & (1 << X86_EBX))
2714 pos -= 4;
2715 if (cfg->used_int_regs & (1 << X86_EDI))
2716 pos -= 4;
2717 if (cfg->used_int_regs & (1 << X86_ESI))
2718 pos -= 4;
2719 if (pos)
2720 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2722 if (cfg->used_int_regs & (1 << X86_ESI))
2723 x86_pop_reg (code, X86_ESI);
2724 if (cfg->used_int_regs & (1 << X86_EDI))
2725 x86_pop_reg (code, X86_EDI);
2726 if (cfg->used_int_regs & (1 << X86_EBX))
2727 x86_pop_reg (code, X86_EBX);
2729 /* restore ESP/EBP */
2730 x86_leave (code);
2731 offset = code - cfg->native_code;
2732 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2733 x86_jump32 (code, 0);
2735 cfg->disable_aot = TRUE;
2736 break;
2738 case OP_CHECK_THIS:
2739 /* ensure ins->sreg1 is not NULL
2740 * note that cmp DWORD PTR [eax], eax is one byte shorter than
2741 * cmp DWORD PTR [eax], 0
2743 x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2744 break;
2745 case OP_ARGLIST: {
2746 int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2747 x86_push_reg (code, hreg);
2748 x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2749 x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2750 x86_pop_reg (code, hreg);
2751 break;
2753 case OP_FCALL:
2754 case OP_LCALL:
2755 case OP_VCALL:
2756 case OP_VCALL2:
2757 case OP_VOIDCALL:
2758 case OP_CALL:
2759 call = (MonoCallInst*)ins;
2760 if (ins->flags & MONO_INST_HAS_METHOD)
2761 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2762 else
2763 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2764 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2765 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2766 * bytes to pop, we want to use pops. GCC does this (note it won't happen
2767 * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2768 * smart enough to do that optimization yet
2770 * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2771 * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2772 * (most likely from locality benefits). People with other processors should
2773 * check on theirs to see what happens.
2775 if (call->stack_usage == 4) {
2776 /* we want to use registers that won't get used soon, so use
2777 * ecx, as eax will get allocated first. edx is used by long calls,
2778 * so we can't use that.
2781 x86_pop_reg (code, X86_ECX);
2782 } else {
2783 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2786 code = emit_move_return_value (cfg, ins, code);
2787 break;
2788 case OP_FCALL_REG:
2789 case OP_LCALL_REG:
2790 case OP_VCALL_REG:
2791 case OP_VCALL2_REG:
2792 case OP_VOIDCALL_REG:
2793 case OP_CALL_REG:
2794 call = (MonoCallInst*)ins;
2795 x86_call_reg (code, ins->sreg1);
2796 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2797 if (call->stack_usage == 4)
2798 x86_pop_reg (code, X86_ECX);
2799 else
2800 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2802 code = emit_move_return_value (cfg, ins, code);
2803 break;
2804 case OP_FCALL_MEMBASE:
2805 case OP_LCALL_MEMBASE:
2806 case OP_VCALL_MEMBASE:
2807 case OP_VCALL2_MEMBASE:
2808 case OP_VOIDCALL_MEMBASE:
2809 case OP_CALL_MEMBASE:
2810 call = (MonoCallInst*)ins;
2813 * Emit a few nops to simplify get_vcall_slot ().
2815 x86_nop (code);
2816 x86_nop (code);
2817 x86_nop (code);
2819 x86_call_membase (code, ins->sreg1, ins->inst_offset);
2820 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2821 if (call->stack_usage == 4)
2822 x86_pop_reg (code, X86_ECX);
2823 else
2824 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2826 code = emit_move_return_value (cfg, ins, code);
2827 break;
2828 case OP_X86_PUSH:
2829 x86_push_reg (code, ins->sreg1);
2830 break;
2831 case OP_X86_PUSH_IMM:
2832 x86_push_imm (code, ins->inst_imm);
2833 break;
2834 case OP_X86_PUSH_MEMBASE:
2835 x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2836 break;
2837 case OP_X86_PUSH_OBJ:
2838 x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2839 x86_push_reg (code, X86_EDI);
2840 x86_push_reg (code, X86_ESI);
2841 x86_push_reg (code, X86_ECX);
2842 if (ins->inst_offset)
2843 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2844 else
2845 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2846 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2847 x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2848 x86_cld (code);
2849 x86_prefix (code, X86_REP_PREFIX);
2850 x86_movsd (code);
2851 x86_pop_reg (code, X86_ECX);
2852 x86_pop_reg (code, X86_ESI);
2853 x86_pop_reg (code, X86_EDI);
2854 break;
2855 case OP_X86_LEA:
2856 x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2857 break;
2858 case OP_X86_LEA_MEMBASE:
2859 x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2860 break;
2861 case OP_X86_XCHG:
2862 x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2863 break;
2864 case OP_LOCALLOC:
2865 /* keep alignment */
2866 x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2867 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2868 code = mono_emit_stack_alloc (code, ins);
2869 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2870 break;
2871 case OP_LOCALLOC_IMM: {
2872 guint32 size = ins->inst_imm;
2873 size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
2875 if (ins->flags & MONO_INST_INIT) {
2876 /* FIXME: Optimize this */
2877 x86_mov_reg_imm (code, ins->dreg, size);
2878 ins->sreg1 = ins->dreg;
2880 code = mono_emit_stack_alloc (code, ins);
2881 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2882 } else {
2883 x86_alu_reg_imm (code, X86_SUB, X86_ESP, size);
2884 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2886 break;
2888 case OP_THROW: {
2889 x86_push_reg (code, ins->sreg1);
2890 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
2891 (gpointer)"mono_arch_throw_exception");
2892 break;
2894 case OP_RETHROW: {
2895 x86_push_reg (code, ins->sreg1);
2896 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
2897 (gpointer)"mono_arch_rethrow_exception");
2898 break;
2900 case OP_CALL_HANDLER:
2901 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
2902 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2903 x86_call_imm (code, 0);
2904 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
2905 break;
2906 case OP_START_HANDLER: {
2907 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2908 x86_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, X86_ESP, 4);
2909 break;
2911 case OP_ENDFINALLY: {
2912 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2913 x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
2914 x86_ret (code);
2915 break;
2917 case OP_ENDFILTER: {
2918 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2919 x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
2920 /* The local allocator will put the result into EAX */
2921 x86_ret (code);
2922 break;
2925 case OP_LABEL:
2926 ins->inst_c0 = code - cfg->native_code;
2927 break;
2928 case OP_BR:
2929 if (ins->inst_target_bb->native_offset) {
2930 x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset);
2931 } else {
2932 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2933 if ((cfg->opt & MONO_OPT_BRANCH) &&
2934 x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2935 x86_jump8 (code, 0);
2936 else
2937 x86_jump32 (code, 0);
2939 break;
2940 case OP_BR_REG:
2941 x86_jump_reg (code, ins->sreg1);
2942 break;
2943 case OP_CEQ:
2944 case OP_CLT:
2945 case OP_CLT_UN:
2946 case OP_CGT:
2947 case OP_CGT_UN:
2948 case OP_CNE:
2949 case OP_ICEQ:
2950 case OP_ICLT:
2951 case OP_ICLT_UN:
2952 case OP_ICGT:
2953 case OP_ICGT_UN:
2954 x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2955 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2956 break;
2957 case OP_COND_EXC_EQ:
2958 case OP_COND_EXC_NE_UN:
2959 case OP_COND_EXC_LT:
2960 case OP_COND_EXC_LT_UN:
2961 case OP_COND_EXC_GT:
2962 case OP_COND_EXC_GT_UN:
2963 case OP_COND_EXC_GE:
2964 case OP_COND_EXC_GE_UN:
2965 case OP_COND_EXC_LE:
2966 case OP_COND_EXC_LE_UN:
2967 case OP_COND_EXC_IEQ:
2968 case OP_COND_EXC_INE_UN:
2969 case OP_COND_EXC_ILT:
2970 case OP_COND_EXC_ILT_UN:
2971 case OP_COND_EXC_IGT:
2972 case OP_COND_EXC_IGT_UN:
2973 case OP_COND_EXC_IGE:
2974 case OP_COND_EXC_IGE_UN:
2975 case OP_COND_EXC_ILE:
2976 case OP_COND_EXC_ILE_UN:
2977 EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
2978 break;
2979 case OP_COND_EXC_OV:
2980 case OP_COND_EXC_NO:
2981 case OP_COND_EXC_C:
2982 case OP_COND_EXC_NC:
2983 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2984 break;
2985 case OP_COND_EXC_IOV:
2986 case OP_COND_EXC_INO:
2987 case OP_COND_EXC_IC:
2988 case OP_COND_EXC_INC:
2989 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_IEQ], (ins->opcode < OP_COND_EXC_INE_UN), ins->inst_p1);
2990 break;
2991 case OP_IBEQ:
2992 case OP_IBNE_UN:
2993 case OP_IBLT:
2994 case OP_IBLT_UN:
2995 case OP_IBGT:
2996 case OP_IBGT_UN:
2997 case OP_IBGE:
2998 case OP_IBGE_UN:
2999 case OP_IBLE:
3000 case OP_IBLE_UN:
3001 EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
3002 break;
3004 case OP_CMOV_IEQ:
3005 case OP_CMOV_IGE:
3006 case OP_CMOV_IGT:
3007 case OP_CMOV_ILE:
3008 case OP_CMOV_ILT:
3009 case OP_CMOV_INE_UN:
3010 case OP_CMOV_IGE_UN:
3011 case OP_CMOV_IGT_UN:
3012 case OP_CMOV_ILE_UN:
3013 case OP_CMOV_ILT_UN:
3014 g_assert (ins->dreg == ins->sreg1);
3015 x86_cmov_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, ins->sreg2);
3016 break;
3018 /* floating point opcodes */
3019 case OP_R8CONST: {
3020 double d = *(double *)ins->inst_p0;
3022 if ((d == 0.0) && (mono_signbit (d) == 0)) {
3023 x86_fldz (code);
3024 } else if (d == 1.0) {
3025 x86_fld1 (code);
3026 } else {
3027 if (cfg->compile_aot) {
3028 guint32 *val = (guint32*)&d;
3029 x86_push_imm (code, val [1]);
3030 x86_push_imm (code, val [0]);
3031 x86_fld_membase (code, X86_ESP, 0, TRUE);
3032 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3034 else {
3035 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3036 x86_fld (code, NULL, TRUE);
3039 break;
3041 case OP_R4CONST: {
3042 float f = *(float *)ins->inst_p0;
3044 if ((f == 0.0) && (mono_signbit (f) == 0)) {
3045 x86_fldz (code);
3046 } else if (f == 1.0) {
3047 x86_fld1 (code);
3048 } else {
3049 if (cfg->compile_aot) {
3050 guint32 val = *(guint32*)&f;
3051 x86_push_imm (code, val);
3052 x86_fld_membase (code, X86_ESP, 0, FALSE);
3053 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3055 else {
3056 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3057 x86_fld (code, NULL, FALSE);
3060 break;
3062 case OP_STORER8_MEMBASE_REG:
3063 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3064 break;
3065 case OP_LOADR8_SPILL_MEMBASE:
3066 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3067 x86_fxch (code, 1);
3068 break;
3069 case OP_LOADR8_MEMBASE:
3070 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3071 break;
3072 case OP_STORER4_MEMBASE_REG:
3073 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3074 break;
3075 case OP_LOADR4_MEMBASE:
3076 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3077 break;
3078 case OP_ICONV_TO_R4:
3079 x86_push_reg (code, ins->sreg1);
3080 x86_fild_membase (code, X86_ESP, 0, FALSE);
3081 /* Change precision */
3082 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3083 x86_fld_membase (code, X86_ESP, 0, FALSE);
3084 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3085 break;
3086 case OP_ICONV_TO_R8:
3087 x86_push_reg (code, ins->sreg1);
3088 x86_fild_membase (code, X86_ESP, 0, FALSE);
3089 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3090 break;
3091 case OP_ICONV_TO_R_UN:
3092 x86_push_imm (code, 0);
3093 x86_push_reg (code, ins->sreg1);
3094 x86_fild_membase (code, X86_ESP, 0, TRUE);
3095 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3096 break;
3097 case OP_X86_FP_LOAD_I8:
3098 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3099 break;
3100 case OP_X86_FP_LOAD_I4:
3101 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3102 break;
3103 case OP_FCONV_TO_R4:
3104 /* Change precision */
3105 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3106 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3107 x86_fld_membase (code, X86_ESP, 0, FALSE);
3108 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3109 break;
3110 case OP_FCONV_TO_I1:
3111 code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3112 break;
3113 case OP_FCONV_TO_U1:
3114 code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3115 break;
3116 case OP_FCONV_TO_I2:
3117 code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3118 break;
3119 case OP_FCONV_TO_U2:
3120 code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3121 break;
3122 case OP_FCONV_TO_I4:
3123 case OP_FCONV_TO_I:
3124 code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3125 break;
3126 case OP_FCONV_TO_I8:
3127 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3128 x86_fnstcw_membase(code, X86_ESP, 0);
3129 x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3130 x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3131 x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3132 x86_fldcw_membase (code, X86_ESP, 2);
3133 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3134 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3135 x86_pop_reg (code, ins->dreg);
3136 x86_pop_reg (code, ins->backend.reg3);
3137 x86_fldcw_membase (code, X86_ESP, 0);
3138 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3139 break;
3140 case OP_LCONV_TO_R8_2:
3141 x86_push_reg (code, ins->sreg2);
3142 x86_push_reg (code, ins->sreg1);
3143 x86_fild_membase (code, X86_ESP, 0, TRUE);
3144 /* Change precision */
3145 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
3146 x86_fld_membase (code, X86_ESP, 0, TRUE);
3147 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3148 break;
3149 case OP_LCONV_TO_R4_2:
3150 x86_push_reg (code, ins->sreg2);
3151 x86_push_reg (code, ins->sreg1);
3152 x86_fild_membase (code, X86_ESP, 0, TRUE);
3153 /* Change precision */
3154 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3155 x86_fld_membase (code, X86_ESP, 0, FALSE);
3156 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3157 break;
3158 case OP_LCONV_TO_R_UN:
3159 case OP_LCONV_TO_R_UN_2: {
3160 static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3161 guint8 *br;
3163 /* load 64bit integer to FP stack */
3164 x86_push_reg (code, ins->sreg2);
3165 x86_push_reg (code, ins->sreg1);
3166 x86_fild_membase (code, X86_ESP, 0, TRUE);
3168 /* test if lreg is negative */
3169 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3170 br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3172 /* add correction constant mn */
3173 x86_fld80_mem (code, mn);
3174 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3176 x86_patch (br, code);
3178 /* Change precision */
3179 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
3180 x86_fld_membase (code, X86_ESP, 0, TRUE);
3182 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3184 break;
3186 case OP_LCONV_TO_OVF_I:
3187 case OP_LCONV_TO_OVF_I4_2: {
3188 guint8 *br [3], *label [1];
3189 MonoInst *tins;
3192 * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3194 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3196 /* If the low word top bit is set, see if we are negative */
3197 br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3198 /* We are not negative (no top bit set, check for our top word to be zero */
3199 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3200 br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3201 label [0] = code;
3203 /* throw exception */
3204 tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
3205 if (tins) {
3206 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
3207 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
3208 x86_jump8 (code, 0);
3209 else
3210 x86_jump32 (code, 0);
3211 } else {
3212 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3213 x86_jump32 (code, 0);
3217 x86_patch (br [0], code);
3218 /* our top bit is set, check that top word is 0xfffffff */
3219 x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3221 x86_patch (br [1], code);
3222 /* nope, emit exception */
3223 br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3224 x86_patch (br [2], label [0]);
3226 if (ins->dreg != ins->sreg1)
3227 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3228 break;
3230 case OP_FMOVE:
3231 /* Not needed on the fp stack */
3232 break;
3233 case OP_FADD:
3234 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3235 break;
3236 case OP_FSUB:
3237 x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3238 break;
3239 case OP_FMUL:
3240 x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3241 break;
3242 case OP_FDIV:
3243 x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3244 break;
3245 case OP_FNEG:
3246 x86_fchs (code);
3247 break;
3248 case OP_SIN:
3249 x86_fsin (code);
3250 x86_fldz (code);
3251 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3252 break;
3253 case OP_COS:
3254 x86_fcos (code);
3255 x86_fldz (code);
3256 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3257 break;
3258 case OP_ABS:
3259 x86_fabs (code);
3260 break;
3261 case OP_TAN: {
3263 * it really doesn't make sense to inline all this code,
3264 * it's here just to show that things may not be as simple
3265 * as they appear.
3267 guchar *check_pos, *end_tan, *pop_jump;
3268 x86_push_reg (code, X86_EAX);
3269 x86_fptan (code);
3270 x86_fnstsw (code);
3271 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3272 check_pos = code;
3273 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3274 x86_fstp (code, 0); /* pop the 1.0 */
3275 end_tan = code;
3276 x86_jump8 (code, 0);
3277 x86_fldpi (code);
3278 x86_fp_op (code, X86_FADD, 0);
3279 x86_fxch (code, 1);
3280 x86_fprem1 (code);
3281 x86_fstsw (code);
3282 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3283 pop_jump = code;
3284 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3285 x86_fstp (code, 1);
3286 x86_fptan (code);
3287 x86_patch (pop_jump, code);
3288 x86_fstp (code, 0); /* pop the 1.0 */
3289 x86_patch (check_pos, code);
3290 x86_patch (end_tan, code);
3291 x86_fldz (code);
3292 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3293 x86_pop_reg (code, X86_EAX);
3294 break;
3296 case OP_ATAN:
3297 x86_fld1 (code);
3298 x86_fpatan (code);
3299 x86_fldz (code);
3300 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3301 break;
3302 case OP_SQRT:
3303 x86_fsqrt (code);
3304 break;
3305 case OP_ROUND:
3306 x86_frndint (code);
3307 break;
3308 case OP_IMIN:
3309 g_assert (cfg->opt & MONO_OPT_CMOV);
3310 g_assert (ins->dreg == ins->sreg1);
3311 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3312 x86_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2);
3313 break;
3314 case OP_IMIN_UN:
3315 g_assert (cfg->opt & MONO_OPT_CMOV);
3316 g_assert (ins->dreg == ins->sreg1);
3317 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3318 x86_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2);
3319 break;
3320 case OP_IMAX:
3321 g_assert (cfg->opt & MONO_OPT_CMOV);
3322 g_assert (ins->dreg == ins->sreg1);
3323 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3324 x86_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2);
3325 break;
3326 case OP_IMAX_UN:
3327 g_assert (cfg->opt & MONO_OPT_CMOV);
3328 g_assert (ins->dreg == ins->sreg1);
3329 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3330 x86_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2);
3331 break;
3332 case OP_X86_FPOP:
3333 x86_fstp (code, 0);
3334 break;
3335 case OP_X86_FXCH:
3336 x86_fxch (code, ins->inst_imm);
3337 break;
3338 case OP_FREM: {
3339 guint8 *l1, *l2;
3341 x86_push_reg (code, X86_EAX);
3342 /* we need to exchange ST(0) with ST(1) */
3343 x86_fxch (code, 1);
3345 /* this requires a loop, because fprem somtimes
3346 * returns a partial remainder */
3347 l1 = code;
3348 /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3349 /* x86_fprem1 (code); */
3350 x86_fprem (code);
3351 x86_fnstsw (code);
3352 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3353 l2 = code + 2;
3354 x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3356 /* pop result */
3357 x86_fstp (code, 1);
3359 x86_pop_reg (code, X86_EAX);
3360 break;
3362 case OP_FCOMPARE:
3363 if (cfg->opt & MONO_OPT_FCMOV) {
3364 x86_fcomip (code, 1);
3365 x86_fstp (code, 0);
3366 break;
3368 /* this overwrites EAX */
3369 EMIT_FPCOMPARE(code);
3370 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3371 break;
3372 case OP_FCEQ:
3373 if (cfg->opt & MONO_OPT_FCMOV) {
3374 /* zeroing the register at the start results in
3375 * shorter and faster code (we can also remove the widening op)
3377 guchar *unordered_check;
3378 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3379 x86_fcomip (code, 1);
3380 x86_fstp (code, 0);
3381 unordered_check = code;
3382 x86_branch8 (code, X86_CC_P, 0, FALSE);
3383 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3384 x86_patch (unordered_check, code);
3385 break;
3387 if (ins->dreg != X86_EAX)
3388 x86_push_reg (code, X86_EAX);
3390 EMIT_FPCOMPARE(code);
3391 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3392 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3393 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3394 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3396 if (ins->dreg != X86_EAX)
3397 x86_pop_reg (code, X86_EAX);
3398 break;
3399 case OP_FCLT:
3400 case OP_FCLT_UN:
3401 if (cfg->opt & MONO_OPT_FCMOV) {
3402 /* zeroing the register at the start results in
3403 * shorter and faster code (we can also remove the widening op)
3405 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3406 x86_fcomip (code, 1);
3407 x86_fstp (code, 0);
3408 if (ins->opcode == OP_FCLT_UN) {
3409 guchar *unordered_check = code;
3410 guchar *jump_to_end;
3411 x86_branch8 (code, X86_CC_P, 0, FALSE);
3412 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3413 jump_to_end = code;
3414 x86_jump8 (code, 0);
3415 x86_patch (unordered_check, code);
3416 x86_inc_reg (code, ins->dreg);
3417 x86_patch (jump_to_end, code);
3418 } else {
3419 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3421 break;
3423 if (ins->dreg != X86_EAX)
3424 x86_push_reg (code, X86_EAX);
3426 EMIT_FPCOMPARE(code);
3427 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3428 if (ins->opcode == OP_FCLT_UN) {
3429 guchar *is_not_zero_check, *end_jump;
3430 is_not_zero_check = code;
3431 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3432 end_jump = code;
3433 x86_jump8 (code, 0);
3434 x86_patch (is_not_zero_check, code);
3435 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3437 x86_patch (end_jump, code);
3439 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3440 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3442 if (ins->dreg != X86_EAX)
3443 x86_pop_reg (code, X86_EAX);
3444 break;
3445 case OP_FCGT:
3446 case OP_FCGT_UN:
3447 if (cfg->opt & MONO_OPT_FCMOV) {
3448 /* zeroing the register at the start results in
3449 * shorter and faster code (we can also remove the widening op)
3451 guchar *unordered_check;
3452 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3453 x86_fcomip (code, 1);
3454 x86_fstp (code, 0);
3455 if (ins->opcode == OP_FCGT) {
3456 unordered_check = code;
3457 x86_branch8 (code, X86_CC_P, 0, FALSE);
3458 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3459 x86_patch (unordered_check, code);
3460 } else {
3461 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3463 break;
3465 if (ins->dreg != X86_EAX)
3466 x86_push_reg (code, X86_EAX);
3468 EMIT_FPCOMPARE(code);
3469 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3470 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3471 if (ins->opcode == OP_FCGT_UN) {
3472 guchar *is_not_zero_check, *end_jump;
3473 is_not_zero_check = code;
3474 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3475 end_jump = code;
3476 x86_jump8 (code, 0);
3477 x86_patch (is_not_zero_check, code);
3478 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3480 x86_patch (end_jump, code);
3482 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3483 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3485 if (ins->dreg != X86_EAX)
3486 x86_pop_reg (code, X86_EAX);
3487 break;
3488 case OP_FBEQ:
3489 if (cfg->opt & MONO_OPT_FCMOV) {
3490 guchar *jump = code;
3491 x86_branch8 (code, X86_CC_P, 0, TRUE);
3492 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3493 x86_patch (jump, code);
3494 break;
3496 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3497 EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3498 break;
3499 case OP_FBNE_UN:
3500 /* Branch if C013 != 100 */
3501 if (cfg->opt & MONO_OPT_FCMOV) {
3502 /* branch if !ZF or (PF|CF) */
3503 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3504 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3505 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3506 break;
3508 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3509 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3510 break;
3511 case OP_FBLT:
3512 if (cfg->opt & MONO_OPT_FCMOV) {
3513 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3514 break;
3516 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3517 break;
3518 case OP_FBLT_UN:
3519 if (cfg->opt & MONO_OPT_FCMOV) {
3520 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3521 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3522 break;
3524 if (ins->opcode == OP_FBLT_UN) {
3525 guchar *is_not_zero_check, *end_jump;
3526 is_not_zero_check = code;
3527 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3528 end_jump = code;
3529 x86_jump8 (code, 0);
3530 x86_patch (is_not_zero_check, code);
3531 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3533 x86_patch (end_jump, code);
3535 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3536 break;
3537 case OP_FBGT:
3538 case OP_FBGT_UN:
3539 if (cfg->opt & MONO_OPT_FCMOV) {
3540 if (ins->opcode == OP_FBGT) {
3541 guchar *br1;
3543 /* skip branch if C1=1 */
3544 br1 = code;
3545 x86_branch8 (code, X86_CC_P, 0, FALSE);
3546 /* branch if (C0 | C3) = 1 */
3547 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3548 x86_patch (br1, code);
3549 } else {
3550 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3552 break;
3554 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3555 if (ins->opcode == OP_FBGT_UN) {
3556 guchar *is_not_zero_check, *end_jump;
3557 is_not_zero_check = code;
3558 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3559 end_jump = code;
3560 x86_jump8 (code, 0);
3561 x86_patch (is_not_zero_check, code);
3562 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3564 x86_patch (end_jump, code);
3566 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3567 break;
3568 case OP_FBGE:
3569 /* Branch if C013 == 100 or 001 */
3570 if (cfg->opt & MONO_OPT_FCMOV) {
3571 guchar *br1;
3573 /* skip branch if C1=1 */
3574 br1 = code;
3575 x86_branch8 (code, X86_CC_P, 0, FALSE);
3576 /* branch if (C0 | C3) = 1 */
3577 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3578 x86_patch (br1, code);
3579 break;
3581 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3582 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3583 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3584 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3585 break;
3586 case OP_FBGE_UN:
3587 /* Branch if C013 == 000 */
3588 if (cfg->opt & MONO_OPT_FCMOV) {
3589 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3590 break;
3592 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3593 break;
3594 case OP_FBLE:
3595 /* Branch if C013=000 or 100 */
3596 if (cfg->opt & MONO_OPT_FCMOV) {
3597 guchar *br1;
3599 /* skip branch if C1=1 */
3600 br1 = code;
3601 x86_branch8 (code, X86_CC_P, 0, FALSE);
3602 /* branch if C0=0 */
3603 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3604 x86_patch (br1, code);
3605 break;
3607 x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3608 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3609 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3610 break;
3611 case OP_FBLE_UN:
3612 /* Branch if C013 != 001 */
3613 if (cfg->opt & MONO_OPT_FCMOV) {
3614 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3615 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3616 break;
3618 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3619 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3620 break;
3621 case OP_CKFINITE: {
3622 guchar *br1;
3623 x86_push_reg (code, X86_EAX);
3624 x86_fxam (code);
3625 x86_fnstsw (code);
3626 x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3627 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3628 x86_pop_reg (code, X86_EAX);
3630 /* Have to clean up the fp stack before throwing the exception */
3631 br1 = code;
3632 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3634 x86_fstp (code, 0);
3635 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3637 x86_patch (br1, code);
3638 break;
3640 case OP_TLS_GET: {
3641 code = mono_x86_emit_tls_get (code, ins->dreg, ins->inst_offset);
3642 break;
3644 case OP_MEMORY_BARRIER: {
3645 /* Not needed on x86 */
3646 break;
3648 case OP_ATOMIC_ADD_I4: {
3649 int dreg = ins->dreg;
3651 if (dreg == ins->inst_basereg) {
3652 x86_push_reg (code, ins->sreg2);
3653 dreg = ins->sreg2;
3656 if (dreg != ins->sreg2)
3657 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3659 x86_prefix (code, X86_LOCK_PREFIX);
3660 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3662 if (dreg != ins->dreg) {
3663 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3664 x86_pop_reg (code, dreg);
3667 break;
3669 case OP_ATOMIC_ADD_NEW_I4: {
3670 int dreg = ins->dreg;
3672 /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3673 if (ins->sreg2 == dreg) {
3674 if (dreg == X86_EBX) {
3675 dreg = X86_EDI;
3676 if (ins->inst_basereg == X86_EDI)
3677 dreg = X86_ESI;
3678 } else {
3679 dreg = X86_EBX;
3680 if (ins->inst_basereg == X86_EBX)
3681 dreg = X86_EDI;
3683 } else if (ins->inst_basereg == dreg) {
3684 if (dreg == X86_EBX) {
3685 dreg = X86_EDI;
3686 if (ins->sreg2 == X86_EDI)
3687 dreg = X86_ESI;
3688 } else {
3689 dreg = X86_EBX;
3690 if (ins->sreg2 == X86_EBX)
3691 dreg = X86_EDI;
3695 if (dreg != ins->dreg) {
3696 x86_push_reg (code, dreg);
3699 x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3700 x86_prefix (code, X86_LOCK_PREFIX);
3701 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3702 /* dreg contains the old value, add with sreg2 value */
3703 x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3705 if (ins->dreg != dreg) {
3706 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3707 x86_pop_reg (code, dreg);
3710 break;
3712 case OP_ATOMIC_EXCHANGE_I4: {
3713 guchar *br[2];
3714 int sreg2 = ins->sreg2;
3715 int breg = ins->inst_basereg;
3717 /* cmpxchg uses eax as comperand, need to make sure we can use it
3718 * hack to overcome limits in x86 reg allocator
3719 * (req: dreg == eax and sreg2 != eax and breg != eax)
3721 g_assert (ins->dreg == X86_EAX);
3723 /* We need the EAX reg for the cmpxchg */
3724 if (ins->sreg2 == X86_EAX) {
3725 sreg2 = (breg == X86_EDX) ? X86_EBX : X86_EDX;
3726 x86_push_reg (code, sreg2);
3727 x86_mov_reg_reg (code, sreg2, X86_EAX, 4);
3730 if (breg == X86_EAX) {
3731 breg = (sreg2 == X86_ESI) ? X86_EDI : X86_ESI;
3732 x86_push_reg (code, breg);
3733 x86_mov_reg_reg (code, breg, X86_EAX, 4);
3736 x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3738 br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3739 x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3740 br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3741 x86_patch (br [1], br [0]);
3743 if (breg != ins->inst_basereg)
3744 x86_pop_reg (code, breg);
3746 if (ins->sreg2 != sreg2)
3747 x86_pop_reg (code, sreg2);
3749 break;
3751 case OP_ATOMIC_CAS_I4: {
3752 g_assert (ins->sreg3 == X86_EAX);
3753 g_assert (ins->sreg1 != X86_EAX);
3754 g_assert (ins->sreg1 != ins->sreg2);
3756 x86_prefix (code, X86_LOCK_PREFIX);
3757 x86_cmpxchg_membase_reg (code, ins->sreg1, ins->inst_offset, ins->sreg2);
3759 if (ins->dreg != X86_EAX)
3760 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3761 break;
3763 #ifdef MONO_ARCH_SIMD_INTRINSICS
3764 case OP_ADDPS:
3765 x86_sse_alu_ps_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
3766 break;
3767 case OP_DIVPS:
3768 x86_sse_alu_ps_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
3769 break;
3770 case OP_MULPS:
3771 x86_sse_alu_ps_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
3772 break;
3773 case OP_SUBPS:
3774 x86_sse_alu_ps_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
3775 break;
3776 case OP_MAXPS:
3777 x86_sse_alu_ps_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
3778 break;
3779 case OP_MINPS:
3780 x86_sse_alu_ps_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
3781 break;
3782 case OP_COMPPS:
3783 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
3784 x86_sse_alu_ps_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
3785 break;
3786 case OP_ANDPS:
3787 x86_sse_alu_ps_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
3788 break;
3789 case OP_ANDNPS:
3790 x86_sse_alu_ps_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
3791 break;
3792 case OP_ORPS:
3793 x86_sse_alu_ps_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
3794 break;
3795 case OP_XORPS:
3796 x86_sse_alu_ps_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
3797 break;
3798 case OP_SQRTPS:
3799 x86_sse_alu_ps_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
3800 break;
3801 case OP_RSQRTPS:
3802 x86_sse_alu_ps_reg_reg (code, X86_SSE_RSQRT, ins->dreg, ins->sreg1);
3803 break;
3804 case OP_RCPPS:
3805 x86_sse_alu_ps_reg_reg (code, X86_SSE_RCP, ins->dreg, ins->sreg1);
3806 break;
3807 case OP_ADDSUBPS:
3808 x86_sse_alu_sd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
3809 break;
3810 case OP_HADDPS:
3811 x86_sse_alu_sd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
3812 break;
3813 case OP_HSUBPS:
3814 x86_sse_alu_sd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
3815 break;
3816 case OP_DUPPS_HIGH:
3817 x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSHDUP, ins->dreg, ins->sreg1);
3818 break;
3819 case OP_DUPPS_LOW:
3820 x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSLDUP, ins->dreg, ins->sreg1);
3821 break;
3823 case OP_PSHUFLEW_HIGH:
3824 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3825 x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 1);
3826 break;
3827 case OP_PSHUFLEW_LOW:
3828 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3829 x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 0);
3830 break;
3831 case OP_PSHUFLED:
3832 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3833 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->sreg1, ins->inst_c0);
3834 break;
3836 case OP_ADDPD:
3837 x86_sse_alu_pd_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
3838 break;
3839 case OP_DIVPD:
3840 x86_sse_alu_pd_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
3841 break;
3842 case OP_MULPD:
3843 x86_sse_alu_pd_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
3844 break;
3845 case OP_SUBPD:
3846 x86_sse_alu_pd_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
3847 break;
3848 case OP_MAXPD:
3849 x86_sse_alu_pd_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
3850 break;
3851 case OP_MINPD:
3852 x86_sse_alu_pd_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
3853 break;
3854 case OP_COMPPD:
3855 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
3856 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
3857 break;
3858 case OP_ANDPD:
3859 x86_sse_alu_pd_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
3860 break;
3861 case OP_ANDNPD:
3862 x86_sse_alu_pd_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
3863 break;
3864 case OP_ORPD:
3865 x86_sse_alu_pd_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
3866 break;
3867 case OP_XORPD:
3868 x86_sse_alu_pd_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
3869 break;
3870 case OP_SQRTPD:
3871 x86_sse_alu_pd_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
3872 break;
3873 case OP_ADDSUBPD:
3874 x86_sse_alu_pd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
3875 break;
3876 case OP_HADDPD:
3877 x86_sse_alu_pd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
3878 break;
3879 case OP_HSUBPD:
3880 x86_sse_alu_pd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
3881 break;
3882 case OP_DUPPD:
3883 x86_sse_alu_sd_reg_reg (code, X86_SSE_MOVDDUP, ins->dreg, ins->sreg1);
3884 break;
3886 case OP_EXTRACT_MASK:
3887 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMOVMSKB, ins->dreg, ins->sreg1);
3888 break;
3890 case OP_PAND:
3891 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAND, ins->sreg1, ins->sreg2);
3892 break;
3893 case OP_POR:
3894 x86_sse_alu_pd_reg_reg (code, X86_SSE_POR, ins->sreg1, ins->sreg2);
3895 break;
3896 case OP_PXOR:
3897 x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->sreg1, ins->sreg2);
3898 break;
3900 case OP_PADDB:
3901 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDB, ins->sreg1, ins->sreg2);
3902 break;
3903 case OP_PADDW:
3904 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDW, ins->sreg1, ins->sreg2);
3905 break;
3906 case OP_PADDD:
3907 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDD, ins->sreg1, ins->sreg2);
3908 break;
3909 case OP_PADDQ:
3910 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDQ, ins->sreg1, ins->sreg2);
3911 break;
3913 case OP_PSUBB:
3914 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBB, ins->sreg1, ins->sreg2);
3915 break;
3916 case OP_PSUBW:
3917 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBW, ins->sreg1, ins->sreg2);
3918 break;
3919 case OP_PSUBD:
3920 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBD, ins->sreg1, ins->sreg2);
3921 break;
3922 case OP_PSUBQ:
3923 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBQ, ins->sreg1, ins->sreg2);
3924 break;
3926 case OP_PMAXB_UN:
3927 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXUB, ins->sreg1, ins->sreg2);
3928 break;
3929 case OP_PMAXW_UN:
3930 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUW, ins->sreg1, ins->sreg2);
3931 break;
3932 case OP_PMAXD_UN:
3933 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUD, ins->sreg1, ins->sreg2);
3934 break;
3936 case OP_PMAXB:
3937 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSB, ins->sreg1, ins->sreg2);
3938 break;
3939 case OP_PMAXW:
3940 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXSW, ins->sreg1, ins->sreg2);
3941 break;
3942 case OP_PMAXD:
3943 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSD, ins->sreg1, ins->sreg2);
3944 break;
3946 case OP_PAVGB_UN:
3947 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGB, ins->sreg1, ins->sreg2);
3948 break;
3949 case OP_PAVGW_UN:
3950 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGW, ins->sreg1, ins->sreg2);
3951 break;
3953 case OP_PMINB_UN:
3954 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINUB, ins->sreg1, ins->sreg2);
3955 break;
3956 case OP_PMINW_UN:
3957 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUW, ins->sreg1, ins->sreg2);
3958 break;
3959 case OP_PMIND_UN:
3960 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUD, ins->sreg1, ins->sreg2);
3961 break;
3963 case OP_PMINB:
3964 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSB, ins->sreg1, ins->sreg2);
3965 break;
3966 case OP_PMINW:
3967 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINSW, ins->sreg1, ins->sreg2);
3968 break;
3969 case OP_PMIND:
3970 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSD, ins->sreg1, ins->sreg2);
3971 break;
3973 case OP_PCMPEQB:
3974 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQB, ins->sreg1, ins->sreg2);
3975 break;
3976 case OP_PCMPEQW:
3977 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQW, ins->sreg1, ins->sreg2);
3978 break;
3979 case OP_PCMPEQD:
3980 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQD, ins->sreg1, ins->sreg2);
3981 break;
3982 case OP_PCMPEQQ:
3983 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPEQQ, ins->sreg1, ins->sreg2);
3984 break;
3986 case OP_PCMPGTB:
3987 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTB, ins->sreg1, ins->sreg2);
3988 break;
3989 case OP_PCMPGTW:
3990 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTW, ins->sreg1, ins->sreg2);
3991 break;
3992 case OP_PCMPGTD:
3993 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTD, ins->sreg1, ins->sreg2);
3994 break;
3995 case OP_PCMPGTQ:
3996 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPGTQ, ins->sreg1, ins->sreg2);
3997 break;
3999 case OP_PSUM_ABS_DIFF:
4000 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSADBW, ins->sreg1, ins->sreg2);
4001 break;
4003 case OP_UNPACK_LOWB:
4004 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLBW, ins->sreg1, ins->sreg2);
4005 break;
4006 case OP_UNPACK_LOWW:
4007 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLWD, ins->sreg1, ins->sreg2);
4008 break;
4009 case OP_UNPACK_LOWD:
4010 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLDQ, ins->sreg1, ins->sreg2);
4011 break;
4012 case OP_UNPACK_LOWQ:
4013 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLQDQ, ins->sreg1, ins->sreg2);
4014 break;
4015 case OP_UNPACK_LOWPS:
4016 x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
4017 break;
4018 case OP_UNPACK_LOWPD:
4019 x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
4020 break;
4022 case OP_UNPACK_HIGHB:
4023 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHBW, ins->sreg1, ins->sreg2);
4024 break;
4025 case OP_UNPACK_HIGHW:
4026 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHWD, ins->sreg1, ins->sreg2);
4027 break;
4028 case OP_UNPACK_HIGHD:
4029 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHDQ, ins->sreg1, ins->sreg2);
4030 break;
4031 case OP_UNPACK_HIGHQ:
4032 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHQDQ, ins->sreg1, ins->sreg2);
4033 break;
4034 case OP_UNPACK_HIGHPS:
4035 x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
4036 break;
4037 case OP_UNPACK_HIGHPD:
4038 x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
4039 break;
4041 case OP_PACKW:
4042 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSWB, ins->sreg1, ins->sreg2);
4043 break;
4044 case OP_PACKD:
4045 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSDW, ins->sreg1, ins->sreg2);
4046 break;
4047 case OP_PACKW_UN:
4048 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKUSWB, ins->sreg1, ins->sreg2);
4049 break;
4050 case OP_PACKD_UN:
4051 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PACKUSDW, ins->sreg1, ins->sreg2);
4052 break;
4054 case OP_PADDB_SAT_UN:
4055 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSB, ins->sreg1, ins->sreg2);
4056 break;
4057 case OP_PSUBB_SAT_UN:
4058 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSB, ins->sreg1, ins->sreg2);
4059 break;
4060 case OP_PADDW_SAT_UN:
4061 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSW, ins->sreg1, ins->sreg2);
4062 break;
4063 case OP_PSUBW_SAT_UN:
4064 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSW, ins->sreg1, ins->sreg2);
4065 break;
4067 case OP_PADDB_SAT:
4068 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSB, ins->sreg1, ins->sreg2);
4069 break;
4070 case OP_PSUBB_SAT:
4071 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSB, ins->sreg1, ins->sreg2);
4072 break;
4073 case OP_PADDW_SAT:
4074 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSW, ins->sreg1, ins->sreg2);
4075 break;
4076 case OP_PSUBW_SAT:
4077 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSW, ins->sreg1, ins->sreg2);
4078 break;
4080 case OP_PMULW:
4081 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULLW, ins->sreg1, ins->sreg2);
4082 break;
4083 case OP_PMULD:
4084 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMULLD, ins->sreg1, ins->sreg2);
4085 break;
4086 case OP_PMULQ:
4087 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULUDQ, ins->sreg1, ins->sreg2);
4088 break;
4089 case OP_PMULW_HIGH_UN:
4090 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHUW, ins->sreg1, ins->sreg2);
4091 break;
4092 case OP_PMULW_HIGH:
4093 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHW, ins->sreg1, ins->sreg2);
4094 break;
4096 case OP_PSHRW:
4097 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4098 break;
4099 case OP_PSHRW_REG:
4100 x86_sse_shift_reg_reg (code, X86_SSE_PSRLW_REG, ins->dreg, ins->sreg2);
4101 break;
4103 case OP_PSARW:
4104 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4105 break;
4106 case OP_PSARW_REG:
4107 x86_sse_shift_reg_reg (code, X86_SSE_PSRAW_REG, ins->dreg, ins->sreg2);
4108 break;
4110 case OP_PSHLW:
4111 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4112 break;
4113 case OP_PSHLW_REG:
4114 x86_sse_shift_reg_reg (code, X86_SSE_PSLLW_REG, ins->dreg, ins->sreg2);
4115 break;
4117 case OP_PSHRD:
4118 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4119 break;
4120 case OP_PSHRD_REG:
4121 x86_sse_shift_reg_reg (code, X86_SSE_PSRLD_REG, ins->dreg, ins->sreg2);
4122 break;
4124 case OP_PSARD:
4125 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4126 break;
4127 case OP_PSARD_REG:
4128 x86_sse_shift_reg_reg (code, X86_SSE_PSRAD_REG, ins->dreg, ins->sreg2);
4129 break;
4131 case OP_PSHLD:
4132 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4133 break;
4134 case OP_PSHLD_REG:
4135 x86_sse_shift_reg_reg (code, X86_SSE_PSLLD_REG, ins->dreg, ins->sreg2);
4136 break;
4138 case OP_PSHRQ:
4139 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4140 break;
4141 case OP_PSHRQ_REG:
4142 x86_sse_shift_reg_reg (code, X86_SSE_PSRLQ_REG, ins->dreg, ins->sreg2);
4143 break;
4145 case OP_PSHLQ:
4146 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4147 break;
4148 case OP_PSHLQ_REG:
4149 x86_sse_shift_reg_reg (code, X86_SSE_PSLLQ_REG, ins->dreg, ins->sreg2);
4150 break;
4152 case OP_ICONV_TO_X:
4153 x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4154 break;
4155 case OP_EXTRACT_I4:
4156 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4157 break;
4158 case OP_EXTRACT_I1:
4159 case OP_EXTRACT_U1:
4160 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4161 if (ins->inst_c0)
4162 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
4163 x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE);
4164 break;
4165 case OP_EXTRACT_I2:
4166 case OP_EXTRACT_U2:
4167 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4168 if (ins->inst_c0)
4169 x86_shift_reg_imm (code, X86_SHR, ins->dreg, 16);
4170 x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE);
4171 break;
4172 case OP_EXTRACT_R8:
4173 if (ins->inst_c0)
4174 x86_sse_alu_pd_membase_reg (code, X86_SSE_MOVHPD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
4175 else
4176 x86_sse_alu_sd_membase_reg (code, X86_SSE_MOVSD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
4177 x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE);
4178 break;
4180 case OP_INSERT_I2:
4181 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->sreg1, ins->sreg2, ins->inst_c0);
4182 break;
4183 case OP_EXTRACTX_U2:
4184 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PEXTRW, ins->dreg, ins->sreg1, ins->inst_c0);
4185 break;
4186 case OP_INSERTX_U1_SLOW:
4187 /*sreg1 is the extracted ireg (scratch)
4188 /sreg2 is the to be inserted ireg (scratch)
4189 /dreg is the xreg to receive the value*/
4191 /*clear the bits from the extracted word*/
4192 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00);
4193 /*shift the value to insert if needed*/
4194 if (ins->inst_c0 & 1)
4195 x86_shift_reg_imm (code, X86_SHL, ins->sreg2, 8);
4196 /*join them together*/
4197 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
4198 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
4199 break;
4200 case OP_INSERTX_I4_SLOW:
4201 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
4202 x86_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
4203 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
4204 break;
4206 case OP_INSERTX_R4_SLOW:
4207 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
4208 /*TODO if inst_c0 == 0 use movss*/
4209 x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 0, ins->inst_c0 * 2);
4210 x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 2, ins->inst_c0 * 2 + 1);
4211 break;
4212 case OP_INSERTX_R8_SLOW:
4213 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4214 if (ins->inst_c0)
4215 x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVHPD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4216 else
4217 x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVSD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4218 break;
4220 case OP_STOREX_MEMBASE_REG:
4221 case OP_STOREX_MEMBASE:
4222 x86_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4223 break;
4224 case OP_LOADX_MEMBASE:
4225 x86_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4226 break;
4227 case OP_LOADX_ALIGNED_MEMBASE:
4228 x86_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4229 break;
4230 case OP_STOREX_ALIGNED_MEMBASE_REG:
4231 x86_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4232 break;
4233 case OP_STOREX_NTA_MEMBASE_REG:
4234 x86_sse_alu_reg_membase (code, X86_SSE_MOVNTPS, ins->dreg, ins->sreg1, ins->inst_offset);
4235 break;
4236 case OP_PREFETCH_MEMBASE:
4237 x86_sse_alu_reg_membase (code, X86_SSE_PREFETCH, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
4239 break;
4240 case OP_XMOVE:
4241 /*FIXME the peephole pass should have killed this*/
4242 if (ins->dreg != ins->sreg1)
4243 x86_movaps_reg_reg (code, ins->dreg, ins->sreg1);
4244 break;
4245 case OP_XZERO:
4246 x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->dreg, ins->dreg);
4247 break;
4248 case OP_ICONV_TO_R8_RAW:
4249 x86_mov_membase_reg (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1, 4);
4250 x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE);
4251 break;
4253 case OP_FCONV_TO_R8_X:
4254 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4255 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4256 break;
4258 case OP_XCONV_R8_TO_I4:
4259 x86_cvttsd2si (code, ins->dreg, ins->sreg1);
4260 switch (ins->backend.source_opcode) {
4261 case OP_FCONV_TO_I1:
4262 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
4263 break;
4264 case OP_FCONV_TO_U1:
4265 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4266 break;
4267 case OP_FCONV_TO_I2:
4268 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
4269 break;
4270 case OP_FCONV_TO_U2:
4271 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
4272 break;
4274 break;
4276 case OP_EXPAND_I1:
4277 /*FIXME this causes a partial register stall, maybe it would not be that bad to use shift + mask + or*/
4278 /*The +4 is to get a mov ?h, ?l over the same reg.*/
4279 x86_mov_reg_reg (code, ins->dreg + 4, ins->dreg, 1);
4280 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
4281 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
4282 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4283 break;
4284 case OP_EXPAND_I2:
4285 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
4286 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
4287 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4288 break;
4289 case OP_EXPAND_I4:
4290 x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4291 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4292 break;
4293 case OP_EXPAND_R4:
4294 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
4295 x86_movd_xreg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4296 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4297 break;
4298 case OP_EXPAND_R8:
4299 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4300 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4301 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0x44);
4302 break;
4303 #endif
4304 case OP_LIVERANGE_START: {
4305 if (cfg->verbose_level > 1)
4306 printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
4307 MONO_VARINFO (cfg, ins->inst_c0)->live_range_start = code - cfg->native_code;
4308 break;
4310 case OP_LIVERANGE_END: {
4311 if (cfg->verbose_level > 1)
4312 printf ("R%d END=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
4313 MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code;
4314 break;
4316 default:
4317 g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
4318 g_assert_not_reached ();
4321 if (G_UNLIKELY ((code - cfg->native_code - offset) > max_len)) {
4322 g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4323 mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4324 g_assert_not_reached ();
4327 cpos += max_len;
4330 cfg->code_len = code - cfg->native_code;
4333 #endif /* DISABLE_JIT */
4335 void
4336 mono_arch_register_lowlevel_calls (void)
4340 void
4341 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4343 MonoJumpInfo *patch_info;
4344 gboolean compile_aot = !run_cctors;
4346 for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4347 unsigned char *ip = patch_info->ip.i + code;
4348 const unsigned char *target;
4350 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4352 if (compile_aot) {
4353 switch (patch_info->type) {
4354 case MONO_PATCH_INFO_BB:
4355 case MONO_PATCH_INFO_LABEL:
4356 break;
4357 default:
4358 /* No need to patch these */
4359 continue;
4363 switch (patch_info->type) {
4364 case MONO_PATCH_INFO_IP:
4365 *((gconstpointer *)(ip)) = target;
4366 break;
4367 case MONO_PATCH_INFO_CLASS_INIT: {
4368 guint8 *code = ip;
4369 /* Might already been changed to a nop */
4370 x86_call_code (code, 0);
4371 x86_patch (ip, target);
4372 break;
4374 case MONO_PATCH_INFO_ABS:
4375 case MONO_PATCH_INFO_METHOD:
4376 case MONO_PATCH_INFO_METHOD_JUMP:
4377 case MONO_PATCH_INFO_INTERNAL_METHOD:
4378 case MONO_PATCH_INFO_BB:
4379 case MONO_PATCH_INFO_LABEL:
4380 case MONO_PATCH_INFO_RGCTX_FETCH:
4381 case MONO_PATCH_INFO_GENERIC_CLASS_INIT:
4382 case MONO_PATCH_INFO_MONITOR_ENTER:
4383 case MONO_PATCH_INFO_MONITOR_EXIT:
4384 x86_patch (ip, target);
4385 break;
4386 case MONO_PATCH_INFO_NONE:
4387 break;
4388 default: {
4389 guint32 offset = mono_arch_get_patch_offset (ip);
4390 *((gconstpointer *)(ip + offset)) = target;
4391 break;
4397 guint8 *
4398 mono_arch_emit_prolog (MonoCompile *cfg)
4400 MonoMethod *method = cfg->method;
4401 MonoBasicBlock *bb;
4402 MonoMethodSignature *sig;
4403 MonoInst *inst;
4404 int alloc_size, pos, max_offset, i, cfa_offset;
4405 guint8 *code;
4406 gboolean need_stack_frame;
4408 cfg->code_size = MAX (mono_method_get_header (method)->code_size * 4, 10240);
4410 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
4411 cfg->code_size += 512;
4413 code = cfg->native_code = g_malloc (cfg->code_size);
4415 /* Offset between RSP and the CFA */
4416 cfa_offset = 0;
4418 // CFA = sp + 4
4419 cfa_offset = sizeof (gpointer);
4420 mono_emit_unwind_op_def_cfa (cfg, code, X86_ESP, sizeof (gpointer));
4421 // IP saved at CFA - 4
4422 /* There is no IP reg on x86 */
4423 mono_emit_unwind_op_offset (cfg, code, X86_NREG, -cfa_offset);
4425 need_stack_frame = needs_stack_frame (cfg);
4427 if (need_stack_frame) {
4428 x86_push_reg (code, X86_EBP);
4429 cfa_offset += sizeof (gpointer);
4430 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
4431 mono_emit_unwind_op_offset (cfg, code, X86_EBP, - cfa_offset);
4432 x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4433 mono_emit_unwind_op_def_cfa_reg (cfg, code, X86_EBP);
4436 alloc_size = cfg->stack_offset;
4437 pos = 0;
4439 if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4440 /* Might need to attach the thread to the JIT or change the domain for the callback */
4441 if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
4442 guint8 *buf, *no_domain_branch;
4444 code = mono_x86_emit_tls_get (code, X86_EAX, appdomain_tls_offset);
4445 x86_alu_reg_imm (code, X86_CMP, X86_EAX, GPOINTER_TO_UINT (cfg->domain));
4446 no_domain_branch = code;
4447 x86_branch8 (code, X86_CC_NE, 0, 0);
4448 code = mono_x86_emit_tls_get ( code, X86_EAX, lmf_tls_offset);
4449 x86_test_reg_reg (code, X86_EAX, X86_EAX);
4450 buf = code;
4451 x86_branch8 (code, X86_CC_NE, 0, 0);
4452 x86_patch (no_domain_branch, code);
4453 x86_push_imm (code, cfg->domain);
4454 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4455 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4456 x86_patch (buf, code);
4457 #ifdef PLATFORM_WIN32
4458 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4459 /* FIXME: Add a separate key for LMF to avoid this */
4460 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4461 #endif
4463 else {
4464 g_assert (!cfg->compile_aot);
4465 x86_push_imm (code, cfg->domain);
4466 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4467 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4471 if (method->save_lmf) {
4472 pos += sizeof (MonoLMF);
4474 /* save the current IP */
4475 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4476 x86_push_imm_template (code);
4477 cfa_offset += sizeof (gpointer);
4479 /* save all caller saved regs */
4480 x86_push_reg (code, X86_EBP);
4481 cfa_offset += sizeof (gpointer);
4482 x86_push_reg (code, X86_ESI);
4483 cfa_offset += sizeof (gpointer);
4484 mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset);
4485 x86_push_reg (code, X86_EDI);
4486 cfa_offset += sizeof (gpointer);
4487 mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset);
4488 x86_push_reg (code, X86_EBX);
4489 cfa_offset += sizeof (gpointer);
4490 mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset);
4492 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
4494 * Optimized version which uses the mono_lmf TLS variable instead of indirection
4495 * through the mono_lmf_addr TLS variable.
4497 /* %eax = previous_lmf */
4498 x86_prefix (code, X86_GS_PREFIX);
4499 x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
4500 /* skip esp + method_info + lmf */
4501 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
4502 /* push previous_lmf */
4503 x86_push_reg (code, X86_EAX);
4504 /* new lmf = ESP */
4505 x86_prefix (code, X86_GS_PREFIX);
4506 x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
4507 } else {
4508 /* get the address of lmf for the current thread */
4510 * This is performance critical so we try to use some tricks to make
4511 * it fast.
4514 if (lmf_addr_tls_offset != -1) {
4515 /* Load lmf quicky using the GS register */
4516 code = mono_x86_emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
4517 #ifdef PLATFORM_WIN32
4518 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4519 /* FIXME: Add a separate key for LMF to avoid this */
4520 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4521 #endif
4522 } else {
4523 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4526 /* Skip esp + method info */
4527 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
4529 /* push lmf */
4530 x86_push_reg (code, X86_EAX);
4531 /* push *lfm (previous_lmf) */
4532 x86_push_membase (code, X86_EAX, 0);
4533 /* *(lmf) = ESP */
4534 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4536 } else {
4538 if (cfg->used_int_regs & (1 << X86_EBX)) {
4539 x86_push_reg (code, X86_EBX);
4540 pos += 4;
4541 cfa_offset += sizeof (gpointer);
4542 mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset);
4545 if (cfg->used_int_regs & (1 << X86_EDI)) {
4546 x86_push_reg (code, X86_EDI);
4547 pos += 4;
4548 cfa_offset += sizeof (gpointer);
4549 mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset);
4552 if (cfg->used_int_regs & (1 << X86_ESI)) {
4553 x86_push_reg (code, X86_ESI);
4554 pos += 4;
4555 cfa_offset += sizeof (gpointer);
4556 mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset);
4560 alloc_size -= pos;
4562 /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
4563 if (mono_do_x86_stack_align && need_stack_frame) {
4564 int tot = alloc_size + pos + 4; /* ret ip */
4565 if (need_stack_frame)
4566 tot += 4; /* ebp */
4567 tot &= MONO_ARCH_FRAME_ALIGNMENT - 1;
4568 if (tot)
4569 alloc_size += MONO_ARCH_FRAME_ALIGNMENT - tot;
4572 if (alloc_size) {
4573 /* See mono_emit_stack_alloc */
4574 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
4575 guint32 remaining_size = alloc_size;
4576 while (remaining_size >= 0x1000) {
4577 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4578 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4579 remaining_size -= 0x1000;
4581 if (remaining_size)
4582 x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4583 #else
4584 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4585 #endif
4587 g_assert (need_stack_frame);
4590 if (cfg->method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED ||
4591 cfg->method->wrapper_type == MONO_WRAPPER_RUNTIME_INVOKE) {
4592 x86_alu_reg_imm (code, X86_AND, X86_ESP, -MONO_ARCH_FRAME_ALIGNMENT);
4595 #if DEBUG_STACK_ALIGNMENT
4596 /* check the stack is aligned */
4597 if (need_stack_frame && method->wrapper_type == MONO_WRAPPER_NONE) {
4598 x86_mov_reg_reg (code, X86_ECX, X86_ESP, 4);
4599 x86_alu_reg_imm (code, X86_AND, X86_ECX, MONO_ARCH_FRAME_ALIGNMENT - 1);
4600 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
4601 x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
4602 x86_breakpoint (code);
4604 #endif
4606 /* compute max_offset in order to use short forward jumps */
4607 max_offset = 0;
4608 if (cfg->opt & MONO_OPT_BRANCH) {
4609 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4610 MonoInst *ins;
4611 bb->max_offset = max_offset;
4613 if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4614 max_offset += 6;
4615 /* max alignment for loops */
4616 if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4617 max_offset += LOOP_ALIGNMENT;
4619 MONO_BB_FOR_EACH_INS (bb, ins) {
4620 if (ins->opcode == OP_LABEL)
4621 ins->inst_c1 = max_offset;
4623 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
4628 /* store runtime generic context */
4629 if (cfg->rgctx_var) {
4630 g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET && cfg->rgctx_var->inst_basereg == X86_EBP);
4632 x86_mov_membase_reg (code, X86_EBP, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 4);
4635 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4636 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4638 /* load arguments allocated to register from the stack */
4639 sig = mono_method_signature (method);
4640 pos = 0;
4642 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4643 inst = cfg->args [pos];
4644 if (inst->opcode == OP_REGVAR) {
4645 g_assert (need_stack_frame);
4646 x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4647 if (cfg->verbose_level > 2)
4648 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4650 pos++;
4653 cfg->code_len = code - cfg->native_code;
4655 g_assert (cfg->code_len < cfg->code_size);
4657 return code;
4660 void
4661 mono_arch_emit_epilog (MonoCompile *cfg)
4663 MonoMethod *method = cfg->method;
4664 MonoMethodSignature *sig = mono_method_signature (method);
4665 int quad, pos;
4666 guint32 stack_to_pop;
4667 guint8 *code;
4668 int max_epilog_size = 16;
4669 CallInfo *cinfo;
4670 gboolean need_stack_frame = needs_stack_frame (cfg);
4672 if (cfg->method->save_lmf)
4673 max_epilog_size += 128;
4675 while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4676 cfg->code_size *= 2;
4677 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4678 mono_jit_stats.code_reallocs++;
4681 code = cfg->native_code + cfg->code_len;
4683 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4684 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4686 /* the code restoring the registers must be kept in sync with OP_JMP */
4687 pos = 0;
4689 if (method->save_lmf) {
4690 gint32 prev_lmf_reg;
4691 gint32 lmf_offset = -sizeof (MonoLMF);
4693 /* check if we need to restore protection of the stack after a stack overflow */
4694 if (mono_get_jit_tls_offset () != -1) {
4695 guint8 *patch;
4696 code = mono_x86_emit_tls_get (code, X86_ECX, mono_get_jit_tls_offset ());
4697 /* we load the value in a separate instruction: this mechanism may be
4698 * used later as a safer way to do thread interruption
4700 x86_mov_reg_membase (code, X86_ECX, X86_ECX, G_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 4);
4701 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
4702 patch = code;
4703 x86_branch8 (code, X86_CC_Z, 0, FALSE);
4704 /* note that the call trampoline will preserve eax/edx */
4705 x86_call_reg (code, X86_ECX);
4706 x86_patch (patch, code);
4707 } else {
4708 /* FIXME: maybe save the jit tls in the prolog */
4710 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
4712 * Optimized version which uses the mono_lmf TLS variable instead of indirection
4713 * through the mono_lmf_addr TLS variable.
4715 /* reg = previous_lmf */
4716 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
4718 /* lmf = previous_lmf */
4719 x86_prefix (code, X86_GS_PREFIX);
4720 x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
4721 } else {
4722 /* Find a spare register */
4723 switch (mini_type_get_underlying_type (cfg->generic_sharing_context, sig->ret)->type) {
4724 case MONO_TYPE_I8:
4725 case MONO_TYPE_U8:
4726 prev_lmf_reg = X86_EDI;
4727 cfg->used_int_regs |= (1 << X86_EDI);
4728 break;
4729 default:
4730 prev_lmf_reg = X86_EDX;
4731 break;
4734 /* reg = previous_lmf */
4735 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
4737 /* ecx = lmf */
4738 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
4740 /* *(lmf) = previous_lmf */
4741 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4744 /* restore caller saved regs */
4745 if (cfg->used_int_regs & (1 << X86_EBX)) {
4746 x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
4749 if (cfg->used_int_regs & (1 << X86_EDI)) {
4750 x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
4752 if (cfg->used_int_regs & (1 << X86_ESI)) {
4753 x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
4756 /* EBP is restored by LEAVE */
4757 } else {
4758 if (cfg->used_int_regs & (1 << X86_EBX)) {
4759 pos -= 4;
4761 if (cfg->used_int_regs & (1 << X86_EDI)) {
4762 pos -= 4;
4764 if (cfg->used_int_regs & (1 << X86_ESI)) {
4765 pos -= 4;
4768 if (pos) {
4769 g_assert (need_stack_frame);
4770 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4773 if (cfg->used_int_regs & (1 << X86_ESI)) {
4774 x86_pop_reg (code, X86_ESI);
4776 if (cfg->used_int_regs & (1 << X86_EDI)) {
4777 x86_pop_reg (code, X86_EDI);
4779 if (cfg->used_int_regs & (1 << X86_EBX)) {
4780 x86_pop_reg (code, X86_EBX);
4784 /* Load returned vtypes into registers if needed */
4785 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
4786 if (cinfo->ret.storage == ArgValuetypeInReg) {
4787 for (quad = 0; quad < 2; quad ++) {
4788 switch (cinfo->ret.pair_storage [quad]) {
4789 case ArgInIReg:
4790 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
4791 break;
4792 case ArgOnFloatFpStack:
4793 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
4794 break;
4795 case ArgOnDoubleFpStack:
4796 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
4797 break;
4798 case ArgNone:
4799 break;
4800 default:
4801 g_assert_not_reached ();
4806 if (need_stack_frame)
4807 x86_leave (code);
4809 if (CALLCONV_IS_STDCALL (sig)) {
4810 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4812 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4813 } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
4814 stack_to_pop = 4;
4815 else
4816 stack_to_pop = 0;
4818 if (stack_to_pop) {
4819 g_assert (need_stack_frame);
4820 x86_ret_imm (code, stack_to_pop);
4821 } else {
4822 x86_ret (code);
4825 cfg->code_len = code - cfg->native_code;
4827 g_assert (cfg->code_len < cfg->code_size);
4830 void
4831 mono_arch_emit_exceptions (MonoCompile *cfg)
4833 MonoJumpInfo *patch_info;
4834 int nthrows, i;
4835 guint8 *code;
4836 MonoClass *exc_classes [16];
4837 guint8 *exc_throw_start [16], *exc_throw_end [16];
4838 guint32 code_size;
4839 int exc_count = 0;
4841 /* Compute needed space */
4842 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4843 if (patch_info->type == MONO_PATCH_INFO_EXC)
4844 exc_count++;
4848 * make sure we have enough space for exceptions
4849 * 16 is the size of two push_imm instructions and a call
4851 if (cfg->compile_aot)
4852 code_size = exc_count * 32;
4853 else
4854 code_size = exc_count * 16;
4856 while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4857 cfg->code_size *= 2;
4858 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4859 mono_jit_stats.code_reallocs++;
4862 code = cfg->native_code + cfg->code_len;
4864 nthrows = 0;
4865 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4866 switch (patch_info->type) {
4867 case MONO_PATCH_INFO_EXC: {
4868 MonoClass *exc_class;
4869 guint8 *buf, *buf2;
4870 guint32 throw_ip;
4872 x86_patch (patch_info->ip.i + cfg->native_code, code);
4874 exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4875 g_assert (exc_class);
4876 throw_ip = patch_info->ip.i;
4878 /* Find a throw sequence for the same exception class */
4879 for (i = 0; i < nthrows; ++i)
4880 if (exc_classes [i] == exc_class)
4881 break;
4882 if (i < nthrows) {
4883 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4884 x86_jump_code (code, exc_throw_start [i]);
4885 patch_info->type = MONO_PATCH_INFO_NONE;
4887 else {
4888 guint32 size;
4890 /* Compute size of code following the push <OFFSET> */
4891 size = 5 + 5;
4893 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4894 /* Use the shorter form */
4895 buf = buf2 = code;
4896 x86_push_imm (code, 0);
4898 else {
4899 buf = code;
4900 x86_push_imm (code, 0xf0f0f0f0);
4901 buf2 = code;
4904 if (nthrows < 16) {
4905 exc_classes [nthrows] = exc_class;
4906 exc_throw_start [nthrows] = code;
4909 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
4910 patch_info->data.name = "mono_arch_throw_corlib_exception";
4911 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4912 patch_info->ip.i = code - cfg->native_code;
4913 x86_call_code (code, 0);
4914 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4915 while (buf < buf2)
4916 x86_nop (buf);
4918 if (nthrows < 16) {
4919 exc_throw_end [nthrows] = code;
4920 nthrows ++;
4923 break;
4925 default:
4926 /* do nothing */
4927 break;
4931 cfg->code_len = code - cfg->native_code;
4933 g_assert (cfg->code_len < cfg->code_size);
4936 void
4937 mono_arch_flush_icache (guint8 *code, gint size)
4939 /* not needed */
4942 void
4943 mono_arch_flush_register_windows (void)
4947 gboolean
4948 mono_arch_is_inst_imm (gint64 imm)
4950 return TRUE;
4954 * Support for fast access to the thread-local lmf structure using the GS
4955 * segment register on NPTL + kernel 2.6.x.
4958 static gboolean tls_offset_inited = FALSE;
4960 void
4961 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4963 if (!tls_offset_inited) {
4964 if (!getenv ("MONO_NO_TLS")) {
4965 #ifdef PLATFORM_WIN32
4967 * We need to init this multiple times, since when we are first called, the key might not
4968 * be initialized yet.
4970 appdomain_tls_offset = mono_domain_get_tls_key ();
4971 lmf_tls_offset = mono_get_jit_tls_key ();
4972 thread_tls_offset = mono_thread_get_tls_key ();
4974 /* Only 64 tls entries can be accessed using inline code */
4975 if (appdomain_tls_offset >= 64)
4976 appdomain_tls_offset = -1;
4977 if (lmf_tls_offset >= 64)
4978 lmf_tls_offset = -1;
4979 if (thread_tls_offset >= 64)
4980 thread_tls_offset = -1;
4981 #else
4982 #if MONO_XEN_OPT
4983 optimize_for_xen = access ("/proc/xen", F_OK) == 0;
4984 #endif
4985 tls_offset_inited = TRUE;
4986 appdomain_tls_offset = mono_domain_get_tls_offset ();
4987 lmf_tls_offset = mono_get_lmf_tls_offset ();
4988 lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
4989 thread_tls_offset = mono_thread_get_tls_offset ();
4990 #endif
4995 void
4996 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
5000 #ifdef MONO_ARCH_HAVE_IMT
5002 // Linear handler, the bsearch head compare is shorter
5003 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
5004 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
5005 // x86_patch(ins,target)
5006 //[1 + 5] x86_jump_mem(inst,mem)
5008 #define CMP_SIZE 6
5009 #define BR_SMALL_SIZE 2
5010 #define BR_LARGE_SIZE 5
5011 #define JUMP_IMM_SIZE 6
5012 #define ENABLE_WRONG_METHOD_CHECK 0
5014 static int
5015 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
5017 int i, distance = 0;
5018 for (i = start; i < target; ++i)
5019 distance += imt_entries [i]->chunk_size;
5020 return distance;
5024 * LOCKING: called with the domain lock held
5026 gpointer
5027 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
5028 gpointer fail_tramp)
5030 int i;
5031 int size = 0;
5032 guint8 *code, *start;
5034 for (i = 0; i < count; ++i) {
5035 MonoIMTCheckItem *item = imt_entries [i];
5036 if (item->is_equals) {
5037 if (item->check_target_idx) {
5038 if (!item->compare_done)
5039 item->chunk_size += CMP_SIZE;
5040 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
5041 } else {
5042 if (fail_tramp) {
5043 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + JUMP_IMM_SIZE * 2;
5044 } else {
5045 item->chunk_size += JUMP_IMM_SIZE;
5046 #if ENABLE_WRONG_METHOD_CHECK
5047 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
5048 #endif
5051 } else {
5052 item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
5053 imt_entries [item->check_target_idx]->compare_done = TRUE;
5055 size += item->chunk_size;
5057 if (fail_tramp)
5058 code = mono_method_alloc_generic_virtual_thunk (domain, size);
5059 else
5060 code = mono_domain_code_reserve (domain, size);
5061 start = code;
5062 for (i = 0; i < count; ++i) {
5063 MonoIMTCheckItem *item = imt_entries [i];
5064 item->code_target = code;
5065 if (item->is_equals) {
5066 if (item->check_target_idx) {
5067 if (!item->compare_done)
5068 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5069 item->jmp_code = code;
5070 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5071 if (item->has_target_code)
5072 x86_jump_code (code, item->value.target_code);
5073 else
5074 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5075 } else {
5076 if (fail_tramp) {
5077 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5078 item->jmp_code = code;
5079 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5080 if (item->has_target_code)
5081 x86_jump_code (code, item->value.target_code);
5082 else
5083 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5084 x86_patch (item->jmp_code, code);
5085 x86_jump_code (code, fail_tramp);
5086 item->jmp_code = NULL;
5087 } else {
5088 /* enable the commented code to assert on wrong method */
5089 #if ENABLE_WRONG_METHOD_CHECK
5090 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5091 item->jmp_code = code;
5092 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5093 #endif
5094 if (item->has_target_code)
5095 x86_jump_code (code, item->value.target_code);
5096 else
5097 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5098 #if ENABLE_WRONG_METHOD_CHECK
5099 x86_patch (item->jmp_code, code);
5100 x86_breakpoint (code);
5101 item->jmp_code = NULL;
5102 #endif
5105 } else {
5106 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5107 item->jmp_code = code;
5108 if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
5109 x86_branch8 (code, X86_CC_GE, 0, FALSE);
5110 else
5111 x86_branch32 (code, X86_CC_GE, 0, FALSE);
5114 /* patch the branches to get to the target items */
5115 for (i = 0; i < count; ++i) {
5116 MonoIMTCheckItem *item = imt_entries [i];
5117 if (item->jmp_code) {
5118 if (item->check_target_idx) {
5119 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
5124 if (!fail_tramp)
5125 mono_stats.imt_thunks_size += code - start;
5126 g_assert (code - start <= size);
5127 return start;
5130 MonoMethod*
5131 mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
5133 return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
5136 MonoObject*
5137 mono_arch_find_this_argument (mgreg_t *regs, MonoMethod *method, MonoGenericSharingContext *gsctx)
5139 MonoMethodSignature *sig = mono_method_signature (method);
5140 CallInfo *cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5141 int this_argument_offset;
5142 MonoObject *this_argument;
5145 * this is the offset of the this arg from esp as saved at the start of
5146 * mono_arch_create_trampoline_code () in tramp-x86.c.
5148 this_argument_offset = 5;
5149 if (MONO_TYPE_ISSTRUCT (sig->ret) && (cinfo->ret.storage == ArgOnStack))
5150 this_argument_offset++;
5152 this_argument = * (MonoObject**) (((guint8*) regs [X86_ESP]) + this_argument_offset * sizeof (gpointer));
5154 g_free (cinfo);
5155 return this_argument;
5157 #endif
5159 MonoVTable*
5160 mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code)
5162 return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
5165 MonoInst*
5166 mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
5168 MonoInst *ins = NULL;
5169 int opcode = 0;
5171 if (cmethod->klass == mono_defaults.math_class) {
5172 if (strcmp (cmethod->name, "Sin") == 0) {
5173 opcode = OP_SIN;
5174 } else if (strcmp (cmethod->name, "Cos") == 0) {
5175 opcode = OP_COS;
5176 } else if (strcmp (cmethod->name, "Tan") == 0) {
5177 opcode = OP_TAN;
5178 } else if (strcmp (cmethod->name, "Atan") == 0) {
5179 opcode = OP_ATAN;
5180 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
5181 opcode = OP_SQRT;
5182 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
5183 opcode = OP_ABS;
5184 } else if (strcmp (cmethod->name, "Round") == 0 && fsig->param_count == 1 && fsig->params [0]->type == MONO_TYPE_R8) {
5185 opcode = OP_ROUND;
5188 if (opcode) {
5189 MONO_INST_NEW (cfg, ins, opcode);
5190 ins->type = STACK_R8;
5191 ins->dreg = mono_alloc_freg (cfg);
5192 ins->sreg1 = args [0]->dreg;
5193 MONO_ADD_INS (cfg->cbb, ins);
5196 if (cfg->opt & MONO_OPT_CMOV) {
5197 int opcode = 0;
5199 if (strcmp (cmethod->name, "Min") == 0) {
5200 if (fsig->params [0]->type == MONO_TYPE_I4)
5201 opcode = OP_IMIN;
5202 } else if (strcmp (cmethod->name, "Max") == 0) {
5203 if (fsig->params [0]->type == MONO_TYPE_I4)
5204 opcode = OP_IMAX;
5207 if (opcode) {
5208 MONO_INST_NEW (cfg, ins, opcode);
5209 ins->type = STACK_I4;
5210 ins->dreg = mono_alloc_ireg (cfg);
5211 ins->sreg1 = args [0]->dreg;
5212 ins->sreg2 = args [1]->dreg;
5213 MONO_ADD_INS (cfg->cbb, ins);
5217 #if 0
5218 /* OP_FREM is not IEEE compatible */
5219 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
5220 MONO_INST_NEW (cfg, ins, OP_FREM);
5221 ins->inst_i0 = args [0];
5222 ins->inst_i1 = args [1];
5224 #endif
5227 return ins;
5230 gboolean
5231 mono_arch_print_tree (MonoInst *tree, int arity)
5233 return 0;
5236 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
5238 MonoInst* ins;
5240 return NULL;
5242 if (appdomain_tls_offset == -1)
5243 return NULL;
5245 MONO_INST_NEW (cfg, ins, OP_TLS_GET);
5246 ins->inst_offset = appdomain_tls_offset;
5247 return ins;
5250 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
5252 MonoInst* ins;
5254 if (thread_tls_offset == -1)
5255 return NULL;
5257 MONO_INST_NEW (cfg, ins, OP_TLS_GET);
5258 ins->inst_offset = thread_tls_offset;
5259 return ins;
5262 guint32
5263 mono_arch_get_patch_offset (guint8 *code)
5265 if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
5266 return 2;
5267 else if ((code [0] == 0xba))
5268 return 1;
5269 else if ((code [0] == 0x68))
5270 /* push IMM */
5271 return 1;
5272 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
5273 /* push <OFFSET>(<REG>) */
5274 return 2;
5275 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
5276 /* call *<OFFSET>(<REG>) */
5277 return 2;
5278 else if ((code [0] == 0xdd) || (code [0] == 0xd9))
5279 /* fldl <ADDR> */
5280 return 2;
5281 else if ((code [0] == 0x58) && (code [1] == 0x05))
5282 /* pop %eax; add <OFFSET>, %eax */
5283 return 2;
5284 else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
5285 /* pop <REG>; add <OFFSET>, <REG> */
5286 return 3;
5287 else if ((code [0] >= 0xb8) && (code [0] < 0xb8 + 8))
5288 /* mov <REG>, imm */
5289 return 1;
5290 else {
5291 g_assert_not_reached ();
5292 return -1;
5297 * mono_breakpoint_clean_code:
5299 * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
5300 * breakpoints in the original code, they are removed in the copy.
5302 * Returns TRUE if no sw breakpoint was present.
5304 gboolean
5305 mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size)
5307 int i;
5308 gboolean can_write = TRUE;
5310 * If method_start is non-NULL we need to perform bound checks, since we access memory
5311 * at code - offset we could go before the start of the method and end up in a different
5312 * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
5313 * instead.
5315 if (!method_start || code - offset >= method_start) {
5316 memcpy (buf, code - offset, size);
5317 } else {
5318 int diff = code - method_start;
5319 memset (buf, 0, size);
5320 memcpy (buf + offset - diff, method_start, diff + size - offset);
5322 code -= offset;
5323 for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
5324 int idx = mono_breakpoint_info_index [i];
5325 guint8 *ptr;
5326 if (idx < 1)
5327 continue;
5328 ptr = mono_breakpoint_info [idx].address;
5329 if (ptr >= code && ptr < code + size) {
5330 guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
5331 can_write = FALSE;
5332 /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
5333 buf [ptr - code] = saved_byte;
5336 return can_write;
5339 gpointer
5340 mono_arch_get_vcall_slot (guint8 *code, mgreg_t *regs, int *displacement)
5342 guint8 buf [8];
5343 guint8 reg = 0;
5344 gint32 disp = 0;
5346 mono_breakpoint_clean_code (NULL, code, 8, buf, sizeof (buf));
5347 code = buf + 8;
5349 *displacement = 0;
5351 code -= 6;
5354 * A given byte sequence can match more than case here, so we have to be
5355 * really careful about the ordering of the cases. Longer sequences
5356 * come first.
5357 * There are two types of calls:
5358 * - direct calls: 0xff address_byte 8/32 bits displacement
5359 * - indirect calls: nop nop nop <call>
5360 * The nops make sure we don't confuse the instruction preceeding an indirect
5361 * call with a direct call.
5363 if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
5364 reg = code [4] & 0x07;
5365 disp = (signed char)code [5];
5366 } else if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
5367 reg = code [1] & 0x07;
5368 disp = *((gint32*)(code + 2));
5369 } else if ((code [1] == 0xe8)) {
5370 return NULL;
5371 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
5373 * This is a interface call
5374 * 8b 40 30 mov 0x30(%eax),%eax
5375 * ff 10 call *(%eax)
5377 disp = 0;
5378 reg = code [5] & 0x07;
5380 else
5381 return NULL;
5383 *displacement = disp;
5384 return (gpointer)regs [reg];
5387 gpointer
5388 mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig,
5389 mgreg_t *regs, guint8 *code)
5391 guint32 esp = regs [X86_ESP];
5392 CallInfo *cinfo = NULL;
5393 gpointer res;
5394 int offset;
5397 * Avoid expensive calls to get_generic_context_from_code () + get_call_info
5398 * if possible.
5400 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
5401 if (!gsctx && code)
5402 gsctx = mono_get_generic_context_from_code (code);
5403 cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5405 offset = cinfo->args [0].offset;
5406 } else {
5407 offset = 0;
5411 * The stack looks like:
5412 * <other args>
5413 * <this=delegate>
5414 * <possible vtype return address>
5415 * <return addr>
5416 * <4 pointers pushed by mono_arch_create_trampoline_code ()>
5418 res = (((MonoObject**)esp) [5 + (offset / 4)]);
5419 if (cinfo)
5420 g_free (cinfo);
5421 return res;
5424 #define MAX_ARCH_DELEGATE_PARAMS 10
5426 gpointer
5427 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
5429 guint8 *code, *start;
5431 if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
5432 return NULL;
5434 /* FIXME: Support more cases */
5435 if (MONO_TYPE_ISSTRUCT (sig->ret))
5436 return NULL;
5439 * The stack contains:
5440 * <delegate>
5441 * <return addr>
5444 if (has_target) {
5445 static guint8* cached = NULL;
5446 if (cached)
5447 return cached;
5449 start = code = mono_global_codeman_reserve (64);
5451 /* Replace the this argument with the target */
5452 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
5453 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
5454 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
5455 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5457 g_assert ((code - start) < 64);
5459 mono_debug_add_delegate_trampoline (start, code - start);
5461 mono_memory_barrier ();
5463 cached = start;
5464 } else {
5465 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
5466 int i = 0;
5467 /* 8 for mov_reg and jump, plus 8 for each parameter */
5468 int code_reserve = 8 + (sig->param_count * 8);
5470 for (i = 0; i < sig->param_count; ++i)
5471 if (!mono_is_regsize_var (sig->params [i]))
5472 return NULL;
5474 code = cache [sig->param_count];
5475 if (code)
5476 return code;
5479 * The stack contains:
5480 * <args in reverse order>
5481 * <delegate>
5482 * <return addr>
5484 * and we need:
5485 * <args in reverse order>
5486 * <return addr>
5488 * without unbalancing the stack.
5489 * So move each arg up a spot in the stack (overwriting un-needed 'this' arg)
5490 * and leaving original spot of first arg as placeholder in stack so
5491 * when callee pops stack everything works.
5494 start = code = mono_global_codeman_reserve (code_reserve);
5496 /* store delegate for access to method_ptr */
5497 x86_mov_reg_membase (code, X86_ECX, X86_ESP, 4, 4);
5499 /* move args up */
5500 for (i = 0; i < sig->param_count; ++i) {
5501 x86_mov_reg_membase (code, X86_EAX, X86_ESP, (i+2)*4, 4);
5502 x86_mov_membase_reg (code, X86_ESP, (i+1)*4, X86_EAX, 4);
5505 x86_jump_membase (code, X86_ECX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5507 g_assert ((code - start) < code_reserve);
5509 mono_debug_add_delegate_trampoline (start, code - start);
5511 mono_memory_barrier ();
5513 cache [sig->param_count] = start;
5516 return start;
5519 gpointer
5520 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
5522 switch (reg) {
5523 case X86_EAX: return (gpointer)ctx->eax;
5524 case X86_EBX: return (gpointer)ctx->ebx;
5525 case X86_ECX: return (gpointer)ctx->ecx;
5526 case X86_EDX: return (gpointer)ctx->edx;
5527 case X86_ESP: return (gpointer)ctx->esp;
5528 case X86_EBP: return (gpointer)ctx->ebp;
5529 case X86_ESI: return (gpointer)ctx->esi;
5530 case X86_EDI: return (gpointer)ctx->edi;
5531 default: g_assert_not_reached ();
5535 #ifdef MONO_ARCH_SIMD_INTRINSICS
5537 static MonoInst*
5538 get_float_to_x_spill_area (MonoCompile *cfg)
5540 if (!cfg->fconv_to_r8_x_var) {
5541 cfg->fconv_to_r8_x_var = mono_compile_create_var (cfg, &mono_defaults.double_class->byval_arg, OP_LOCAL);
5542 cfg->fconv_to_r8_x_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
5544 return cfg->fconv_to_r8_x_var;
5548 * Convert all fconv opts that MONO_OPT_SSE2 would get wrong.
5550 void
5551 mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
5553 MonoInst *fconv;
5554 int dreg, src_opcode;
5556 if (!(cfg->opt & MONO_OPT_SSE2) || !(cfg->opt & MONO_OPT_SIMD) || COMPILE_LLVM (cfg))
5557 return;
5559 switch (src_opcode = ins->opcode) {
5560 case OP_FCONV_TO_I1:
5561 case OP_FCONV_TO_U1:
5562 case OP_FCONV_TO_I2:
5563 case OP_FCONV_TO_U2:
5564 case OP_FCONV_TO_I4:
5565 case OP_FCONV_TO_I:
5566 break;
5567 default:
5568 return;
5571 /* dreg is the IREG and sreg1 is the FREG */
5572 MONO_INST_NEW (cfg, fconv, OP_FCONV_TO_R8_X);
5573 fconv->klass = NULL; /*FIXME, what can I use here as the Mono.Simd lib might not be loaded yet*/
5574 fconv->sreg1 = ins->sreg1;
5575 fconv->dreg = mono_alloc_ireg (cfg);
5576 fconv->type = STACK_VTYPE;
5577 fconv->backend.spill_var = get_float_to_x_spill_area (cfg);
5579 mono_bblock_insert_before_ins (cfg->cbb, ins, fconv);
5581 dreg = ins->dreg;
5582 NULLIFY_INS (ins);
5583 ins->opcode = OP_XCONV_R8_TO_I4;
5585 ins->klass = mono_defaults.int32_class;
5586 ins->sreg1 = fconv->dreg;
5587 ins->dreg = dreg;
5588 ins->type = STACK_I4;
5589 ins->backend.source_opcode = src_opcode;
5592 #endif /* #ifdef MONO_ARCH_SIMD_INTRINSICS */
5594 void
5595 mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins)
5597 MonoInst *ins;
5598 int vreg;
5600 if (long_ins->opcode == OP_LNEG) {
5601 ins = long_ins;
5602 MONO_EMIT_NEW_UNALU (cfg, OP_INEG, ins->dreg + 1, ins->sreg1 + 1);
5603 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ADC_IMM, ins->dreg + 2, ins->sreg1 + 2, 0);
5604 MONO_EMIT_NEW_UNALU (cfg, OP_INEG, ins->dreg + 2, ins->dreg + 2);
5605 NULLIFY_INS (ins);
5606 return;
5609 #ifdef MONO_ARCH_SIMD_INTRINSICS
5611 if (!(cfg->opt & MONO_OPT_SIMD))
5612 return;
5614 /*TODO move this to simd-intrinsic.c once we support sse 4.1 dword extractors since we need the runtime caps info */
5615 switch (long_ins->opcode) {
5616 case OP_EXTRACT_I8:
5617 vreg = long_ins->sreg1;
5619 if (long_ins->inst_c0) {
5620 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5621 ins->klass = long_ins->klass;
5622 ins->sreg1 = long_ins->sreg1;
5623 ins->inst_c0 = 2;
5624 ins->type = STACK_VTYPE;
5625 ins->dreg = vreg = alloc_ireg (cfg);
5626 MONO_ADD_INS (cfg->cbb, ins);
5629 MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
5630 ins->klass = mono_defaults.int32_class;
5631 ins->sreg1 = vreg;
5632 ins->type = STACK_I4;
5633 ins->dreg = long_ins->dreg + 1;
5634 MONO_ADD_INS (cfg->cbb, ins);
5636 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5637 ins->klass = long_ins->klass;
5638 ins->sreg1 = long_ins->sreg1;
5639 ins->inst_c0 = long_ins->inst_c0 ? 3 : 1;
5640 ins->type = STACK_VTYPE;
5641 ins->dreg = vreg = alloc_ireg (cfg);
5642 MONO_ADD_INS (cfg->cbb, ins);
5644 MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
5645 ins->klass = mono_defaults.int32_class;
5646 ins->sreg1 = vreg;
5647 ins->type = STACK_I4;
5648 ins->dreg = long_ins->dreg + 2;
5649 MONO_ADD_INS (cfg->cbb, ins);
5651 long_ins->opcode = OP_NOP;
5652 break;
5653 case OP_INSERTX_I8_SLOW:
5654 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5655 ins->dreg = long_ins->dreg;
5656 ins->sreg1 = long_ins->dreg;
5657 ins->sreg2 = long_ins->sreg2 + 1;
5658 ins->inst_c0 = long_ins->inst_c0 * 2;
5659 MONO_ADD_INS (cfg->cbb, ins);
5661 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5662 ins->dreg = long_ins->dreg;
5663 ins->sreg1 = long_ins->dreg;
5664 ins->sreg2 = long_ins->sreg2 + 2;
5665 ins->inst_c0 = long_ins->inst_c0 * 2 + 1;
5666 MONO_ADD_INS (cfg->cbb, ins);
5668 long_ins->opcode = OP_NOP;
5669 break;
5670 case OP_EXPAND_I8:
5671 MONO_INST_NEW (cfg, ins, OP_ICONV_TO_X);
5672 ins->dreg = long_ins->dreg;
5673 ins->sreg1 = long_ins->sreg1 + 1;
5674 ins->klass = long_ins->klass;
5675 ins->type = STACK_VTYPE;
5676 MONO_ADD_INS (cfg->cbb, ins);
5678 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5679 ins->dreg = long_ins->dreg;
5680 ins->sreg1 = long_ins->dreg;
5681 ins->sreg2 = long_ins->sreg1 + 2;
5682 ins->inst_c0 = 1;
5683 ins->klass = long_ins->klass;
5684 ins->type = STACK_VTYPE;
5685 MONO_ADD_INS (cfg->cbb, ins);
5687 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5688 ins->dreg = long_ins->dreg;
5689 ins->sreg1 = long_ins->dreg;;
5690 ins->inst_c0 = 0x44; /*Magic number for swizzling (X,Y,X,Y)*/
5691 ins->klass = long_ins->klass;
5692 ins->type = STACK_VTYPE;
5693 MONO_ADD_INS (cfg->cbb, ins);
5695 long_ins->opcode = OP_NOP;
5696 break;
5698 #endif /* MONO_ARCH_SIMD_INTRINSICS */