3 * AMD64 backend for the Mono code generator
8 * Paolo Molaro (lupus@ximian.com)
9 * Dietmar Maurer (dietmar@ximian.com)
11 * Zoltan Varga (vargaz@gmail.com)
12 * Johan Lorensson (lateralusx.github@gmail.com)
14 * (C) 2003 Ximian, Inc.
15 * Copyright 2003-2011 Novell, Inc (http://www.novell.com)
16 * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
17 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
27 #include <mono/metadata/abi-details.h>
28 #include <mono/metadata/appdomain.h>
29 #include <mono/metadata/debug-helpers.h>
30 #include <mono/metadata/threads.h>
31 #include <mono/metadata/profiler-private.h>
32 #include <mono/metadata/mono-debug.h>
33 #include <mono/metadata/gc-internals.h>
34 #include <mono/utils/mono-math.h>
35 #include <mono/utils/mono-mmap.h>
36 #include <mono/utils/mono-memory-model.h>
37 #include <mono/utils/mono-tls.h>
38 #include <mono/utils/mono-hwcap.h>
39 #include <mono/utils/mono-threads.h>
40 #include <mono/utils/unlocked.h>
42 #include "interp/interp.h"
45 #include "mini-amd64.h"
46 #include "cpu-amd64.h"
47 #include "debugger-agent.h"
49 #include "mini-runtime.h"
50 #include "aot-runtime.h"
53 static gboolean optimize_for_xen
= TRUE
;
55 #define optimize_for_xen 0
58 static GENERATE_TRY_GET_CLASS_WITH_CACHE (math
, "System", "Math")
61 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
63 #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
65 /* The single step trampoline */
66 static gpointer ss_trampoline
;
68 /* The breakpoint trampoline */
69 static gpointer bp_trampoline
;
71 /* Offset between fp and the first argument in the callee */
72 #define ARGS_OFFSET 16
73 #define GP_SCRATCH_REG AMD64_R11
75 /* Max number of bblocks before we bail from using more advanced branch placement code */
76 #define MAX_BBLOCKS_FOR_BRANCH_OPTS 800
79 * AMD64 register usage:
80 * - callee saved registers are used for global register allocation
81 * - %r11 is used for materializing 64 bit constants in opcodes
82 * - the rest is used for local allocation
86 * Floating point comparison results:
96 mono_arch_regname (int reg
)
99 case AMD64_RAX
: return "%rax";
100 case AMD64_RBX
: return "%rbx";
101 case AMD64_RCX
: return "%rcx";
102 case AMD64_RDX
: return "%rdx";
103 case AMD64_RSP
: return "%rsp";
104 case AMD64_RBP
: return "%rbp";
105 case AMD64_RDI
: return "%rdi";
106 case AMD64_RSI
: return "%rsi";
107 case AMD64_R8
: return "%r8";
108 case AMD64_R9
: return "%r9";
109 case AMD64_R10
: return "%r10";
110 case AMD64_R11
: return "%r11";
111 case AMD64_R12
: return "%r12";
112 case AMD64_R13
: return "%r13";
113 case AMD64_R14
: return "%r14";
114 case AMD64_R15
: return "%r15";
119 static const char * const packed_xmmregs
[] = {
120 "p:xmm0", "p:xmm1", "p:xmm2", "p:xmm3", "p:xmm4", "p:xmm5", "p:xmm6", "p:xmm7", "p:xmm8",
121 "p:xmm9", "p:xmm10", "p:xmm11", "p:xmm12", "p:xmm13", "p:xmm14", "p:xmm15"
124 static const char * const single_xmmregs
[] = {
125 "s:xmm0", "s:xmm1", "s:xmm2", "s:xmm3", "s:xmm4", "s:xmm5", "s:xmm6", "s:xmm7", "s:xmm8",
126 "s:xmm9", "s:xmm10", "s:xmm11", "s:xmm12", "s:xmm13", "s:xmm14", "s:xmm15"
130 mono_arch_fregname (int reg
)
132 if (reg
< AMD64_XMM_NREG
)
133 return single_xmmregs
[reg
];
139 mono_arch_xregname (int reg
)
141 if (reg
< AMD64_XMM_NREG
)
142 return packed_xmmregs
[reg
];
151 return mono_debug_count ();
158 amd64_is_near_call (guint8
*code
)
161 if ((code
[0] >= 0x40) && (code
[0] <= 0x4f))
164 return code
[0] == 0xe8;
168 amd64_use_imm32 (gint64 val
)
170 if (mini_debug_options
.single_imm_size
)
173 return amd64_is_imm32 (val
);
177 mono_x86_patch (unsigned char* code
, gpointer target
)
179 mono_x86_patch_inline (code
, target
);
183 amd64_patch (unsigned char* code
, gpointer target
)
185 // NOTE: Sometimes code has just been generated, is not running yet,
186 // and has no alignment requirements. Sometimes it could be running while we patch it,
187 // and there are alignment requirements.
188 // FIXME Assert alignment.
193 if ((code
[0] >= 0x40) && (code
[0] <= 0x4f)) {
198 if ((code
[0] & 0xf8) == 0xb8) {
199 /* amd64_set_reg_template */
200 *(guint64
*)(code
+ 1) = (guint64
)target
;
202 else if ((code
[0] == 0x8b) && rex
&& x86_modrm_mod (code
[1]) == 0 && x86_modrm_rm (code
[1]) == 5) {
203 /* mov 0(%rip), %dreg */
204 g_assert (!1); // Historical code was incorrect.
205 ptrdiff_t const offset
= (guchar
*)target
- (code
+ 6);
206 g_assert (offset
== (gint32
)offset
);
207 *(gint32
*)(code
+ 2) = (gint32
)offset
;
209 else if (code
[0] == 0xff && (code
[1] == 0x15 || code
[1] == 0x25)) {
210 /* call or jmp *<OFFSET>(%rip) */
211 // Patch the data, not the code.
212 g_assert (!2); // For possible use later.
213 *(void**)(code
+ 6 + *(gint32
*)(code
+ 2)) = target
;
216 x86_patch (code
, target
);
220 mono_amd64_patch (unsigned char* code
, gpointer target
)
222 amd64_patch (code
, target
);
225 #define DEBUG(a) if (cfg->verbose_level > 1) a
228 add_general (guint32
*gr
, guint32
*stack_size
, ArgInfo
*ainfo
)
230 ainfo
->offset
= *stack_size
;
232 if (*gr
>= PARAM_REGS
) {
233 ainfo
->storage
= ArgOnStack
;
234 ainfo
->arg_size
= sizeof (target_mgreg_t
);
235 /* Since the same stack slot size is used for all arg */
236 /* types, it needs to be big enough to hold them all */
237 (*stack_size
) += sizeof (target_mgreg_t
);
240 ainfo
->storage
= ArgInIReg
;
241 ainfo
->reg
= param_regs
[*gr
];
247 add_float (guint32
*gr
, guint32
*stack_size
, ArgInfo
*ainfo
, gboolean is_double
)
249 ainfo
->offset
= *stack_size
;
251 if (*gr
>= FLOAT_PARAM_REGS
) {
252 ainfo
->storage
= ArgOnStack
;
253 ainfo
->arg_size
= sizeof (target_mgreg_t
);
254 /* Since the same stack slot size is used for both float */
255 /* types, it needs to be big enough to hold them both */
256 (*stack_size
) += sizeof (target_mgreg_t
);
259 /* A double register */
261 ainfo
->storage
= ArgInDoubleSSEReg
;
263 ainfo
->storage
= ArgInFloatSSEReg
;
269 typedef enum ArgumentClass
{
277 merge_argument_class_from_type (MonoType
*type
, ArgumentClass class1
)
279 ArgumentClass class2
= ARG_CLASS_NO_CLASS
;
282 ptype
= mini_get_underlying_type (type
);
283 switch (ptype
->type
) {
292 case MONO_TYPE_OBJECT
:
294 case MONO_TYPE_FNPTR
:
297 class2
= ARG_CLASS_INTEGER
;
302 class2
= ARG_CLASS_INTEGER
;
304 class2
= ARG_CLASS_SSE
;
308 case MONO_TYPE_TYPEDBYREF
:
309 g_assert_not_reached ();
311 case MONO_TYPE_GENERICINST
:
312 if (!mono_type_generic_inst_is_valuetype (ptype
)) {
313 class2
= ARG_CLASS_INTEGER
;
317 case MONO_TYPE_VALUETYPE
: {
318 MonoMarshalType
*info
= mono_marshal_load_type_info (ptype
->data
.klass
);
321 for (i
= 0; i
< info
->num_fields
; ++i
) {
323 class2
= merge_argument_class_from_type (info
->fields
[i
].field
->type
, class2
);
328 g_assert_not_reached ();
332 if (class1
== class2
)
334 else if (class1
== ARG_CLASS_NO_CLASS
)
336 else if ((class1
== ARG_CLASS_MEMORY
) || (class2
== ARG_CLASS_MEMORY
))
337 class1
= ARG_CLASS_MEMORY
;
338 else if ((class1
== ARG_CLASS_INTEGER
) || (class2
== ARG_CLASS_INTEGER
))
339 class1
= ARG_CLASS_INTEGER
;
341 class1
= ARG_CLASS_SSE
;
352 * collect_field_info_nested:
354 * Collect field info from KLASS recursively into FIELDS.
357 collect_field_info_nested (MonoClass
*klass
, GArray
*fields_array
, int offset
, gboolean pinvoke
, gboolean unicode
)
359 MonoMarshalType
*info
;
363 info
= mono_marshal_load_type_info (klass
);
365 for (i
= 0; i
< info
->num_fields
; ++i
) {
366 if (MONO_TYPE_ISSTRUCT (info
->fields
[i
].field
->type
)) {
367 collect_field_info_nested (mono_class_from_mono_type_internal (info
->fields
[i
].field
->type
), fields_array
, info
->fields
[i
].offset
, pinvoke
, unicode
);
372 f
.type
= info
->fields
[i
].field
->type
;
373 f
.size
= mono_marshal_type_size (info
->fields
[i
].field
->type
,
374 info
->fields
[i
].mspec
,
375 &align
, TRUE
, unicode
);
376 f
.offset
= offset
+ info
->fields
[i
].offset
;
377 if (i
== info
->num_fields
- 1 && f
.size
+ f
.offset
< info
->native_size
) {
378 /* This can happen with .pack directives eg. 'fixed' arrays */
379 if (MONO_TYPE_IS_PRIMITIVE (f
.type
)) {
380 /* Replicate the last field to fill out the remaining place, since the code in add_valuetype () needs type information */
381 g_array_append_val (fields_array
, f
);
382 while (f
.size
+ f
.offset
< info
->native_size
) {
384 g_array_append_val (fields_array
, f
);
387 f
.size
= info
->native_size
- f
.offset
;
388 g_array_append_val (fields_array
, f
);
391 g_array_append_val (fields_array
, f
);
397 MonoClassField
*field
;
400 while ((field
= mono_class_get_fields_internal (klass
, &iter
))) {
401 if (field
->type
->attrs
& FIELD_ATTRIBUTE_STATIC
)
403 if (MONO_TYPE_ISSTRUCT (field
->type
)) {
404 collect_field_info_nested (mono_class_from_mono_type_internal (field
->type
), fields_array
, field
->offset
- MONO_ABI_SIZEOF (MonoObject
), pinvoke
, unicode
);
409 f
.type
= field
->type
;
410 f
.size
= mono_type_size (field
->type
, &align
);
411 f
.offset
= field
->offset
- MONO_ABI_SIZEOF (MonoObject
) + offset
;
413 g_array_append_val (fields_array
, f
);
421 /* Windows x64 ABI can pass/return value types in register of size 1,2,4,8 bytes. */
422 #define MONO_WIN64_VALUE_TYPE_FITS_REG(arg_size) (arg_size <= SIZEOF_REGISTER && (arg_size == 1 || arg_size == 2 || arg_size == 4 || arg_size == 8))
425 allocate_register_for_valuetype_win64 (ArgInfo
*arg_info
, ArgumentClass arg_class
, guint32 arg_size
, const AMD64_Reg_No int_regs
[], int int_reg_count
, const AMD64_XMM_Reg_No float_regs
[], int float_reg_count
, guint32
*current_int_reg
, guint32
*current_float_reg
)
427 gboolean result
= FALSE
;
429 assert (arg_info
!= NULL
&& int_regs
!= NULL
&& float_regs
!= NULL
&& current_int_reg
!= NULL
&& current_float_reg
!= NULL
);
430 assert (arg_info
->storage
== ArgValuetypeInReg
|| arg_info
->storage
== ArgValuetypeAddrInIReg
);
432 arg_info
->pair_storage
[0] = arg_info
->pair_storage
[1] = ArgNone
;
433 arg_info
->pair_regs
[0] = arg_info
->pair_regs
[1] = ArgNone
;
434 arg_info
->pair_size
[0] = 0;
435 arg_info
->pair_size
[1] = 0;
438 if (arg_class
== ARG_CLASS_INTEGER
&& *current_int_reg
< int_reg_count
) {
439 /* Pass parameter in integer register. */
440 arg_info
->pair_storage
[0] = ArgInIReg
;
441 arg_info
->pair_regs
[0] = int_regs
[*current_int_reg
];
442 (*current_int_reg
) ++;
444 } else if (arg_class
== ARG_CLASS_SSE
&& *current_float_reg
< float_reg_count
) {
445 /* Pass parameter in float register. */
446 arg_info
->pair_storage
[0] = (arg_size
<= sizeof (gfloat
)) ? ArgInFloatSSEReg
: ArgInDoubleSSEReg
;
447 arg_info
->pair_regs
[0] = float_regs
[*current_float_reg
];
448 (*current_float_reg
) ++;
452 if (result
== TRUE
) {
453 arg_info
->pair_size
[0] = arg_size
;
461 allocate_parameter_register_for_valuetype_win64 (ArgInfo
*arg_info
, ArgumentClass arg_class
, guint32 arg_size
, guint32
*current_int_reg
, guint32
*current_float_reg
)
463 return allocate_register_for_valuetype_win64 (arg_info
, arg_class
, arg_size
, param_regs
, PARAM_REGS
, float_param_regs
, FLOAT_PARAM_REGS
, current_int_reg
, current_float_reg
);
467 allocate_return_register_for_valuetype_win64 (ArgInfo
*arg_info
, ArgumentClass arg_class
, guint32 arg_size
, guint32
*current_int_reg
, guint32
*current_float_reg
)
469 return allocate_register_for_valuetype_win64 (arg_info
, arg_class
, arg_size
, return_regs
, RETURN_REGS
, float_return_regs
, FLOAT_RETURN_REGS
, current_int_reg
, current_float_reg
);
473 allocate_storage_for_valuetype_win64 (ArgInfo
*arg_info
, MonoType
*type
, gboolean is_return
, ArgumentClass arg_class
,
474 guint32 arg_size
, guint32
*current_int_reg
, guint32
*current_float_reg
, guint32
*stack_size
)
476 /* Windows x64 value type ABI.
478 * Parameters: https://msdn.microsoft.com/en-us/library/zthk2dkh.aspx
480 * Integer/Float types smaller than or equals to 8 bytes or porperly sized struct/union (1,2,4,8)
481 * Try pass in register using ArgValuetypeInReg/(ArgInIReg|ArgInFloatSSEReg|ArgInDoubleSSEReg) as storage and size of parameter(1,2,4,8), if no more registers, pass on stack using ArgOnStack as storage and size of parameter(1,2,4,8).
482 * Integer/Float types bigger than 8 bytes or struct/unions larger than 8 bytes or (3,5,6,7).
483 * Try to pass pointer in register using ArgValuetypeAddrInIReg, if no more registers, pass pointer on stack using ArgValuetypeAddrOnStack as storage and parameter size of register (8 bytes).
485 * Return values: https://msdn.microsoft.com/en-us/library/7572ztz4.aspx.
487 * Integers/Float types smaller than or equal to 8 bytes
488 * Return in corresponding register RAX/XMM0 using ArgValuetypeInReg/(ArgInIReg|ArgInFloatSSEReg|ArgInDoubleSSEReg) as storage and size of parameter(1,2,4,8).
489 * Properly sized struct/unions (1,2,4,8)
490 * Return in register RAX using ArgValuetypeInReg as storage and size of parameter(1,2,4,8).
491 * Types bigger than 8 bytes or struct/unions larger than 8 bytes or (3,5,6,7).
492 * Return pointer to allocated stack space (allocated by caller) using ArgValuetypeAddrInIReg as storage and parameter size.
495 assert (arg_info
!= NULL
&& type
!= NULL
&& current_int_reg
!= NULL
&& current_float_reg
!= NULL
&& stack_size
!= NULL
);
499 /* Parameter cases. */
500 if (arg_class
!= ARG_CLASS_MEMORY
&& MONO_WIN64_VALUE_TYPE_FITS_REG (arg_size
)) {
501 assert (arg_size
== 1 || arg_size
== 2 || arg_size
== 4 || arg_size
== 8);
503 /* First, try to use registers for parameter. If type is struct it can only be passed by value in integer register. */
504 arg_info
->storage
= ArgValuetypeInReg
;
505 if (!allocate_parameter_register_for_valuetype_win64 (arg_info
, !MONO_TYPE_ISSTRUCT (type
) ? arg_class
: ARG_CLASS_INTEGER
, arg_size
, current_int_reg
, current_float_reg
)) {
506 /* No more registers, fallback passing parameter on stack as value. */
507 assert (arg_info
->pair_storage
[0] == ArgNone
&& arg_info
->pair_storage
[1] == ArgNone
&& arg_info
->pair_size
[0] == 0 && arg_info
->pair_size
[1] == 0 && arg_info
->nregs
== 0);
509 /* Passing value directly on stack, so use size of value. */
510 arg_info
->storage
= ArgOnStack
;
511 arg_size
= ALIGN_TO (arg_size
, sizeof (target_mgreg_t
));
512 arg_info
->offset
= *stack_size
;
513 arg_info
->arg_size
= arg_size
;
514 *stack_size
+= arg_size
;
517 /* Fallback to stack, try to pass address to parameter in register. Always use integer register to represent stack address. */
518 arg_info
->storage
= ArgValuetypeAddrInIReg
;
519 if (!allocate_parameter_register_for_valuetype_win64 (arg_info
, ARG_CLASS_INTEGER
, arg_size
, current_int_reg
, current_float_reg
)) {
520 /* No more registers, fallback passing address to parameter on stack. */
521 assert (arg_info
->pair_storage
[0] == ArgNone
&& arg_info
->pair_storage
[1] == ArgNone
&& arg_info
->pair_size
[0] == 0 && arg_info
->pair_size
[1] == 0 && arg_info
->nregs
== 0);
523 /* Passing an address to value on stack, so use size of register as argument size. */
524 arg_info
->storage
= ArgValuetypeAddrOnStack
;
525 arg_size
= sizeof (target_mgreg_t
);
526 arg_info
->offset
= *stack_size
;
527 arg_info
->arg_size
= arg_size
;
528 *stack_size
+= arg_size
;
532 /* Return value cases. */
533 if (arg_class
!= ARG_CLASS_MEMORY
&& MONO_WIN64_VALUE_TYPE_FITS_REG (arg_size
)) {
534 assert (arg_size
== 1 || arg_size
== 2 || arg_size
== 4 || arg_size
== 8);
536 /* Return value fits into return registers. If type is struct it can only be returned by value in integer register. */
537 arg_info
->storage
= ArgValuetypeInReg
;
538 allocate_return_register_for_valuetype_win64 (arg_info
, !MONO_TYPE_ISSTRUCT (type
) ? arg_class
: ARG_CLASS_INTEGER
, arg_size
, current_int_reg
, current_float_reg
);
540 /* Only RAX/XMM0 should be used to return valuetype. */
541 assert ((arg_info
->pair_regs
[0] == AMD64_RAX
&& arg_info
->pair_regs
[1] == ArgNone
) || (arg_info
->pair_regs
[0] == AMD64_XMM0
&& arg_info
->pair_regs
[1] == ArgNone
));
543 /* Return value doesn't fit into return register, return address to allocated stack space (allocated by caller and passed as input). */
544 arg_info
->storage
= ArgValuetypeAddrInIReg
;
545 allocate_return_register_for_valuetype_win64 (arg_info
, ARG_CLASS_INTEGER
, arg_size
, current_int_reg
, current_float_reg
);
547 /* Only RAX should be used to return valuetype address. */
548 assert (arg_info
->pair_regs
[0] == AMD64_RAX
&& arg_info
->pair_regs
[1] == ArgNone
);
550 arg_size
= ALIGN_TO (arg_size
, sizeof (target_mgreg_t
));
551 arg_info
->offset
= *stack_size
;
552 *stack_size
+= arg_size
;
558 get_valuetype_size_win64 (MonoClass
*klass
, gboolean pinvoke
, ArgInfo
*arg_info
, MonoType
*type
, ArgumentClass
*arg_class
, guint32
*arg_size
)
561 *arg_class
= ARG_CLASS_NO_CLASS
;
563 assert (klass
!= NULL
&& arg_info
!= NULL
&& type
!= NULL
&& arg_class
!= NULL
&& arg_size
!= NULL
);
566 /* Calculate argument class type and size of marshalled type. */
567 MonoMarshalType
*info
= mono_marshal_load_type_info (klass
);
568 *arg_size
= info
->native_size
;
570 /* Calculate argument class type and size of managed type. */
571 *arg_size
= mono_class_value_size (klass
, NULL
);
574 /* Windows ABI only handle value types on stack or passed in integer register (if it fits register size). */
575 *arg_class
= MONO_WIN64_VALUE_TYPE_FITS_REG (*arg_size
) ? ARG_CLASS_INTEGER
: ARG_CLASS_MEMORY
;
577 if (*arg_class
== ARG_CLASS_MEMORY
) {
578 /* Value type has a size that doesn't seem to fit register according to ABI. Try to used full stack size of type. */
579 *arg_size
= mini_type_stack_size_full (m_class_get_byval_arg (klass
), NULL
, pinvoke
);
583 * Standard C and C++ doesn't allow empty structs, empty structs will always have a size of 1 byte.
584 * GCC have an extension to allow empty structs, https://gcc.gnu.org/onlinedocs/gcc/Empty-Structures.html.
585 * This cause a little dilemma since runtime build using none GCC compiler will not be compatible with
586 * GCC build C libraries and the other way around. On platforms where empty structs has size of 1 byte
587 * it must be represented in call and cannot be dropped.
589 if (*arg_size
== 0 && MONO_TYPE_ISSTRUCT (type
)) {
590 arg_info
->pass_empty_struct
= TRUE
;
591 *arg_size
= SIZEOF_REGISTER
;
592 *arg_class
= ARG_CLASS_INTEGER
;
595 assert (*arg_class
!= ARG_CLASS_NO_CLASS
);
599 add_valuetype_win64 (MonoMethodSignature
*signature
, ArgInfo
*arg_info
, MonoType
*type
,
600 gboolean is_return
, guint32
*current_int_reg
, guint32
*current_float_reg
, guint32
*stack_size
)
602 guint32 arg_size
= SIZEOF_REGISTER
;
603 MonoClass
*klass
= NULL
;
604 ArgumentClass arg_class
;
606 assert (signature
!= NULL
&& arg_info
!= NULL
&& type
!= NULL
&& current_int_reg
!= NULL
&& current_float_reg
!= NULL
&& stack_size
!= NULL
);
608 klass
= mono_class_from_mono_type_internal (type
);
609 get_valuetype_size_win64 (klass
, signature
->pinvoke
, arg_info
, type
, &arg_class
, &arg_size
);
611 /* Only drop value type if its not an empty struct as input that must be represented in call */
612 if ((arg_size
== 0 && !arg_info
->pass_empty_struct
) || (arg_info
->pass_empty_struct
&& is_return
)) {
613 arg_info
->storage
= ArgValuetypeInReg
;
614 arg_info
->pair_storage
[0] = arg_info
->pair_storage
[1] = ArgNone
;
616 /* Alocate storage for value type. */
617 allocate_storage_for_valuetype_win64 (arg_info
, type
, is_return
, arg_class
, arg_size
, current_int_reg
, current_float_reg
, stack_size
);
621 #endif /* TARGET_WIN32 */
624 add_valuetype (MonoMethodSignature
*sig
, ArgInfo
*ainfo
, MonoType
*type
,
626 guint32
*gr
, guint32
*fr
, guint32
*stack_size
)
629 add_valuetype_win64 (sig
, ainfo
, type
, is_return
, gr
, fr
, stack_size
);
631 guint32 size
, quad
, nquads
, i
, nfields
;
632 /* Keep track of the size used in each quad so we can */
633 /* use the right size when copying args/return vars. */
634 guint32 quadsize
[2] = {8, 8};
635 ArgumentClass args
[2];
636 StructFieldInfo
*fields
= NULL
;
637 GArray
*fields_array
;
639 gboolean pass_on_stack
= FALSE
;
642 klass
= mono_class_from_mono_type_internal (type
);
643 size
= mini_type_stack_size_full (m_class_get_byval_arg (klass
), NULL
, sig
->pinvoke
);
645 if (!sig
->pinvoke
&& ((is_return
&& (size
== 8)) || (!is_return
&& (size
<= 16)))) {
646 /* We pass and return vtypes of size 8 in a register */
647 } else if (!sig
->pinvoke
|| (size
== 0) || (size
> 16)) {
648 pass_on_stack
= TRUE
;
651 /* If this struct can't be split up naturally into 8-byte */
652 /* chunks (registers), pass it on the stack. */
654 MonoMarshalType
*info
= mono_marshal_load_type_info (klass
);
656 struct_size
= info
->native_size
;
658 struct_size
= mono_class_value_size (klass
, NULL
);
661 * Collect field information recursively to be able to
662 * handle nested structures.
664 fields_array
= g_array_new (FALSE
, TRUE
, sizeof (StructFieldInfo
));
665 collect_field_info_nested (klass
, fields_array
, 0, sig
->pinvoke
, m_class_is_unicode (klass
));
666 fields
= (StructFieldInfo
*)fields_array
->data
;
667 nfields
= fields_array
->len
;
669 for (i
= 0; i
< nfields
; ++i
) {
670 if ((fields
[i
].offset
< 8) && (fields
[i
].offset
+ fields
[i
].size
) > 8) {
671 pass_on_stack
= TRUE
;
677 ainfo
->storage
= ArgValuetypeInReg
;
678 ainfo
->pair_storage
[0] = ainfo
->pair_storage
[1] = ArgNone
;
683 /* Allways pass in memory */
684 ainfo
->offset
= *stack_size
;
685 *stack_size
+= ALIGN_TO (size
, 8);
686 ainfo
->storage
= is_return
? ArgValuetypeAddrInIReg
: ArgOnStack
;
688 ainfo
->arg_size
= ALIGN_TO (size
, 8);
690 g_array_free (fields_array
, TRUE
);
700 int n
= mono_class_value_size (klass
, NULL
);
702 quadsize
[0] = n
>= 8 ? 8 : n
;
703 quadsize
[1] = n
>= 8 ? MAX (n
- 8, 8) : 0;
705 /* Always pass in 1 or 2 integer registers */
706 args
[0] = ARG_CLASS_INTEGER
;
707 args
[1] = ARG_CLASS_INTEGER
;
708 /* Only the simplest cases are supported */
709 if (is_return
&& nquads
!= 1) {
710 args
[0] = ARG_CLASS_MEMORY
;
711 args
[1] = ARG_CLASS_MEMORY
;
715 * Implement the algorithm from section 3.2.3 of the X86_64 ABI.
716 * The X87 and SSEUP stuff is left out since there are no such types in
720 ainfo
->storage
= ArgValuetypeInReg
;
721 ainfo
->pair_storage
[0] = ainfo
->pair_storage
[1] = ArgNone
;
725 if (struct_size
> 16) {
726 ainfo
->offset
= *stack_size
;
727 *stack_size
+= ALIGN_TO (struct_size
, 8);
728 ainfo
->storage
= is_return
? ArgValuetypeAddrInIReg
: ArgOnStack
;
730 ainfo
->arg_size
= ALIGN_TO (struct_size
, 8);
732 g_array_free (fields_array
, TRUE
);
736 args
[0] = ARG_CLASS_NO_CLASS
;
737 args
[1] = ARG_CLASS_NO_CLASS
;
738 for (quad
= 0; quad
< nquads
; ++quad
) {
739 ArgumentClass class1
;
742 class1
= ARG_CLASS_MEMORY
;
744 class1
= ARG_CLASS_NO_CLASS
;
745 for (i
= 0; i
< nfields
; ++i
) {
746 if ((fields
[i
].offset
< 8) && (fields
[i
].offset
+ fields
[i
].size
) > 8) {
747 /* Unaligned field */
751 /* Skip fields in other quad */
752 if ((quad
== 0) && (fields
[i
].offset
>= 8))
754 if ((quad
== 1) && (fields
[i
].offset
< 8))
757 /* How far into this quad this data extends.*/
758 /* (8 is size of quad) */
759 quadsize
[quad
] = fields
[i
].offset
+ fields
[i
].size
- (quad
* 8);
761 class1
= merge_argument_class_from_type (fields
[i
].type
, class1
);
763 /* Empty structs have a nonzero size, causing this assert to be hit */
765 g_assert (class1
!= ARG_CLASS_NO_CLASS
);
766 args
[quad
] = class1
;
770 g_array_free (fields_array
, TRUE
);
772 /* Post merger cleanup */
773 if ((args
[0] == ARG_CLASS_MEMORY
) || (args
[1] == ARG_CLASS_MEMORY
))
774 args
[0] = args
[1] = ARG_CLASS_MEMORY
;
776 /* Allocate registers */
781 while (quadsize
[0] != 1 && quadsize
[0] != 2 && quadsize
[0] != 4 && quadsize
[0] != 8)
783 while (quadsize
[1] != 0 && quadsize
[1] != 1 && quadsize
[1] != 2 && quadsize
[1] != 4 && quadsize
[1] != 8)
786 ainfo
->storage
= ArgValuetypeInReg
;
787 ainfo
->pair_storage
[0] = ainfo
->pair_storage
[1] = ArgNone
;
788 g_assert (quadsize
[0] <= 8);
789 g_assert (quadsize
[1] <= 8);
790 ainfo
->pair_size
[0] = quadsize
[0];
791 ainfo
->pair_size
[1] = quadsize
[1];
792 ainfo
->nregs
= nquads
;
793 for (quad
= 0; quad
< nquads
; ++quad
) {
794 switch (args
[quad
]) {
795 case ARG_CLASS_INTEGER
:
796 if (*gr
>= PARAM_REGS
)
797 args
[quad
] = ARG_CLASS_MEMORY
;
799 ainfo
->pair_storage
[quad
] = ArgInIReg
;
801 ainfo
->pair_regs
[quad
] = return_regs
[*gr
];
803 ainfo
->pair_regs
[quad
] = param_regs
[*gr
];
808 if (*fr
>= FLOAT_PARAM_REGS
)
809 args
[quad
] = ARG_CLASS_MEMORY
;
811 if (quadsize
[quad
] <= 4)
812 ainfo
->pair_storage
[quad
] = ArgInFloatSSEReg
;
813 else ainfo
->pair_storage
[quad
] = ArgInDoubleSSEReg
;
814 ainfo
->pair_regs
[quad
] = *fr
;
818 case ARG_CLASS_MEMORY
:
820 case ARG_CLASS_NO_CLASS
:
823 g_assert_not_reached ();
827 if ((args
[0] == ARG_CLASS_MEMORY
) || (args
[1] == ARG_CLASS_MEMORY
)) {
829 /* Revert possible register assignments */
833 ainfo
->offset
= *stack_size
;
835 arg_size
= ALIGN_TO (struct_size
, 8);
837 arg_size
= nquads
* sizeof (target_mgreg_t
);
838 *stack_size
+= arg_size
;
839 ainfo
->storage
= is_return
? ArgValuetypeAddrInIReg
: ArgOnStack
;
841 ainfo
->arg_size
= arg_size
;
844 #endif /* !TARGET_WIN32 */
850 * Obtain information about a call according to the calling convention.
851 * For AMD64 System V, see the "System V ABI, x86-64 Architecture Processor Supplement
852 * Draft Version 0.23" document for more information.
853 * For AMD64 Windows, see "Overview of x64 Calling Conventions",
854 * https://msdn.microsoft.com/en-us/library/ms235286.aspx
857 get_call_info (MonoMemPool
*mp
, MonoMethodSignature
*sig
)
859 guint32 i
, gr
, fr
, pstart
;
861 int n
= sig
->hasthis
+ sig
->param_count
;
862 guint32 stack_size
= 0;
864 gboolean is_pinvoke
= sig
->pinvoke
;
867 cinfo
= (CallInfo
*)mono_mempool_alloc0 (mp
, sizeof (CallInfo
) + (sizeof (ArgInfo
) * n
));
869 cinfo
= (CallInfo
*)g_malloc0 (sizeof (CallInfo
) + (sizeof (ArgInfo
) * n
));
872 cinfo
->gsharedvt
= mini_is_gsharedvt_variable_signature (sig
);
878 /* Reserve space where the callee can save the argument registers */
879 stack_size
= 4 * sizeof (target_mgreg_t
);
883 ret_type
= mini_get_underlying_type (sig
->ret
);
884 switch (ret_type
->type
) {
894 case MONO_TYPE_FNPTR
:
895 case MONO_TYPE_OBJECT
:
896 cinfo
->ret
.storage
= ArgInIReg
;
897 cinfo
->ret
.reg
= AMD64_RAX
;
901 cinfo
->ret
.storage
= ArgInIReg
;
902 cinfo
->ret
.reg
= AMD64_RAX
;
905 cinfo
->ret
.storage
= ArgInFloatSSEReg
;
906 cinfo
->ret
.reg
= AMD64_XMM0
;
909 cinfo
->ret
.storage
= ArgInDoubleSSEReg
;
910 cinfo
->ret
.reg
= AMD64_XMM0
;
912 case MONO_TYPE_GENERICINST
:
913 if (!mono_type_generic_inst_is_valuetype (ret_type
)) {
914 cinfo
->ret
.storage
= ArgInIReg
;
915 cinfo
->ret
.reg
= AMD64_RAX
;
918 if (mini_is_gsharedvt_type (ret_type
)) {
919 cinfo
->ret
.storage
= ArgGsharedvtVariableInReg
;
923 case MONO_TYPE_VALUETYPE
:
924 case MONO_TYPE_TYPEDBYREF
: {
925 guint32 tmp_gr
= 0, tmp_fr
= 0, tmp_stacksize
= 0;
927 add_valuetype (sig
, &cinfo
->ret
, ret_type
, TRUE
, &tmp_gr
, &tmp_fr
, &tmp_stacksize
);
928 g_assert (cinfo
->ret
.storage
!= ArgInIReg
);
933 g_assert (mini_is_gsharedvt_type (ret_type
));
934 cinfo
->ret
.storage
= ArgGsharedvtVariableInReg
;
939 g_error ("Can't handle as return value 0x%x", ret_type
->type
);
944 * To simplify get_this_arg_reg () and LLVM integration, emit the vret arg after
945 * the first argument, allowing 'this' to be always passed in the first arg reg.
946 * Also do this if the first argument is a reference type, since virtual calls
947 * are sometimes made using calli without sig->hasthis set, like in the delegate
950 ArgStorage ret_storage
= cinfo
->ret
.storage
;
951 if ((ret_storage
== ArgValuetypeAddrInIReg
|| ret_storage
== ArgGsharedvtVariableInReg
) && !is_pinvoke
&& (sig
->hasthis
|| (sig
->param_count
> 0 && MONO_TYPE_IS_REFERENCE (mini_get_underlying_type (sig
->params
[0]))))) {
953 add_general (&gr
, &stack_size
, cinfo
->args
+ 0);
955 add_general (&gr
, &stack_size
, &cinfo
->args
[sig
->hasthis
+ 0]);
958 add_general (&gr
, &stack_size
, &cinfo
->ret
);
959 cinfo
->ret
.storage
= ret_storage
;
960 cinfo
->vret_arg_index
= 1;
964 add_general (&gr
, &stack_size
, cinfo
->args
+ 0);
966 if (ret_storage
== ArgValuetypeAddrInIReg
|| ret_storage
== ArgGsharedvtVariableInReg
) {
967 add_general (&gr
, &stack_size
, &cinfo
->ret
);
968 cinfo
->ret
.storage
= ret_storage
;
972 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
) && (n
== 0)) {
974 fr
= FLOAT_PARAM_REGS
;
976 /* Emit the signature cookie just before the implicit arguments */
977 add_general (&gr
, &stack_size
, &cinfo
->sig_cookie
);
980 for (i
= pstart
; i
< sig
->param_count
; ++i
) {
981 ArgInfo
*ainfo
= &cinfo
->args
[sig
->hasthis
+ i
];
985 /* The float param registers and other param registers must be the same index on Windows x64.*/
992 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
) && (i
== sig
->sentinelpos
)) {
993 /* We allways pass the sig cookie on the stack for simplicity */
995 * Prevent implicit arguments + the sig cookie from being passed
999 fr
= FLOAT_PARAM_REGS
;
1001 /* Emit the signature cookie just before the implicit arguments */
1002 add_general (&gr
, &stack_size
, &cinfo
->sig_cookie
);
1005 ptype
= mini_get_underlying_type (sig
->params
[i
]);
1006 switch (ptype
->type
) {
1008 ainfo
->is_signed
= 1;
1010 add_general (&gr
, &stack_size
, ainfo
);
1011 ainfo
->byte_arg_size
= 1;
1014 ainfo
->is_signed
= 1;
1016 add_general (&gr
, &stack_size
, ainfo
);
1017 ainfo
->byte_arg_size
= 2;
1020 ainfo
->is_signed
= 1;
1022 add_general (&gr
, &stack_size
, ainfo
);
1023 ainfo
->byte_arg_size
= 4;
1028 case MONO_TYPE_FNPTR
:
1029 case MONO_TYPE_OBJECT
:
1030 add_general (&gr
, &stack_size
, ainfo
);
1032 case MONO_TYPE_GENERICINST
:
1033 if (!mono_type_generic_inst_is_valuetype (ptype
)) {
1034 add_general (&gr
, &stack_size
, ainfo
);
1037 if (mini_is_gsharedvt_variable_type (ptype
)) {
1038 /* gsharedvt arguments are passed by ref */
1039 add_general (&gr
, &stack_size
, ainfo
);
1040 if (ainfo
->storage
== ArgInIReg
)
1041 ainfo
->storage
= ArgGSharedVtInReg
;
1043 ainfo
->storage
= ArgGSharedVtOnStack
;
1047 case MONO_TYPE_VALUETYPE
:
1048 case MONO_TYPE_TYPEDBYREF
:
1049 add_valuetype (sig
, ainfo
, ptype
, FALSE
, &gr
, &fr
, &stack_size
);
1054 add_general (&gr
, &stack_size
, ainfo
);
1057 add_float (&fr
, &stack_size
, ainfo
, FALSE
);
1060 add_float (&fr
, &stack_size
, ainfo
, TRUE
);
1063 case MONO_TYPE_MVAR
:
1064 /* gsharedvt arguments are passed by ref */
1065 g_assert (mini_is_gsharedvt_type (ptype
));
1066 add_general (&gr
, &stack_size
, ainfo
);
1067 if (ainfo
->storage
== ArgInIReg
)
1068 ainfo
->storage
= ArgGSharedVtInReg
;
1070 ainfo
->storage
= ArgGSharedVtOnStack
;
1073 g_assert_not_reached ();
1077 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
) && (n
> 0) && (sig
->sentinelpos
== sig
->param_count
)) {
1079 fr
= FLOAT_PARAM_REGS
;
1081 /* Emit the signature cookie just before the implicit arguments */
1082 add_general (&gr
, &stack_size
, &cinfo
->sig_cookie
);
1085 cinfo
->stack_usage
= stack_size
;
1086 cinfo
->reg_usage
= gr
;
1087 cinfo
->freg_usage
= fr
;
1092 arg_need_temp (ArgInfo
*ainfo
)
1094 // Value types using one register doesn't need temp.
1095 if (ainfo
->storage
== ArgValuetypeInReg
&& ainfo
->nregs
> 1)
1096 return ainfo
->nregs
* sizeof (host_mgreg_t
);
1101 arg_get_storage (CallContext
*ccontext
, ArgInfo
*ainfo
)
1103 switch (ainfo
->storage
) {
1105 return &ccontext
->gregs
[ainfo
->reg
];
1106 case ArgInFloatSSEReg
:
1107 case ArgInDoubleSSEReg
:
1108 return &ccontext
->fregs
[ainfo
->reg
];
1110 case ArgValuetypeAddrOnStack
:
1111 return ccontext
->stack
+ ainfo
->offset
;
1112 case ArgValuetypeInReg
:
1114 if (ainfo
->nregs
== 0)
1116 // Value type using one register can be stored
1117 // directly in its context gregs/fregs slot.
1118 g_assert (ainfo
->nregs
== 1);
1119 switch (ainfo
->pair_storage
[0]) {
1121 return &ccontext
->gregs
[ainfo
->pair_regs
[0]];
1122 case ArgInFloatSSEReg
:
1123 case ArgInDoubleSSEReg
:
1124 return &ccontext
->fregs
[ainfo
->pair_regs
[0]];
1126 g_assert_not_reached ();
1128 case ArgValuetypeAddrInIReg
:
1129 g_assert (ainfo
->pair_storage
[0] == ArgInIReg
&& ainfo
->pair_storage
[1] == ArgNone
);
1130 return &ccontext
->gregs
[ainfo
->pair_regs
[0]];
1132 g_error ("Arg storage type not yet supported");
1137 arg_get_val (CallContext
*ccontext
, ArgInfo
*ainfo
, gpointer dest
)
1139 g_assert (arg_need_temp (ainfo
));
1141 host_mgreg_t
*dest_cast
= (host_mgreg_t
*)dest
;
1142 /* Reconstruct the value type */
1143 for (int k
= 0; k
< ainfo
->nregs
; k
++) {
1144 int storage_type
= ainfo
->pair_storage
[k
];
1145 int reg_storage
= ainfo
->pair_regs
[k
];
1146 switch (storage_type
) {
1148 *dest_cast
= ccontext
->gregs
[reg_storage
];
1150 case ArgInFloatSSEReg
:
1151 case ArgInDoubleSSEReg
:
1152 *(double*)dest_cast
= ccontext
->fregs
[reg_storage
];
1155 g_assert_not_reached ();
1162 arg_set_val (CallContext
*ccontext
, ArgInfo
*ainfo
, gpointer src
)
1164 g_assert (arg_need_temp (ainfo
));
1166 host_mgreg_t
*src_cast
= (host_mgreg_t
*)src
;
1167 for (int k
= 0; k
< ainfo
->nregs
; k
++) {
1168 int storage_type
= ainfo
->pair_storage
[k
];
1169 int reg_storage
= ainfo
->pair_regs
[k
];
1170 switch (storage_type
) {
1172 ccontext
->gregs
[reg_storage
] = *src_cast
;
1174 case ArgInFloatSSEReg
:
1175 case ArgInDoubleSSEReg
:
1176 ccontext
->fregs
[reg_storage
] = *(double*)src_cast
;
1179 g_assert_not_reached ();
1186 mono_arch_set_native_call_context_args (CallContext
*ccontext
, gpointer frame
, MonoMethodSignature
*sig
)
1188 CallInfo
*cinfo
= get_call_info (NULL
, sig
);
1189 const MonoEECallbacks
*interp_cb
= mini_get_interp_callbacks ();
1193 memset (ccontext
, 0, sizeof (CallContext
));
1195 ccontext
->stack_size
= ALIGN_TO (cinfo
->stack_usage
, MONO_ARCH_FRAME_ALIGNMENT
);
1196 if (ccontext
->stack_size
)
1197 ccontext
->stack
= (guint8
*)g_calloc (1, ccontext
->stack_size
);
1199 if (sig
->ret
->type
!= MONO_TYPE_VOID
) {
1200 ainfo
= &cinfo
->ret
;
1201 if (ainfo
->storage
== ArgValuetypeAddrInIReg
) {
1202 storage
= interp_cb
->frame_arg_to_storage ((MonoInterpFrameHandle
)frame
, sig
, -1);
1203 ccontext
->gregs
[cinfo
->ret
.reg
] = (host_mgreg_t
)storage
;
1207 g_assert (!sig
->hasthis
);
1209 for (int i
= 0; i
< sig
->param_count
; i
++) {
1210 ainfo
= &cinfo
->args
[i
];
1212 if (ainfo
->storage
== ArgValuetypeAddrInIReg
|| ainfo
->storage
== ArgValuetypeAddrOnStack
) {
1213 storage
= arg_get_storage (ccontext
, ainfo
);
1214 *(gpointer
*)storage
= interp_cb
->frame_arg_to_storage (frame
, sig
, i
);
1218 int temp_size
= arg_need_temp (ainfo
);
1221 storage
= alloca (temp_size
); // FIXME? alloca in a loop
1223 storage
= arg_get_storage (ccontext
, ainfo
);
1225 interp_cb
->frame_arg_to_data ((MonoInterpFrameHandle
)frame
, sig
, i
, storage
);
1227 arg_set_val (ccontext
, ainfo
, storage
);
1234 mono_arch_set_native_call_context_ret (CallContext
*ccontext
, gpointer frame
, MonoMethodSignature
*sig
)
1236 const MonoEECallbacks
*interp_cb
;
1241 if (sig
->ret
->type
== MONO_TYPE_VOID
)
1244 interp_cb
= mini_get_interp_callbacks ();
1245 cinfo
= get_call_info (NULL
, sig
);
1246 ainfo
= &cinfo
->ret
;
1248 if (cinfo
->ret
.storage
!= ArgValuetypeAddrInIReg
) {
1249 int temp_size
= arg_need_temp (ainfo
);
1252 storage
= alloca (temp_size
);
1254 storage
= arg_get_storage (ccontext
, ainfo
);
1255 memset (ccontext
, 0, sizeof (CallContext
)); // FIXME
1256 interp_cb
->frame_arg_to_data ((MonoInterpFrameHandle
)frame
, sig
, -1, storage
);
1258 arg_set_val (ccontext
, ainfo
, storage
);
1261 // Windows x64 ABI ainfo implementation includes info on how to return value type address.
1264 storage
= arg_get_storage (ccontext
, ainfo
);
1265 *(gpointer
*)storage
= interp_cb
->frame_arg_to_storage (frame
, sig
, -1);
1273 mono_arch_get_native_call_context_args (CallContext
*ccontext
, gpointer frame
, MonoMethodSignature
*sig
)
1275 const MonoEECallbacks
*interp_cb
= mini_get_interp_callbacks ();
1276 CallInfo
*cinfo
= get_call_info (NULL
, sig
);
1280 if (sig
->ret
->type
!= MONO_TYPE_VOID
) {
1281 ainfo
= &cinfo
->ret
;
1282 if (ainfo
->storage
== ArgValuetypeAddrInIReg
) {
1283 storage
= (gpointer
) ccontext
->gregs
[cinfo
->ret
.reg
];
1284 interp_cb
->frame_arg_set_storage ((MonoInterpFrameHandle
)frame
, sig
, -1, storage
);
1288 for (int i
= 0; i
< sig
->param_count
+ sig
->hasthis
; i
++) {
1289 ainfo
= &cinfo
->args
[i
];
1291 if (ainfo
->storage
== ArgValuetypeAddrInIReg
|| ainfo
->storage
== ArgValuetypeAddrOnStack
) {
1292 storage
= arg_get_storage (ccontext
, ainfo
);
1293 interp_cb
->data_to_frame_arg ((MonoInterpFrameHandle
)frame
, sig
, i
, *(gpointer
*)storage
);
1297 int temp_size
= arg_need_temp (ainfo
);
1300 storage
= alloca (temp_size
); // FIXME? alloca in a loop
1301 arg_get_val (ccontext
, ainfo
, storage
);
1303 storage
= arg_get_storage (ccontext
, ainfo
);
1306 interp_cb
->data_to_frame_arg ((MonoInterpFrameHandle
)frame
, sig
, i
, storage
);
1313 mono_arch_get_native_call_context_ret (CallContext
*ccontext
, gpointer frame
, MonoMethodSignature
*sig
)
1315 const MonoEECallbacks
*interp_cb
;
1320 /* No return value */
1321 if (sig
->ret
->type
== MONO_TYPE_VOID
)
1324 interp_cb
= mini_get_interp_callbacks ();
1325 cinfo
= get_call_info (NULL
, sig
);
1326 ainfo
= &cinfo
->ret
;
1328 /* The return values were stored directly at address passed in reg */
1329 if (cinfo
->ret
.storage
!= ArgValuetypeAddrInIReg
) {
1330 int temp_size
= arg_need_temp (ainfo
);
1333 storage
= alloca (temp_size
);
1334 arg_get_val (ccontext
, ainfo
, storage
);
1336 storage
= arg_get_storage (ccontext
, ainfo
);
1338 interp_cb
->data_to_frame_arg ((MonoInterpFrameHandle
)frame
, sig
, -1, storage
);
1345 * mono_arch_get_argument_info:
1346 * @csig: a method signature
1347 * @param_count: the number of parameters to consider
1348 * @arg_info: an array to store the result infos
1350 * Gathers information on parameters such as size, alignment and
1351 * padding. arg_info should be large enought to hold param_count + 1 entries.
1353 * Returns the size of the argument area on the stack.
1356 mono_arch_get_argument_info (MonoMethodSignature
*csig
, int param_count
, MonoJitArgumentInfo
*arg_info
)
1359 CallInfo
*cinfo
= get_call_info (NULL
, csig
);
1360 guint32 args_size
= cinfo
->stack_usage
;
1362 /* The arguments are saved to a stack area in mono_arch_instrument_prolog */
1363 if (csig
->hasthis
) {
1364 arg_info
[0].offset
= 0;
1367 for (k
= 0; k
< param_count
; k
++) {
1368 arg_info
[k
+ 1].offset
= ((k
+ csig
->hasthis
) * 8);
1370 arg_info
[k
+ 1].size
= 0;
1380 mono_arch_tailcall_supported (MonoCompile
*cfg
, MonoMethodSignature
*caller_sig
, MonoMethodSignature
*callee_sig
, gboolean virtual_
)
1382 CallInfo
*caller_info
= get_call_info (NULL
, caller_sig
);
1383 CallInfo
*callee_info
= get_call_info (NULL
, callee_sig
);
1384 gboolean res
= IS_SUPPORTED_TAILCALL (callee_info
->stack_usage
<= caller_info
->stack_usage
)
1385 && IS_SUPPORTED_TAILCALL (callee_info
->ret
.storage
== caller_info
->ret
.storage
);
1387 // Limit stack_usage to 1G. Assume 32bit limits when we move parameters.
1388 res
&= IS_SUPPORTED_TAILCALL (callee_info
->stack_usage
< (1 << 30));
1389 res
&= IS_SUPPORTED_TAILCALL (caller_info
->stack_usage
< (1 << 30));
1391 // valuetype parameters are address of local
1392 const ArgInfo
*ainfo
;
1393 ainfo
= callee_info
->args
+ callee_sig
->hasthis
;
1394 for (int i
= 0; res
&& i
< callee_sig
->param_count
; ++i
) {
1395 res
= IS_SUPPORTED_TAILCALL (ainfo
[i
].storage
!= ArgValuetypeAddrInIReg
)
1396 && IS_SUPPORTED_TAILCALL (ainfo
[i
].storage
!= ArgValuetypeAddrOnStack
);
1399 g_free (caller_info
);
1400 g_free (callee_info
);
1404 #endif /* DISABLE_JIT */
1407 * Initialize the cpu to execute managed code.
1410 mono_arch_cpu_init (void)
1415 /* spec compliance requires running with double precision */
1416 __asm__
__volatile__ ("fnstcw %0\n": "=m" (fpcw
));
1417 fpcw
&= ~X86_FPCW_PRECC_MASK
;
1418 fpcw
|= X86_FPCW_PREC_DOUBLE
;
1419 __asm__
__volatile__ ("fldcw %0\n": : "m" (fpcw
));
1420 __asm__
__volatile__ ("fnstcw %0\n": "=m" (fpcw
));
1422 /* TODO: This is crashing on Win64 right now.
1423 * _control87 (_PC_53, MCW_PC);
1429 * Initialize architecture specific code.
1432 mono_arch_init (void)
1435 bp_trampoline
= mini_get_breakpoint_trampoline ();
1439 * Cleanup architecture specific code.
1442 mono_arch_cleanup (void)
1447 * This function returns the optimizations supported on this cpu.
1450 mono_arch_cpu_optimizations (guint32
*exclude_mask
)
1456 if (mono_hwcap_x86_has_cmov
) {
1457 opts
|= MONO_OPT_CMOV
;
1459 if (mono_hwcap_x86_has_fcmov
)
1460 opts
|= MONO_OPT_FCMOV
;
1462 *exclude_mask
|= MONO_OPT_FCMOV
;
1464 *exclude_mask
|= MONO_OPT_CMOV
;
1471 * This function test for all SSE functions supported.
1473 * Returns a bitmask corresponding to all supported versions.
1477 mono_arch_cpu_enumerate_simd_versions (void)
1479 guint32 sse_opts
= 0;
1481 if (mono_hwcap_x86_has_sse1
)
1482 sse_opts
|= SIMD_VERSION_SSE1
;
1484 if (mono_hwcap_x86_has_sse2
)
1485 sse_opts
|= SIMD_VERSION_SSE2
;
1487 if (mono_hwcap_x86_has_sse3
)
1488 sse_opts
|= SIMD_VERSION_SSE3
;
1490 if (mono_hwcap_x86_has_ssse3
)
1491 sse_opts
|= SIMD_VERSION_SSSE3
;
1493 if (mono_hwcap_x86_has_sse41
)
1494 sse_opts
|= SIMD_VERSION_SSE41
;
1496 if (mono_hwcap_x86_has_sse42
)
1497 sse_opts
|= SIMD_VERSION_SSE42
;
1499 if (mono_hwcap_x86_has_sse4a
)
1500 sse_opts
|= SIMD_VERSION_SSE4a
;
1506 mono_arch_get_cpu_features (void)
1508 guint64 features
= MONO_CPU_INITED
;
1510 if (mono_hwcap_x86_has_popcnt
)
1511 features
|= MONO_CPU_X86_POPCNT
;
1513 if (mono_hwcap_x86_has_lzcnt
)
1514 features
|= MONO_CPU_X86_LZCNT
;
1516 return (MonoCPUFeatures
)features
;
1522 mono_arch_get_allocatable_int_vars (MonoCompile
*cfg
)
1527 for (i
= 0; i
< cfg
->num_varinfo
; i
++) {
1528 MonoInst
*ins
= cfg
->varinfo
[i
];
1529 MonoMethodVar
*vmv
= MONO_VARINFO (cfg
, i
);
1532 if (vmv
->range
.first_use
.abs_pos
>= vmv
->range
.last_use
.abs_pos
)
1535 if ((ins
->flags
& (MONO_INST_IS_DEAD
|MONO_INST_VOLATILE
|MONO_INST_INDIRECT
)) ||
1536 (ins
->opcode
!= OP_LOCAL
&& ins
->opcode
!= OP_ARG
))
1539 if (mono_is_regsize_var (ins
->inst_vtype
)) {
1540 g_assert (MONO_VARINFO (cfg
, i
)->reg
== -1);
1541 g_assert (i
== vmv
->idx
);
1542 vars
= g_list_prepend (vars
, vmv
);
1546 vars
= mono_varlist_sort (cfg
, vars
, 0);
1552 * mono_arch_compute_omit_fp:
1553 * Determine whether the frame pointer can be eliminated.
1556 mono_arch_compute_omit_fp (MonoCompile
*cfg
)
1558 MonoMethodSignature
*sig
;
1559 MonoMethodHeader
*header
;
1563 if (cfg
->arch
.omit_fp_computed
)
1566 header
= cfg
->header
;
1568 sig
= mono_method_signature_internal (cfg
->method
);
1570 if (!cfg
->arch
.cinfo
)
1571 cfg
->arch
.cinfo
= get_call_info (cfg
->mempool
, sig
);
1572 cinfo
= cfg
->arch
.cinfo
;
1575 * FIXME: Remove some of the restrictions.
1577 cfg
->arch
.omit_fp
= TRUE
;
1578 cfg
->arch
.omit_fp_computed
= TRUE
;
1580 if (cfg
->disable_omit_fp
)
1581 cfg
->arch
.omit_fp
= FALSE
;
1583 if (!debug_omit_fp ())
1584 cfg
->arch
.omit_fp
= FALSE
;
1586 if (cfg->method->save_lmf)
1587 cfg->arch.omit_fp = FALSE;
1589 if (cfg
->flags
& MONO_CFG_HAS_ALLOCA
)
1590 cfg
->arch
.omit_fp
= FALSE
;
1591 if (header
->num_clauses
)
1592 cfg
->arch
.omit_fp
= FALSE
;
1593 if (cfg
->param_area
)
1594 cfg
->arch
.omit_fp
= FALSE
;
1595 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
))
1596 cfg
->arch
.omit_fp
= FALSE
;
1597 for (i
= 0; i
< sig
->param_count
+ sig
->hasthis
; ++i
) {
1598 ArgInfo
*ainfo
= &cinfo
->args
[i
];
1600 if (ainfo
->storage
== ArgOnStack
|| ainfo
->storage
== ArgValuetypeAddrInIReg
|| ainfo
->storage
== ArgValuetypeAddrOnStack
) {
1602 * The stack offset can only be determined when the frame
1605 cfg
->arch
.omit_fp
= FALSE
;
1610 for (i
= cfg
->locals_start
; i
< cfg
->num_varinfo
; i
++) {
1611 MonoInst
*ins
= cfg
->varinfo
[i
];
1614 locals_size
+= mono_type_size (ins
->inst_vtype
, &ialign
);
1619 mono_arch_get_global_int_regs (MonoCompile
*cfg
)
1623 mono_arch_compute_omit_fp (cfg
);
1625 if (cfg
->arch
.omit_fp
)
1626 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RBP
);
1628 /* We use the callee saved registers for global allocation */
1629 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RBX
);
1630 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R12
);
1631 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R13
);
1632 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R14
);
1633 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R15
);
1635 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RDI
);
1636 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RSI
);
1643 * mono_arch_regalloc_cost:
1645 * Return the cost, in number of memory references, of the action of
1646 * allocating the variable VMV into a register during global register
1650 mono_arch_regalloc_cost (MonoCompile
*cfg
, MonoMethodVar
*vmv
)
1652 MonoInst
*ins
= cfg
->varinfo
[vmv
->idx
];
1654 if (cfg
->method
->save_lmf
)
1655 /* The register is already saved */
1656 /* substract 1 for the invisible store in the prolog */
1657 return (ins
->opcode
== OP_ARG
) ? 0 : 1;
1660 return (ins
->opcode
== OP_ARG
) ? 1 : 2;
1664 * mono_arch_fill_argument_info:
1666 * Populate cfg->args, cfg->ret and cfg->vret_addr with information about the arguments
1670 mono_arch_fill_argument_info (MonoCompile
*cfg
)
1672 MonoMethodSignature
*sig
;
1677 sig
= mono_method_signature_internal (cfg
->method
);
1679 cinfo
= cfg
->arch
.cinfo
;
1682 * Contrary to mono_arch_allocate_vars (), the information should describe
1683 * where the arguments are at the beginning of the method, not where they can be
1684 * accessed during the execution of the method. The later makes no sense for the
1685 * global register allocator, since a variable can be in more than one location.
1687 switch (cinfo
->ret
.storage
) {
1689 case ArgInFloatSSEReg
:
1690 case ArgInDoubleSSEReg
:
1691 cfg
->ret
->opcode
= OP_REGVAR
;
1692 cfg
->ret
->inst_c0
= cinfo
->ret
.reg
;
1694 case ArgValuetypeInReg
:
1695 cfg
->ret
->opcode
= OP_REGOFFSET
;
1696 cfg
->ret
->inst_basereg
= -1;
1697 cfg
->ret
->inst_offset
= -1;
1702 g_assert_not_reached ();
1705 for (i
= 0; i
< sig
->param_count
+ sig
->hasthis
; ++i
) {
1706 ArgInfo
*ainfo
= &cinfo
->args
[i
];
1708 ins
= cfg
->args
[i
];
1710 switch (ainfo
->storage
) {
1712 case ArgInFloatSSEReg
:
1713 case ArgInDoubleSSEReg
:
1714 ins
->opcode
= OP_REGVAR
;
1715 ins
->inst_c0
= ainfo
->reg
;
1718 ins
->opcode
= OP_REGOFFSET
;
1719 ins
->inst_basereg
= -1;
1720 ins
->inst_offset
= -1;
1722 case ArgValuetypeInReg
:
1724 ins
->opcode
= OP_NOP
;
1727 g_assert_not_reached ();
1733 mono_arch_allocate_vars (MonoCompile
*cfg
)
1736 MonoMethodSignature
*sig
;
1739 guint32 locals_stack_size
, locals_stack_align
;
1743 sig
= mono_method_signature_internal (cfg
->method
);
1745 cinfo
= cfg
->arch
.cinfo
;
1746 sig_ret
= mini_get_underlying_type (sig
->ret
);
1748 mono_arch_compute_omit_fp (cfg
);
1751 * We use the ABI calling conventions for managed code as well.
1752 * Exception: valuetypes are only sometimes passed or returned in registers.
1756 * The stack looks like this:
1757 * <incoming arguments passed on the stack>
1759 * <lmf/caller saved registers>
1762 * <localloc area> -> grows dynamically
1766 if (cfg
->arch
.omit_fp
) {
1767 cfg
->flags
|= MONO_CFG_HAS_SPILLUP
;
1768 cfg
->frame_reg
= AMD64_RSP
;
1771 /* Locals are allocated backwards from %fp */
1772 cfg
->frame_reg
= AMD64_RBP
;
1776 cfg
->arch
.saved_iregs
= cfg
->used_int_regs
;
1777 if (cfg
->method
->save_lmf
) {
1778 /* Save all callee-saved registers normally (except RBP, if not already used), and restore them when unwinding through an LMF */
1779 guint32 iregs_to_save
= AMD64_CALLEE_SAVED_REGS
& ~(1<<AMD64_RBP
);
1780 cfg
->arch
.saved_iregs
|= iregs_to_save
;
1783 if (cfg
->arch
.omit_fp
)
1784 cfg
->arch
.reg_save_area_offset
= offset
;
1785 /* Reserve space for callee saved registers */
1786 for (i
= 0; i
< AMD64_NREG
; ++i
)
1787 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->arch
.saved_iregs
& (1 << i
))) {
1788 offset
+= sizeof (target_mgreg_t
);
1790 if (!cfg
->arch
.omit_fp
)
1791 cfg
->arch
.reg_save_area_offset
= -offset
;
1793 if (sig_ret
->type
!= MONO_TYPE_VOID
) {
1794 switch (cinfo
->ret
.storage
) {
1796 case ArgInFloatSSEReg
:
1797 case ArgInDoubleSSEReg
:
1798 cfg
->ret
->opcode
= OP_REGVAR
;
1799 cfg
->ret
->inst_c0
= cinfo
->ret
.reg
;
1800 cfg
->ret
->dreg
= cinfo
->ret
.reg
;
1802 case ArgValuetypeAddrInIReg
:
1803 case ArgGsharedvtVariableInReg
:
1804 /* The register is volatile */
1805 cfg
->vret_addr
->opcode
= OP_REGOFFSET
;
1806 cfg
->vret_addr
->inst_basereg
= cfg
->frame_reg
;
1807 if (cfg
->arch
.omit_fp
) {
1808 cfg
->vret_addr
->inst_offset
= offset
;
1812 cfg
->vret_addr
->inst_offset
= -offset
;
1814 if (G_UNLIKELY (cfg
->verbose_level
> 1)) {
1815 printf ("vret_addr =");
1816 mono_print_ins (cfg
->vret_addr
);
1819 case ArgValuetypeInReg
:
1820 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
1821 cfg
->ret
->opcode
= OP_REGOFFSET
;
1822 cfg
->ret
->inst_basereg
= cfg
->frame_reg
;
1823 if (cfg
->arch
.omit_fp
) {
1824 cfg
->ret
->inst_offset
= offset
;
1825 offset
+= cinfo
->ret
.pair_storage
[1] == ArgNone
? 8 : 16;
1827 offset
+= cinfo
->ret
.pair_storage
[1] == ArgNone
? 8 : 16;
1828 cfg
->ret
->inst_offset
= - offset
;
1832 g_assert_not_reached ();
1836 /* Allocate locals */
1837 offsets
= mono_allocate_stack_slots (cfg
, cfg
->arch
.omit_fp
? FALSE
: TRUE
, &locals_stack_size
, &locals_stack_align
);
1838 if (locals_stack_align
) {
1839 offset
+= (locals_stack_align
- 1);
1840 offset
&= ~(locals_stack_align
- 1);
1842 if (cfg
->arch
.omit_fp
) {
1843 cfg
->locals_min_stack_offset
= offset
;
1844 cfg
->locals_max_stack_offset
= offset
+ locals_stack_size
;
1846 cfg
->locals_min_stack_offset
= - (offset
+ locals_stack_size
);
1847 cfg
->locals_max_stack_offset
= - offset
;
1850 for (i
= cfg
->locals_start
; i
< cfg
->num_varinfo
; i
++) {
1851 if (offsets
[i
] != -1) {
1852 MonoInst
*ins
= cfg
->varinfo
[i
];
1853 ins
->opcode
= OP_REGOFFSET
;
1854 ins
->inst_basereg
= cfg
->frame_reg
;
1855 if (cfg
->arch
.omit_fp
)
1856 ins
->inst_offset
= (offset
+ offsets
[i
]);
1858 ins
->inst_offset
= - (offset
+ offsets
[i
]);
1859 //printf ("allocated local %d to ", i); mono_print_tree_nl (ins);
1862 offset
+= locals_stack_size
;
1864 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
)) {
1865 g_assert (!cfg
->arch
.omit_fp
);
1866 g_assert (cinfo
->sig_cookie
.storage
== ArgOnStack
);
1867 cfg
->sig_cookie
= cinfo
->sig_cookie
.offset
+ ARGS_OFFSET
;
1870 for (i
= 0; i
< sig
->param_count
+ sig
->hasthis
; ++i
) {
1871 ins
= cfg
->args
[i
];
1872 if (ins
->opcode
!= OP_REGVAR
) {
1873 ArgInfo
*ainfo
= &cinfo
->args
[i
];
1874 gboolean inreg
= TRUE
;
1876 /* FIXME: Allocate volatile arguments to registers */
1877 if (ins
->flags
& (MONO_INST_VOLATILE
|MONO_INST_INDIRECT
))
1881 * Under AMD64, all registers used to pass arguments to functions
1882 * are volatile across calls.
1883 * FIXME: Optimize this.
1885 if ((ainfo
->storage
== ArgInIReg
) || (ainfo
->storage
== ArgInFloatSSEReg
) || (ainfo
->storage
== ArgInDoubleSSEReg
) || (ainfo
->storage
== ArgValuetypeInReg
) || (ainfo
->storage
== ArgGSharedVtInReg
))
1888 ins
->opcode
= OP_REGOFFSET
;
1890 switch (ainfo
->storage
) {
1892 case ArgInFloatSSEReg
:
1893 case ArgInDoubleSSEReg
:
1894 case ArgGSharedVtInReg
:
1896 ins
->opcode
= OP_REGVAR
;
1897 ins
->dreg
= ainfo
->reg
;
1901 case ArgGSharedVtOnStack
:
1902 g_assert (!cfg
->arch
.omit_fp
);
1903 ins
->opcode
= OP_REGOFFSET
;
1904 ins
->inst_basereg
= cfg
->frame_reg
;
1905 ins
->inst_offset
= ainfo
->offset
+ ARGS_OFFSET
;
1907 case ArgValuetypeInReg
:
1909 case ArgValuetypeAddrInIReg
:
1910 case ArgValuetypeAddrOnStack
: {
1912 g_assert (!cfg
->arch
.omit_fp
);
1913 g_assert (ainfo
->storage
== ArgValuetypeAddrInIReg
|| (ainfo
->storage
== ArgValuetypeAddrOnStack
&& ainfo
->pair_storage
[0] == ArgNone
));
1914 MONO_INST_NEW (cfg
, indir
, 0);
1916 indir
->opcode
= OP_REGOFFSET
;
1917 if (ainfo
->pair_storage
[0] == ArgInIReg
) {
1918 indir
->inst_basereg
= cfg
->frame_reg
;
1919 offset
= ALIGN_TO (offset
, sizeof (target_mgreg_t
));
1920 offset
+= sizeof (target_mgreg_t
);
1921 indir
->inst_offset
= - offset
;
1924 indir
->inst_basereg
= cfg
->frame_reg
;
1925 indir
->inst_offset
= ainfo
->offset
+ ARGS_OFFSET
;
1928 ins
->opcode
= OP_VTARG_ADDR
;
1929 ins
->inst_left
= indir
;
1937 if (!inreg
&& (ainfo
->storage
!= ArgOnStack
) && (ainfo
->storage
!= ArgValuetypeAddrInIReg
) && (ainfo
->storage
!= ArgValuetypeAddrOnStack
) && (ainfo
->storage
!= ArgGSharedVtOnStack
)) {
1938 ins
->opcode
= OP_REGOFFSET
;
1939 ins
->inst_basereg
= cfg
->frame_reg
;
1940 /* These arguments are saved to the stack in the prolog */
1941 offset
= ALIGN_TO (offset
, sizeof (target_mgreg_t
));
1942 if (cfg
->arch
.omit_fp
) {
1943 ins
->inst_offset
= offset
;
1944 offset
+= (ainfo
->storage
== ArgValuetypeInReg
) ? ainfo
->nregs
* sizeof (target_mgreg_t
) : sizeof (target_mgreg_t
);
1945 // Arguments are yet supported by the stack map creation code
1946 //cfg->locals_max_stack_offset = MAX (cfg->locals_max_stack_offset, offset);
1948 offset
+= (ainfo
->storage
== ArgValuetypeInReg
) ? ainfo
->nregs
* sizeof (target_mgreg_t
) : sizeof (target_mgreg_t
);
1949 ins
->inst_offset
= - offset
;
1950 //cfg->locals_min_stack_offset = MIN (cfg->locals_min_stack_offset, offset);
1956 cfg
->stack_offset
= offset
;
1960 mono_arch_create_vars (MonoCompile
*cfg
)
1962 MonoMethodSignature
*sig
;
1965 sig
= mono_method_signature_internal (cfg
->method
);
1967 if (!cfg
->arch
.cinfo
)
1968 cfg
->arch
.cinfo
= get_call_info (cfg
->mempool
, sig
);
1969 cinfo
= cfg
->arch
.cinfo
;
1971 if (cinfo
->ret
.storage
== ArgValuetypeInReg
)
1972 cfg
->ret_var_is_local
= TRUE
;
1974 if (cinfo
->ret
.storage
== ArgValuetypeAddrInIReg
|| cinfo
->ret
.storage
== ArgGsharedvtVariableInReg
) {
1975 cfg
->vret_addr
= mono_compile_create_var (cfg
, mono_get_int_type (), OP_ARG
);
1976 if (G_UNLIKELY (cfg
->verbose_level
> 1)) {
1977 printf ("vret_addr = ");
1978 mono_print_ins (cfg
->vret_addr
);
1982 if (cfg
->gen_sdb_seq_points
) {
1985 if (cfg
->compile_aot
) {
1986 MonoInst
*ins
= mono_compile_create_var (cfg
, mono_get_int_type (), OP_LOCAL
);
1987 ins
->flags
|= MONO_INST_VOLATILE
;
1988 cfg
->arch
.seq_point_info_var
= ins
;
1990 ins
= mono_compile_create_var (cfg
, mono_get_int_type (), OP_LOCAL
);
1991 ins
->flags
|= MONO_INST_VOLATILE
;
1992 cfg
->arch
.ss_tramp_var
= ins
;
1994 ins
= mono_compile_create_var (cfg
, mono_get_int_type (), OP_LOCAL
);
1995 ins
->flags
|= MONO_INST_VOLATILE
;
1996 cfg
->arch
.bp_tramp_var
= ins
;
1999 if (cfg
->method
->save_lmf
)
2000 cfg
->create_lmf_var
= TRUE
;
2002 if (cfg
->method
->save_lmf
) {
2008 add_outarg_reg (MonoCompile
*cfg
, MonoCallInst
*call
, ArgStorage storage
, int reg
, MonoInst
*tree
)
2014 MONO_INST_NEW (cfg
, ins
, OP_MOVE
);
2015 ins
->dreg
= mono_alloc_ireg_copy (cfg
, tree
->dreg
);
2016 ins
->sreg1
= tree
->dreg
;
2017 MONO_ADD_INS (cfg
->cbb
, ins
);
2018 mono_call_inst_add_outarg_reg (cfg
, call
, ins
->dreg
, reg
, FALSE
);
2020 case ArgInFloatSSEReg
:
2021 MONO_INST_NEW (cfg
, ins
, OP_AMD64_SET_XMMREG_R4
);
2022 ins
->dreg
= mono_alloc_freg (cfg
);
2023 ins
->sreg1
= tree
->dreg
;
2024 MONO_ADD_INS (cfg
->cbb
, ins
);
2026 mono_call_inst_add_outarg_reg (cfg
, call
, ins
->dreg
, reg
, TRUE
);
2028 case ArgInDoubleSSEReg
:
2029 MONO_INST_NEW (cfg
, ins
, OP_FMOVE
);
2030 ins
->dreg
= mono_alloc_freg (cfg
);
2031 ins
->sreg1
= tree
->dreg
;
2032 MONO_ADD_INS (cfg
->cbb
, ins
);
2034 mono_call_inst_add_outarg_reg (cfg
, call
, ins
->dreg
, reg
, TRUE
);
2038 g_assert_not_reached ();
2043 arg_storage_to_load_membase (ArgStorage storage
)
2047 #if defined(MONO_ARCH_ILP32)
2048 return OP_LOADI8_MEMBASE
;
2050 return OP_LOAD_MEMBASE
;
2052 case ArgInDoubleSSEReg
:
2053 return OP_LOADR8_MEMBASE
;
2054 case ArgInFloatSSEReg
:
2055 return OP_LOADR4_MEMBASE
;
2057 g_assert_not_reached ();
2064 emit_sig_cookie (MonoCompile
*cfg
, MonoCallInst
*call
, CallInfo
*cinfo
)
2066 MonoMethodSignature
*tmp_sig
;
2069 if (call
->tailcall
) // FIXME tailcall is not always yet initialized.
2072 g_assert (cinfo
->sig_cookie
.storage
== ArgOnStack
);
2075 * mono_ArgIterator_Setup assumes the signature cookie is
2076 * passed first and all the arguments which were before it are
2077 * passed on the stack after the signature. So compensate by
2078 * passing a different signature.
2080 tmp_sig
= mono_metadata_signature_dup_full (m_class_get_image (cfg
->method
->klass
), call
->signature
);
2081 tmp_sig
->param_count
-= call
->signature
->sentinelpos
;
2082 tmp_sig
->sentinelpos
= 0;
2083 memcpy (tmp_sig
->params
, call
->signature
->params
+ call
->signature
->sentinelpos
, tmp_sig
->param_count
* sizeof (MonoType
*));
2085 sig_reg
= mono_alloc_ireg (cfg
);
2086 MONO_EMIT_NEW_SIGNATURECONST (cfg
, sig_reg
, tmp_sig
);
2088 MONO_EMIT_NEW_STORE_MEMBASE (cfg
, OP_STORE_MEMBASE_REG
, AMD64_RSP
, cinfo
->sig_cookie
.offset
, sig_reg
);
2092 static LLVMArgStorage
2093 arg_storage_to_llvm_arg_storage (MonoCompile
*cfg
, ArgStorage storage
)
2097 return LLVMArgInIReg
;
2100 case ArgGSharedVtInReg
:
2101 case ArgGSharedVtOnStack
:
2102 return LLVMArgGSharedVt
;
2104 g_assert_not_reached ();
2110 mono_arch_get_llvm_call_info (MonoCompile
*cfg
, MonoMethodSignature
*sig
)
2116 LLVMCallInfo
*linfo
;
2117 MonoType
*t
, *sig_ret
;
2119 n
= sig
->param_count
+ sig
->hasthis
;
2120 sig_ret
= mini_get_underlying_type (sig
->ret
);
2122 cinfo
= get_call_info (cfg
->mempool
, sig
);
2124 linfo
= mono_mempool_alloc0 (cfg
->mempool
, sizeof (LLVMCallInfo
) + (sizeof (LLVMArgInfo
) * n
));
2127 * LLVM always uses the native ABI while we use our own ABI, the
2128 * only difference is the handling of vtypes:
2129 * - we only pass/receive them in registers in some cases, and only
2130 * in 1 or 2 integer registers.
2132 switch (cinfo
->ret
.storage
) {
2134 linfo
->ret
.storage
= LLVMArgNone
;
2137 case ArgInFloatSSEReg
:
2138 case ArgInDoubleSSEReg
:
2139 linfo
->ret
.storage
= LLVMArgNormal
;
2141 case ArgValuetypeInReg
: {
2142 ainfo
= &cinfo
->ret
;
2145 (ainfo
->pair_storage
[0] == ArgInFloatSSEReg
|| ainfo
->pair_storage
[0] == ArgInDoubleSSEReg
||
2146 ainfo
->pair_storage
[1] == ArgInFloatSSEReg
|| ainfo
->pair_storage
[1] == ArgInDoubleSSEReg
)) {
2147 cfg
->exception_message
= g_strdup ("pinvoke + vtype ret");
2148 cfg
->disable_llvm
= TRUE
;
2152 linfo
->ret
.storage
= LLVMArgVtypeInReg
;
2153 for (j
= 0; j
< 2; ++j
)
2154 linfo
->ret
.pair_storage
[j
] = arg_storage_to_llvm_arg_storage (cfg
, ainfo
->pair_storage
[j
]);
2157 case ArgValuetypeAddrInIReg
:
2158 case ArgGsharedvtVariableInReg
:
2159 /* Vtype returned using a hidden argument */
2160 linfo
->ret
.storage
= LLVMArgVtypeRetAddr
;
2161 linfo
->vret_arg_index
= cinfo
->vret_arg_index
;
2164 g_assert_not_reached ();
2168 for (i
= 0; i
< n
; ++i
) {
2169 ainfo
= cinfo
->args
+ i
;
2171 if (i
>= sig
->hasthis
)
2172 t
= sig
->params
[i
- sig
->hasthis
];
2174 t
= mono_get_int_type ();
2175 t
= mini_type_get_underlying_type (t
);
2177 linfo
->args
[i
].storage
= LLVMArgNone
;
2179 switch (ainfo
->storage
) {
2181 linfo
->args
[i
].storage
= LLVMArgNormal
;
2183 case ArgInDoubleSSEReg
:
2184 case ArgInFloatSSEReg
:
2185 linfo
->args
[i
].storage
= LLVMArgNormal
;
2188 if (MONO_TYPE_ISSTRUCT (t
))
2189 linfo
->args
[i
].storage
= LLVMArgVtypeByVal
;
2191 linfo
->args
[i
].storage
= LLVMArgNormal
;
2193 case ArgValuetypeInReg
:
2195 (ainfo
->pair_storage
[0] == ArgInFloatSSEReg
|| ainfo
->pair_storage
[0] == ArgInDoubleSSEReg
||
2196 ainfo
->pair_storage
[1] == ArgInFloatSSEReg
|| ainfo
->pair_storage
[1] == ArgInDoubleSSEReg
)) {
2197 cfg
->exception_message
= g_strdup ("pinvoke + vtypes");
2198 cfg
->disable_llvm
= TRUE
;
2202 linfo
->args
[i
].storage
= LLVMArgVtypeInReg
;
2203 for (j
= 0; j
< 2; ++j
)
2204 linfo
->args
[i
].pair_storage
[j
] = arg_storage_to_llvm_arg_storage (cfg
, ainfo
->pair_storage
[j
]);
2206 case ArgGSharedVtInReg
:
2207 case ArgGSharedVtOnStack
:
2208 linfo
->args
[i
].storage
= LLVMArgGSharedVt
;
2210 case ArgValuetypeAddrInIReg
:
2211 case ArgValuetypeAddrOnStack
:
2212 linfo
->args
[i
].storage
= LLVMArgVtypeAddr
;
2215 cfg
->exception_message
= g_strdup ("ainfo->storage");
2216 cfg
->disable_llvm
= TRUE
;
2226 mono_arch_emit_call (MonoCompile
*cfg
, MonoCallInst
*call
)
2229 MonoMethodSignature
*sig
;
2234 sig
= call
->signature
;
2235 n
= sig
->param_count
+ sig
->hasthis
;
2237 cinfo
= get_call_info (cfg
->mempool
, sig
);
2239 if (COMPILE_LLVM (cfg
)) {
2240 /* We shouldn't be called in the llvm case */
2241 cfg
->disable_llvm
= TRUE
;
2246 * Emit all arguments which are passed on the stack to prevent register
2247 * allocation problems.
2249 for (i
= 0; i
< n
; ++i
) {
2251 ainfo
= cinfo
->args
+ i
;
2253 in
= call
->args
[i
];
2255 if (sig
->hasthis
&& i
== 0)
2256 t
= mono_get_object_type ();
2258 t
= sig
->params
[i
- sig
->hasthis
];
2260 t
= mini_get_underlying_type (t
);
2261 //XXX what about ArgGSharedVtOnStack here?
2262 // FIXME tailcall is not always yet initialized.
2263 if (ainfo
->storage
== ArgOnStack
&& !MONO_TYPE_ISSTRUCT (t
) && !call
->tailcall
) {
2265 if (t
->type
== MONO_TYPE_R4
)
2266 MONO_EMIT_NEW_STORE_MEMBASE (cfg
, OP_STORER4_MEMBASE_REG
, AMD64_RSP
, ainfo
->offset
, in
->dreg
);
2267 else if (t
->type
== MONO_TYPE_R8
)
2268 MONO_EMIT_NEW_STORE_MEMBASE (cfg
, OP_STORER8_MEMBASE_REG
, AMD64_RSP
, ainfo
->offset
, in
->dreg
);
2270 MONO_EMIT_NEW_STORE_MEMBASE (cfg
, OP_STORE_MEMBASE_REG
, AMD64_RSP
, ainfo
->offset
, in
->dreg
);
2272 MONO_EMIT_NEW_STORE_MEMBASE (cfg
, OP_STORE_MEMBASE_REG
, AMD64_RSP
, ainfo
->offset
, in
->dreg
);
2274 if (cfg
->compute_gc_maps
) {
2277 EMIT_NEW_GC_PARAM_SLOT_LIVENESS_DEF (cfg
, def
, ainfo
->offset
, t
);
2283 * Emit all parameters passed in registers in non-reverse order for better readability
2284 * and to help the optimization in emit_prolog ().
2286 for (i
= 0; i
< n
; ++i
) {
2287 ainfo
= cinfo
->args
+ i
;
2289 in
= call
->args
[i
];
2291 if (ainfo
->storage
== ArgInIReg
)
2292 add_outarg_reg (cfg
, call
, ainfo
->storage
, ainfo
->reg
, in
);
2295 for (i
= n
- 1; i
>= 0; --i
) {
2298 ainfo
= cinfo
->args
+ i
;
2300 in
= call
->args
[i
];
2302 if (sig
->hasthis
&& i
== 0)
2303 t
= mono_get_object_type ();
2305 t
= sig
->params
[i
- sig
->hasthis
];
2306 t
= mini_get_underlying_type (t
);
2308 switch (ainfo
->storage
) {
2312 case ArgInFloatSSEReg
:
2313 case ArgInDoubleSSEReg
:
2314 add_outarg_reg (cfg
, call
, ainfo
->storage
, ainfo
->reg
, in
);
2317 case ArgValuetypeInReg
:
2318 case ArgValuetypeAddrInIReg
:
2319 case ArgValuetypeAddrOnStack
:
2320 case ArgGSharedVtInReg
:
2321 case ArgGSharedVtOnStack
: {
2322 // FIXME tailcall is not always yet initialized.
2323 if (ainfo
->storage
== ArgOnStack
&& !MONO_TYPE_ISSTRUCT (t
) && !call
->tailcall
)
2324 /* Already emitted above */
2326 //FIXME what about ArgGSharedVtOnStack ?
2327 // FIXME tailcall is not always yet initialized.
2328 if (ainfo
->storage
== ArgOnStack
&& call
->tailcall
) {
2329 MonoInst
*call_inst
= (MonoInst
*)call
;
2330 cfg
->args
[i
]->flags
|= MONO_INST_VOLATILE
;
2331 EMIT_NEW_ARGSTORE (cfg
, call_inst
, i
, in
);
2339 size
= mono_type_native_stack_size (t
, &align
);
2342 * Other backends use mono_type_stack_size (), but that
2343 * aligns the size to 8, which is larger than the size of
2344 * the source, leading to reads of invalid memory if the
2345 * source is at the end of address space.
2347 size
= mono_class_value_size (mono_class_from_mono_type_internal (t
), &align
);
2350 if (size
>= 10000) {
2351 /* Avoid asserts in emit_memcpy () */
2352 mono_cfg_set_exception_invalid_program (cfg
, g_strdup_printf ("Passing an argument of size '%d'.", size
));
2353 /* Continue normally */
2356 if (size
> 0 || ainfo
->pass_empty_struct
) {
2357 MONO_INST_NEW (cfg
, arg
, OP_OUTARG_VT
);
2358 arg
->sreg1
= in
->dreg
;
2359 arg
->klass
= mono_class_from_mono_type_internal (t
);
2360 arg
->backend
.size
= size
;
2361 arg
->inst_p0
= call
;
2362 arg
->inst_p1
= mono_mempool_alloc (cfg
->mempool
, sizeof (ArgInfo
));
2363 memcpy (arg
->inst_p1
, ainfo
, sizeof (ArgInfo
));
2365 MONO_ADD_INS (cfg
->cbb
, arg
);
2370 g_assert_not_reached ();
2373 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
) && (i
== sig
->sentinelpos
))
2374 /* Emit the signature cookie just before the implicit arguments */
2375 emit_sig_cookie (cfg
, call
, cinfo
);
2378 /* Handle the case where there are no implicit arguments */
2379 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
) && (n
== sig
->sentinelpos
))
2380 emit_sig_cookie (cfg
, call
, cinfo
);
2382 switch (cinfo
->ret
.storage
) {
2383 case ArgValuetypeInReg
:
2384 if (cinfo
->ret
.pair_storage
[0] == ArgInIReg
&& cinfo
->ret
.pair_storage
[1] == ArgNone
) {
2386 * Tell the JIT to use a more efficient calling convention: call using
2387 * OP_CALL, compute the result location after the call, and save the
2390 call
->vret_in_reg
= TRUE
;
2392 * Nullify the instruction computing the vret addr to enable
2393 * future optimizations.
2396 NULLIFY_INS (call
->vret_var
);
2401 * The valuetype is in RAX:RDX after the call, need to be copied to
2402 * the stack. Push the address here, so the call instruction can
2405 if (!cfg
->arch
.vret_addr_loc
) {
2406 cfg
->arch
.vret_addr_loc
= mono_compile_create_var (cfg
, mono_get_int_type (), OP_LOCAL
);
2407 /* Prevent it from being register allocated or optimized away */
2408 cfg
->arch
.vret_addr_loc
->flags
|= MONO_INST_VOLATILE
;
2411 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, cfg
->arch
.vret_addr_loc
->dreg
, call
->vret_var
->dreg
);
2414 case ArgValuetypeAddrInIReg
:
2415 case ArgGsharedvtVariableInReg
: {
2417 MONO_INST_NEW (cfg
, vtarg
, OP_MOVE
);
2418 vtarg
->sreg1
= call
->vret_var
->dreg
;
2419 vtarg
->dreg
= mono_alloc_preg (cfg
);
2420 MONO_ADD_INS (cfg
->cbb
, vtarg
);
2422 mono_call_inst_add_outarg_reg (cfg
, call
, vtarg
->dreg
, cinfo
->ret
.reg
, FALSE
);
2429 if (cfg
->method
->save_lmf
) {
2430 MONO_INST_NEW (cfg
, arg
, OP_AMD64_SAVE_SP_TO_LMF
);
2431 MONO_ADD_INS (cfg
->cbb
, arg
);
2434 call
->stack_usage
= cinfo
->stack_usage
;
2438 mono_arch_emit_outarg_vt (MonoCompile
*cfg
, MonoInst
*ins
, MonoInst
*src
)
2441 MonoCallInst
*call
= (MonoCallInst
*)ins
->inst_p0
;
2442 ArgInfo
*ainfo
= (ArgInfo
*)ins
->inst_p1
;
2443 int size
= ins
->backend
.size
;
2445 switch (ainfo
->storage
) {
2446 case ArgValuetypeInReg
: {
2450 for (part
= 0; part
< 2; ++part
) {
2451 if (ainfo
->pair_storage
[part
] == ArgNone
)
2454 if (ainfo
->pass_empty_struct
) {
2455 //Pass empty struct value as 0 on platforms representing empty structs as 1 byte.
2456 NEW_ICONST (cfg
, load
, 0);
2459 MONO_INST_NEW (cfg
, load
, arg_storage_to_load_membase (ainfo
->pair_storage
[part
]));
2460 load
->inst_basereg
= src
->dreg
;
2461 load
->inst_offset
= part
* sizeof (target_mgreg_t
);
2463 switch (ainfo
->pair_storage
[part
]) {
2465 load
->dreg
= mono_alloc_ireg (cfg
);
2467 case ArgInDoubleSSEReg
:
2468 case ArgInFloatSSEReg
:
2469 load
->dreg
= mono_alloc_freg (cfg
);
2472 g_assert_not_reached ();
2476 MONO_ADD_INS (cfg
->cbb
, load
);
2478 add_outarg_reg (cfg
, call
, ainfo
->pair_storage
[part
], ainfo
->pair_regs
[part
], load
);
2482 case ArgValuetypeAddrInIReg
:
2483 case ArgValuetypeAddrOnStack
: {
2484 MonoInst
*vtaddr
, *load
;
2486 g_assert (ainfo
->storage
== ArgValuetypeAddrInIReg
|| (ainfo
->storage
== ArgValuetypeAddrOnStack
&& ainfo
->pair_storage
[0] == ArgNone
));
2488 vtaddr
= mono_compile_create_var (cfg
, m_class_get_byval_arg (ins
->klass
), OP_LOCAL
);
2489 vtaddr
->backend
.is_pinvoke
= call
->signature
->pinvoke
;
2491 MONO_INST_NEW (cfg
, load
, OP_LDADDR
);
2492 cfg
->has_indirection
= TRUE
;
2493 load
->inst_p0
= vtaddr
;
2494 vtaddr
->flags
|= MONO_INST_INDIRECT
;
2495 load
->type
= STACK_MP
;
2496 load
->klass
= vtaddr
->klass
;
2497 load
->dreg
= mono_alloc_ireg (cfg
);
2498 MONO_ADD_INS (cfg
->cbb
, load
);
2499 mini_emit_memcpy (cfg
, load
->dreg
, 0, src
->dreg
, 0, size
, TARGET_SIZEOF_VOID_P
);
2501 if (ainfo
->pair_storage
[0] == ArgInIReg
) {
2502 MONO_INST_NEW (cfg
, arg
, OP_AMD64_LEA_MEMBASE
);
2503 arg
->dreg
= mono_alloc_ireg (cfg
);
2504 arg
->sreg1
= load
->dreg
;
2506 MONO_ADD_INS (cfg
->cbb
, arg
);
2507 mono_call_inst_add_outarg_reg (cfg
, call
, arg
->dreg
, ainfo
->pair_regs
[0], FALSE
);
2509 MONO_EMIT_NEW_STORE_MEMBASE (cfg
, OP_STORE_MEMBASE_REG
, AMD64_RSP
, ainfo
->offset
, load
->dreg
);
2513 case ArgGSharedVtInReg
:
2515 mono_call_inst_add_outarg_reg (cfg
, call
, src
->dreg
, ainfo
->reg
, FALSE
);
2517 case ArgGSharedVtOnStack
:
2518 MONO_EMIT_NEW_STORE_MEMBASE (cfg
, OP_STORE_MEMBASE_REG
, AMD64_RSP
, ainfo
->offset
, src
->dreg
);
2522 int dreg
= mono_alloc_ireg (cfg
);
2524 MONO_EMIT_NEW_LOAD_MEMBASE (cfg
, dreg
, src
->dreg
, 0);
2525 MONO_EMIT_NEW_STORE_MEMBASE (cfg
, OP_STORE_MEMBASE_REG
, AMD64_RSP
, ainfo
->offset
, dreg
);
2526 } else if (size
<= 40) {
2527 mini_emit_memcpy (cfg
, AMD64_RSP
, ainfo
->offset
, src
->dreg
, 0, size
, TARGET_SIZEOF_VOID_P
);
2529 // FIXME: Code growth
2530 mini_emit_memcpy (cfg
, AMD64_RSP
, ainfo
->offset
, src
->dreg
, 0, size
, TARGET_SIZEOF_VOID_P
);
2533 if (cfg
->compute_gc_maps
) {
2535 EMIT_NEW_GC_PARAM_SLOT_LIVENESS_DEF (cfg
, def
, ainfo
->offset
, m_class_get_byval_arg (ins
->klass
));
2541 mono_arch_emit_setret (MonoCompile
*cfg
, MonoMethod
*method
, MonoInst
*val
)
2543 MonoType
*ret
= mini_get_underlying_type (mono_method_signature_internal (method
)->ret
);
2545 if (ret
->type
== MONO_TYPE_R4
) {
2546 if (COMPILE_LLVM (cfg
))
2547 MONO_EMIT_NEW_UNALU (cfg
, OP_FMOVE
, cfg
->ret
->dreg
, val
->dreg
);
2549 MONO_EMIT_NEW_UNALU (cfg
, OP_AMD64_SET_XMMREG_R4
, cfg
->ret
->dreg
, val
->dreg
);
2551 } else if (ret
->type
== MONO_TYPE_R8
) {
2552 MONO_EMIT_NEW_UNALU (cfg
, OP_FMOVE
, cfg
->ret
->dreg
, val
->dreg
);
2556 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, cfg
->ret
->dreg
, val
->dreg
);
2559 #endif /* DISABLE_JIT */
2561 #define EMIT_COND_BRANCH(ins,cond,sign) \
2562 if (ins->inst_true_bb->native_offset) { \
2563 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
2565 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
2566 if (optimize_branch_pred && \
2567 x86_is_imm8 (ins->inst_true_bb->max_offset - offset)) \
2568 x86_branch8 (code, cond, 0, sign); \
2570 x86_branch32 (code, cond, 0, sign); \
2574 MonoMethodSignature
*sig
;
2576 int nstack_args
, nullable_area
;
2580 dyn_call_supported (MonoMethodSignature
*sig
, CallInfo
*cinfo
)
2584 switch (cinfo
->ret
.storage
) {
2587 case ArgInFloatSSEReg
:
2588 case ArgInDoubleSSEReg
:
2589 case ArgValuetypeAddrInIReg
:
2590 case ArgValuetypeInReg
:
2596 for (i
= 0; i
< cinfo
->nargs
; ++i
) {
2597 ArgInfo
*ainfo
= &cinfo
->args
[i
];
2598 switch (ainfo
->storage
) {
2600 case ArgInFloatSSEReg
:
2601 case ArgInDoubleSSEReg
:
2602 case ArgValuetypeInReg
:
2603 case ArgValuetypeAddrInIReg
:
2604 case ArgValuetypeAddrOnStack
:
2616 * mono_arch_dyn_call_prepare:
2618 * Return a pointer to an arch-specific structure which contains information
2619 * needed by mono_arch_get_dyn_call_args (). Return NULL if OP_DYN_CALL is not
2620 * supported for SIG.
2621 * This function is equivalent to ffi_prep_cif in libffi.
2624 mono_arch_dyn_call_prepare (MonoMethodSignature
*sig
)
2626 ArchDynCallInfo
*info
;
2630 cinfo
= get_call_info (NULL
, sig
);
2632 if (!dyn_call_supported (sig
, cinfo
)) {
2637 info
= g_new0 (ArchDynCallInfo
, 1);
2638 // FIXME: Preprocess the info to speed up get_dyn_call_args ().
2640 info
->cinfo
= cinfo
;
2641 info
->nstack_args
= 0;
2643 for (i
= 0; i
< cinfo
->nargs
; ++i
) {
2644 ArgInfo
*ainfo
= &cinfo
->args
[i
];
2645 switch (ainfo
->storage
) {
2647 case ArgValuetypeAddrOnStack
:
2648 info
->nstack_args
= MAX (info
->nstack_args
, (ainfo
->offset
/ sizeof (target_mgreg_t
)) + (ainfo
->arg_size
/ sizeof (target_mgreg_t
)));
2655 for (aindex
= 0; aindex
< sig
->param_count
; aindex
++) {
2656 MonoType
*t
= sig
->params
[aindex
];
2657 ArgInfo
*ainfo
= &cinfo
->args
[aindex
+ sig
->hasthis
];
2663 case MONO_TYPE_GENERICINST
:
2664 if (t
->type
== MONO_TYPE_GENERICINST
&& mono_class_is_nullable (mono_class_from_mono_type_internal (t
))) {
2665 MonoClass
*klass
= mono_class_from_mono_type_internal (t
);
2668 if (!(ainfo
->storage
== ArgValuetypeInReg
|| ainfo
->storage
== ArgOnStack
)) {
2669 /* Nullables need a temporary buffer, its stored at the end of DynCallArgs.regs after the stack args */
2670 size
= mono_class_value_size (klass
, NULL
);
2671 info
->nullable_area
+= size
;
2680 info
->nullable_area
= ALIGN_TO (info
->nullable_area
, 16);
2682 /* Align to 16 bytes */
2683 if (info
->nstack_args
& 1)
2684 info
->nstack_args
++;
2686 return (MonoDynCallInfo
*)info
;
2690 * mono_arch_dyn_call_free:
2692 * Free a MonoDynCallInfo structure.
2695 mono_arch_dyn_call_free (MonoDynCallInfo
*info
)
2697 ArchDynCallInfo
*ainfo
= (ArchDynCallInfo
*)info
;
2699 g_free (ainfo
->cinfo
);
2704 mono_arch_dyn_call_get_buf_size (MonoDynCallInfo
*info
)
2706 ArchDynCallInfo
*ainfo
= (ArchDynCallInfo
*)info
;
2708 /* Extend the 'regs' field dynamically */
2709 return sizeof (DynCallArgs
) + (ainfo
->nstack_args
* sizeof (target_mgreg_t
)) + ainfo
->nullable_area
;
2712 #define PTR_TO_GREG(ptr) ((host_mgreg_t)(ptr))
2713 #define GREG_TO_PTR(greg) ((gpointer)(greg))
2716 * mono_arch_get_start_dyn_call:
2718 * Convert the arguments ARGS to a format which can be passed to OP_DYN_CALL, and
2719 * store the result into BUF.
2720 * ARGS should be an array of pointers pointing to the arguments.
2721 * RET should point to a memory buffer large enought to hold the result of the
2723 * This function should be as fast as possible, any work which does not depend
2724 * on the actual values of the arguments should be done in
2725 * mono_arch_dyn_call_prepare ().
2726 * start_dyn_call + OP_DYN_CALL + finish_dyn_call is equivalent to ffi_call in
2730 mono_arch_start_dyn_call (MonoDynCallInfo
*info
, gpointer
**args
, guint8
*ret
, guint8
*buf
)
2732 ArchDynCallInfo
*dinfo
= (ArchDynCallInfo
*)info
;
2733 DynCallArgs
*p
= (DynCallArgs
*)buf
;
2734 int arg_index
, greg
, i
, pindex
;
2735 MonoMethodSignature
*sig
= dinfo
->sig
;
2736 int buffer_offset
= 0;
2737 guint8
*nullable_buffer
;
2738 static int general_param_reg_to_index
[MONO_MAX_IREGS
];
2739 static int float_param_reg_to_index
[MONO_MAX_FREGS
];
2741 static gboolean param_reg_to_index_inited
;
2743 if (!param_reg_to_index_inited
) {
2744 for (i
= 0; i
< PARAM_REGS
; ++i
)
2745 general_param_reg_to_index
[param_regs
[i
]] = i
;
2746 for (i
= 0; i
< FLOAT_PARAM_REGS
; ++i
)
2747 float_param_reg_to_index
[float_param_regs
[i
]] = i
;
2748 mono_memory_barrier ();
2749 param_reg_to_index_inited
= 1;
2751 mono_memory_barrier ();
2756 p
->nstack_args
= dinfo
->nstack_args
;
2762 /* Stored after the stack arguments */
2763 nullable_buffer
= (guint8
*)&(p
->regs
[PARAM_REGS
+ dinfo
->nstack_args
]);
2765 if (sig
->hasthis
|| dinfo
->cinfo
->vret_arg_index
== 1) {
2766 p
->regs
[greg
++] = PTR_TO_GREG(*(args
[arg_index
++]));
2771 if (dinfo
->cinfo
->ret
.storage
== ArgValuetypeAddrInIReg
|| dinfo
->cinfo
->ret
.storage
== ArgGsharedvtVariableInReg
)
2772 p
->regs
[greg
++] = PTR_TO_GREG (ret
);
2774 for (; pindex
< sig
->param_count
; pindex
++) {
2775 MonoType
*t
= mini_get_underlying_type (sig
->params
[pindex
]);
2776 gpointer
*arg
= args
[arg_index
++];
2777 ArgInfo
*ainfo
= &dinfo
->cinfo
->args
[pindex
+ sig
->hasthis
];
2780 if (ainfo
->storage
== ArgOnStack
|| ainfo
->storage
== ArgValuetypeAddrOnStack
) {
2781 slot
= PARAM_REGS
+ (ainfo
->offset
/ sizeof (target_mgreg_t
));
2782 } else if (ainfo
->storage
== ArgValuetypeAddrInIReg
) {
2783 g_assert (ainfo
->pair_storage
[0] == ArgInIReg
&& ainfo
->pair_storage
[1] == ArgNone
);
2784 slot
= general_param_reg_to_index
[ainfo
->pair_regs
[0]];
2785 } else if (ainfo
->storage
== ArgInFloatSSEReg
|| ainfo
->storage
== ArgInDoubleSSEReg
) {
2786 slot
= float_param_reg_to_index
[ainfo
->reg
];
2788 slot
= general_param_reg_to_index
[ainfo
->reg
];
2792 p
->regs
[slot
] = PTR_TO_GREG (*(arg
));
2797 case MONO_TYPE_OBJECT
:
2801 #if !defined(MONO_ARCH_ILP32)
2805 p
->regs
[slot
] = PTR_TO_GREG (*(arg
));
2807 #if defined(MONO_ARCH_ILP32)
2810 p
->regs
[slot
] = *(guint64
*)(arg
);
2814 p
->regs
[slot
] = *(guint8
*)(arg
);
2817 p
->regs
[slot
] = *(gint8
*)(arg
);
2820 p
->regs
[slot
] = *(gint16
*)(arg
);
2823 p
->regs
[slot
] = *(guint16
*)(arg
);
2826 p
->regs
[slot
] = *(gint32
*)(arg
);
2829 p
->regs
[slot
] = *(guint32
*)(arg
);
2831 case MONO_TYPE_R4
: {
2833 *(float*)&d
= *(float*)(arg
);
2835 if (ainfo
->storage
== ArgOnStack
) {
2836 *(double *)(p
->regs
+ slot
) = d
;
2839 p
->fregs
[slot
] = d
;
2844 if (ainfo
->storage
== ArgOnStack
) {
2845 *(double *)(p
->regs
+ slot
) = *(double*)(arg
);
2848 p
->fregs
[slot
] = *(double*)(arg
);
2851 case MONO_TYPE_GENERICINST
:
2852 if (MONO_TYPE_IS_REFERENCE (t
)) {
2853 p
->regs
[slot
] = PTR_TO_GREG (*(arg
));
2855 } else if (t
->type
== MONO_TYPE_GENERICINST
&& mono_class_is_nullable (mono_class_from_mono_type_internal (t
))) {
2856 MonoClass
*klass
= mono_class_from_mono_type_internal (t
);
2857 guint8
*nullable_buf
;
2860 size
= mono_class_value_size (klass
, NULL
);
2861 if (ainfo
->storage
== ArgValuetypeInReg
|| ainfo
->storage
== ArgOnStack
) {
2862 nullable_buf
= g_alloca (size
);
2864 nullable_buf
= nullable_buffer
+ buffer_offset
;
2865 buffer_offset
+= size
;
2866 g_assert (buffer_offset
<= dinfo
->nullable_area
);
2869 /* The argument pointed to by arg is either a boxed vtype or null */
2870 mono_nullable_init (nullable_buf
, (MonoObject
*)arg
, klass
);
2872 arg
= (gpointer
*)nullable_buf
;
2878 case MONO_TYPE_VALUETYPE
: {
2879 switch (ainfo
->storage
) {
2880 case ArgValuetypeInReg
:
2881 for (i
= 0; i
< 2; ++i
) {
2882 switch (ainfo
->pair_storage
[i
]) {
2886 slot
= general_param_reg_to_index
[ainfo
->pair_regs
[i
]];
2887 p
->regs
[slot
] = ((target_mgreg_t
*)(arg
))[i
];
2889 case ArgInFloatSSEReg
: {
2892 slot
= float_param_reg_to_index
[ainfo
->pair_regs
[i
]];
2893 *(float*)&d
= ((float*)(arg
))[i
];
2894 p
->fregs
[slot
] = d
;
2897 case ArgInDoubleSSEReg
:
2899 slot
= float_param_reg_to_index
[ainfo
->pair_regs
[i
]];
2900 p
->fregs
[slot
] = ((double*)(arg
))[i
];
2903 g_assert_not_reached ();
2908 case ArgValuetypeAddrInIReg
:
2909 case ArgValuetypeAddrOnStack
:
2910 // In DYNCALL use case value types are already copied when included in parameter array.
2911 // Currently no need to make an extra temporary value type on stack for this use case.
2912 p
->regs
[slot
] = (target_mgreg_t
)arg
;
2915 for (i
= 0; i
< ainfo
->arg_size
/ 8; ++i
)
2916 p
->regs
[slot
+ i
] = ((target_mgreg_t
*)(arg
))[i
];
2919 g_assert_not_reached ();
2925 g_assert_not_reached ();
2931 * mono_arch_finish_dyn_call:
2933 * Store the result of a dyn call into the return value buffer passed to
2934 * start_dyn_call ().
2935 * This function should be as fast as possible, any work which does not depend
2936 * on the actual values of the arguments should be done in
2937 * mono_arch_dyn_call_prepare ().
2940 mono_arch_finish_dyn_call (MonoDynCallInfo
*info
, guint8
*buf
)
2942 ArchDynCallInfo
*dinfo
= (ArchDynCallInfo
*)info
;
2943 MonoMethodSignature
*sig
= dinfo
->sig
;
2944 DynCallArgs
*dargs
= (DynCallArgs
*)buf
;
2945 guint8
*ret
= dargs
->ret
;
2946 host_mgreg_t res
= dargs
->res
;
2947 MonoType
*sig_ret
= mini_get_underlying_type (sig
->ret
);
2950 switch (sig_ret
->type
) {
2951 case MONO_TYPE_VOID
:
2952 *(gpointer
*)ret
= NULL
;
2954 case MONO_TYPE_OBJECT
:
2958 *(gpointer
*)ret
= GREG_TO_PTR (res
);
2964 *(guint8
*)ret
= res
;
2967 *(gint16
*)ret
= res
;
2970 *(guint16
*)ret
= res
;
2973 *(gint32
*)ret
= res
;
2976 *(guint32
*)ret
= res
;
2979 *(gint64
*)ret
= res
;
2982 *(guint64
*)ret
= res
;
2985 *(float*)ret
= *(float*)&(dargs
->fregs
[0]);
2988 *(double*)ret
= dargs
->fregs
[0];
2990 case MONO_TYPE_GENERICINST
:
2991 if (MONO_TYPE_IS_REFERENCE (sig_ret
)) {
2992 *(gpointer
*)ret
= GREG_TO_PTR(res
);
2997 case MONO_TYPE_VALUETYPE
:
2998 if (dinfo
->cinfo
->ret
.storage
== ArgValuetypeAddrInIReg
|| dinfo
->cinfo
->ret
.storage
== ArgGsharedvtVariableInReg
) {
3001 ArgInfo
*ainfo
= &dinfo
->cinfo
->ret
;
3003 g_assert (ainfo
->storage
== ArgValuetypeInReg
);
3005 for (i
= 0; i
< 2; ++i
) {
3006 switch (ainfo
->pair_storage
[0]) {
3008 ((host_mgreg_t
*)ret
)[i
] = res
;
3010 case ArgInDoubleSSEReg
:
3011 ((double*)ret
)[i
] = dargs
->fregs
[i
];
3016 g_assert_not_reached ();
3023 g_assert_not_reached ();
3030 /* emit an exception if condition is fail */
3031 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \
3033 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
3034 if (tins == NULL) { \
3035 mono_add_patch_info (cfg, code - cfg->native_code, \
3036 MONO_PATCH_INFO_EXC, exc_name); \
3037 x86_branch32 (code, cond, 0, signed); \
3039 EMIT_COND_BRANCH (tins, cond, signed); \
3043 #define EMIT_SSE2_FPFUNC(code, op, dreg, sreg1) do { \
3044 amd64_movsd_membase_reg (code, AMD64_RSP, -8, (sreg1)); \
3045 amd64_fld_membase (code, AMD64_RSP, -8, TRUE); \
3046 amd64_ ##op (code); \
3047 amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE); \
3048 amd64_movsd_reg_membase (code, (dreg), AMD64_RSP, -8); \
3053 emit_call (MonoCompile
*cfg
, MonoCallInst
*call
, guint8
*code
, MonoJitICallId jit_icall_id
)
3055 gboolean no_patch
= FALSE
;
3056 MonoJumpInfoTarget patch
;
3058 // FIXME? This is similar to mono_call_to_patch, except it favors MONO_PATCH_INFO_ABS over call->jit_icall_id.
3062 patch
.type
= MONO_PATCH_INFO_JIT_ICALL_ID
;
3063 patch
.target
= GUINT_TO_POINTER (jit_icall_id
);
3064 } else if (call
->inst
.flags
& MONO_INST_HAS_METHOD
) {
3065 patch
.type
= MONO_PATCH_INFO_METHOD
;
3066 patch
.target
= call
->method
;
3068 patch
.type
= MONO_PATCH_INFO_ABS
;
3069 patch
.target
= call
->fptr
;
3073 * FIXME: Add support for thunks
3076 gboolean near_call
= FALSE
;
3079 * Indirect calls are expensive so try to make a near call if possible.
3080 * The caller memory is allocated by the code manager so it is
3081 * guaranteed to be at a 32 bit offset.
3084 if (patch
.type
!= MONO_PATCH_INFO_ABS
) {
3086 /* The target is in memory allocated using the code manager */
3089 if (patch
.type
== MONO_PATCH_INFO_METHOD
) {
3091 MonoMethod
* const method
= call
->method
;
3093 if (m_class_get_image (method
->klass
)->aot_module
)
3094 /* The callee might be an AOT method */
3096 if (method
->dynamic
)
3097 /* The target is in malloc-ed memory */
3101 * The call might go directly to a native function without
3104 MonoJitICallInfo
* const mi
= mono_find_jit_icall_info (jit_icall_id
);
3105 gconstpointer target
= mono_icall_get_wrapper (mi
);
3106 if ((((guint64
)target
) >> 32) != 0)
3110 MonoJumpInfo
*jinfo
= NULL
;
3112 if (cfg
->abs_patches
)
3113 jinfo
= (MonoJumpInfo
*)g_hash_table_lookup (cfg
->abs_patches
, call
->fptr
);
3116 if (jinfo
->type
== MONO_PATCH_INFO_JIT_ICALL_ADDR
) {
3117 MonoJitICallInfo
*mi
= mono_find_jit_icall_info (jinfo
->data
.jit_icall_id
);
3118 if (mi
&& (((guint64
)mi
->func
) >> 32) == 0)
3123 * This is not really an optimization, but required because the
3124 * generic class init trampolines use R11 to pass the vtable.
3129 jit_icall_id
= call
->jit_icall_id
;
3132 MonoJitICallInfo
const *info
= mono_find_jit_icall_info (jit_icall_id
);
3134 // Change patch from MONO_PATCH_INFO_ABS to MONO_PATCH_INFO_JIT_ICALL_ID.
3135 patch
.type
= MONO_PATCH_INFO_JIT_ICALL_ID
;
3136 patch
.target
= GUINT_TO_POINTER (jit_icall_id
);
3138 if (info
->func
== info
->wrapper
) {
3140 if ((((guint64
)info
->func
) >> 32) == 0)
3143 /* ?See the comment in mono_codegen ()? */
3147 else if ((((guint64
)patch
.target
) >> 32) == 0) {
3154 if (cfg
->method
->dynamic
)
3155 /* These methods are allocated using malloc */
3158 #ifdef MONO_ARCH_NOMAP32BIT
3161 /* The 64bit XEN kernel does not honour the MAP_32BIT flag. (#522894) */
3162 if (optimize_for_xen
)
3165 if (cfg
->compile_aot
) {
3172 * Align the call displacement to an address divisible by 4 so it does
3173 * not span cache lines. This is required for code patching to work on SMP
3176 if (!no_patch
&& ((guint32
)(code
+ 1 - cfg
->native_code
) % 4) != 0) {
3177 guint32 pad_size
= 4 - ((guint32
)(code
+ 1 - cfg
->native_code
) % 4);
3178 amd64_padding (code
, pad_size
);
3180 mono_add_patch_info (cfg
, code
- cfg
->native_code
, patch
.type
, patch
.target
);
3181 amd64_call_code (code
, 0);
3184 if (!no_patch
&& ((guint32
)(code
+ 2 - cfg
->native_code
) % 8) != 0) {
3185 guint32 pad_size
= 8 - ((guint32
)(code
+ 2 - cfg
->native_code
) % 8);
3186 amd64_padding (code
, pad_size
);
3187 g_assert ((guint64
)(code
+ 2 - cfg
->native_code
) % 8 == 0);
3189 mono_add_patch_info (cfg
, code
- cfg
->native_code
, patch
.type
, patch
.target
);
3190 amd64_set_reg_template (code
, GP_SCRATCH_REG
);
3191 amd64_call_reg (code
, GP_SCRATCH_REG
);
3195 set_code_cursor (cfg
, code
);
3201 store_membase_imm_to_store_membase_reg (int opcode
)
3204 case OP_STORE_MEMBASE_IMM
:
3205 return OP_STORE_MEMBASE_REG
;
3206 case OP_STOREI4_MEMBASE_IMM
:
3207 return OP_STOREI4_MEMBASE_REG
;
3208 case OP_STOREI8_MEMBASE_IMM
:
3209 return OP_STOREI8_MEMBASE_REG
;
3216 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB) || ((opcode) == OP_ISBB_IMM)))
3219 * mono_arch_peephole_pass_1:
3221 * Perform peephole opts which should/can be performed before local regalloc
3224 mono_arch_peephole_pass_1 (MonoCompile
*cfg
, MonoBasicBlock
*bb
)
3228 MONO_BB_FOR_EACH_INS_SAFE (bb
, n
, ins
) {
3229 MonoInst
*last_ins
= mono_inst_prev (ins
, FILTER_IL_SEQ_POINT
);
3231 switch (ins
->opcode
) {
3235 if ((ins
->sreg1
< MONO_MAX_IREGS
) && (ins
->dreg
>= MONO_MAX_IREGS
) && (ins
->inst_imm
> 0)) {
3237 * X86_LEA is like ADD, but doesn't have the
3238 * sreg1==dreg restriction. inst_imm > 0 is needed since LEA sign-extends
3239 * its operand to 64 bit.
3241 ins
->opcode
= ins
->opcode
== OP_IADD_IMM
? OP_X86_LEA_MEMBASE
: OP_AMD64_LEA_MEMBASE
;
3242 ins
->inst_basereg
= ins
->sreg1
;
3247 if ((ins
->sreg1
== ins
->sreg2
) && (ins
->sreg1
== ins
->dreg
)) {
3251 * Replace STORE_MEMBASE_IMM 0 with STORE_MEMBASE_REG since
3252 * the latter has length 2-3 instead of 6 (reverse constant
3253 * propagation). These instruction sequences are very common
3254 * in the initlocals bblock.
3256 for (ins2
= ins
->next
; ins2
; ins2
= ins2
->next
) {
3257 if (((ins2
->opcode
== OP_STORE_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI4_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI8_MEMBASE_IMM
) || (ins2
->opcode
== OP_STORE_MEMBASE_IMM
)) && (ins2
->inst_imm
== 0)) {
3258 ins2
->opcode
= store_membase_imm_to_store_membase_reg (ins2
->opcode
);
3259 ins2
->sreg1
= ins
->dreg
;
3260 } else if ((ins2
->opcode
== OP_STOREI1_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI2_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI8_MEMBASE_REG
) || (ins2
->opcode
== OP_STORE_MEMBASE_REG
)) {
3262 } else if (((ins2
->opcode
== OP_ICONST
) || (ins2
->opcode
== OP_I8CONST
)) && (ins2
->dreg
== ins
->dreg
) && (ins2
->inst_c0
== 0)) {
3265 } else if (ins2
->opcode
== OP_IL_SEQ_POINT
) {
3273 case OP_COMPARE_IMM
:
3274 case OP_LCOMPARE_IMM
:
3275 /* OP_COMPARE_IMM (reg, 0)
3277 * OP_AMD64_TEST_NULL (reg)
3280 ins
->opcode
= OP_AMD64_TEST_NULL
;
3282 case OP_ICOMPARE_IMM
:
3284 ins
->opcode
= OP_X86_TEST_NULL
;
3286 case OP_AMD64_ICOMPARE_MEMBASE_IMM
:
3288 * OP_STORE_MEMBASE_REG reg, offset(basereg)
3289 * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
3291 * OP_STORE_MEMBASE_REG reg, offset(basereg)
3292 * OP_COMPARE_IMM reg, imm
3294 * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
3296 if (last_ins
&& (last_ins
->opcode
== OP_STOREI4_MEMBASE_REG
) &&
3297 ins
->inst_basereg
== last_ins
->inst_destbasereg
&&
3298 ins
->inst_offset
== last_ins
->inst_offset
) {
3299 ins
->opcode
= OP_ICOMPARE_IMM
;
3300 ins
->sreg1
= last_ins
->sreg1
;
3302 /* check if we can remove cmp reg,0 with test null */
3304 ins
->opcode
= OP_X86_TEST_NULL
;
3310 mono_peephole_ins (bb
, ins
);
3315 mono_arch_peephole_pass_2 (MonoCompile
*cfg
, MonoBasicBlock
*bb
)
3319 MONO_BB_FOR_EACH_INS_SAFE (bb
, n
, ins
) {
3320 switch (ins
->opcode
) {
3323 MonoInst
*next
= mono_inst_next (ins
, FILTER_IL_SEQ_POINT
);
3324 /* reg = 0 -> XOR (reg, reg) */
3325 /* XOR sets cflags on x86, so we cant do it always */
3326 if (ins
->inst_c0
== 0 && (!next
|| (next
&& INST_IGNORES_CFLAGS (next
->opcode
)))) {
3327 ins
->opcode
= OP_LXOR
;
3328 ins
->sreg1
= ins
->dreg
;
3329 ins
->sreg2
= ins
->dreg
;
3337 * Use IXOR to avoid a rex prefix if possible. The cpu will sign extend the
3338 * 0 result into 64 bits.
3340 if ((ins
->sreg1
== ins
->sreg2
) && (ins
->sreg1
== ins
->dreg
)) {
3341 ins
->opcode
= OP_IXOR
;
3345 if ((ins
->sreg1
== ins
->sreg2
) && (ins
->sreg1
== ins
->dreg
)) {
3349 * Replace STORE_MEMBASE_IMM 0 with STORE_MEMBASE_REG since
3350 * the latter has length 2-3 instead of 6 (reverse constant
3351 * propagation). These instruction sequences are very common
3352 * in the initlocals bblock.
3354 for (ins2
= ins
->next
; ins2
; ins2
= ins2
->next
) {
3355 if (((ins2
->opcode
== OP_STORE_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI4_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI8_MEMBASE_IMM
) || (ins2
->opcode
== OP_STORE_MEMBASE_IMM
)) && (ins2
->inst_imm
== 0)) {
3356 ins2
->opcode
= store_membase_imm_to_store_membase_reg (ins2
->opcode
);
3357 ins2
->sreg1
= ins
->dreg
;
3358 } else if ((ins2
->opcode
== OP_STOREI1_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI2_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI4_MEMBASE_REG
) || (ins2
->opcode
== OP_STOREI8_MEMBASE_REG
) || (ins2
->opcode
== OP_STORE_MEMBASE_REG
) || (ins2
->opcode
== OP_LIVERANGE_START
) || (ins2
->opcode
== OP_GC_LIVENESS_DEF
) || (ins2
->opcode
== OP_GC_LIVENESS_USE
)) {
3360 } else if (((ins2
->opcode
== OP_ICONST
) || (ins2
->opcode
== OP_I8CONST
)) && (ins2
->dreg
== ins
->dreg
) && (ins2
->inst_c0
== 0)) {
3363 } else if (ins2
->opcode
== OP_IL_SEQ_POINT
) {
3372 if ((ins
->inst_imm
== 1) && (ins
->dreg
== ins
->sreg1
))
3373 ins
->opcode
= OP_X86_INC_REG
;
3376 if ((ins
->inst_imm
== 1) && (ins
->dreg
== ins
->sreg1
))
3377 ins
->opcode
= OP_X86_DEC_REG
;
3381 mono_peephole_ins (bb
, ins
);
3385 #define NEW_INS(cfg,ins,dest,op) do { \
3386 MONO_INST_NEW ((cfg), (dest), (op)); \
3387 (dest)->cil_code = (ins)->cil_code; \
3388 mono_bblock_insert_before_ins (bb, ins, (dest)); \
3391 #define NEW_SIMD_INS(cfg,ins,dest,op,d,s1,s2) do { \
3392 MONO_INST_NEW ((cfg), (dest), (op)); \
3393 (dest)->cil_code = (ins)->cil_code; \
3395 (dest)->sreg1 = s1; \
3396 (dest)->sreg2 = s2; \
3397 (dest)->type = STACK_VTYPE; \
3398 (dest)->klass = ins->klass; \
3399 mono_bblock_insert_before_ins (bb, ins, (dest)); \
3403 simd_type_to_comp_op (int t
)
3417 return OP_PCMPEQQ
; // SSE 4.1
3419 g_assert_not_reached ();
3425 simd_type_to_sub_op (int t
)
3441 g_assert_not_reached ();
3447 simd_type_to_shl_op (int t
)
3460 g_assert_not_reached ();
3466 simd_type_to_gt_op (int t
)
3480 return OP_PCMPGTQ
; // SSE 4.2
3482 g_assert_not_reached ();
3488 simd_type_to_max_un_op (int t
)
3494 return OP_PMAXW_UN
; // SSE 4.1
3496 return OP_PMAXD_UN
; // SSE 4.1
3497 //case MONO_TYPE_U8:
3498 // return OP_PMAXQ_UN; // AVX
3500 g_assert_not_reached ();
3506 simd_type_to_add_op (int t
)
3522 g_assert_not_reached ();
3528 simd_type_to_min_op (int t
)
3532 return OP_PMINB
; // SSE 4.1
3534 return OP_PMINB_UN
; // SSE 4.1
3540 return OP_PMIND
; // SSE 4.1
3542 return OP_PMIND_UN
; // SSE 4.1
3543 // case MONO_TYPE_I8: // AVX
3544 // case MONO_TYPE_U8:
3546 g_assert_not_reached ();
3552 simd_type_to_max_op (int t
)
3556 return OP_PMAXB
; // SSE 4.1
3558 return OP_PMAXB_UN
; // SSE 4.1
3564 return OP_PMAXD
; // SSE 4.1
3566 return OP_PMAXD_UN
; // SSE 4.1
3567 // case MONO_TYPE_I8: // AVX
3568 // case MONO_TYPE_U8:
3570 g_assert_not_reached ();
3576 emit_simd_comp_op (MonoCompile
*cfg
, MonoBasicBlock
*bb
, MonoInst
*ins
, int type
, int dreg
, int sreg1
, int sreg2
)
3580 if (!mono_hwcap_x86_has_sse42
&& (ins
->inst_c1
== MONO_TYPE_I8
|| ins
->inst_c1
== MONO_TYPE_U8
)) {
3581 int temp_reg1
= mono_alloc_ireg (cfg
);
3582 int temp_reg2
= mono_alloc_ireg (cfg
);
3584 NEW_SIMD_INS (cfg
, ins
, temp
, OP_PCMPEQD
, temp_reg1
, sreg1
, sreg2
);
3585 NEW_SIMD_INS (cfg
, ins
, temp
, OP_PSHUFLED
, temp_reg2
, temp_reg1
, -1);
3586 temp
->inst_c0
= 0xB1;
3587 NEW_SIMD_INS (cfg
, ins
, temp
, OP_ANDPD
, dreg
, temp_reg1
, temp_reg2
);
3589 NEW_SIMD_INS (cfg
, ins
, temp
, simd_type_to_comp_op (type
), dreg
, sreg1
, sreg2
);
3594 emit_simd_gt_op (MonoCompile
*cfg
, MonoBasicBlock
*bb
, MonoInst
*ins
, int type
, int dreg
, int sreg1
, int sreg2
);
3597 emit_simd_gt_un_op (MonoCompile
*cfg
, MonoBasicBlock
*bb
, MonoInst
*ins
, int type
, int dreg
, int sreg1
, int sreg2
)
3604 if (mono_hwcap_x86_has_sse41
)
3610 // dreg = max(sreg1, sreg2) != sreg2
3612 int temp_reg1
= mono_alloc_ireg (cfg
);
3613 int temp_reg2
= mono_alloc_ireg (cfg
);
3614 int temp_reg3
= mono_alloc_ireg (cfg
);
3616 NEW_SIMD_INS (cfg
, ins
, temp
, simd_type_to_max_un_op (type
), temp_reg1
, sreg1
, sreg2
);
3617 emit_simd_comp_op (cfg
, bb
, ins
, ins
->inst_c1
, temp_reg2
, temp_reg1
, ins
->sreg2
);
3618 NEW_SIMD_INS (cfg
, ins
, temp
, OP_XONES
, temp_reg3
, -1, -1);
3619 NEW_SIMD_INS (cfg
, ins
, temp
, OP_XORPD
, dreg
, temp_reg2
, temp_reg3
);
3625 // convert to signed integer by subtracting (1 << (size - 1)) from each operand
3626 // and then use signed comparison
3628 int temp_c0
= mono_alloc_ireg (cfg
);
3629 int temp_c80
= mono_alloc_ireg (cfg
);
3630 int temp_s1
= mono_alloc_ireg (cfg
);
3631 int temp_s2
= mono_alloc_ireg (cfg
);
3633 NEW_SIMD_INS (cfg
, ins
, temp
, OP_XONES
, temp_c0
, -1, -1);
3634 NEW_SIMD_INS (cfg
, ins
, temp
, simd_type_to_shl_op (type
), temp_c80
, temp_c0
, -1);
3635 temp
->inst_imm
= type
== MONO_TYPE_U2
? 15 : (type
== MONO_TYPE_U4
? 31 : 63);
3636 NEW_SIMD_INS (cfg
, ins
, temp
, simd_type_to_sub_op (type
), temp_s1
, sreg1
, temp_c80
);
3637 NEW_SIMD_INS (cfg
, ins
, temp
, simd_type_to_sub_op (type
), temp_s2
, sreg2
, temp_c80
);
3638 emit_simd_gt_op (cfg
, bb
, ins
, type
, dreg
, temp_s1
, temp_s2
);
3645 emit_simd_gt_op (MonoCompile
*cfg
, MonoBasicBlock
*bb
, MonoInst
*ins
, int type
, int dreg
, int sreg1
, int sreg2
)
3649 if (!mono_hwcap_x86_has_sse42
&& (type
== MONO_TYPE_I8
|| type
== MONO_TYPE_U8
)) {
3650 // Decompose 64-bit greater than to 32-bit
3654 // v = (v1 > v2) unsigned
3656 // z = shuffle(t, (3, 3, 1, 1))
3657 // t1 = shuffle(v, (2, 2, 0, 0))
3658 // u1 = shuffle(u, (3, 3, 1, 1))
3660 // result = bitwise_or(z, w)
3662 int temp_t
= mono_alloc_ireg (cfg
);
3663 int temp_u
= mono_alloc_ireg (cfg
);
3664 int temp_v
= mono_alloc_ireg (cfg
);
3665 int temp_z
= temp_t
;
3666 int temp_t1
= temp_v
;
3667 int temp_u1
= temp_u
;
3668 int temp_w
= temp_t1
;
3670 NEW_SIMD_INS (cfg
, ins
, temp
, OP_PCMPGTD
, temp_t
, sreg1
, sreg2
);
3671 NEW_SIMD_INS (cfg
, ins
, temp
, OP_PCMPEQD
, temp_u
, sreg1
, sreg2
);
3672 emit_simd_gt_un_op (cfg
, bb
, ins
, MONO_TYPE_U4
, temp_v
, sreg1
, sreg2
);
3673 NEW_SIMD_INS (cfg
, ins
, temp
, OP_PSHUFLED
, temp_z
, temp_t
, -1);
3674 temp
->inst_c0
= 0xF5;
3675 NEW_SIMD_INS (cfg
, ins
, temp
, OP_PSHUFLED
, temp_t1
, temp_v
, -1);
3676 temp
->inst_c0
= 0xA0;
3677 NEW_SIMD_INS (cfg
, ins
, temp
, OP_PSHUFLED
, temp_u1
, temp_u
, -1);
3678 temp
->inst_c0
= 0xF5;
3679 NEW_SIMD_INS (cfg
, ins
, temp
, OP_ANDPD
, temp_w
, temp_t1
, temp_u1
);
3680 NEW_SIMD_INS (cfg
, ins
, temp
, OP_ORPD
, dreg
, temp_z
, temp_w
);
3682 NEW_SIMD_INS (cfg
, ins
, temp
, simd_type_to_gt_op (type
), dreg
, sreg1
, sreg2
);
3687 emit_simd_min_op (MonoCompile
*cfg
, MonoBasicBlock
*bb
, MonoInst
*ins
, int type
, int dreg
, int sreg1
, int sreg2
)
3691 if (type
== MONO_TYPE_I2
|| type
== MONO_TYPE_U2
) {
3692 // SSE2, so always available
3693 NEW_SIMD_INS (cfg
, ins
, temp
, simd_type_to_min_op (type
), dreg
, sreg1
, sreg2
);
3694 } else if (!mono_hwcap_x86_has_sse41
|| type
== MONO_TYPE_I8
|| type
== MONO_TYPE_U8
) {
3695 // Decompose to t = (s1 > s2), d = (s1 & !t) | (s2 & t)
3696 int temp_t
= mono_alloc_ireg (cfg
);
3697 int temp_d1
= mono_alloc_ireg (cfg
);
3698 int temp_d2
= mono_alloc_ireg (cfg
);
3699 if (type
== MONO_TYPE_U8
|| type
== MONO_TYPE_U4
|| type
== MONO_TYPE_U1
)
3700 emit_simd_gt_un_op (cfg
, bb
, ins
, type
, temp_t
, sreg1
, sreg2
);
3702 emit_simd_gt_op (cfg
, bb
, ins
, type
, temp_t
, sreg1
, sreg2
);
3703 NEW_SIMD_INS (cfg
, ins
, temp
, OP_PANDN
, temp_d1
, temp_t
, sreg1
);
3704 NEW_SIMD_INS (cfg
, ins
, temp
, OP_PAND
, temp_d2
, temp_t
, sreg2
);
3705 NEW_SIMD_INS (cfg
, ins
, temp
, OP_POR
, dreg
, temp_d1
, temp_d2
);
3707 // SSE 4.1 has byte- and dword- operations
3708 NEW_SIMD_INS (cfg
, ins
, temp
, simd_type_to_min_op (type
), dreg
, sreg1
, sreg2
);
3713 emit_simd_max_op (MonoCompile
*cfg
, MonoBasicBlock
*bb
, MonoInst
*ins
, int type
, int dreg
, int sreg1
, int sreg2
)
3717 if (type
== MONO_TYPE_I2
|| type
== MONO_TYPE_U2
) {
3718 // SSE2, so always available
3719 NEW_SIMD_INS (cfg
, ins
, temp
, simd_type_to_max_op (type
), dreg
, sreg1
, sreg2
);
3720 } else if (!mono_hwcap_x86_has_sse41
|| type
== MONO_TYPE_I8
|| type
== MONO_TYPE_U8
) {
3721 // Decompose to t = (s1 > s2), d = (s1 & t) | (s2 & !t)
3722 int temp_t
= mono_alloc_ireg (cfg
);
3723 int temp_d1
= mono_alloc_ireg (cfg
);
3724 int temp_d2
= mono_alloc_ireg (cfg
);
3725 if (type
== MONO_TYPE_U8
|| type
== MONO_TYPE_U4
|| type
== MONO_TYPE_U1
)
3726 emit_simd_gt_un_op (cfg
, bb
, ins
, type
, temp_t
, sreg1
, sreg2
);
3728 emit_simd_gt_op (cfg
, bb
, ins
, type
, temp_t
, sreg1
, sreg2
);
3729 NEW_SIMD_INS (cfg
, ins
, temp
, OP_PAND
, temp_d1
, temp_t
, sreg1
);
3730 NEW_SIMD_INS (cfg
, ins
, temp
, OP_PANDN
, temp_d2
, temp_t
, sreg2
);
3731 NEW_SIMD_INS (cfg
, ins
, temp
, OP_POR
, dreg
, temp_d1
, temp_d2
);
3733 // SSE 4.1 has byte- and dword- operations
3734 NEW_SIMD_INS (cfg
, ins
, temp
, simd_type_to_max_op (type
), dreg
, sreg1
, sreg2
);
3739 * mono_arch_lowering_pass:
3741 * Converts complex opcodes into simpler ones so that each IR instruction
3742 * corresponds to one machine instruction.
3745 mono_arch_lowering_pass (MonoCompile
*cfg
, MonoBasicBlock
*bb
)
3747 MonoInst
*ins
, *n
, *temp
;
3750 * FIXME: Need to add more instructions, but the current machine
3751 * description can't model some parts of the composite instructions like
3754 MONO_BB_FOR_EACH_INS_SAFE (bb
, n
, ins
) {
3755 switch (ins
->opcode
) {
3759 case OP_IDIV_UN_IMM
:
3760 case OP_IREM_UN_IMM
:
3763 mono_decompose_op_imm (cfg
, bb
, ins
);
3765 case OP_COMPARE_IMM
:
3766 case OP_LCOMPARE_IMM
:
3767 if (!amd64_use_imm32 (ins
->inst_imm
)) {
3768 NEW_INS (cfg
, ins
, temp
, OP_I8CONST
);
3769 temp
->inst_c0
= ins
->inst_imm
;
3770 temp
->dreg
= mono_alloc_ireg (cfg
);
3771 ins
->opcode
= OP_COMPARE
;
3772 ins
->sreg2
= temp
->dreg
;
3775 #ifndef MONO_ARCH_ILP32
3776 case OP_LOAD_MEMBASE
:
3778 case OP_LOADI8_MEMBASE
:
3779 /* Don't generate memindex opcodes (to simplify */
3780 /* read sandboxing) */
3781 if (!amd64_use_imm32 (ins
->inst_offset
)) {
3782 NEW_INS (cfg
, ins
, temp
, OP_I8CONST
);
3783 temp
->inst_c0
= ins
->inst_offset
;
3784 temp
->dreg
= mono_alloc_ireg (cfg
);
3785 ins
->opcode
= OP_AMD64_LOADI8_MEMINDEX
;
3786 ins
->inst_indexreg
= temp
->dreg
;
3789 #ifndef MONO_ARCH_ILP32
3790 case OP_STORE_MEMBASE_IMM
:
3792 case OP_STOREI8_MEMBASE_IMM
:
3793 if (!amd64_use_imm32 (ins
->inst_imm
)) {
3794 NEW_INS (cfg
, ins
, temp
, OP_I8CONST
);
3795 temp
->inst_c0
= ins
->inst_imm
;
3796 temp
->dreg
= mono_alloc_ireg (cfg
);
3797 ins
->opcode
= OP_STOREI8_MEMBASE_REG
;
3798 ins
->sreg1
= temp
->dreg
;
3801 #ifdef MONO_ARCH_SIMD_INTRINSICS
3802 case OP_EXPAND_I1
: {
3803 int temp_reg1
= mono_alloc_ireg (cfg
);
3804 int temp_reg2
= mono_alloc_ireg (cfg
);
3805 int original_reg
= ins
->sreg1
;
3807 NEW_INS (cfg
, ins
, temp
, OP_ICONV_TO_U1
);
3808 temp
->sreg1
= original_reg
;
3809 temp
->dreg
= temp_reg1
;
3811 NEW_INS (cfg
, ins
, temp
, OP_SHL_IMM
);
3812 temp
->sreg1
= temp_reg1
;
3813 temp
->dreg
= temp_reg2
;
3816 NEW_INS (cfg
, ins
, temp
, OP_LOR
);
3817 temp
->sreg1
= temp
->dreg
= temp_reg2
;
3818 temp
->sreg2
= temp_reg1
;
3820 ins
->opcode
= OP_EXPAND_I2
;
3821 ins
->sreg1
= temp_reg2
;
3826 int temp_reg1
= mono_alloc_ireg (cfg
);
3827 int temp_reg2
= mono_alloc_ireg (cfg
);
3829 NEW_SIMD_INS (cfg
, ins
, temp
, OP_PCMPEQD
, temp_reg1
, ins
->sreg1
, ins
->sreg2
);
3830 NEW_SIMD_INS (cfg
, ins
, temp
, OP_EXTRACT_MASK
, temp_reg2
, temp_reg1
, -1);
3831 temp
->type
= STACK_I4
;
3832 NEW_INS (cfg
, ins
, temp
, OP_COMPARE_IMM
);
3833 temp
->sreg1
= temp_reg2
;
3834 temp
->inst_imm
= 0xFFFF;
3835 temp
->klass
= ins
->klass
;
3836 ins
->opcode
= OP_CEQ
;
3845 switch (ins
->inst_c0
)
3848 emit_simd_comp_op (cfg
, bb
, ins
, ins
->inst_c1
, ins
->dreg
, ins
->sreg1
, ins
->sreg2
);
3853 int temp_reg1
= mono_alloc_ireg (cfg
);
3854 int temp_reg2
= mono_alloc_ireg (cfg
);
3856 emit_simd_comp_op (cfg
, bb
, ins
, ins
->inst_c1
, temp_reg1
, ins
->sreg1
, ins
->sreg2
);
3857 NEW_SIMD_INS (cfg
, ins
, temp
, OP_XONES
, temp_reg2
, -1, -1);
3858 ins
->opcode
= OP_XORPD
;
3859 ins
->sreg1
= temp_reg1
;
3860 ins
->sreg1
= temp_reg2
;
3865 temp_reg
= ins
->sreg1
;
3866 ins
->sreg1
= ins
->sreg2
;
3867 ins
->sreg2
= temp_reg
;
3869 emit_simd_gt_op (cfg
, bb
, ins
, ins
->inst_c1
, ins
->dreg
, ins
->sreg1
, ins
->sreg2
);
3874 temp_reg
= ins
->sreg1
;
3875 ins
->sreg1
= ins
->sreg2
;
3876 ins
->sreg2
= temp_reg
;
3878 int temp_reg1
= mono_alloc_ireg (cfg
);
3879 int temp_reg2
= mono_alloc_ireg (cfg
);
3881 emit_simd_gt_op (cfg
, bb
, ins
, ins
->inst_c1
, temp_reg1
, ins
->sreg1
, ins
->sreg2
);
3882 emit_simd_comp_op (cfg
, bb
, ins
, ins
->inst_c1
, temp_reg2
, ins
->sreg1
, ins
->sreg2
);
3883 ins
->opcode
= OP_POR
;
3884 ins
->sreg1
= temp_reg1
;
3885 ins
->sreg2
= temp_reg2
;
3890 temp_reg
= ins
->sreg1
;
3891 ins
->sreg1
= ins
->sreg2
;
3892 ins
->sreg2
= temp_reg
;
3894 if (mono_hwcap_x86_has_sse41
&& ins
->inst_c1
!= MONO_TYPE_U8
) {
3895 int temp_reg1
= mono_alloc_ireg (cfg
);
3897 NEW_SIMD_INS (cfg
, ins
, temp
, simd_type_to_max_un_op (ins
->inst_c1
), temp_reg1
, ins
->sreg1
, ins
->sreg2
);
3898 emit_simd_comp_op (cfg
, bb
, ins
, ins
->inst_c1
, ins
->dreg
, temp_reg1
, ins
->sreg1
);
3901 int temp_reg1
= mono_alloc_ireg (cfg
);
3902 int temp_reg2
= mono_alloc_ireg (cfg
);
3904 emit_simd_gt_un_op (cfg
, bb
, ins
, ins
->inst_c1
, temp_reg1
, ins
->sreg1
, ins
->sreg2
);
3905 emit_simd_comp_op (cfg
, bb
, ins
, ins
->inst_c1
, temp_reg2
, ins
->sreg1
, ins
->sreg2
);
3906 ins
->opcode
= OP_POR
;
3907 ins
->sreg1
= temp_reg1
;
3908 ins
->sreg2
= temp_reg2
;
3913 temp_reg
= ins
->sreg1
;
3914 ins
->sreg1
= ins
->sreg2
;
3915 ins
->sreg2
= temp_reg
;
3917 emit_simd_gt_un_op (cfg
, bb
, ins
, ins
->inst_c1
, ins
->dreg
, ins
->sreg1
, ins
->sreg2
);
3923 g_assert_not_reached();
3927 ins
->type
= STACK_VTYPE
;
3932 case OP_XCOMPARE_FP
: {
3933 ins
->opcode
= ins
->inst_c1
== MONO_TYPE_R4
? OP_COMPPS
: OP_COMPPD
;
3935 switch (ins
->inst_c0
)
3937 case CMP_EQ
: ins
->inst_c0
= 0; break;
3938 case CMP_NE
: ins
->inst_c0
= 4; break;
3939 case CMP_LT
: ins
->inst_c0
= 1; break;
3940 case CMP_LE
: ins
->inst_c0
= 2; break;
3941 case CMP_GT
: ins
->inst_c0
= 6; break;
3942 case CMP_GE
: ins
->inst_c0
= 5; break;
3944 g_assert_not_reached();
3952 ins
->opcode
= OP_XMOVE
;
3957 switch (ins
->inst_c0
)
3960 ins
->opcode
= simd_type_to_sub_op (ins
->inst_c1
);
3963 ins
->opcode
= simd_type_to_add_op (ins
->inst_c1
);
3966 ins
->opcode
= OP_ANDPD
;
3969 ins
->opcode
= OP_XORPD
;
3972 ins
->opcode
= OP_ORPD
;
3975 emit_simd_min_op (cfg
, bb
, ins
, ins
->inst_c1
, ins
->dreg
, ins
->sreg1
, ins
->sreg2
);
3979 emit_simd_max_op (cfg
, bb
, ins
, ins
->inst_c1
, ins
->dreg
, ins
->sreg1
, ins
->sreg2
);
3983 ins
->opcode
= ins
->inst_c1
== MONO_TYPE_R8
? OP_SUBPD
: OP_SUBPS
;
3986 ins
->opcode
= ins
->inst_c1
== MONO_TYPE_R8
? OP_ADDPD
: OP_ADDPS
;
3989 ins
->opcode
= ins
->inst_c1
== MONO_TYPE_R8
? OP_DIVPD
: OP_DIVPS
;
3992 ins
->opcode
= ins
->inst_c1
== MONO_TYPE_R8
? OP_MULPD
: OP_MULPS
;
3995 ins
->opcode
= ins
->inst_c1
== MONO_TYPE_R8
? OP_MINPD
: OP_MINPS
;
3998 ins
->opcode
= ins
->inst_c1
== MONO_TYPE_R8
? OP_MAXPD
: OP_MAXPS
;
4001 g_assert_not_reached();
4007 case OP_XEXTRACT_R4
:
4008 case OP_XEXTRACT_R8
:
4009 case OP_XEXTRACT_I32
:
4010 case OP_XEXTRACT_I64
: {
4012 g_assert_not_reached();
4021 bb
->max_vreg
= cfg
->next_vreg
;
4025 branch_cc_table
[] = {
4026 X86_CC_EQ
, X86_CC_GE
, X86_CC_GT
, X86_CC_LE
, X86_CC_LT
,
4027 X86_CC_NE
, X86_CC_GE
, X86_CC_GT
, X86_CC_LE
, X86_CC_LT
,
4028 X86_CC_O
, X86_CC_NO
, X86_CC_C
, X86_CC_NC
4031 /* Maps CMP_... constants to X86_CC_... constants */
4034 X86_CC_EQ
, X86_CC_NE
, X86_CC_LE
, X86_CC_GE
, X86_CC_LT
, X86_CC_GT
,
4035 X86_CC_LE
, X86_CC_GE
, X86_CC_LT
, X86_CC_GT
4039 cc_signed_table
[] = {
4040 TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
,
4041 FALSE
, FALSE
, FALSE
, FALSE
4044 /*#include "cprop.c"*/
4046 static unsigned char*
4047 emit_float_to_int (MonoCompile
*cfg
, guchar
*code
, int dreg
, int sreg
, int size
, gboolean is_signed
)
4049 // Use 8 as register size to get Nan/Inf conversion to uint result truncated to 0
4050 if (size
== 8 || (!is_signed
&& size
== 4))
4051 amd64_sse_cvttsd2si_reg_reg (code
, dreg
, sreg
);
4053 amd64_sse_cvttsd2si_reg_reg_size (code
, dreg
, sreg
, 4);
4056 amd64_widen_reg (code
, dreg
, dreg
, is_signed
, FALSE
);
4058 amd64_widen_reg (code
, dreg
, dreg
, is_signed
, TRUE
);
4062 static unsigned char*
4063 mono_emit_stack_alloc (MonoCompile
*cfg
, guchar
*code
, MonoInst
* tree
)
4065 int sreg
= tree
->sreg1
;
4066 int need_touch
= FALSE
;
4068 #if defined(TARGET_WIN32)
4070 #elif defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
4071 if (!(tree
->flags
& MONO_INST_INIT
))
4080 * If requested stack size is larger than one page,
4081 * perform stack-touch operation
4084 * Generate stack probe code.
4085 * Under Windows, it is necessary to allocate one page at a time,
4086 * "touching" stack after each successful sub-allocation. This is
4087 * because of the way stack growth is implemented - there is a
4088 * guard page before the lowest stack page that is currently commited.
4089 * Stack normally grows sequentially so OS traps access to the
4090 * guard page and commits more pages when needed.
4092 amd64_test_reg_imm (code
, sreg
, ~0xFFF);
4093 br
[0] = code
; x86_branch8 (code
, X86_CC_Z
, 0, FALSE
);
4095 br
[2] = code
; /* loop */
4096 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 0x1000);
4097 amd64_test_membase_reg (code
, AMD64_RSP
, 0, AMD64_RSP
);
4098 amd64_alu_reg_imm (code
, X86_SUB
, sreg
, 0x1000);
4099 amd64_alu_reg_imm (code
, X86_CMP
, sreg
, 0x1000);
4100 br
[3] = code
; x86_branch8 (code
, X86_CC_AE
, 0, FALSE
);
4101 amd64_patch (br
[3], br
[2]);
4102 amd64_test_reg_reg (code
, sreg
, sreg
);
4103 br
[4] = code
; x86_branch8 (code
, X86_CC_Z
, 0, FALSE
);
4104 amd64_alu_reg_reg (code
, X86_SUB
, AMD64_RSP
, sreg
);
4106 br
[1] = code
; x86_jump8 (code
, 0);
4108 amd64_patch (br
[0], code
);
4109 amd64_alu_reg_reg (code
, X86_SUB
, AMD64_RSP
, sreg
);
4110 amd64_patch (br
[1], code
);
4111 amd64_patch (br
[4], code
);
4114 amd64_alu_reg_reg (code
, X86_SUB
, AMD64_RSP
, tree
->sreg1
);
4116 if (tree
->flags
& MONO_INST_INIT
) {
4118 if (tree
->dreg
!= AMD64_RAX
&& sreg
!= AMD64_RAX
) {
4119 amd64_push_reg (code
, AMD64_RAX
);
4122 if (tree
->dreg
!= AMD64_RCX
&& sreg
!= AMD64_RCX
) {
4123 amd64_push_reg (code
, AMD64_RCX
);
4126 if (tree
->dreg
!= AMD64_RDI
&& sreg
!= AMD64_RDI
) {
4127 amd64_push_reg (code
, AMD64_RDI
);
4131 amd64_shift_reg_imm (code
, X86_SHR
, sreg
, 3);
4132 if (sreg
!= AMD64_RCX
)
4133 amd64_mov_reg_reg (code
, AMD64_RCX
, sreg
, 8);
4134 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RAX
, AMD64_RAX
);
4136 amd64_lea_membase (code
, AMD64_RDI
, AMD64_RSP
, offset
);
4137 if (cfg
->param_area
)
4138 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RDI
, cfg
->param_area
);
4140 amd64_prefix (code
, X86_REP_PREFIX
);
4143 if (tree
->dreg
!= AMD64_RDI
&& sreg
!= AMD64_RDI
)
4144 amd64_pop_reg (code
, AMD64_RDI
);
4145 if (tree
->dreg
!= AMD64_RCX
&& sreg
!= AMD64_RCX
)
4146 amd64_pop_reg (code
, AMD64_RCX
);
4147 if (tree
->dreg
!= AMD64_RAX
&& sreg
!= AMD64_RAX
)
4148 amd64_pop_reg (code
, AMD64_RAX
);
4154 emit_move_return_value (MonoCompile
*cfg
, MonoInst
*ins
, guint8
*code
)
4159 /* Move return value to the target register */
4160 /* FIXME: do this in the local reg allocator */
4161 switch (ins
->opcode
) {
4164 case OP_CALL_MEMBASE
:
4167 case OP_LCALL_MEMBASE
:
4168 g_assert (ins
->dreg
== AMD64_RAX
);
4172 case OP_FCALL_MEMBASE
: {
4173 MonoType
*rtype
= mini_get_underlying_type (((MonoCallInst
*)ins
)->signature
->ret
);
4174 if (rtype
->type
== MONO_TYPE_R4
) {
4175 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, AMD64_XMM0
);
4178 if (ins
->dreg
!= AMD64_XMM0
)
4179 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, AMD64_XMM0
);
4185 case OP_RCALL_MEMBASE
:
4186 if (ins
->dreg
!= AMD64_XMM0
)
4187 amd64_sse_movss_reg_reg (code
, ins
->dreg
, AMD64_XMM0
);
4191 case OP_VCALL_MEMBASE
:
4194 case OP_VCALL2_MEMBASE
:
4195 cinfo
= get_call_info (cfg
->mempool
, ((MonoCallInst
*)ins
)->signature
);
4196 if (cinfo
->ret
.storage
== ArgValuetypeInReg
) {
4197 MonoInst
*loc
= cfg
->arch
.vret_addr_loc
;
4199 /* Load the destination address */
4200 g_assert (loc
->opcode
== OP_REGOFFSET
);
4201 amd64_mov_reg_membase (code
, AMD64_RCX
, loc
->inst_basereg
, loc
->inst_offset
, sizeof(gpointer
));
4203 for (quad
= 0; quad
< 2; quad
++) {
4204 switch (cinfo
->ret
.pair_storage
[quad
]) {
4206 amd64_mov_membase_reg (code
, AMD64_RCX
, (quad
* sizeof (target_mgreg_t
)), cinfo
->ret
.pair_regs
[quad
], sizeof (target_mgreg_t
));
4208 case ArgInFloatSSEReg
:
4209 amd64_movss_membase_reg (code
, AMD64_RCX
, (quad
* 8), cinfo
->ret
.pair_regs
[quad
]);
4211 case ArgInDoubleSSEReg
:
4212 amd64_movsd_membase_reg (code
, AMD64_RCX
, (quad
* 8), cinfo
->ret
.pair_regs
[quad
]);
4227 #endif /* DISABLE_JIT */
4230 static int tls_gs_offset
;
4234 mono_arch_have_fast_tls (void)
4237 static gboolean have_fast_tls
= FALSE
;
4238 static gboolean inited
= FALSE
;
4241 if (mini_debug_options
.use_fallback_tls
)
4245 return have_fast_tls
;
4247 ins
= (guint8
*)pthread_getspecific
;
4250 * We're looking for these two instructions:
4252 * mov %gs:[offset](,%rdi,8),%rax
4255 have_fast_tls
= ins
[0] == 0x65 &&
4265 tls_gs_offset
= ins
[5];
4268 * Apple now loads a different version of pthread_getspecific when launched from Xcode
4269 * For that version we're looking for these instructions:
4273 * mov %gs:[offset](,%rdi,8),%rax
4277 if (!have_fast_tls
) {
4278 have_fast_tls
= ins
[0] == 0x55 &&
4293 tls_gs_offset
= ins
[9];
4297 return have_fast_tls
;
4298 #elif defined(TARGET_ANDROID)
4301 if (mini_debug_options
.use_fallback_tls
)
4308 mono_amd64_get_tls_gs_offset (void)
4311 return tls_gs_offset
;
4313 g_assert_not_reached ();
4319 * \param code buffer to store code to
4320 * \param dreg hard register where to place the result
4321 * \param tls_offset offset info
4322 * \return a pointer to the end of the stored code
4324 * mono_amd64_emit_tls_get emits in \p code the native code that puts in
4325 * the dreg register the item in the thread local storage identified
4329 mono_amd64_emit_tls_get (guint8
* code
, int dreg
, int tls_offset
)
4332 if (tls_offset
< 64) {
4333 x86_prefix (code
, X86_GS_PREFIX
);
4334 amd64_mov_reg_mem (code
, dreg
, (tls_offset
* 8) + 0x1480, 8);
4338 g_assert (tls_offset
< 0x440);
4339 /* Load TEB->TlsExpansionSlots */
4340 x86_prefix (code
, X86_GS_PREFIX
);
4341 amd64_mov_reg_mem (code
, dreg
, 0x1780, 8);
4342 amd64_test_reg_reg (code
, dreg
, dreg
);
4344 amd64_branch (code
, X86_CC_EQ
, code
, TRUE
);
4345 amd64_mov_reg_membase (code
, dreg
, dreg
, (tls_offset
* 8) - 0x200, 8);
4346 amd64_patch (buf
[0], code
);
4348 #elif defined(TARGET_MACH)
4349 x86_prefix (code
, X86_GS_PREFIX
);
4350 amd64_mov_reg_mem (code
, dreg
, tls_gs_offset
+ (tls_offset
* 8), 8);
4352 if (optimize_for_xen
) {
4353 x86_prefix (code
, X86_FS_PREFIX
);
4354 amd64_mov_reg_mem (code
, dreg
, 0, 8);
4355 amd64_mov_reg_membase (code
, dreg
, dreg
, tls_offset
, 8);
4357 x86_prefix (code
, X86_FS_PREFIX
);
4358 amd64_mov_reg_mem (code
, dreg
, tls_offset
, 8);
4365 mono_amd64_emit_tls_set (guint8
*code
, int sreg
, int tls_offset
)
4368 g_assert_not_reached ();
4369 #elif defined(TARGET_MACH)
4370 x86_prefix (code
, X86_GS_PREFIX
);
4371 amd64_mov_mem_reg (code
, tls_gs_offset
+ (tls_offset
* 8), sreg
, 8);
4373 g_assert (!optimize_for_xen
);
4374 x86_prefix (code
, X86_FS_PREFIX
);
4375 amd64_mov_mem_reg (code
, tls_offset
, sreg
, 8);
4383 * Emit code to initialize an LMF structure at LMF_OFFSET.
4386 emit_setup_lmf (MonoCompile
*cfg
, guint8
*code
, gint32 lmf_offset
, int cfa_offset
)
4389 * The ip field is not set, the exception handling code will obtain it from the stack location pointed to by the sp field.
4392 * sp is saved right before calls but we need to save it here too so
4393 * async stack walks would work.
4395 amd64_mov_membase_reg (code
, cfg
->frame_reg
, lmf_offset
+ MONO_STRUCT_OFFSET (MonoLMF
, rsp
), AMD64_RSP
, 8);
4397 amd64_mov_membase_reg (code
, cfg
->frame_reg
, lmf_offset
+ MONO_STRUCT_OFFSET (MonoLMF
, rbp
), AMD64_RBP
, 8);
4398 if (cfg
->arch
.omit_fp
&& cfa_offset
!= -1)
4399 mono_emit_unwind_op_offset (cfg
, code
, AMD64_RBP
, - (cfa_offset
- (lmf_offset
+ MONO_STRUCT_OFFSET (MonoLMF
, rbp
))));
4401 /* These can't contain refs */
4402 mini_gc_set_slot_type_from_fp (cfg
, lmf_offset
+ MONO_STRUCT_OFFSET (MonoLMF
, previous_lmf
), SLOT_NOREF
);
4403 mini_gc_set_slot_type_from_fp (cfg
, lmf_offset
+ MONO_STRUCT_OFFSET (MonoLMF
, rsp
), SLOT_NOREF
);
4404 /* These are handled automatically by the stack marking code */
4405 mini_gc_set_slot_type_from_fp (cfg
, lmf_offset
+ MONO_STRUCT_OFFSET (MonoLMF
, rbp
), SLOT_NOREF
);
4412 #define TEB_LAST_ERROR_OFFSET 0x68
4415 emit_get_last_error (guint8
* code
, int dreg
)
4417 /* Threads last error value is located in TEB_LAST_ERROR_OFFSET. */
4418 x86_prefix (code
, X86_GS_PREFIX
);
4419 amd64_mov_reg_mem (code
, dreg
, TEB_LAST_ERROR_OFFSET
, sizeof (guint32
));
4426 emit_get_last_error (guint8
* code
, int dreg
)
4428 g_assert_not_reached ();
4433 /* benchmark and set based on cpu */
4434 #define LOOP_ALIGNMENT 8
4435 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
4440 amd64_handle_varargs_nregs (guint8
*code
, guint32 nregs
)
4442 #ifndef TARGET_WIN32
4444 amd64_mov_reg_imm (code
, AMD64_RAX
, nregs
);
4446 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RAX
, AMD64_RAX
);
4452 amd64_handle_varargs_call (MonoCompile
*cfg
, guint8
*code
, MonoCallInst
*call
, gboolean free_rax
)
4458 * The AMD64 ABI forces callers to know about varargs.
4461 if (call
->signature
->call_convention
== MONO_CALL_VARARG
&& call
->signature
->pinvoke
) {
4462 // deliberatly nothing -- but nreg = 0 and do not return
4463 } else if (cfg
->method
->wrapper_type
== MONO_WRAPPER_MANAGED_TO_NATIVE
&& m_class_get_image (cfg
->method
->klass
) != mono_defaults
.corlib
) {
4465 * Since the unmanaged calling convention doesn't contain a
4466 * 'vararg' entry, we have to treat every pinvoke call as a
4467 * potential vararg call.
4469 for (guint32 i
= 0; i
< AMD64_XMM_NREG
; ++i
)
4470 nregs
+= (call
->used_fregs
& (1 << i
)) != 0;
4474 MonoInst
*ins
= (MonoInst
*)call
;
4475 if (free_rax
&& ins
->sreg1
== AMD64_RAX
) {
4476 amd64_mov_reg_reg (code
, AMD64_R11
, AMD64_RAX
, 8);
4477 ins
->sreg1
= AMD64_R11
;
4479 return amd64_handle_varargs_nregs (code
, nregs
);
4484 mono_arch_output_basic_block (MonoCompile
*cfg
, MonoBasicBlock
*bb
)
4488 guint8
*code
= cfg
->native_code
+ cfg
->code_len
;
4490 /* Fix max_offset estimate for each successor bb */
4491 gboolean optimize_branch_pred
= (cfg
->opt
& MONO_OPT_BRANCH
) && (cfg
->max_block_num
< MAX_BBLOCKS_FOR_BRANCH_OPTS
);
4493 if (optimize_branch_pred
) {
4494 int current_offset
= cfg
->code_len
;
4495 MonoBasicBlock
*current_bb
;
4496 for (current_bb
= bb
; current_bb
!= NULL
; current_bb
= current_bb
->next_bb
) {
4497 current_bb
->max_offset
= current_offset
;
4498 current_offset
+= current_bb
->max_length
;
4502 if (cfg
->opt
& MONO_OPT_LOOP
) {
4503 int pad
, align
= LOOP_ALIGNMENT
;
4504 /* set alignment depending on cpu */
4505 if (bb_is_loop_start (bb
) && (pad
= (cfg
->code_len
& (align
- 1)))) {
4507 /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
4508 amd64_padding (code
, pad
);
4509 cfg
->code_len
+= pad
;
4510 bb
->native_offset
= cfg
->code_len
;
4514 if (cfg
->verbose_level
> 2)
4515 g_print ("Basic block %d starting at offset 0x%x\n", bb
->block_num
, bb
->native_offset
);
4517 set_code_cursor (cfg
, code
);
4519 mono_debug_open_block (cfg
, bb
, code
- cfg
->native_code
);
4521 if (mono_break_at_bb_method
&& mono_method_desc_full_match (mono_break_at_bb_method
, cfg
->method
) && bb
->block_num
== mono_break_at_bb_bb_num
)
4522 x86_breakpoint (code
);
4524 MONO_BB_FOR_EACH_INS (bb
, ins
) {
4525 const guint offset
= code
- cfg
->native_code
;
4526 set_code_cursor (cfg
, code
);
4527 int max_len
= ins_get_size (ins
->opcode
);
4529 code
= realloc_code (cfg
, max_len
);
4531 if (cfg
->debug_info
)
4532 mono_debug_record_line_number (cfg
, ins
, offset
);
4534 switch (ins
->opcode
) {
4536 amd64_mul_reg (code
, ins
->sreg2
, TRUE
);
4539 amd64_mul_reg (code
, ins
->sreg2
, FALSE
);
4541 case OP_X86_SETEQ_MEMBASE
:
4542 amd64_set_membase (code
, X86_CC_EQ
, ins
->inst_basereg
, ins
->inst_offset
, TRUE
);
4544 case OP_STOREI1_MEMBASE_IMM
:
4545 amd64_mov_membase_imm (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->inst_imm
, 1);
4547 case OP_STOREI2_MEMBASE_IMM
:
4548 amd64_mov_membase_imm (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->inst_imm
, 2);
4550 case OP_STOREI4_MEMBASE_IMM
:
4551 amd64_mov_membase_imm (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
4553 case OP_STOREI1_MEMBASE_REG
:
4554 amd64_mov_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
, 1);
4556 case OP_STOREI2_MEMBASE_REG
:
4557 amd64_mov_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
, 2);
4559 /* In AMD64 NaCl, pointers are 4 bytes, */
4560 /* so STORE_* != STOREI8_*. Likewise below. */
4561 case OP_STORE_MEMBASE_REG
:
4562 amd64_mov_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
, sizeof(gpointer
));
4564 case OP_STOREI8_MEMBASE_REG
:
4565 amd64_mov_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
, 8);
4567 case OP_STOREI4_MEMBASE_REG
:
4568 amd64_mov_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
, 4);
4570 case OP_STORE_MEMBASE_IMM
:
4571 /* In NaCl, this could be a PCONST type, which could */
4572 /* mean a pointer type was copied directly into the */
4573 /* lower 32-bits of inst_imm, so for InvalidPtr==-1 */
4574 /* the value would be 0x00000000FFFFFFFF which is */
4575 /* not proper for an imm32 unless you cast it. */
4576 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4577 amd64_mov_membase_imm (code
, ins
->inst_destbasereg
, ins
->inst_offset
, (gint32
)ins
->inst_imm
, sizeof(gpointer
));
4579 case OP_STOREI8_MEMBASE_IMM
:
4580 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4581 amd64_mov_membase_imm (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
4584 #ifdef MONO_ARCH_ILP32
4585 /* In ILP32, pointers are 4 bytes, so separate these */
4586 /* cases, use literal 8 below where we really want 8 */
4587 amd64_mov_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
4588 amd64_mov_reg_membase (code
, ins
->dreg
, ins
->dreg
, 0, sizeof(gpointer
));
4592 // FIXME: Decompose this earlier
4593 if (amd64_use_imm32 (ins
->inst_imm
))
4594 amd64_mov_reg_mem (code
, ins
->dreg
, ins
->inst_imm
, 8);
4596 amd64_mov_reg_imm_size (code
, ins
->dreg
, ins
->inst_imm
, sizeof(gpointer
));
4597 amd64_mov_reg_membase (code
, ins
->dreg
, ins
->dreg
, 0, 8);
4601 amd64_mov_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
4602 amd64_movsxd_reg_membase (code
, ins
->dreg
, ins
->dreg
, 0);
4605 // FIXME: Decompose this earlier
4606 if (amd64_use_imm32 (ins
->inst_imm
))
4607 amd64_mov_reg_mem (code
, ins
->dreg
, ins
->inst_imm
, 4);
4609 amd64_mov_reg_imm_size (code
, ins
->dreg
, ins
->inst_imm
, sizeof(gpointer
));
4610 amd64_mov_reg_membase (code
, ins
->dreg
, ins
->dreg
, 0, 4);
4614 amd64_mov_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
4615 amd64_widen_membase (code
, ins
->dreg
, ins
->dreg
, 0, FALSE
, FALSE
);
4618 /* For NaCl, pointers are 4 bytes, so separate these */
4619 /* cases, use literal 8 below where we really want 8 */
4620 amd64_mov_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
4621 amd64_widen_membase (code
, ins
->dreg
, ins
->dreg
, 0, FALSE
, TRUE
);
4623 case OP_LOAD_MEMBASE
:
4624 g_assert (amd64_is_imm32 (ins
->inst_offset
));
4625 amd64_mov_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, sizeof(gpointer
));
4627 case OP_LOADI8_MEMBASE
:
4628 /* Use literal 8 instead of sizeof pointer or */
4629 /* register, we really want 8 for this opcode */
4630 g_assert (amd64_is_imm32 (ins
->inst_offset
));
4631 amd64_mov_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, 8);
4633 case OP_LOADI4_MEMBASE
:
4634 amd64_movsxd_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
);
4636 case OP_LOADU4_MEMBASE
:
4637 amd64_mov_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, 4);
4639 case OP_LOADU1_MEMBASE
:
4640 /* The cpu zero extends the result into 64 bits */
4641 amd64_widen_membase_size (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, FALSE
, FALSE
, 4);
4643 case OP_LOADI1_MEMBASE
:
4644 amd64_widen_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, TRUE
, FALSE
);
4646 case OP_LOADU2_MEMBASE
:
4647 /* The cpu zero extends the result into 64 bits */
4648 amd64_widen_membase_size (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, FALSE
, TRUE
, 4);
4650 case OP_LOADI2_MEMBASE
:
4651 amd64_widen_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, TRUE
, TRUE
);
4653 case OP_AMD64_LOADI8_MEMINDEX
:
4654 amd64_mov_reg_memindex_size (code
, ins
->dreg
, ins
->inst_basereg
, 0, ins
->inst_indexreg
, 0, 8);
4656 case OP_LCONV_TO_I1
:
4657 case OP_ICONV_TO_I1
:
4659 amd64_widen_reg (code
, ins
->dreg
, ins
->sreg1
, TRUE
, FALSE
);
4661 case OP_LCONV_TO_I2
:
4662 case OP_ICONV_TO_I2
:
4664 amd64_widen_reg (code
, ins
->dreg
, ins
->sreg1
, TRUE
, TRUE
);
4666 case OP_LCONV_TO_U1
:
4667 case OP_ICONV_TO_U1
:
4668 amd64_widen_reg (code
, ins
->dreg
, ins
->sreg1
, FALSE
, FALSE
);
4670 case OP_LCONV_TO_U2
:
4671 case OP_ICONV_TO_U2
:
4672 amd64_widen_reg (code
, ins
->dreg
, ins
->sreg1
, FALSE
, TRUE
);
4675 /* Clean out the upper word */
4676 amd64_mov_reg_reg (code
, ins
->dreg
, ins
->sreg1
, 4);
4679 amd64_movsxd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
4683 amd64_alu_reg_reg (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
);
4685 case OP_COMPARE_IMM
:
4686 #if defined(MONO_ARCH_ILP32)
4687 /* Comparison of pointer immediates should be 4 bytes to avoid sign-extend problems */
4688 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4689 amd64_alu_reg_imm_size (code
, X86_CMP
, ins
->sreg1
, ins
->inst_imm
, 4);
4692 case OP_LCOMPARE_IMM
:
4693 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4694 amd64_alu_reg_imm (code
, X86_CMP
, ins
->sreg1
, ins
->inst_imm
);
4696 case OP_X86_COMPARE_REG_MEMBASE
:
4697 amd64_alu_reg_membase (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
);
4699 case OP_X86_TEST_NULL
:
4700 amd64_test_reg_reg_size (code
, ins
->sreg1
, ins
->sreg1
, 4);
4702 case OP_AMD64_TEST_NULL
:
4703 amd64_test_reg_reg (code
, ins
->sreg1
, ins
->sreg1
);
4706 case OP_X86_ADD_REG_MEMBASE
:
4707 amd64_alu_reg_membase_size (code
, X86_ADD
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
4709 case OP_X86_SUB_REG_MEMBASE
:
4710 amd64_alu_reg_membase_size (code
, X86_SUB
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
4712 case OP_X86_AND_REG_MEMBASE
:
4713 amd64_alu_reg_membase_size (code
, X86_AND
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
4715 case OP_X86_OR_REG_MEMBASE
:
4716 amd64_alu_reg_membase_size (code
, X86_OR
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
4718 case OP_X86_XOR_REG_MEMBASE
:
4719 amd64_alu_reg_membase_size (code
, X86_XOR
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
4722 case OP_X86_ADD_MEMBASE_IMM
:
4723 /* FIXME: Make a 64 version too */
4724 amd64_alu_membase_imm_size (code
, X86_ADD
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
4726 case OP_X86_SUB_MEMBASE_IMM
:
4727 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4728 amd64_alu_membase_imm_size (code
, X86_SUB
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
4730 case OP_X86_AND_MEMBASE_IMM
:
4731 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4732 amd64_alu_membase_imm_size (code
, X86_AND
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
4734 case OP_X86_OR_MEMBASE_IMM
:
4735 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4736 amd64_alu_membase_imm_size (code
, X86_OR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
4738 case OP_X86_XOR_MEMBASE_IMM
:
4739 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4740 amd64_alu_membase_imm_size (code
, X86_XOR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
4742 case OP_X86_ADD_MEMBASE_REG
:
4743 amd64_alu_membase_reg_size (code
, X86_ADD
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 4);
4745 case OP_X86_SUB_MEMBASE_REG
:
4746 amd64_alu_membase_reg_size (code
, X86_SUB
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 4);
4748 case OP_X86_AND_MEMBASE_REG
:
4749 amd64_alu_membase_reg_size (code
, X86_AND
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 4);
4751 case OP_X86_OR_MEMBASE_REG
:
4752 amd64_alu_membase_reg_size (code
, X86_OR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 4);
4754 case OP_X86_XOR_MEMBASE_REG
:
4755 amd64_alu_membase_reg_size (code
, X86_XOR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 4);
4757 case OP_X86_INC_MEMBASE
:
4758 amd64_inc_membase_size (code
, ins
->inst_basereg
, ins
->inst_offset
, 4);
4760 case OP_X86_INC_REG
:
4761 amd64_inc_reg_size (code
, ins
->dreg
, 4);
4763 case OP_X86_DEC_MEMBASE
:
4764 amd64_dec_membase_size (code
, ins
->inst_basereg
, ins
->inst_offset
, 4);
4766 case OP_X86_DEC_REG
:
4767 amd64_dec_reg_size (code
, ins
->dreg
, 4);
4769 case OP_X86_MUL_REG_MEMBASE
:
4770 case OP_X86_MUL_MEMBASE_REG
:
4771 amd64_imul_reg_membase_size (code
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
4773 case OP_AMD64_ICOMPARE_MEMBASE_REG
:
4774 amd64_alu_membase_reg_size (code
, X86_CMP
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 4);
4776 case OP_AMD64_ICOMPARE_MEMBASE_IMM
:
4777 amd64_alu_membase_imm_size (code
, X86_CMP
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
4779 case OP_AMD64_COMPARE_MEMBASE_REG
:
4780 amd64_alu_membase_reg_size (code
, X86_CMP
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 8);
4782 case OP_AMD64_COMPARE_MEMBASE_IMM
:
4783 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4784 amd64_alu_membase_imm_size (code
, X86_CMP
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
4786 case OP_X86_COMPARE_MEMBASE8_IMM
:
4787 amd64_alu_membase8_imm_size (code
, X86_CMP
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
4789 case OP_AMD64_ICOMPARE_REG_MEMBASE
:
4790 amd64_alu_reg_membase_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
4792 case OP_AMD64_COMPARE_REG_MEMBASE
:
4793 amd64_alu_reg_membase_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 8);
4796 case OP_AMD64_ADD_REG_MEMBASE
:
4797 amd64_alu_reg_membase_size (code
, X86_ADD
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 8);
4799 case OP_AMD64_SUB_REG_MEMBASE
:
4800 amd64_alu_reg_membase_size (code
, X86_SUB
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 8);
4802 case OP_AMD64_AND_REG_MEMBASE
:
4803 amd64_alu_reg_membase_size (code
, X86_AND
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 8);
4805 case OP_AMD64_OR_REG_MEMBASE
:
4806 amd64_alu_reg_membase_size (code
, X86_OR
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 8);
4808 case OP_AMD64_XOR_REG_MEMBASE
:
4809 amd64_alu_reg_membase_size (code
, X86_XOR
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 8);
4812 case OP_AMD64_ADD_MEMBASE_REG
:
4813 amd64_alu_membase_reg_size (code
, X86_ADD
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 8);
4815 case OP_AMD64_SUB_MEMBASE_REG
:
4816 amd64_alu_membase_reg_size (code
, X86_SUB
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 8);
4818 case OP_AMD64_AND_MEMBASE_REG
:
4819 amd64_alu_membase_reg_size (code
, X86_AND
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 8);
4821 case OP_AMD64_OR_MEMBASE_REG
:
4822 amd64_alu_membase_reg_size (code
, X86_OR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 8);
4824 case OP_AMD64_XOR_MEMBASE_REG
:
4825 amd64_alu_membase_reg_size (code
, X86_XOR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 8);
4828 case OP_AMD64_ADD_MEMBASE_IMM
:
4829 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4830 amd64_alu_membase_imm_size (code
, X86_ADD
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
4832 case OP_AMD64_SUB_MEMBASE_IMM
:
4833 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4834 amd64_alu_membase_imm_size (code
, X86_SUB
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
4836 case OP_AMD64_AND_MEMBASE_IMM
:
4837 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4838 amd64_alu_membase_imm_size (code
, X86_AND
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
4840 case OP_AMD64_OR_MEMBASE_IMM
:
4841 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4842 amd64_alu_membase_imm_size (code
, X86_OR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
4844 case OP_AMD64_XOR_MEMBASE_IMM
:
4845 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4846 amd64_alu_membase_imm_size (code
, X86_XOR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
4850 amd64_breakpoint (code
);
4852 case OP_RELAXED_NOP
:
4853 x86_prefix (code
, X86_REP_PREFIX
);
4861 case OP_DUMMY_ICONST
:
4862 case OP_DUMMY_I8CONST
:
4863 case OP_DUMMY_R8CONST
:
4864 case OP_DUMMY_R4CONST
:
4865 case OP_NOT_REACHED
:
4868 case OP_IL_SEQ_POINT
:
4869 mono_add_seq_point (cfg
, bb
, ins
, code
- cfg
->native_code
);
4871 case OP_SEQ_POINT
: {
4872 if (ins
->flags
& MONO_INST_SINGLE_STEP_LOC
) {
4873 MonoInst
*var
= cfg
->arch
.ss_tramp_var
;
4876 /* Load ss_tramp_var */
4877 /* This is equal to &ss_trampoline */
4878 amd64_mov_reg_membase (code
, AMD64_R11
, var
->inst_basereg
, var
->inst_offset
, 8);
4879 /* Load the trampoline address */
4880 amd64_mov_reg_membase (code
, AMD64_R11
, AMD64_R11
, 0, 8);
4881 /* Call it if it is non-null */
4882 amd64_test_reg_reg (code
, AMD64_R11
, AMD64_R11
);
4884 amd64_branch8 (code
, X86_CC_Z
, 0, FALSE
);
4885 amd64_call_reg (code
, AMD64_R11
);
4886 amd64_patch (label
, code
);
4890 * This is the address which is saved in seq points,
4892 mono_add_seq_point (cfg
, bb
, ins
, code
- cfg
->native_code
);
4894 if (cfg
->compile_aot
) {
4895 const guint32 offset
= code
- cfg
->native_code
;
4897 MonoInst
*info_var
= cfg
->arch
.seq_point_info_var
;
4901 amd64_mov_reg_membase (code
, AMD64_R11
, info_var
->inst_basereg
, info_var
->inst_offset
, 8);
4902 val
= ((offset
) * sizeof (target_mgreg_t
)) + MONO_STRUCT_OFFSET (SeqPointInfo
, bp_addrs
);
4903 /* Load the info->bp_addrs [offset], which is either NULL or the address of the breakpoint trampoline */
4904 amd64_mov_reg_membase (code
, AMD64_R11
, AMD64_R11
, val
, 8);
4905 amd64_test_reg_reg (code
, AMD64_R11
, AMD64_R11
);
4907 amd64_branch8 (code
, X86_CC_Z
, 0, FALSE
);
4908 /* Call the trampoline */
4909 amd64_call_reg (code
, AMD64_R11
);
4910 amd64_patch (label
, code
);
4912 MonoInst
*var
= cfg
->arch
.bp_tramp_var
;
4916 * Emit a test+branch against a constant, the constant will be overwritten
4917 * by mono_arch_set_breakpoint () to cause the test to fail.
4919 amd64_mov_reg_imm (code
, AMD64_R11
, 0);
4920 amd64_test_reg_reg (code
, AMD64_R11
, AMD64_R11
);
4922 amd64_branch8 (code
, X86_CC_Z
, 0, FALSE
);
4925 g_assert (var
->opcode
== OP_REGOFFSET
);
4926 /* Load bp_tramp_var */
4927 /* This is equal to &bp_trampoline */
4928 amd64_mov_reg_membase (code
, AMD64_R11
, var
->inst_basereg
, var
->inst_offset
, 8);
4929 /* Call the trampoline */
4930 amd64_call_membase (code
, AMD64_R11
, 0);
4931 amd64_patch (label
, code
);
4934 * Add an additional nop so skipping the bp doesn't cause the ip to point
4935 * to another IL offset.
4943 amd64_alu_reg_reg (code
, X86_ADD
, ins
->sreg1
, ins
->sreg2
);
4946 amd64_alu_reg_reg (code
, X86_ADC
, ins
->sreg1
, ins
->sreg2
);
4950 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4951 amd64_alu_reg_imm (code
, X86_ADD
, ins
->dreg
, ins
->inst_imm
);
4954 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4955 amd64_alu_reg_imm (code
, X86_ADC
, ins
->dreg
, ins
->inst_imm
);
4960 amd64_alu_reg_reg (code
, X86_SUB
, ins
->sreg1
, ins
->sreg2
);
4963 amd64_alu_reg_reg (code
, X86_SBB
, ins
->sreg1
, ins
->sreg2
);
4967 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4968 amd64_alu_reg_imm (code
, X86_SUB
, ins
->dreg
, ins
->inst_imm
);
4971 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4972 amd64_alu_reg_imm (code
, X86_SBB
, ins
->dreg
, ins
->inst_imm
);
4975 amd64_alu_reg_reg (code
, X86_AND
, ins
->sreg1
, ins
->sreg2
);
4979 g_assert (amd64_is_imm32 (ins
->inst_imm
));
4980 amd64_alu_reg_imm (code
, X86_AND
, ins
->sreg1
, ins
->inst_imm
);
4983 amd64_imul_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
4988 guint32 size
= (ins
->opcode
== OP_IMUL_IMM
) ? 4 : 8;
4990 switch (ins
->inst_imm
) {
4994 if (ins
->dreg
!= ins
->sreg1
)
4995 amd64_mov_reg_reg (code
, ins
->dreg
, ins
->sreg1
, size
);
4996 amd64_alu_reg_reg (code
, X86_ADD
, ins
->dreg
, ins
->dreg
);
4999 /* LEA r1, [r2 + r2*2] */
5000 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 1);
5003 /* LEA r1, [r2 + r2*4] */
5004 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 2);
5007 /* LEA r1, [r2 + r2*2] */
5009 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 1);
5010 amd64_alu_reg_reg (code
, X86_ADD
, ins
->dreg
, ins
->dreg
);
5013 /* LEA r1, [r2 + r2*8] */
5014 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 3);
5017 /* LEA r1, [r2 + r2*4] */
5019 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 2);
5020 amd64_alu_reg_reg (code
, X86_ADD
, ins
->dreg
, ins
->dreg
);
5023 /* LEA r1, [r2 + r2*2] */
5025 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 1);
5026 amd64_shift_reg_imm (code
, X86_SHL
, ins
->dreg
, 2);
5029 /* LEA r1, [r2 + r2*4] */
5030 /* LEA r1, [r1 + r1*4] */
5031 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 2);
5032 amd64_lea_memindex (code
, ins
->dreg
, ins
->dreg
, 0, ins
->dreg
, 2);
5035 /* LEA r1, [r2 + r2*4] */
5037 /* LEA r1, [r1 + r1*4] */
5038 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 2);
5039 amd64_shift_reg_imm (code
, X86_SHL
, ins
->dreg
, 2);
5040 amd64_lea_memindex (code
, ins
->dreg
, ins
->dreg
, 0, ins
->dreg
, 2);
5043 amd64_imul_reg_reg_imm_size (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_imm
, size
);
5050 /* Regalloc magic makes the div/rem cases the same */
5051 if (ins
->sreg2
== AMD64_RDX
) {
5052 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RDX
, 8);
5054 amd64_div_membase (code
, AMD64_RSP
, -8, TRUE
);
5057 amd64_div_reg (code
, ins
->sreg2
, TRUE
);
5062 if (ins
->sreg2
== AMD64_RDX
) {
5063 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RDX
, 8);
5064 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RDX
, AMD64_RDX
);
5065 amd64_div_membase (code
, AMD64_RSP
, -8, FALSE
);
5067 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RDX
, AMD64_RDX
);
5068 amd64_div_reg (code
, ins
->sreg2
, FALSE
);
5073 if (ins
->sreg2
== AMD64_RDX
) {
5074 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RDX
, 8);
5075 amd64_cdq_size (code
, 4);
5076 amd64_div_membase_size (code
, AMD64_RSP
, -8, TRUE
, 4);
5078 amd64_cdq_size (code
, 4);
5079 amd64_div_reg_size (code
, ins
->sreg2
, TRUE
, 4);
5084 if (ins
->sreg2
== AMD64_RDX
) {
5085 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RDX
, 8);
5086 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RDX
, AMD64_RDX
);
5087 amd64_div_membase_size (code
, AMD64_RSP
, -8, FALSE
, 4);
5089 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RDX
, AMD64_RDX
);
5090 amd64_div_reg_size (code
, ins
->sreg2
, FALSE
, 4);
5094 amd64_imul_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
5095 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O
, FALSE
, "OverflowException");
5098 amd64_alu_reg_reg (code
, X86_OR
, ins
->sreg1
, ins
->sreg2
);
5102 g_assert (amd64_is_imm32 (ins
->inst_imm
));
5103 amd64_alu_reg_imm (code
, X86_OR
, ins
->sreg1
, ins
->inst_imm
);
5106 amd64_alu_reg_reg (code
, X86_XOR
, ins
->sreg1
, ins
->sreg2
);
5110 g_assert (amd64_is_imm32 (ins
->inst_imm
));
5111 amd64_alu_reg_imm (code
, X86_XOR
, ins
->sreg1
, ins
->inst_imm
);
5114 g_assert (ins
->sreg2
== AMD64_RCX
);
5115 amd64_shift_reg (code
, X86_SHL
, ins
->dreg
);
5118 g_assert (ins
->sreg2
== AMD64_RCX
);
5119 amd64_shift_reg (code
, X86_SAR
, ins
->dreg
);
5123 g_assert (amd64_is_imm32 (ins
->inst_imm
));
5124 amd64_shift_reg_imm (code
, X86_SAR
, ins
->dreg
, ins
->inst_imm
);
5127 g_assert (amd64_is_imm32 (ins
->inst_imm
));
5128 amd64_shift_reg_imm_size (code
, X86_SHR
, ins
->dreg
, ins
->inst_imm
, 4);
5130 case OP_LSHR_UN_IMM
:
5131 g_assert (amd64_is_imm32 (ins
->inst_imm
));
5132 amd64_shift_reg_imm (code
, X86_SHR
, ins
->dreg
, ins
->inst_imm
);
5135 g_assert (ins
->sreg2
== AMD64_RCX
);
5136 amd64_shift_reg (code
, X86_SHR
, ins
->dreg
);
5140 g_assert (amd64_is_imm32 (ins
->inst_imm
));
5141 amd64_shift_reg_imm (code
, X86_SHL
, ins
->dreg
, ins
->inst_imm
);
5146 amd64_alu_reg_reg_size (code
, X86_ADD
, ins
->sreg1
, ins
->sreg2
, 4);
5149 amd64_alu_reg_reg_size (code
, X86_ADC
, ins
->sreg1
, ins
->sreg2
, 4);
5152 amd64_alu_reg_imm_size (code
, X86_ADD
, ins
->dreg
, ins
->inst_imm
, 4);
5155 amd64_alu_reg_imm_size (code
, X86_ADC
, ins
->dreg
, ins
->inst_imm
, 4);
5159 amd64_alu_reg_reg_size (code
, X86_SUB
, ins
->sreg1
, ins
->sreg2
, 4);
5162 amd64_alu_reg_reg_size (code
, X86_SBB
, ins
->sreg1
, ins
->sreg2
, 4);
5165 amd64_alu_reg_imm_size (code
, X86_SUB
, ins
->dreg
, ins
->inst_imm
, 4);
5168 amd64_alu_reg_imm_size (code
, X86_SBB
, ins
->dreg
, ins
->inst_imm
, 4);
5171 amd64_alu_reg_reg_size (code
, X86_AND
, ins
->sreg1
, ins
->sreg2
, 4);
5174 amd64_alu_reg_imm_size (code
, X86_AND
, ins
->sreg1
, ins
->inst_imm
, 4);
5177 amd64_alu_reg_reg_size (code
, X86_OR
, ins
->sreg1
, ins
->sreg2
, 4);
5180 amd64_alu_reg_imm_size (code
, X86_OR
, ins
->sreg1
, ins
->inst_imm
, 4);
5183 amd64_alu_reg_reg_size (code
, X86_XOR
, ins
->sreg1
, ins
->sreg2
, 4);
5186 amd64_alu_reg_imm_size (code
, X86_XOR
, ins
->sreg1
, ins
->inst_imm
, 4);
5189 amd64_neg_reg_size (code
, ins
->sreg1
, 4);
5192 amd64_not_reg_size (code
, ins
->sreg1
, 4);
5195 g_assert (ins
->sreg2
== AMD64_RCX
);
5196 amd64_shift_reg_size (code
, X86_SHL
, ins
->dreg
, 4);
5199 g_assert (ins
->sreg2
== AMD64_RCX
);
5200 amd64_shift_reg_size (code
, X86_SAR
, ins
->dreg
, 4);
5203 amd64_shift_reg_imm_size (code
, X86_SAR
, ins
->dreg
, ins
->inst_imm
, 4);
5205 case OP_ISHR_UN_IMM
:
5206 amd64_shift_reg_imm_size (code
, X86_SHR
, ins
->dreg
, ins
->inst_imm
, 4);
5209 g_assert (ins
->sreg2
== AMD64_RCX
);
5210 amd64_shift_reg_size (code
, X86_SHR
, ins
->dreg
, 4);
5213 amd64_shift_reg_imm_size (code
, X86_SHL
, ins
->dreg
, ins
->inst_imm
, 4);
5216 amd64_imul_reg_reg_size (code
, ins
->sreg1
, ins
->sreg2
, 4);
5219 amd64_imul_reg_reg_size (code
, ins
->sreg1
, ins
->sreg2
, 4);
5220 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O
, FALSE
, "OverflowException");
5222 case OP_IMUL_OVF_UN
:
5223 case OP_LMUL_OVF_UN
: {
5224 /* the mul operation and the exception check should most likely be split */
5225 int non_eax_reg
, saved_eax
= FALSE
, saved_edx
= FALSE
;
5226 int size
= (ins
->opcode
== OP_IMUL_OVF_UN
) ? 4 : 8;
5227 /*g_assert (ins->sreg2 == X86_EAX);
5228 g_assert (ins->dreg == X86_EAX);*/
5229 if (ins
->sreg2
== X86_EAX
) {
5230 non_eax_reg
= ins
->sreg1
;
5231 } else if (ins
->sreg1
== X86_EAX
) {
5232 non_eax_reg
= ins
->sreg2
;
5234 /* no need to save since we're going to store to it anyway */
5235 if (ins
->dreg
!= X86_EAX
) {
5237 amd64_push_reg (code
, X86_EAX
);
5239 amd64_mov_reg_reg (code
, X86_EAX
, ins
->sreg1
, size
);
5240 non_eax_reg
= ins
->sreg2
;
5242 if (ins
->dreg
== X86_EDX
) {
5245 amd64_push_reg (code
, X86_EAX
);
5249 amd64_push_reg (code
, X86_EDX
);
5251 amd64_mul_reg_size (code
, non_eax_reg
, FALSE
, size
);
5252 /* save before the check since pop and mov don't change the flags */
5253 if (ins
->dreg
!= X86_EAX
)
5254 amd64_mov_reg_reg (code
, ins
->dreg
, X86_EAX
, size
);
5256 amd64_pop_reg (code
, X86_EDX
);
5258 amd64_pop_reg (code
, X86_EAX
);
5259 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O
, FALSE
, "OverflowException");
5263 amd64_alu_reg_reg_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, 4);
5265 case OP_ICOMPARE_IMM
:
5266 amd64_alu_reg_imm_size (code
, X86_CMP
, ins
->sreg1
, ins
->inst_imm
, 4);
5288 EMIT_COND_BRANCH (ins
, cc_table
[mono_opcode_to_cond (ins
->opcode
)], cc_signed_table
[mono_opcode_to_cond (ins
->opcode
)]);
5296 case OP_CMOV_INE_UN
:
5297 case OP_CMOV_IGE_UN
:
5298 case OP_CMOV_IGT_UN
:
5299 case OP_CMOV_ILE_UN
:
5300 case OP_CMOV_ILT_UN
:
5306 case OP_CMOV_LNE_UN
:
5307 case OP_CMOV_LGE_UN
:
5308 case OP_CMOV_LGT_UN
:
5309 case OP_CMOV_LLE_UN
:
5310 case OP_CMOV_LLT_UN
:
5311 g_assert (ins
->dreg
== ins
->sreg1
);
5312 /* This needs to operate on 64 bit values */
5313 amd64_cmov_reg (code
, cc_table
[mono_opcode_to_cond (ins
->opcode
)], cc_signed_table
[mono_opcode_to_cond (ins
->opcode
)], ins
->dreg
, ins
->sreg2
);
5317 amd64_not_reg (code
, ins
->sreg1
);
5320 amd64_neg_reg (code
, ins
->sreg1
);
5325 if ((((guint64
)ins
->inst_c0
) >> 32) == 0 && !mini_debug_options
.single_imm_size
)
5326 amd64_mov_reg_imm_size (code
, ins
->dreg
, ins
->inst_c0
, 4);
5328 amd64_mov_reg_imm_size (code
, ins
->dreg
, ins
->inst_c0
, 8);
5331 mono_add_patch_info (cfg
, offset
, (MonoJumpInfoType
)(gsize
)ins
->inst_i1
, ins
->inst_p0
);
5332 amd64_mov_reg_membase (code
, ins
->dreg
, AMD64_RIP
, 0, sizeof(gpointer
));
5335 mono_add_patch_info (cfg
, offset
, (MonoJumpInfoType
)(gsize
)ins
->inst_i1
, ins
->inst_p0
);
5336 amd64_mov_reg_imm_size (code
, ins
->dreg
, 0, 8);
5339 if (ins
->dreg
!= ins
->sreg1
)
5340 amd64_mov_reg_reg (code
, ins
->dreg
, ins
->sreg1
, sizeof (target_mgreg_t
));
5342 case OP_AMD64_SET_XMMREG_R4
: {
5344 if (ins
->dreg
!= ins
->sreg1
)
5345 amd64_sse_movss_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
5347 amd64_sse_cvtsd2ss_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
5351 case OP_AMD64_SET_XMMREG_R8
: {
5352 if (ins
->dreg
!= ins
->sreg1
)
5353 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
5357 case OP_TAILCALL_PARAMETER
:
5358 // This opcode helps compute sizes, i.e.
5359 // of the subsequent OP_TAILCALL, but contributes no code.
5360 g_assert (ins
->next
);
5364 case OP_TAILCALL_REG
:
5365 case OP_TAILCALL_MEMBASE
: {
5366 call
= (MonoCallInst
*)ins
;
5367 int i
, save_area_offset
;
5368 gboolean tailcall_membase
= (ins
->opcode
== OP_TAILCALL_MEMBASE
);
5369 gboolean tailcall_reg
= (ins
->opcode
== OP_TAILCALL_REG
);
5371 g_assert (!cfg
->method
->save_lmf
);
5373 max_len
+= AMD64_NREG
* 4;
5374 max_len
+= call
->stack_usage
/ sizeof (target_mgreg_t
) * ins_get_size (OP_TAILCALL_PARAMETER
);
5375 code
= realloc_code (cfg
, max_len
);
5377 // FIXME hardcoding RAX here is not ideal.
5380 int const reg
= ins
->sreg1
;
5381 g_assert (reg
> -1);
5382 if (reg
!= AMD64_RAX
)
5383 amd64_mov_reg_reg (code
, AMD64_RAX
, reg
, 8);
5384 } else if (tailcall_membase
) {
5385 int const reg
= ins
->sreg1
;
5386 g_assert (reg
> -1);
5387 amd64_mov_reg_membase (code
, AMD64_RAX
, reg
, ins
->inst_offset
, 8);
5389 if (cfg
->compile_aot
) {
5390 mono_add_patch_info (cfg
, code
- cfg
->native_code
, MONO_PATCH_INFO_METHOD_JUMP
, call
->method
);
5391 amd64_mov_reg_membase (code
, AMD64_RAX
, AMD64_RIP
, 0, 8);
5393 // FIXME Patch data instead of code.
5394 guint32 pad_size
= (guint32
)((code
+ 2 - cfg
->native_code
) % 8);
5396 amd64_padding (code
, 8 - pad_size
);
5397 mono_add_patch_info (cfg
, code
- cfg
->native_code
, MONO_PATCH_INFO_METHOD_JUMP
, call
->method
);
5398 amd64_set_reg_template (code
, AMD64_RAX
);
5402 /* Restore callee saved registers */
5403 save_area_offset
= cfg
->arch
.reg_save_area_offset
;
5404 for (i
= 0; i
< AMD64_NREG
; ++i
)
5405 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->used_int_regs
& ((regmask_t
)1 << i
))) {
5406 amd64_mov_reg_membase (code
, i
, cfg
->frame_reg
, save_area_offset
, 8);
5407 save_area_offset
+= 8;
5410 if (cfg
->arch
.omit_fp
) {
5411 if (cfg
->arch
.stack_alloc_size
)
5412 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, cfg
->arch
.stack_alloc_size
);
5414 if (call
->stack_usage
)
5417 amd64_push_reg (code
, AMD64_RAX
);
5418 /* Copy arguments on the stack to our argument area */
5419 // FIXME use rep mov for constant code size, before nonvolatiles
5420 // restored, first saving rsi, rdi into volatiles
5421 for (i
= 0; i
< call
->stack_usage
; i
+= sizeof (target_mgreg_t
)) {
5422 amd64_mov_reg_membase (code
, AMD64_RAX
, AMD64_RSP
, i
+ 8, sizeof (target_mgreg_t
));
5423 amd64_mov_membase_reg (code
, AMD64_RBP
, ARGS_OFFSET
+ i
, AMD64_RAX
, sizeof (target_mgreg_t
));
5425 amd64_pop_reg (code
, AMD64_RAX
);
5427 amd64_lea_membase (code
, AMD64_RSP
, AMD64_RBP
, 0);
5428 amd64_pop_reg (code
, AMD64_RBP
);
5429 mono_emit_unwind_op_same_value (cfg
, code
, AMD64_RBP
);
5436 // Redundant REX byte indicates a tailcall to the native unwinder. It means nothing to the processor.
5437 // https://github.com/dotnet/coreclr/blob/966dabb5bb3c4bf1ea885e1e8dc6528e8c64dc4f/src/unwinder/amd64/unwinder_amd64.cpp#L1394
5438 // FIXME This should be jmp rip+32 for AOT direct to same assembly.
5439 // FIXME This should be jmp [rip+32] for AOT direct to not-same assembly (through data).
5440 // FIXME This should be jmp [rip+32] for JIT direct -- patch data instead of code.
5441 // This is only close to ideal for tailcall_membase, and even then it should
5442 // have a more dynamic register allocation.
5443 x86_imm_emit8 (code
, 0x48);
5444 amd64_jump_reg (code
, AMD64_RAX
);
5446 // NT does not have varargs rax use, and NT ABI does not have red zone.
5447 // Use red-zone mov/jmp instead of push/ret to preserve call/ret speculation stack.
5448 // FIXME Just like NT the direct cases are are not ideal.
5449 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RAX
, 8);
5450 code
= amd64_handle_varargs_call (cfg
, code
, call
, FALSE
);
5451 amd64_jump_membase (code
, AMD64_RSP
, -8);
5453 ins
->flags
|= MONO_INST_GC_CALLSITE
;
5454 ins
->backend
.pc_offset
= code
- cfg
->native_code
;
5458 /* ensure ins->sreg1 is not NULL */
5459 amd64_alu_membase_imm_size (code
, X86_CMP
, ins
->sreg1
, 0, 0, 4);
5462 amd64_lea_membase (code
, AMD64_R11
, cfg
->frame_reg
, cfg
->sig_cookie
);
5463 amd64_mov_membase_reg (code
, ins
->sreg1
, 0, AMD64_R11
, sizeof(gpointer
));
5473 call
= (MonoCallInst
*)ins
;
5475 code
= amd64_handle_varargs_call (cfg
, code
, call
, FALSE
);
5476 code
= emit_call (cfg
, call
, code
, MONO_JIT_ICALL_ZeroIsReserved
);
5477 ins
->flags
|= MONO_INST_GC_CALLSITE
;
5478 ins
->backend
.pc_offset
= code
- cfg
->native_code
;
5479 code
= emit_move_return_value (cfg
, ins
, code
);
5486 case OP_VOIDCALL_REG
:
5488 call
= (MonoCallInst
*)ins
;
5490 if (AMD64_IS_ARGUMENT_REG (ins
->sreg1
)) {
5491 amd64_mov_reg_reg (code
, AMD64_R11
, ins
->sreg1
, 8);
5492 ins
->sreg1
= AMD64_R11
;
5495 code
= amd64_handle_varargs_call (cfg
, code
, call
, TRUE
);
5496 amd64_call_reg (code
, ins
->sreg1
);
5497 ins
->flags
|= MONO_INST_GC_CALLSITE
;
5498 ins
->backend
.pc_offset
= code
- cfg
->native_code
;
5499 code
= emit_move_return_value (cfg
, ins
, code
);
5501 case OP_FCALL_MEMBASE
:
5502 case OP_RCALL_MEMBASE
:
5503 case OP_LCALL_MEMBASE
:
5504 case OP_VCALL_MEMBASE
:
5505 case OP_VCALL2_MEMBASE
:
5506 case OP_VOIDCALL_MEMBASE
:
5507 case OP_CALL_MEMBASE
:
5508 call
= (MonoCallInst
*)ins
;
5510 amd64_call_membase (code
, ins
->sreg1
, ins
->inst_offset
);
5511 ins
->flags
|= MONO_INST_GC_CALLSITE
;
5512 ins
->backend
.pc_offset
= code
- cfg
->native_code
;
5513 code
= emit_move_return_value (cfg
, ins
, code
);
5516 int i
, limit_reg
, index_reg
, src_reg
, dst_reg
;
5517 MonoInst
*var
= cfg
->dyn_call_var
;
5521 g_assert (var
->opcode
== OP_REGOFFSET
);
5523 /* r11 = args buffer filled by mono_arch_get_dyn_call_args () */
5524 amd64_mov_reg_reg (code
, AMD64_R11
, ins
->sreg1
, 8);
5526 amd64_mov_reg_reg (code
, AMD64_R10
, ins
->sreg2
, 8);
5528 /* Save args buffer */
5529 amd64_mov_membase_reg (code
, var
->inst_basereg
, var
->inst_offset
, AMD64_R11
, 8);
5531 /* Set fp arg regs */
5532 amd64_mov_reg_membase (code
, AMD64_RAX
, AMD64_R11
, MONO_STRUCT_OFFSET (DynCallArgs
, has_fp
), sizeof (target_mgreg_t
));
5533 amd64_test_reg_reg (code
, AMD64_RAX
, AMD64_RAX
);
5535 amd64_branch8 (code
, X86_CC_Z
, -1, 1);
5536 for (i
= 0; i
< FLOAT_PARAM_REGS
; ++i
)
5537 amd64_sse_movsd_reg_membase (code
, i
, AMD64_R11
, MONO_STRUCT_OFFSET (DynCallArgs
, fregs
) + (i
* sizeof (double)));
5538 amd64_patch (label
, code
);
5540 /* Allocate param area */
5541 /* This doesn't need to be freed since OP_DYN_CALL is never called in a loop */
5542 amd64_mov_reg_membase (code
, AMD64_RAX
, AMD64_R11
, MONO_STRUCT_OFFSET (DynCallArgs
, nstack_args
), 8);
5543 amd64_shift_reg_imm (code
, X86_SHL
, AMD64_RAX
, 3);
5544 amd64_alu_reg_reg (code
, X86_SUB
, AMD64_RSP
, AMD64_RAX
);
5545 /* Set stack args */
5546 /* rax/rcx/rdx/r8/r9 is scratch */
5547 limit_reg
= AMD64_RAX
;
5548 index_reg
= AMD64_RCX
;
5551 amd64_mov_reg_membase (code
, limit_reg
, AMD64_R11
, MONO_STRUCT_OFFSET (DynCallArgs
, nstack_args
), 8);
5552 amd64_mov_reg_imm (code
, index_reg
, 0);
5553 amd64_lea_membase (code
, src_reg
, AMD64_R11
, MONO_STRUCT_OFFSET (DynCallArgs
, regs
) + ((PARAM_REGS
) * sizeof (target_mgreg_t
)));
5554 amd64_mov_reg_reg (code
, dst_reg
, AMD64_RSP
, 8);
5556 x86_jump8 (code
, 0);
5558 amd64_mov_reg_membase (code
, AMD64_RDX
, src_reg
, 0, 8);
5559 amd64_mov_membase_reg (code
, dst_reg
, 0, AMD64_RDX
, 8);
5560 amd64_alu_reg_imm (code
, X86_ADD
, index_reg
, 1);
5561 amd64_alu_reg_imm (code
, X86_ADD
, src_reg
, 8);
5562 amd64_alu_reg_imm (code
, X86_ADD
, dst_reg
, 8);
5563 amd64_patch (buf
[0], code
);
5564 amd64_alu_reg_reg (code
, X86_CMP
, index_reg
, limit_reg
);
5566 x86_branch8 (code
, X86_CC_LT
, 0, FALSE
);
5567 amd64_patch (buf
[2], buf
[1]);
5569 /* Set argument registers */
5570 for (i
= 0; i
< PARAM_REGS
; ++i
)
5571 amd64_mov_reg_membase (code
, param_regs
[i
], AMD64_R11
, MONO_STRUCT_OFFSET (DynCallArgs
, regs
) + (i
* sizeof (target_mgreg_t
)), sizeof (target_mgreg_t
));
5574 amd64_call_reg (code
, AMD64_R10
);
5576 ins
->flags
|= MONO_INST_GC_CALLSITE
;
5577 ins
->backend
.pc_offset
= code
- cfg
->native_code
;
5580 amd64_mov_reg_membase (code
, AMD64_R11
, var
->inst_basereg
, var
->inst_offset
, 8);
5581 amd64_mov_membase_reg (code
, AMD64_R11
, MONO_STRUCT_OFFSET (DynCallArgs
, res
), AMD64_RAX
, 8);
5582 amd64_sse_movsd_membase_reg (code
, AMD64_R11
, MONO_STRUCT_OFFSET (DynCallArgs
, fregs
), AMD64_XMM0
);
5583 amd64_sse_movsd_membase_reg (code
, AMD64_R11
, MONO_STRUCT_OFFSET (DynCallArgs
, fregs
) + sizeof (double), AMD64_XMM1
);
5586 case OP_AMD64_SAVE_SP_TO_LMF
: {
5587 MonoInst
*lmf_var
= cfg
->lmf_var
;
5588 amd64_mov_membase_reg (code
, lmf_var
->inst_basereg
, lmf_var
->inst_offset
+ MONO_STRUCT_OFFSET (MonoLMF
, rsp
), AMD64_RSP
, 8);
5592 g_assert_not_reached ();
5593 amd64_push_reg (code
, ins
->sreg1
);
5595 case OP_X86_PUSH_IMM
:
5596 g_assert_not_reached ();
5597 g_assert (amd64_is_imm32 (ins
->inst_imm
));
5598 amd64_push_imm (code
, ins
->inst_imm
);
5600 case OP_X86_PUSH_MEMBASE
:
5601 g_assert_not_reached ();
5602 amd64_push_membase (code
, ins
->inst_basereg
, ins
->inst_offset
);
5604 case OP_X86_PUSH_OBJ
: {
5605 int size
= ALIGN_TO (ins
->inst_imm
, 8);
5607 g_assert_not_reached ();
5609 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, size
);
5610 amd64_push_reg (code
, AMD64_RDI
);
5611 amd64_push_reg (code
, AMD64_RSI
);
5612 amd64_push_reg (code
, AMD64_RCX
);
5613 if (ins
->inst_offset
)
5614 amd64_lea_membase (code
, AMD64_RSI
, ins
->inst_basereg
, ins
->inst_offset
);
5616 amd64_mov_reg_reg (code
, AMD64_RSI
, ins
->inst_basereg
, 8);
5617 amd64_lea_membase (code
, AMD64_RDI
, AMD64_RSP
, (3 * 8));
5618 amd64_mov_reg_imm (code
, AMD64_RCX
, (size
>> 3));
5620 amd64_prefix (code
, X86_REP_PREFIX
);
5622 amd64_pop_reg (code
, AMD64_RCX
);
5623 amd64_pop_reg (code
, AMD64_RSI
);
5624 amd64_pop_reg (code
, AMD64_RDI
);
5627 case OP_GENERIC_CLASS_INIT
: {
5630 g_assert (ins
->sreg1
== MONO_AMD64_ARG_REG1
);
5632 amd64_test_membase_imm_size (code
, ins
->sreg1
, MONO_STRUCT_OFFSET (MonoVTable
, initialized
), 1, 1);
5634 amd64_branch8 (code
, X86_CC_NZ
, -1, 1);
5636 code
= emit_call (cfg
, NULL
, code
, MONO_JIT_ICALL_mono_generic_class_init
);
5637 ins
->flags
|= MONO_INST_GC_CALLSITE
;
5638 ins
->backend
.pc_offset
= code
- cfg
->native_code
;
5640 x86_patch (jump
, code
);
5645 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_imm
, ins
->sreg2
, ins
->backend
.shift_amount
);
5647 case OP_X86_LEA_MEMBASE
:
5648 amd64_lea4_membase (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_imm
);
5650 case OP_AMD64_LEA_MEMBASE
:
5651 amd64_lea_membase (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_imm
);
5654 amd64_xchg_reg_reg (code
, ins
->sreg1
, ins
->sreg2
, 4);
5657 /* keep alignment */
5658 amd64_alu_reg_imm (code
, X86_ADD
, ins
->sreg1
, MONO_ARCH_FRAME_ALIGNMENT
- 1);
5659 amd64_alu_reg_imm (code
, X86_AND
, ins
->sreg1
, ~(MONO_ARCH_FRAME_ALIGNMENT
- 1));
5660 code
= mono_emit_stack_alloc (cfg
, code
, ins
);
5661 amd64_mov_reg_reg (code
, ins
->dreg
, AMD64_RSP
, 8);
5662 if (cfg
->param_area
)
5663 amd64_alu_reg_imm (code
, X86_ADD
, ins
->dreg
, cfg
->param_area
);
5665 case OP_LOCALLOC_IMM
: {
5666 guint32 size
= ins
->inst_imm
;
5667 size
= (size
+ (MONO_ARCH_FRAME_ALIGNMENT
- 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT
- 1);
5669 if (ins
->flags
& MONO_INST_INIT
) {
5673 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, size
);
5674 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
5676 for (i
= 0; i
< size
; i
+= 8)
5677 amd64_mov_membase_reg (code
, AMD64_RSP
, i
, ins
->dreg
, 8);
5678 amd64_mov_reg_reg (code
, ins
->dreg
, AMD64_RSP
, 8);
5680 amd64_mov_reg_imm (code
, ins
->dreg
, size
);
5681 ins
->sreg1
= ins
->dreg
;
5683 code
= mono_emit_stack_alloc (cfg
, code
, ins
);
5684 amd64_mov_reg_reg (code
, ins
->dreg
, AMD64_RSP
, 8);
5687 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, size
);
5688 amd64_mov_reg_reg (code
, ins
->dreg
, AMD64_RSP
, 8);
5690 if (cfg
->param_area
)
5691 amd64_alu_reg_imm (code
, X86_ADD
, ins
->dreg
, cfg
->param_area
);
5695 amd64_mov_reg_reg (code
, AMD64_ARG_REG1
, ins
->sreg1
, 8);
5696 code
= emit_call (cfg
, NULL
, code
, MONO_JIT_ICALL_mono_arch_throw_exception
);
5697 ins
->flags
|= MONO_INST_GC_CALLSITE
;
5698 ins
->backend
.pc_offset
= code
- cfg
->native_code
;
5702 amd64_mov_reg_reg (code
, AMD64_ARG_REG1
, ins
->sreg1
, 8);
5703 code
= emit_call (cfg
, NULL
, code
, MONO_JIT_ICALL_mono_arch_rethrow_exception
);
5704 ins
->flags
|= MONO_INST_GC_CALLSITE
;
5705 ins
->backend
.pc_offset
= code
- cfg
->native_code
;
5708 case OP_CALL_HANDLER
:
5710 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 8);
5711 mono_add_patch_info (cfg
, code
- cfg
->native_code
, MONO_PATCH_INFO_BB
, ins
->inst_target_bb
);
5712 amd64_call_imm (code
, 0);
5714 * ins->inst_eh_blocks and bb->clause_holes are part of same GList.
5715 * Holes from bb->clause_holes will be added separately for the entire
5716 * basic block. Add only the rest of them.
5718 for (GList
*tmp
= ins
->inst_eh_blocks
; tmp
!= bb
->clause_holes
; tmp
= tmp
->prev
)
5719 mono_cfg_add_try_hole (cfg
, ((MonoLeaveClause
*) tmp
->data
)->clause
, code
, bb
);
5720 /* Restore stack alignment */
5721 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, 8);
5723 case OP_START_HANDLER
: {
5724 /* Even though we're saving RSP, use sizeof */
5725 /* gpointer because spvar is of type IntPtr */
5726 /* see: mono_create_spvar_for_region */
5727 MonoInst
*spvar
= mono_find_spvar_for_region (cfg
, bb
->region
);
5728 amd64_mov_membase_reg (code
, spvar
->inst_basereg
, spvar
->inst_offset
, AMD64_RSP
, sizeof(gpointer
));
5730 if ((MONO_BBLOCK_IS_IN_REGION (bb
, MONO_REGION_FINALLY
) ||
5731 MONO_BBLOCK_IS_IN_REGION (bb
, MONO_REGION_FILTER
) ||
5732 MONO_BBLOCK_IS_IN_REGION (bb
, MONO_REGION_FAULT
)) &&
5734 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, ALIGN_TO (cfg
->param_area
, MONO_ARCH_FRAME_ALIGNMENT
));
5738 case OP_ENDFINALLY
: {
5739 MonoInst
*spvar
= mono_find_spvar_for_region (cfg
, bb
->region
);
5740 amd64_mov_reg_membase (code
, AMD64_RSP
, spvar
->inst_basereg
, spvar
->inst_offset
, sizeof(gpointer
));
5744 case OP_ENDFILTER
: {
5745 MonoInst
*spvar
= mono_find_spvar_for_region (cfg
, bb
->region
);
5746 amd64_mov_reg_membase (code
, AMD64_RSP
, spvar
->inst_basereg
, spvar
->inst_offset
, sizeof(gpointer
));
5747 /* The local allocator will put the result into RAX */
5752 if (ins
->dreg
!= AMD64_RAX
)
5753 amd64_mov_reg_reg (code
, ins
->dreg
, AMD64_RAX
, sizeof (target_mgreg_t
));
5756 ins
->inst_c0
= code
- cfg
->native_code
;
5759 //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
5760 //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
5762 if (ins
->inst_target_bb
->native_offset
) {
5763 amd64_jump_code (code
, cfg
->native_code
+ ins
->inst_target_bb
->native_offset
);
5765 mono_add_patch_info (cfg
, offset
, MONO_PATCH_INFO_BB
, ins
->inst_target_bb
);
5766 if (optimize_branch_pred
&&
5767 x86_is_imm8 (ins
->inst_target_bb
->max_offset
- offset
))
5768 x86_jump8 (code
, 0);
5770 x86_jump32 (code
, 0);
5774 amd64_jump_reg (code
, ins
->sreg1
);
5797 amd64_set_reg (code
, cc_table
[mono_opcode_to_cond (ins
->opcode
)], ins
->dreg
, cc_signed_table
[mono_opcode_to_cond (ins
->opcode
)]);
5798 amd64_widen_reg (code
, ins
->dreg
, ins
->dreg
, FALSE
, FALSE
);
5800 case OP_COND_EXC_EQ
:
5801 case OP_COND_EXC_NE_UN
:
5802 case OP_COND_EXC_LT
:
5803 case OP_COND_EXC_LT_UN
:
5804 case OP_COND_EXC_GT
:
5805 case OP_COND_EXC_GT_UN
:
5806 case OP_COND_EXC_GE
:
5807 case OP_COND_EXC_GE_UN
:
5808 case OP_COND_EXC_LE
:
5809 case OP_COND_EXC_LE_UN
:
5810 case OP_COND_EXC_IEQ
:
5811 case OP_COND_EXC_INE_UN
:
5812 case OP_COND_EXC_ILT
:
5813 case OP_COND_EXC_ILT_UN
:
5814 case OP_COND_EXC_IGT
:
5815 case OP_COND_EXC_IGT_UN
:
5816 case OP_COND_EXC_IGE
:
5817 case OP_COND_EXC_IGE_UN
:
5818 case OP_COND_EXC_ILE
:
5819 case OP_COND_EXC_ILE_UN
:
5820 EMIT_COND_SYSTEM_EXCEPTION (cc_table
[mono_opcode_to_cond (ins
->opcode
)], cc_signed_table
[mono_opcode_to_cond (ins
->opcode
)], (const char *)ins
->inst_p1
);
5822 case OP_COND_EXC_OV
:
5823 case OP_COND_EXC_NO
:
5825 case OP_COND_EXC_NC
:
5826 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table
[ins
->opcode
- OP_COND_EXC_EQ
],
5827 (ins
->opcode
< OP_COND_EXC_NE_UN
), (const char *)ins
->inst_p1
);
5829 case OP_COND_EXC_IOV
:
5830 case OP_COND_EXC_INO
:
5831 case OP_COND_EXC_IC
:
5832 case OP_COND_EXC_INC
:
5833 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table
[ins
->opcode
- OP_COND_EXC_IEQ
],
5834 (ins
->opcode
< OP_COND_EXC_INE_UN
), (const char *)ins
->inst_p1
);
5837 /* floating point opcodes */
5839 double d
= *(double *)ins
->inst_p0
;
5841 if ((d
== 0.0) && (mono_signbit (d
) == 0)) {
5842 amd64_sse_xorpd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
5845 mono_add_patch_info (cfg
, offset
, MONO_PATCH_INFO_R8
, ins
->inst_p0
);
5846 amd64_sse_movsd_reg_membase (code
, ins
->dreg
, AMD64_RIP
, 0);
5851 float f
= *(float *)ins
->inst_p0
;
5853 if ((f
== 0.0) && (mono_signbit (f
) == 0)) {
5855 amd64_sse_xorps_reg_reg (code
, ins
->dreg
, ins
->dreg
);
5857 amd64_sse_xorpd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
5860 mono_add_patch_info (cfg
, offset
, MONO_PATCH_INFO_R4
, ins
->inst_p0
);
5861 amd64_sse_movss_reg_membase (code
, ins
->dreg
, AMD64_RIP
, 0);
5863 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
5867 case OP_STORER8_MEMBASE_REG
:
5868 amd64_sse_movsd_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
);
5870 case OP_LOADR8_MEMBASE
:
5871 amd64_sse_movsd_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
);
5873 case OP_STORER4_MEMBASE_REG
:
5875 amd64_sse_movss_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
);
5877 /* This requires a double->single conversion */
5878 amd64_sse_cvtsd2ss_reg_reg (code
, MONO_ARCH_FP_SCRATCH_REG
, ins
->sreg1
);
5879 amd64_sse_movss_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, MONO_ARCH_FP_SCRATCH_REG
);
5882 case OP_LOADR4_MEMBASE
:
5884 amd64_sse_movss_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
);
5886 amd64_sse_movss_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
);
5887 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
5890 case OP_ICONV_TO_R4
:
5892 amd64_sse_cvtsi2ss_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
5894 amd64_sse_cvtsi2ss_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
5895 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
5898 case OP_ICONV_TO_R8
:
5899 amd64_sse_cvtsi2sd_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
5901 case OP_LCONV_TO_R4
:
5903 amd64_sse_cvtsi2ss_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
5905 amd64_sse_cvtsi2ss_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
5906 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
5909 case OP_LCONV_TO_R8
:
5910 amd64_sse_cvtsi2sd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
5912 case OP_FCONV_TO_R4
:
5914 amd64_sse_cvtsd2ss_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
5916 amd64_sse_cvtsd2ss_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
5917 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
5920 case OP_FCONV_TO_I1
:
5921 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 1, TRUE
);
5923 case OP_FCONV_TO_U1
:
5924 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 1, FALSE
);
5926 case OP_FCONV_TO_I2
:
5927 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 2, TRUE
);
5929 case OP_FCONV_TO_U2
:
5930 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 2, FALSE
);
5932 case OP_FCONV_TO_U4
:
5933 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 4, FALSE
);
5935 case OP_FCONV_TO_I4
:
5937 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 4, TRUE
);
5939 case OP_FCONV_TO_I8
:
5940 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 8, TRUE
);
5943 case OP_RCONV_TO_I1
:
5944 amd64_sse_cvtss2si_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
5945 amd64_widen_reg (code
, ins
->dreg
, ins
->dreg
, TRUE
, FALSE
);
5947 case OP_RCONV_TO_U1
:
5948 amd64_sse_cvtss2si_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
5949 amd64_widen_reg (code
, ins
->dreg
, ins
->dreg
, FALSE
, FALSE
);
5951 case OP_RCONV_TO_I2
:
5952 amd64_sse_cvtss2si_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
5953 amd64_widen_reg (code
, ins
->dreg
, ins
->dreg
, TRUE
, TRUE
);
5955 case OP_RCONV_TO_U2
:
5956 amd64_sse_cvtss2si_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
5957 amd64_widen_reg (code
, ins
->dreg
, ins
->dreg
, FALSE
, TRUE
);
5959 case OP_RCONV_TO_I4
:
5960 amd64_sse_cvtss2si_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
5962 case OP_RCONV_TO_U4
:
5963 // Use 8 as register size to get Nan/Inf conversion result truncated to 0
5964 amd64_sse_cvtss2si_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
5966 case OP_RCONV_TO_I8
:
5968 amd64_sse_cvtss2si_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 8);
5970 case OP_RCONV_TO_R8
:
5971 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
5973 case OP_RCONV_TO_R4
:
5974 if (ins
->dreg
!= ins
->sreg1
)
5975 amd64_sse_movss_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
5978 case OP_LCONV_TO_R_UN
: {
5981 /* Based on gcc code */
5982 amd64_test_reg_reg (code
, ins
->sreg1
, ins
->sreg1
);
5983 br
[0] = code
; x86_branch8 (code
, X86_CC_S
, 0, TRUE
);
5986 amd64_sse_cvtsi2sd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
5987 br
[1] = code
; x86_jump8 (code
, 0);
5988 amd64_patch (br
[0], code
);
5991 /* Save to the red zone */
5992 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RAX
, 8);
5993 amd64_mov_membase_reg (code
, AMD64_RSP
, -16, AMD64_RCX
, 8);
5994 amd64_mov_reg_reg (code
, AMD64_RCX
, ins
->sreg1
, 8);
5995 amd64_mov_reg_reg (code
, AMD64_RAX
, ins
->sreg1
, 8);
5996 amd64_alu_reg_imm (code
, X86_AND
, AMD64_RCX
, 1);
5997 amd64_shift_reg_imm (code
, X86_SHR
, AMD64_RAX
, 1);
5998 amd64_alu_reg_imm (code
, X86_OR
, AMD64_RAX
, AMD64_RCX
);
5999 amd64_sse_cvtsi2sd_reg_reg (code
, ins
->dreg
, AMD64_RAX
);
6000 amd64_sse_addsd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
6002 amd64_mov_reg_membase (code
, AMD64_RCX
, AMD64_RSP
, -16, 8);
6003 amd64_mov_reg_membase (code
, AMD64_RAX
, AMD64_RSP
, -8, 8);
6004 amd64_patch (br
[1], code
);
6007 case OP_LCONV_TO_OVF_U4
:
6008 amd64_alu_reg_imm (code
, X86_CMP
, ins
->sreg1
, 0);
6009 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_LT
, TRUE
, "OverflowException");
6010 amd64_mov_reg_reg (code
, ins
->dreg
, ins
->sreg1
, 8);
6012 case OP_LCONV_TO_OVF_I4_UN
:
6013 amd64_alu_reg_imm (code
, X86_CMP
, ins
->sreg1
, 0x7fffffff);
6014 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_GT
, FALSE
, "OverflowException");
6015 amd64_mov_reg_reg (code
, ins
->dreg
, ins
->sreg1
, 8);
6018 if (ins
->dreg
!= ins
->sreg1
)
6019 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
6022 if (ins
->dreg
!= ins
->sreg1
)
6023 amd64_sse_movss_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
6025 case OP_MOVE_F_TO_I4
:
6027 amd64_movd_reg_xreg_size (code
, ins
->dreg
, ins
->sreg1
, 8);
6029 amd64_sse_cvtsd2ss_reg_reg (code
, MONO_ARCH_FP_SCRATCH_REG
, ins
->sreg1
);
6030 amd64_movd_reg_xreg_size (code
, ins
->dreg
, MONO_ARCH_FP_SCRATCH_REG
, 8);
6033 case OP_MOVE_I4_TO_F
:
6034 amd64_movd_xreg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 8);
6036 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
6038 case OP_MOVE_F_TO_I8
:
6039 amd64_movd_reg_xreg_size (code
, ins
->dreg
, ins
->sreg1
, 8);
6041 case OP_MOVE_I8_TO_F
:
6042 amd64_movd_xreg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 8);
6045 amd64_sse_addsd_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
6048 amd64_sse_subsd_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
6051 amd64_sse_mulsd_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
6054 amd64_sse_divsd_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
6057 static double r8_0
= -0.0;
6059 g_assert (ins
->sreg1
== ins
->dreg
);
6061 mono_add_patch_info (cfg
, offset
, MONO_PATCH_INFO_R8
, &r8_0
);
6062 amd64_sse_xorpd_reg_membase (code
, ins
->dreg
, AMD64_RIP
, 0);
6066 static guint64 d
= 0x7fffffffffffffffUL
;
6068 g_assert (ins
->sreg1
== ins
->dreg
);
6070 mono_add_patch_info (cfg
, offset
, MONO_PATCH_INFO_R8
, &d
);
6071 amd64_sse_andpd_reg_membase (code
, ins
->dreg
, AMD64_RIP
, 0);
6075 EMIT_SSE2_FPFUNC (code
, fsqrt
, ins
->dreg
, ins
->sreg1
);
6079 amd64_sse_addss_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
6082 amd64_sse_subss_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
6085 amd64_sse_mulss_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
6088 amd64_sse_divss_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
6091 static float r4_0
= -0.0;
6093 g_assert (ins
->sreg1
== ins
->dreg
);
6095 mono_add_patch_info (cfg
, offset
, MONO_PATCH_INFO_R4
, &r4_0
);
6096 amd64_sse_movss_reg_membase (code
, MONO_ARCH_FP_SCRATCH_REG
, AMD64_RIP
, 0);
6097 amd64_sse_xorps_reg_reg (code
, ins
->dreg
, MONO_ARCH_FP_SCRATCH_REG
);
6102 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
6103 g_assert (ins
->dreg
== ins
->sreg1
);
6104 amd64_alu_reg_reg_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, 4);
6105 amd64_cmov_reg_size (code
, X86_CC_GT
, TRUE
, ins
->dreg
, ins
->sreg2
, 4);
6108 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
6109 g_assert (ins
->dreg
== ins
->sreg1
);
6110 amd64_alu_reg_reg_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, 4);
6111 amd64_cmov_reg_size (code
, X86_CC_GT
, FALSE
, ins
->dreg
, ins
->sreg2
, 4);
6114 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
6115 g_assert (ins
->dreg
== ins
->sreg1
);
6116 amd64_alu_reg_reg_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, 4);
6117 amd64_cmov_reg_size (code
, X86_CC_LT
, TRUE
, ins
->dreg
, ins
->sreg2
, 4);
6120 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
6121 g_assert (ins
->dreg
== ins
->sreg1
);
6122 amd64_alu_reg_reg_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, 4);
6123 amd64_cmov_reg_size (code
, X86_CC_LT
, FALSE
, ins
->dreg
, ins
->sreg2
, 4);
6126 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
6127 g_assert (ins
->dreg
== ins
->sreg1
);
6128 amd64_alu_reg_reg (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
);
6129 amd64_cmov_reg (code
, X86_CC_GT
, TRUE
, ins
->dreg
, ins
->sreg2
);
6132 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
6133 g_assert (ins
->dreg
== ins
->sreg1
);
6134 amd64_alu_reg_reg (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
);
6135 amd64_cmov_reg (code
, X86_CC_GT
, FALSE
, ins
->dreg
, ins
->sreg2
);
6138 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
6139 g_assert (ins
->dreg
== ins
->sreg1
);
6140 amd64_alu_reg_reg (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
);
6141 amd64_cmov_reg (code
, X86_CC_LT
, TRUE
, ins
->dreg
, ins
->sreg2
);
6144 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
6145 g_assert (ins
->dreg
== ins
->sreg1
);
6146 amd64_alu_reg_reg (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
);
6147 amd64_cmov_reg (code
, X86_CC_LT
, FALSE
, ins
->dreg
, ins
->sreg2
);
6153 * The two arguments are swapped because the fbranch instructions
6154 * depend on this for the non-sse case to work.
6156 amd64_sse_comisd_reg_reg (code
, ins
->sreg2
, ins
->sreg1
);
6160 * FIXME: Get rid of this.
6161 * The two arguments are swapped because the fbranch instructions
6162 * depend on this for the non-sse case to work.
6164 amd64_sse_comiss_reg_reg (code
, ins
->sreg2
, ins
->sreg1
);
6168 /* zeroing the register at the start results in
6169 * shorter and faster code (we can also remove the widening op)
6171 guchar
*unordered_check
;
6173 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
6174 amd64_sse_comisd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6175 unordered_check
= code
;
6176 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
6178 if (ins
->opcode
== OP_FCEQ
) {
6179 amd64_set_reg (code
, X86_CC_EQ
, ins
->dreg
, FALSE
);
6180 amd64_patch (unordered_check
, code
);
6182 guchar
*jump_to_end
;
6183 amd64_set_reg (code
, X86_CC_NE
, ins
->dreg
, FALSE
);
6185 x86_jump8 (code
, 0);
6186 amd64_patch (unordered_check
, code
);
6187 amd64_inc_reg (code
, ins
->dreg
);
6188 amd64_patch (jump_to_end
, code
);
6194 /* zeroing the register at the start results in
6195 * shorter and faster code (we can also remove the widening op)
6197 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
6198 amd64_sse_comisd_reg_reg (code
, ins
->sreg2
, ins
->sreg1
);
6199 if (ins
->opcode
== OP_FCLT_UN
) {
6200 guchar
*unordered_check
= code
;
6201 guchar
*jump_to_end
;
6202 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
6203 amd64_set_reg (code
, X86_CC_GT
, ins
->dreg
, FALSE
);
6205 x86_jump8 (code
, 0);
6206 amd64_patch (unordered_check
, code
);
6207 amd64_inc_reg (code
, ins
->dreg
);
6208 amd64_patch (jump_to_end
, code
);
6210 amd64_set_reg (code
, X86_CC_GT
, ins
->dreg
, FALSE
);
6215 guchar
*unordered_check
;
6216 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
6217 amd64_sse_comisd_reg_reg (code
, ins
->sreg2
, ins
->sreg1
);
6218 unordered_check
= code
;
6219 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
6220 amd64_set_reg (code
, X86_CC_NB
, ins
->dreg
, FALSE
);
6221 amd64_patch (unordered_check
, code
);
6226 /* zeroing the register at the start results in
6227 * shorter and faster code (we can also remove the widening op)
6229 guchar
*unordered_check
;
6231 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
6232 amd64_sse_comisd_reg_reg (code
, ins
->sreg2
, ins
->sreg1
);
6233 if (ins
->opcode
== OP_FCGT
) {
6234 unordered_check
= code
;
6235 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
6236 amd64_set_reg (code
, X86_CC_LT
, ins
->dreg
, FALSE
);
6237 amd64_patch (unordered_check
, code
);
6239 amd64_set_reg (code
, X86_CC_LT
, ins
->dreg
, FALSE
);
6244 guchar
*unordered_check
;
6245 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
6246 amd64_sse_comisd_reg_reg (code
, ins
->sreg2
, ins
->sreg1
);
6247 unordered_check
= code
;
6248 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
6249 amd64_set_reg (code
, X86_CC_NA
, ins
->dreg
, FALSE
);
6250 amd64_patch (unordered_check
, code
);
6261 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
6262 amd64_sse_comiss_reg_reg (code
, ins
->sreg2
, ins
->sreg1
);
6264 switch (ins
->opcode
) {
6266 x86_cond
= X86_CC_EQ
;
6269 x86_cond
= X86_CC_LT
;
6272 x86_cond
= X86_CC_GT
;
6275 x86_cond
= X86_CC_GT
;
6278 x86_cond
= X86_CC_LT
;
6281 g_assert_not_reached ();
6285 guchar
*unordered_check
;
6287 switch (ins
->opcode
) {
6290 unordered_check
= code
;
6291 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
6292 amd64_set_reg (code
, x86_cond
, ins
->dreg
, FALSE
);
6293 amd64_patch (unordered_check
, code
);
6297 guchar
*jump_to_end
;
6299 unordered_check
= code
;
6300 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
6301 amd64_set_reg (code
, x86_cond
, ins
->dreg
, FALSE
);
6303 x86_jump8 (code
, 0);
6304 amd64_patch (unordered_check
, code
);
6305 amd64_inc_reg (code
, ins
->dreg
);
6306 amd64_patch (jump_to_end
, code
);
6310 amd64_set_reg (code
, x86_cond
, ins
->dreg
, FALSE
);
6313 g_assert_not_reached ();
6318 case OP_FCLT_MEMBASE
:
6319 case OP_FCGT_MEMBASE
:
6320 case OP_FCLT_UN_MEMBASE
:
6321 case OP_FCGT_UN_MEMBASE
:
6322 case OP_FCEQ_MEMBASE
: {
6323 guchar
*unordered_check
, *jump_to_end
;
6326 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
6327 amd64_sse_comisd_reg_membase (code
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
);
6329 switch (ins
->opcode
) {
6330 case OP_FCEQ_MEMBASE
:
6331 x86_cond
= X86_CC_EQ
;
6333 case OP_FCLT_MEMBASE
:
6334 case OP_FCLT_UN_MEMBASE
:
6335 x86_cond
= X86_CC_LT
;
6337 case OP_FCGT_MEMBASE
:
6338 case OP_FCGT_UN_MEMBASE
:
6339 x86_cond
= X86_CC_GT
;
6342 g_assert_not_reached ();
6345 unordered_check
= code
;
6346 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
6347 amd64_set_reg (code
, x86_cond
, ins
->dreg
, FALSE
);
6349 switch (ins
->opcode
) {
6350 case OP_FCEQ_MEMBASE
:
6351 case OP_FCLT_MEMBASE
:
6352 case OP_FCGT_MEMBASE
:
6353 amd64_patch (unordered_check
, code
);
6355 case OP_FCLT_UN_MEMBASE
:
6356 case OP_FCGT_UN_MEMBASE
:
6358 x86_jump8 (code
, 0);
6359 amd64_patch (unordered_check
, code
);
6360 amd64_inc_reg (code
, ins
->dreg
);
6361 amd64_patch (jump_to_end
, code
);
6369 guchar
*jump
= code
;
6370 x86_branch8 (code
, X86_CC_P
, 0, TRUE
);
6371 EMIT_COND_BRANCH (ins
, X86_CC_EQ
, FALSE
);
6372 amd64_patch (jump
, code
);
6376 /* Branch if C013 != 100 */
6377 /* branch if !ZF or (PF|CF) */
6378 EMIT_COND_BRANCH (ins
, X86_CC_NE
, FALSE
);
6379 EMIT_COND_BRANCH (ins
, X86_CC_P
, FALSE
);
6380 EMIT_COND_BRANCH (ins
, X86_CC_B
, FALSE
);
6383 EMIT_COND_BRANCH (ins
, X86_CC_GT
, FALSE
);
6386 EMIT_COND_BRANCH (ins
, X86_CC_P
, FALSE
);
6387 EMIT_COND_BRANCH (ins
, X86_CC_GT
, FALSE
);
6391 if (ins
->opcode
== OP_FBGT
) {
6394 /* skip branch if C1=1 */
6396 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
6397 /* branch if (C0 | C3) = 1 */
6398 EMIT_COND_BRANCH (ins
, X86_CC_LT
, FALSE
);
6399 amd64_patch (br1
, code
);
6402 EMIT_COND_BRANCH (ins
, X86_CC_LT
, FALSE
);
6406 /* Branch if C013 == 100 or 001 */
6409 /* skip branch if C1=1 */
6411 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
6412 /* branch if (C0 | C3) = 1 */
6413 EMIT_COND_BRANCH (ins
, X86_CC_BE
, FALSE
);
6414 amd64_patch (br1
, code
);
6418 /* Branch if C013 == 000 */
6419 EMIT_COND_BRANCH (ins
, X86_CC_LE
, FALSE
);
6422 /* Branch if C013=000 or 100 */
6425 /* skip branch if C1=1 */
6427 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
6428 /* branch if C0=0 */
6429 EMIT_COND_BRANCH (ins
, X86_CC_NB
, FALSE
);
6430 amd64_patch (br1
, code
);
6434 /* Branch if C013 != 001 */
6435 EMIT_COND_BRANCH (ins
, X86_CC_P
, FALSE
);
6436 EMIT_COND_BRANCH (ins
, X86_CC_GE
, FALSE
);
6439 /* Transfer value to the fp stack */
6440 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 16);
6441 amd64_movsd_membase_reg (code
, AMD64_RSP
, 0, ins
->sreg1
);
6442 amd64_fld_membase (code
, AMD64_RSP
, 0, TRUE
);
6444 amd64_push_reg (code
, AMD64_RAX
);
6446 amd64_fnstsw (code
);
6447 amd64_alu_reg_imm (code
, X86_AND
, AMD64_RAX
, 0x4100);
6448 amd64_alu_reg_imm (code
, X86_CMP
, AMD64_RAX
, X86_FP_C0
);
6449 amd64_pop_reg (code
, AMD64_RAX
);
6450 amd64_fstp (code
, 0);
6451 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ
, FALSE
, "OverflowException");
6452 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, 16);
6455 code
= mono_amd64_emit_tls_get (code
, ins
->dreg
, ins
->inst_offset
);
6459 code
= mono_amd64_emit_tls_set (code
, ins
->sreg1
, ins
->inst_offset
);
6462 case OP_MEMORY_BARRIER
: {
6463 if (ins
->backend
.memory_barrier_kind
== MONO_MEMORY_BARRIER_SEQ
)
6467 case OP_ATOMIC_ADD_I4
:
6468 case OP_ATOMIC_ADD_I8
: {
6469 int dreg
= ins
->dreg
;
6470 guint32 size
= (ins
->opcode
== OP_ATOMIC_ADD_I4
) ? 4 : 8;
6472 if ((dreg
== ins
->sreg2
) || (dreg
== ins
->inst_basereg
))
6475 amd64_mov_reg_reg (code
, dreg
, ins
->sreg2
, size
);
6476 amd64_prefix (code
, X86_LOCK_PREFIX
);
6477 amd64_xadd_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, dreg
, size
);
6478 /* dreg contains the old value, add with sreg2 value */
6479 amd64_alu_reg_reg_size (code
, X86_ADD
, dreg
, ins
->sreg2
, size
);
6481 if (ins
->dreg
!= dreg
)
6482 amd64_mov_reg_reg (code
, ins
->dreg
, dreg
, size
);
6486 case OP_ATOMIC_EXCHANGE_I4
:
6487 case OP_ATOMIC_EXCHANGE_I8
: {
6488 guint32 size
= ins
->opcode
== OP_ATOMIC_EXCHANGE_I4
? 4 : 8;
6490 /* LOCK prefix is implied. */
6491 amd64_mov_reg_reg (code
, GP_SCRATCH_REG
, ins
->sreg2
, size
);
6492 amd64_xchg_membase_reg_size (code
, ins
->sreg1
, ins
->inst_offset
, GP_SCRATCH_REG
, size
);
6493 amd64_mov_reg_reg (code
, ins
->dreg
, GP_SCRATCH_REG
, size
);
6496 case OP_ATOMIC_CAS_I4
:
6497 case OP_ATOMIC_CAS_I8
: {
6500 if (ins
->opcode
== OP_ATOMIC_CAS_I8
)
6506 * See http://msdn.microsoft.com/en-us/magazine/cc302329.aspx for
6507 * an explanation of how this works.
6509 g_assert (ins
->sreg3
== AMD64_RAX
);
6510 g_assert (ins
->sreg1
!= AMD64_RAX
);
6511 g_assert (ins
->sreg1
!= ins
->sreg2
);
6513 amd64_prefix (code
, X86_LOCK_PREFIX
);
6514 amd64_cmpxchg_membase_reg_size (code
, ins
->sreg1
, ins
->inst_offset
, ins
->sreg2
, size
);
6516 if (ins
->dreg
!= AMD64_RAX
)
6517 amd64_mov_reg_reg (code
, ins
->dreg
, AMD64_RAX
, size
);
6520 case OP_ATOMIC_LOAD_I1
: {
6521 amd64_widen_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, TRUE
, FALSE
);
6524 case OP_ATOMIC_LOAD_U1
: {
6525 amd64_widen_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, FALSE
, FALSE
);
6528 case OP_ATOMIC_LOAD_I2
: {
6529 amd64_widen_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, TRUE
, TRUE
);
6532 case OP_ATOMIC_LOAD_U2
: {
6533 amd64_widen_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, FALSE
, TRUE
);
6536 case OP_ATOMIC_LOAD_I4
: {
6537 amd64_movsxd_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
);
6540 case OP_ATOMIC_LOAD_U4
:
6541 case OP_ATOMIC_LOAD_I8
:
6542 case OP_ATOMIC_LOAD_U8
: {
6543 amd64_mov_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, ins
->opcode
== OP_ATOMIC_LOAD_U4
? 4 : 8);
6546 case OP_ATOMIC_LOAD_R4
: {
6548 amd64_sse_movss_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
);
6550 amd64_sse_movss_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
);
6551 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
6555 case OP_ATOMIC_LOAD_R8
: {
6556 amd64_sse_movsd_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
);
6559 case OP_ATOMIC_STORE_I1
:
6560 case OP_ATOMIC_STORE_U1
:
6561 case OP_ATOMIC_STORE_I2
:
6562 case OP_ATOMIC_STORE_U2
:
6563 case OP_ATOMIC_STORE_I4
:
6564 case OP_ATOMIC_STORE_U4
:
6565 case OP_ATOMIC_STORE_I8
:
6566 case OP_ATOMIC_STORE_U8
: {
6569 switch (ins
->opcode
) {
6570 case OP_ATOMIC_STORE_I1
:
6571 case OP_ATOMIC_STORE_U1
:
6574 case OP_ATOMIC_STORE_I2
:
6575 case OP_ATOMIC_STORE_U2
:
6578 case OP_ATOMIC_STORE_I4
:
6579 case OP_ATOMIC_STORE_U4
:
6582 case OP_ATOMIC_STORE_I8
:
6583 case OP_ATOMIC_STORE_U8
:
6588 amd64_mov_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
, size
);
6590 if (ins
->backend
.memory_barrier_kind
== MONO_MEMORY_BARRIER_SEQ
)
6594 case OP_ATOMIC_STORE_R4
: {
6596 amd64_sse_movss_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
);
6598 amd64_sse_cvtsd2ss_reg_reg (code
, MONO_ARCH_FP_SCRATCH_REG
, ins
->sreg1
);
6599 amd64_sse_movss_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, MONO_ARCH_FP_SCRATCH_REG
);
6602 if (ins
->backend
.memory_barrier_kind
== MONO_MEMORY_BARRIER_SEQ
)
6606 case OP_ATOMIC_STORE_R8
: {
6609 amd64_sse_movsd_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
);
6613 if (ins
->backend
.memory_barrier_kind
== MONO_MEMORY_BARRIER_SEQ
)
6617 case OP_CARD_TABLE_WBARRIER
: {
6618 int ptr
= ins
->sreg1
;
6619 int value
= ins
->sreg2
;
6621 int nursery_shift
, card_table_shift
;
6622 gpointer card_table_mask
;
6623 size_t nursery_size
;
6625 gpointer card_table
= mono_gc_get_card_table (&card_table_shift
, &card_table_mask
);
6626 guint64 nursery_start
= (guint64
)mono_gc_get_nursery (&nursery_shift
, &nursery_size
);
6627 guint64 shifted_nursery_start
= nursery_start
>> nursery_shift
;
6629 /*If either point to the stack we can simply avoid the WB. This happens due to
6630 * optimizations revealing a stack store that was not visible when op_cardtable was emited.
6632 if (ins
->sreg1
== AMD64_RSP
|| ins
->sreg2
== AMD64_RSP
)
6636 * We need one register we can clobber, we choose EDX and make sreg1
6637 * fixed EAX to work around limitations in the local register allocator.
6638 * sreg2 might get allocated to EDX, but that is not a problem since
6639 * we use it before clobbering EDX.
6641 g_assert (ins
->sreg1
== AMD64_RAX
);
6644 * This is the code we produce:
6647 * edx >>= nursery_shift
6648 * cmp edx, (nursery_start >> nursery_shift)
6651 * edx >>= card_table_shift
6657 if (mono_gc_card_table_nursery_check ()) {
6658 if (value
!= AMD64_RDX
)
6659 amd64_mov_reg_reg (code
, AMD64_RDX
, value
, 8);
6660 amd64_shift_reg_imm (code
, X86_SHR
, AMD64_RDX
, nursery_shift
);
6661 if (shifted_nursery_start
>> 31) {
6663 * The value we need to compare against is 64 bits, so we need
6664 * another spare register. We use RBX, which we save and
6667 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RBX
, 8);
6668 amd64_mov_reg_imm (code
, AMD64_RBX
, shifted_nursery_start
);
6669 amd64_alu_reg_reg (code
, X86_CMP
, AMD64_RDX
, AMD64_RBX
);
6670 amd64_mov_reg_membase (code
, AMD64_RBX
, AMD64_RSP
, -8, 8);
6672 amd64_alu_reg_imm (code
, X86_CMP
, AMD64_RDX
, shifted_nursery_start
);
6674 br
= code
; x86_branch8 (code
, X86_CC_NE
, -1, FALSE
);
6676 amd64_mov_reg_reg (code
, AMD64_RDX
, ptr
, 8);
6677 amd64_shift_reg_imm (code
, X86_SHR
, AMD64_RDX
, card_table_shift
);
6678 if (card_table_mask
)
6679 amd64_alu_reg_imm (code
, X86_AND
, AMD64_RDX
, (guint32
)(guint64
)card_table_mask
);
6681 mono_add_patch_info (cfg
, code
- cfg
->native_code
, MONO_PATCH_INFO_GC_CARD_TABLE_ADDR
, card_table
);
6682 amd64_alu_reg_membase (code
, X86_ADD
, AMD64_RDX
, AMD64_RIP
, 0);
6684 amd64_mov_membase_imm (code
, AMD64_RDX
, 0, 1, 1);
6686 if (mono_gc_card_table_nursery_check ())
6687 x86_patch (br
, code
);
6690 #ifdef MONO_ARCH_SIMD_INTRINSICS
6691 /* TODO: Some of these IR opcodes are marked as no clobber when they indeed do. */
6693 amd64_sse_addps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6696 amd64_sse_divps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6699 amd64_sse_mulps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6702 amd64_sse_subps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6705 amd64_sse_maxps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6708 amd64_sse_minps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6711 g_assert (ins
->inst_c0
>= 0 && ins
->inst_c0
<= 7);
6712 amd64_sse_cmpps_reg_reg_imm (code
, ins
->sreg1
, ins
->sreg2
, ins
->inst_c0
);
6715 amd64_sse_andps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6718 amd64_sse_andnps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6721 amd64_sse_orps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6724 amd64_sse_xorps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6727 amd64_sse_sqrtps_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
6730 amd64_sse_rsqrtps_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
6733 amd64_sse_rcpps_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
6736 amd64_sse_addsubps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6739 amd64_sse_haddps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6742 amd64_sse_hsubps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6745 amd64_sse_movshdup_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
6748 amd64_sse_movsldup_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
6751 case OP_PSHUFLEW_HIGH
:
6752 g_assert (ins
->inst_c0
>= 0 && ins
->inst_c0
<= 0xFF);
6753 amd64_sse_pshufhw_reg_reg_imm (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_c0
);
6755 case OP_PSHUFLEW_LOW
:
6756 g_assert (ins
->inst_c0
>= 0 && ins
->inst_c0
<= 0xFF);
6757 amd64_sse_pshuflw_reg_reg_imm (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_c0
);
6760 g_assert (ins
->inst_c0
>= 0 && ins
->inst_c0
<= 0xFF);
6761 amd64_sse_pshufd_reg_reg_imm (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_c0
);
6764 g_assert (ins
->inst_c0
>= 0 && ins
->inst_c0
<= 0xFF);
6765 amd64_sse_shufps_reg_reg_imm (code
, ins
->sreg1
, ins
->sreg2
, ins
->inst_c0
);
6768 g_assert (ins
->inst_c0
>= 0 && ins
->inst_c0
<= 0x3);
6769 amd64_sse_shufpd_reg_reg_imm (code
, ins
->sreg1
, ins
->sreg2
, ins
->inst_c0
);
6773 amd64_sse_addpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6776 amd64_sse_divpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6779 amd64_sse_mulpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6782 amd64_sse_subpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6785 amd64_sse_maxpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6788 amd64_sse_minpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6791 g_assert (ins
->inst_c0
>= 0 && ins
->inst_c0
<= 7);
6792 amd64_sse_cmppd_reg_reg_imm (code
, ins
->sreg1
, ins
->sreg2
, ins
->inst_c0
);
6795 amd64_sse_andpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6798 amd64_sse_andnpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6801 amd64_sse_orpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6804 amd64_sse_xorpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6807 amd64_sse_sqrtpd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
6810 amd64_sse_addsubpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6813 amd64_sse_haddpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6816 amd64_sse_hsubpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6819 amd64_sse_movddup_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
6822 case OP_EXTRACT_MASK
:
6823 amd64_sse_pmovmskb_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
6827 amd64_sse_pand_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6830 amd64_sse_pandn_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6833 amd64_sse_por_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6836 amd64_sse_pxor_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6840 amd64_sse_paddb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6843 amd64_sse_paddw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6846 amd64_sse_paddd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6849 amd64_sse_paddq_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6853 amd64_sse_psubb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6856 amd64_sse_psubw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6859 amd64_sse_psubd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6862 amd64_sse_psubq_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6866 amd64_sse_pmaxub_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6869 amd64_sse_pmaxuw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6872 amd64_sse_pmaxud_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6876 amd64_sse_pmaxsb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6879 amd64_sse_pmaxsw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6882 amd64_sse_pmaxsd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6886 amd64_sse_pavgb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6889 amd64_sse_pavgw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6893 amd64_sse_pminub_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6896 amd64_sse_pminuw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6899 amd64_sse_pminud_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6903 amd64_sse_pminsb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6906 amd64_sse_pminsw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6909 amd64_sse_pminsd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6913 amd64_sse_pcmpeqb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6916 amd64_sse_pcmpeqw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6919 amd64_sse_pcmpeqd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6922 amd64_sse_pcmpeqq_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6926 amd64_sse_pcmpgtb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6929 amd64_sse_pcmpgtw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6932 amd64_sse_pcmpgtd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6935 amd64_sse_pcmpgtq_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6938 case OP_PSUM_ABS_DIFF
:
6939 amd64_sse_psadbw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6942 case OP_UNPACK_LOWB
:
6943 amd64_sse_punpcklbw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6945 case OP_UNPACK_LOWW
:
6946 amd64_sse_punpcklwd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6948 case OP_UNPACK_LOWD
:
6949 amd64_sse_punpckldq_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6951 case OP_UNPACK_LOWQ
:
6952 amd64_sse_punpcklqdq_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6954 case OP_UNPACK_LOWPS
:
6955 amd64_sse_unpcklps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6957 case OP_UNPACK_LOWPD
:
6958 amd64_sse_unpcklpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6961 case OP_UNPACK_HIGHB
:
6962 amd64_sse_punpckhbw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6964 case OP_UNPACK_HIGHW
:
6965 amd64_sse_punpckhwd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6967 case OP_UNPACK_HIGHD
:
6968 amd64_sse_punpckhdq_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6970 case OP_UNPACK_HIGHQ
:
6971 amd64_sse_punpckhqdq_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6973 case OP_UNPACK_HIGHPS
:
6974 amd64_sse_unpckhps_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6976 case OP_UNPACK_HIGHPD
:
6977 amd64_sse_unpckhpd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6981 amd64_sse_packsswb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6984 amd64_sse_packssdw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6987 amd64_sse_packuswb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6990 amd64_sse_packusdw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6993 case OP_PADDB_SAT_UN
:
6994 amd64_sse_paddusb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6996 case OP_PSUBB_SAT_UN
:
6997 amd64_sse_psubusb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
6999 case OP_PADDW_SAT_UN
:
7000 amd64_sse_paddusw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
7002 case OP_PSUBW_SAT_UN
:
7003 amd64_sse_psubusw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
7007 amd64_sse_paddsb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
7010 amd64_sse_psubsb_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
7013 amd64_sse_paddsw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
7016 amd64_sse_psubsw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
7020 amd64_sse_pmullw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
7023 amd64_sse_pmulld_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
7026 amd64_sse_pmuludq_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
7028 case OP_PMULW_HIGH_UN
:
7029 amd64_sse_pmulhuw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
7032 amd64_sse_pmulhw_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
7036 amd64_sse_psrlw_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
7039 amd64_sse_psrlw_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7043 amd64_sse_psraw_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
7046 amd64_sse_psraw_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7050 amd64_sse_psllw_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
7053 amd64_sse_psllw_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7057 amd64_sse_psrld_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
7060 amd64_sse_psrld_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7064 amd64_sse_psrad_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
7067 amd64_sse_psrad_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7071 amd64_sse_pslld_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
7074 amd64_sse_pslld_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7078 amd64_sse_psrlq_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
7081 amd64_sse_psrlq_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7084 /*TODO: This is appart of the sse spec but not added
7086 amd64_sse_psraq_reg_imm (code, ins->dreg, ins->inst_imm);
7089 amd64_sse_psraq_reg_reg (code, ins->dreg, ins->sreg2);
7094 amd64_sse_psllq_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
7097 amd64_sse_psllq_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7100 amd64_sse_cvtdq2pd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7103 amd64_sse_cvtdq2ps_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7106 amd64_sse_cvtpd2dq_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7109 amd64_sse_cvtpd2ps_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7112 amd64_sse_cvtps2dq_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7115 amd64_sse_cvtps2pd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7118 amd64_sse_cvttpd2dq_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7121 amd64_sse_cvttps2dq_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7125 amd64_movd_xreg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
7128 amd64_movd_reg_xreg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
7132 amd64_movhlps_reg_reg (code
, MONO_ARCH_FP_SCRATCH_REG
, ins
->sreg1
);
7133 amd64_movd_reg_xreg_size (code
, ins
->dreg
, MONO_ARCH_FP_SCRATCH_REG
, 8);
7135 amd64_movd_reg_xreg_size (code
, ins
->dreg
, ins
->sreg1
, 8);
7140 amd64_movd_reg_xreg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
7142 amd64_shift_reg_imm (code
, X86_SHR
, ins
->dreg
, ins
->inst_c0
* 8);
7143 amd64_widen_reg (code
, ins
->dreg
, ins
->dreg
, ins
->opcode
== OP_EXTRACT_I1
, FALSE
);
7147 /*amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
7149 amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, 16, 4);*/
7150 amd64_sse_pextrw_reg_reg_imm (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_c0
);
7151 amd64_widen_reg_size (code
, ins
->dreg
, ins
->dreg
, ins
->opcode
== OP_EXTRACT_I2
, TRUE
, 4);
7155 amd64_movhlps_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7157 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7160 amd64_sse_pinsrw_reg_reg_imm (code
, ins
->sreg1
, ins
->sreg2
, ins
->inst_c0
);
7162 case OP_EXTRACTX_U2
:
7163 amd64_sse_pextrw_reg_reg_imm (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_c0
);
7165 case OP_INSERTX_U1_SLOW
:
7166 /*sreg1 is the extracted ireg (scratch)
7167 /sreg2 is the to be inserted ireg (scratch)
7168 /dreg is the xreg to receive the value*/
7170 /*clear the bits from the extracted word*/
7171 amd64_alu_reg_imm (code
, X86_AND
, ins
->sreg1
, ins
->inst_c0
& 1 ? 0x00FF : 0xFF00);
7172 /*shift the value to insert if needed*/
7173 if (ins
->inst_c0
& 1)
7174 amd64_shift_reg_imm_size (code
, X86_SHL
, ins
->sreg2
, 8, 4);
7175 /*join them together*/
7176 amd64_alu_reg_reg (code
, X86_OR
, ins
->sreg1
, ins
->sreg2
);
7177 amd64_sse_pinsrw_reg_reg_imm (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_c0
/ 2);
7179 case OP_INSERTX_I4_SLOW
:
7180 amd64_sse_pinsrw_reg_reg_imm (code
, ins
->dreg
, ins
->sreg2
, ins
->inst_c0
* 2);
7181 amd64_shift_reg_imm (code
, X86_SHR
, ins
->sreg2
, 16);
7182 amd64_sse_pinsrw_reg_reg_imm (code
, ins
->dreg
, ins
->sreg2
, ins
->inst_c0
* 2 + 1);
7184 case OP_INSERTX_I8_SLOW
:
7185 amd64_movd_xreg_reg_size(code
, MONO_ARCH_FP_SCRATCH_REG
, ins
->sreg2
, 8);
7187 amd64_movlhps_reg_reg (code
, ins
->dreg
, MONO_ARCH_FP_SCRATCH_REG
);
7189 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, MONO_ARCH_FP_SCRATCH_REG
);
7192 case OP_INSERTX_R4_SLOW
:
7193 switch (ins
->inst_c0
) {
7196 amd64_sse_movss_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7198 amd64_sse_cvtsd2ss_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7201 amd64_sse_pshufd_reg_reg_imm (code
, ins
->dreg
, ins
->dreg
, mono_simd_shuffle_mask(1, 0, 2, 3));
7203 amd64_sse_movss_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7205 amd64_sse_cvtsd2ss_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7206 amd64_sse_pshufd_reg_reg_imm (code
, ins
->dreg
, ins
->dreg
, mono_simd_shuffle_mask(1, 0, 2, 3));
7209 amd64_sse_pshufd_reg_reg_imm (code
, ins
->dreg
, ins
->dreg
, mono_simd_shuffle_mask(2, 1, 0, 3));
7211 amd64_sse_movss_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7213 amd64_sse_cvtsd2ss_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7214 amd64_sse_pshufd_reg_reg_imm (code
, ins
->dreg
, ins
->dreg
, mono_simd_shuffle_mask(2, 1, 0, 3));
7217 amd64_sse_pshufd_reg_reg_imm (code
, ins
->dreg
, ins
->dreg
, mono_simd_shuffle_mask(3, 1, 2, 0));
7219 amd64_sse_movss_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7221 amd64_sse_cvtsd2ss_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7222 amd64_sse_pshufd_reg_reg_imm (code
, ins
->dreg
, ins
->dreg
, mono_simd_shuffle_mask(3, 1, 2, 0));
7226 case OP_INSERTX_R8_SLOW
:
7228 amd64_movlhps_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7230 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
7232 case OP_STOREX_MEMBASE_REG
:
7233 case OP_STOREX_MEMBASE
:
7234 amd64_sse_movups_membase_reg (code
, ins
->dreg
, ins
->inst_offset
, ins
->sreg1
);
7236 case OP_LOADX_MEMBASE
:
7237 amd64_sse_movups_reg_membase (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_offset
);
7239 case OP_LOADX_ALIGNED_MEMBASE
:
7240 amd64_sse_movaps_reg_membase (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_offset
);
7242 case OP_STOREX_ALIGNED_MEMBASE_REG
:
7243 amd64_sse_movaps_membase_reg (code
, ins
->dreg
, ins
->inst_offset
, ins
->sreg1
);
7245 case OP_STOREX_NTA_MEMBASE_REG
:
7246 amd64_sse_movntps_reg_membase (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_offset
);
7248 case OP_PREFETCH_MEMBASE
:
7249 amd64_sse_prefetch_reg_membase (code
, ins
->backend
.arg_info
, ins
->sreg1
, ins
->inst_offset
);
7253 /*FIXME the peephole pass should have killed this*/
7254 if (ins
->dreg
!= ins
->sreg1
)
7255 amd64_sse_movaps_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7258 amd64_sse_pxor_reg_reg (code
, ins
->dreg
, ins
->dreg
);
7261 amd64_sse_pcmpeqb_reg_reg (code
, ins
->dreg
, ins
->dreg
);
7263 case OP_ICONV_TO_R4_RAW
:
7264 amd64_movd_xreg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
7266 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
7269 case OP_FCONV_TO_R8_X
:
7270 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7273 case OP_XCONV_R8_TO_I4
:
7274 amd64_sse_cvttsd2si_reg_xreg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
7275 switch (ins
->backend
.source_opcode
) {
7276 case OP_FCONV_TO_I1
:
7277 amd64_widen_reg (code
, ins
->dreg
, ins
->dreg
, TRUE
, FALSE
);
7279 case OP_FCONV_TO_U1
:
7280 amd64_widen_reg (code
, ins
->dreg
, ins
->dreg
, FALSE
, FALSE
);
7282 case OP_FCONV_TO_I2
:
7283 amd64_widen_reg (code
, ins
->dreg
, ins
->dreg
, TRUE
, TRUE
);
7285 case OP_FCONV_TO_U2
:
7286 amd64_widen_reg (code
, ins
->dreg
, ins
->dreg
, FALSE
, TRUE
);
7292 amd64_sse_pinsrw_reg_reg_imm (code
, ins
->dreg
, ins
->sreg1
, 0);
7293 amd64_sse_pinsrw_reg_reg_imm (code
, ins
->dreg
, ins
->sreg1
, 1);
7294 amd64_sse_pshufd_reg_reg_imm (code
, ins
->dreg
, ins
->dreg
, 0);
7297 amd64_movd_xreg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
7298 amd64_sse_pshufd_reg_reg_imm (code
, ins
->dreg
, ins
->dreg
, 0);
7301 amd64_movd_xreg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 8);
7302 amd64_sse_pshufd_reg_reg_imm (code
, ins
->dreg
, ins
->dreg
, 0x44);
7306 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7308 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7309 amd64_sse_cvtsd2ss_reg_reg (code
, ins
->dreg
, ins
->dreg
);
7311 amd64_sse_pshufd_reg_reg_imm (code
, ins
->dreg
, ins
->dreg
, 0);
7314 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
7315 amd64_sse_pshufd_reg_reg_imm (code
, ins
->dreg
, ins
->dreg
, 0x44);
7317 case OP_SSE41_ROUNDPD
:
7318 amd64_sse_roundpd_reg_reg_imm (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_c0
);
7323 amd64_sse_lzcnt_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
7326 amd64_sse_lzcnt_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 8);
7329 amd64_sse_popcnt_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
7332 amd64_sse_popcnt_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 8);
7335 case OP_LIVERANGE_START
: {
7336 if (cfg
->verbose_level
> 1)
7337 printf ("R%d START=0x%x\n", MONO_VARINFO (cfg
, ins
->inst_c0
)->vreg
, (int)(code
- cfg
->native_code
));
7338 MONO_VARINFO (cfg
, ins
->inst_c0
)->live_range_start
= code
- cfg
->native_code
;
7341 case OP_LIVERANGE_END
: {
7342 if (cfg
->verbose_level
> 1)
7343 printf ("R%d END=0x%x\n", MONO_VARINFO (cfg
, ins
->inst_c0
)->vreg
, (int)(code
- cfg
->native_code
));
7344 MONO_VARINFO (cfg
, ins
->inst_c0
)->live_range_end
= code
- cfg
->native_code
;
7347 case OP_GC_SAFE_POINT
: {
7350 amd64_test_membase_imm_size (code
, ins
->sreg1
, 0, 1, 4);
7351 br
[0] = code
; x86_branch8 (code
, X86_CC_EQ
, 0, FALSE
);
7352 code
= emit_call (cfg
, NULL
, code
, MONO_JIT_ICALL_mono_threads_state_poll
);
7353 amd64_patch (br
[0], code
);
7357 case OP_GC_LIVENESS_DEF
:
7358 case OP_GC_LIVENESS_USE
:
7359 case OP_GC_PARAM_SLOT_LIVENESS_DEF
:
7360 ins
->backend
.pc_offset
= code
- cfg
->native_code
;
7362 case OP_GC_SPILL_SLOT_LIVENESS_DEF
:
7363 ins
->backend
.pc_offset
= code
- cfg
->native_code
;
7364 bb
->spill_slot_defs
= g_slist_prepend_mempool (cfg
->mempool
, bb
->spill_slot_defs
, ins
);
7366 case OP_GET_LAST_ERROR
:
7367 code
= emit_get_last_error(code
, ins
->dreg
);
7369 case OP_FILL_PROF_CALL_CTX
:
7370 for (int i
= 0; i
< AMD64_NREG
; i
++)
7371 if (AMD64_IS_CALLEE_SAVED_REG (i
) || i
== AMD64_RSP
)
7372 amd64_mov_membase_reg (code
, ins
->sreg1
, MONO_STRUCT_OFFSET (MonoContext
, gregs
) + i
* sizeof (target_mgreg_t
), i
, sizeof (target_mgreg_t
));
7375 g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins
->opcode
), __FUNCTION__
);
7376 g_assert_not_reached ();
7379 g_assertf ((code
- cfg
->native_code
- offset
) <= max_len
,
7380 "wrong maximal instruction length of instruction %s (expected %d, got %d)",
7381 mono_inst_name (ins
->opcode
), max_len
, (int)(code
- cfg
->native_code
- offset
));
7384 set_code_cursor (cfg
, code
);
7387 #endif /* DISABLE_JIT */
7390 void __chkstk (void);
7391 void ___chkstk_ms (void);
7395 mono_arch_register_lowlevel_calls (void)
7397 /* The signature doesn't matter */
7398 mono_register_jit_icall (mono_amd64_throw_exception
, mono_icall_sig_void
, TRUE
);
7400 #if defined(TARGET_WIN32) || defined(HOST_WIN32)
7402 mono_register_jit_icall_info (&mono_get_jit_icall_info ()->mono_chkstk_win64
, __chkstk
, "mono_chkstk_win64", NULL
, TRUE
, "__chkstk");
7404 mono_register_jit_icall_info (&mono_get_jit_icall_info ()->mono_chkstk_win64
, ___chkstk_ms
, "mono_chkstk_win64", NULL
, TRUE
, "___chkstk_ms");
7410 mono_arch_patch_code_new (MonoCompile
*cfg
, MonoDomain
*domain
, guint8
*code
, MonoJumpInfo
*ji
, gpointer target
)
7412 unsigned char *ip
= ji
->ip
.i
+ code
;
7415 * Debug code to help track down problems where the target of a near call is
7418 if (amd64_is_near_call (ip
)) {
7419 gint64 disp
= (guint8
*)target
- (guint8
*)ip
;
7421 if (!amd64_is_imm32 (disp
)) {
7422 printf ("TYPE: %d\n", ji
->type
);
7424 case MONO_PATCH_INFO_JIT_ICALL_ID
:
7425 printf ("V: %s\n", mono_find_jit_icall_info (ji
->data
.jit_icall_id
)->name
);
7427 case MONO_PATCH_INFO_METHOD_JUMP
:
7428 case MONO_PATCH_INFO_METHOD
:
7429 printf ("V: %s\n", ji
->data
.method
->name
);
7437 amd64_patch (ip
, (gpointer
)target
);
7443 get_max_epilog_size (MonoCompile
*cfg
)
7445 int max_epilog_size
= 16;
7447 if (cfg
->method
->save_lmf
)
7448 max_epilog_size
+= 256;
7450 max_epilog_size
+= (AMD64_NREG
* 2);
7452 return max_epilog_size
;
7456 * This macro is used for testing whenever the unwinder works correctly at every point
7457 * where an async exception can happen.
7459 /* This will generate a SIGSEGV at the given point in the code */
7460 #define async_exc_point(code) do { \
7461 if (mono_inject_async_exc_method && mono_method_desc_full_match (mono_inject_async_exc_method, cfg->method)) { \
7462 if (cfg->arch.async_point_count == mono_inject_async_exc_pos) \
7463 amd64_mov_reg_mem (code, AMD64_RAX, 0, 4); \
7464 cfg->arch.async_point_count ++; \
7470 emit_prolog_setup_sp_win64 (MonoCompile
*cfg
, guint8
*code
, int alloc_size
, int *cfa_offset_input
)
7472 int cfa_offset
= *cfa_offset_input
;
7474 /* Allocate windows stack frame using stack probing method */
7477 if (alloc_size
>= 0x1000) {
7478 amd64_mov_reg_imm (code
, AMD64_RAX
, alloc_size
);
7479 code
= emit_call (cfg
, NULL
, code
, MONO_JIT_ICALL_mono_chkstk_win64
);
7482 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, alloc_size
);
7483 if (cfg
->arch
.omit_fp
) {
7484 cfa_offset
+= alloc_size
;
7485 mono_emit_unwind_op_def_cfa_offset (cfg
, code
, cfa_offset
);
7486 async_exc_point (code
);
7489 // NOTE, in a standard win64 prolog the alloc unwind info is always emitted, but since mono
7490 // uses a frame pointer with negative offsets and a standard win64 prolog assumes positive offsets, we can't
7491 // emit sp alloc unwind metadata since the native OS unwinder will incorrectly restore sp. Excluding the alloc
7492 // metadata on the other hand won't give the OS the information so it can just restore the frame pointer to sp and
7493 // that will retrieve the expected results.
7494 if (cfg
->arch
.omit_fp
)
7495 mono_emit_unwind_op_sp_alloc (cfg
, code
, alloc_size
);
7498 *cfa_offset_input
= cfa_offset
;
7499 set_code_cursor (cfg
, code
);
7502 #endif /* TARGET_WIN32 */
7505 mono_arch_emit_prolog (MonoCompile
*cfg
)
7507 MonoMethod
*method
= cfg
->method
;
7509 MonoMethodSignature
*sig
;
7511 int alloc_size
, pos
, i
, cfa_offset
, quad
, max_epilog_size
, save_area_offset
;
7514 MonoInst
*lmf_var
= cfg
->lmf_var
;
7515 gboolean args_clobbered
= FALSE
;
7517 cfg
->code_size
= MAX (cfg
->header
->code_size
* 4, 1024);
7519 code
= cfg
->native_code
= (unsigned char *)g_malloc (cfg
->code_size
);
7521 /* Amount of stack space allocated by register saving code */
7524 /* Offset between RSP and the CFA */
7528 * The prolog consists of the following parts:
7532 * - save callee saved regs using moves
7534 * - save rgctx if needed
7535 * - save lmf if needed
7538 * - save rgctx if needed
7539 * - save lmf if needed
7540 * - save callee saved regs using moves
7545 mono_emit_unwind_op_def_cfa (cfg
, code
, AMD64_RSP
, 8);
7546 // IP saved at CFA - 8
7547 mono_emit_unwind_op_offset (cfg
, code
, AMD64_RIP
, -cfa_offset
);
7548 async_exc_point (code
);
7549 mini_gc_set_slot_type_from_cfa (cfg
, -cfa_offset
, SLOT_NOREF
);
7551 if (!cfg
->arch
.omit_fp
) {
7552 amd64_push_reg (code
, AMD64_RBP
);
7554 mono_emit_unwind_op_def_cfa_offset (cfg
, code
, cfa_offset
);
7555 mono_emit_unwind_op_offset (cfg
, code
, AMD64_RBP
, - cfa_offset
);
7556 async_exc_point (code
);
7557 /* These are handled automatically by the stack marking code */
7558 mini_gc_set_slot_type_from_cfa (cfg
, -cfa_offset
, SLOT_NOREF
);
7560 amd64_mov_reg_reg (code
, AMD64_RBP
, AMD64_RSP
, sizeof (target_mgreg_t
));
7561 mono_emit_unwind_op_def_cfa_reg (cfg
, code
, AMD64_RBP
);
7562 mono_emit_unwind_op_fp_alloc (cfg
, code
, AMD64_RBP
, 0);
7563 async_exc_point (code
);
7566 /* The param area is always at offset 0 from sp */
7567 /* This needs to be allocated here, since it has to come after the spill area */
7568 if (cfg
->param_area
) {
7569 if (cfg
->arch
.omit_fp
)
7571 g_assert_not_reached ();
7572 cfg
->stack_offset
+= ALIGN_TO (cfg
->param_area
, sizeof (target_mgreg_t
));
7575 if (cfg
->arch
.omit_fp
) {
7577 * On enter, the stack is misaligned by the pushing of the return
7578 * address. It is either made aligned by the pushing of %rbp, or by
7581 alloc_size
= ALIGN_TO (cfg
->stack_offset
, 8);
7582 if ((alloc_size
% 16) == 0) {
7584 /* Mark the padding slot as NOREF */
7585 mini_gc_set_slot_type_from_cfa (cfg
, -cfa_offset
- sizeof (target_mgreg_t
), SLOT_NOREF
);
7588 alloc_size
= ALIGN_TO (cfg
->stack_offset
, MONO_ARCH_FRAME_ALIGNMENT
);
7589 if (cfg
->stack_offset
!= alloc_size
) {
7590 /* Mark the padding slot as NOREF */
7591 mini_gc_set_slot_type_from_fp (cfg
, -alloc_size
+ cfg
->param_area
, SLOT_NOREF
);
7593 cfg
->arch
.sp_fp_offset
= alloc_size
;
7597 cfg
->arch
.stack_alloc_size
= alloc_size
;
7599 set_code_cursor (cfg
, code
);
7601 /* Allocate stack frame */
7603 code
= emit_prolog_setup_sp_win64 (cfg
, code
, alloc_size
, &cfa_offset
);
7606 /* See mono_emit_stack_alloc */
7607 #if defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
7608 guint32 remaining_size
= alloc_size
;
7610 /* Use a loop for large sizes */
7611 if (remaining_size
> 10 * 0x1000) {
7612 amd64_mov_reg_imm (code
, X86_EAX
, remaining_size
/ 0x1000);
7613 guint8
*label
= code
;
7614 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 0x1000);
7615 amd64_test_membase_reg (code
, AMD64_RSP
, 0, AMD64_RSP
);
7616 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RAX
, 1);
7617 amd64_alu_reg_imm (code
, X86_CMP
, AMD64_RAX
, 0);
7618 guint8
*label2
= code
;
7619 x86_branch8 (code
, X86_CC_NE
, 0, FALSE
);
7620 amd64_patch (label2
, label
);
7621 if (cfg
->arch
.omit_fp
) {
7622 cfa_offset
+= (remaining_size
/ 0x1000) * 0x1000;
7623 mono_emit_unwind_op_def_cfa_offset (cfg
, code
, cfa_offset
);
7626 remaining_size
= remaining_size
% 0x1000;
7627 set_code_cursor (cfg
, code
);
7630 guint32 required_code_size
= ((remaining_size
/ 0x1000) + 1) * 11; /*11 is the max size of amd64_alu_reg_imm + amd64_test_membase_reg*/
7631 code
= realloc_code (cfg
, required_code_size
);
7633 while (remaining_size
>= 0x1000) {
7634 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 0x1000);
7635 if (cfg
->arch
.omit_fp
) {
7636 cfa_offset
+= 0x1000;
7637 mono_emit_unwind_op_def_cfa_offset (cfg
, code
, cfa_offset
);
7639 async_exc_point (code
);
7641 amd64_test_membase_reg (code
, AMD64_RSP
, 0, AMD64_RSP
);
7642 remaining_size
-= 0x1000;
7644 if (remaining_size
) {
7645 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, remaining_size
);
7646 if (cfg
->arch
.omit_fp
) {
7647 cfa_offset
+= remaining_size
;
7648 mono_emit_unwind_op_def_cfa_offset (cfg
, code
, cfa_offset
);
7649 async_exc_point (code
);
7653 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, alloc_size
);
7654 if (cfg
->arch
.omit_fp
) {
7655 cfa_offset
+= alloc_size
;
7656 mono_emit_unwind_op_def_cfa_offset (cfg
, code
, cfa_offset
);
7657 async_exc_point (code
);
7663 /* Stack alignment check */
7668 amd64_mov_reg_reg (code
, AMD64_RAX
, AMD64_RSP
, 8);
7669 amd64_alu_reg_imm (code
, X86_AND
, AMD64_RAX
, 0xf);
7670 amd64_alu_reg_imm (code
, X86_CMP
, AMD64_RAX
, 0);
7672 x86_branch8 (code
, X86_CC_EQ
, 1, FALSE
);
7673 amd64_breakpoint (code
);
7674 amd64_patch (buf
, code
);
7678 if (mini_debug_options
.init_stacks
) {
7679 /* Fill the stack frame with a dummy value to force deterministic behavior */
7681 /* Save registers to the red zone */
7682 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RDI
, 8);
7683 amd64_mov_membase_reg (code
, AMD64_RSP
, -16, AMD64_RCX
, 8);
7685 MONO_DISABLE_WARNING (4310) // cast truncates constant value
7686 amd64_mov_reg_imm (code
, AMD64_RAX
, 0x2a2a2a2a2a2a2a2a);
7687 MONO_RESTORE_WARNING
7689 amd64_mov_reg_imm (code
, AMD64_RCX
, alloc_size
/ 8);
7690 amd64_mov_reg_reg (code
, AMD64_RDI
, AMD64_RSP
, 8);
7693 amd64_prefix (code
, X86_REP_PREFIX
);
7696 amd64_mov_reg_membase (code
, AMD64_RDI
, AMD64_RSP
, -8, 8);
7697 amd64_mov_reg_membase (code
, AMD64_RCX
, AMD64_RSP
, -16, 8);
7701 if (method
->save_lmf
)
7702 code
= emit_setup_lmf (cfg
, code
, lmf_var
->inst_offset
, cfa_offset
);
7704 /* Save callee saved registers */
7705 if (cfg
->arch
.omit_fp
) {
7706 save_area_offset
= cfg
->arch
.reg_save_area_offset
;
7707 /* Save caller saved registers after sp is adjusted */
7708 /* The registers are saved at the bottom of the frame */
7709 /* FIXME: Optimize this so the regs are saved at the end of the frame in increasing order */
7711 /* The registers are saved just below the saved rbp */
7712 save_area_offset
= cfg
->arch
.reg_save_area_offset
;
7715 for (i
= 0; i
< AMD64_NREG
; ++i
) {
7716 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->arch
.saved_iregs
& (1 << i
))) {
7717 amd64_mov_membase_reg (code
, cfg
->frame_reg
, save_area_offset
, i
, 8);
7719 if (cfg
->arch
.omit_fp
) {
7720 mono_emit_unwind_op_offset (cfg
, code
, i
, - (cfa_offset
- save_area_offset
));
7721 /* These are handled automatically by the stack marking code */
7722 mini_gc_set_slot_type_from_cfa (cfg
, - (cfa_offset
- save_area_offset
), SLOT_NOREF
);
7724 mono_emit_unwind_op_offset (cfg
, code
, i
, - (-save_area_offset
+ (2 * 8)));
7728 save_area_offset
+= 8;
7729 async_exc_point (code
);
7733 /* store runtime generic context */
7734 if (cfg
->rgctx_var
) {
7735 g_assert (cfg
->rgctx_var
->opcode
== OP_REGOFFSET
&&
7736 (cfg
->rgctx_var
->inst_basereg
== AMD64_RBP
|| cfg
->rgctx_var
->inst_basereg
== AMD64_RSP
));
7738 amd64_mov_membase_reg (code
, cfg
->rgctx_var
->inst_basereg
, cfg
->rgctx_var
->inst_offset
, MONO_ARCH_RGCTX_REG
, sizeof(gpointer
));
7740 mono_add_var_location (cfg
, cfg
->rgctx_var
, TRUE
, MONO_ARCH_RGCTX_REG
, 0, 0, code
- cfg
->native_code
);
7741 mono_add_var_location (cfg
, cfg
->rgctx_var
, FALSE
, cfg
->rgctx_var
->inst_basereg
, cfg
->rgctx_var
->inst_offset
, code
- cfg
->native_code
, 0);
7744 /* compute max_length in order to use short forward jumps */
7745 max_epilog_size
= get_max_epilog_size (cfg
);
7746 if (cfg
->opt
& MONO_OPT_BRANCH
&& cfg
->max_block_num
< MAX_BBLOCKS_FOR_BRANCH_OPTS
) {
7747 for (bb
= cfg
->bb_entry
; bb
; bb
= bb
->next_bb
) {
7751 /* max alignment for loops */
7752 if ((cfg
->opt
& MONO_OPT_LOOP
) && bb_is_loop_start (bb
))
7753 max_length
+= LOOP_ALIGNMENT
;
7755 MONO_BB_FOR_EACH_INS (bb
, ins
) {
7756 max_length
+= ins_get_size (ins
->opcode
);
7759 /* Take prolog and epilog instrumentation into account */
7760 if (bb
== cfg
->bb_entry
|| bb
== cfg
->bb_exit
)
7761 max_length
+= max_epilog_size
;
7763 bb
->max_length
= max_length
;
7767 sig
= mono_method_signature_internal (method
);
7770 cinfo
= cfg
->arch
.cinfo
;
7772 if (sig
->ret
->type
!= MONO_TYPE_VOID
) {
7773 /* Save volatile arguments to the stack */
7774 if (cfg
->vret_addr
&& (cfg
->vret_addr
->opcode
!= OP_REGVAR
))
7775 amd64_mov_membase_reg (code
, cfg
->vret_addr
->inst_basereg
, cfg
->vret_addr
->inst_offset
, cinfo
->ret
.reg
, 8);
7778 /* Keep this in sync with emit_load_volatile_arguments */
7779 for (i
= 0; i
< sig
->param_count
+ sig
->hasthis
; ++i
) {
7780 ArgInfo
*ainfo
= cinfo
->args
+ i
;
7782 ins
= cfg
->args
[i
];
7784 if (ins
->flags
& MONO_INST_IS_DEAD
&& !MONO_CFG_PROFILE (cfg
, ENTER_CONTEXT
))
7785 /* Unused arguments */
7788 /* Save volatile arguments to the stack */
7789 if (ins
->opcode
!= OP_REGVAR
) {
7790 switch (ainfo
->storage
) {
7796 if (stack_offset & 0x1)
7798 else if (stack_offset & 0x2)
7800 else if (stack_offset & 0x4)
7805 amd64_mov_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, ainfo
->reg
, size
);
7808 * Save the original location of 'this',
7809 * get_generic_info_from_stack_frame () needs this to properly look up
7810 * the argument value during the handling of async exceptions.
7812 if (i
== 0 && sig
->hasthis
) {
7813 mono_add_var_location (cfg
, ins
, TRUE
, ainfo
->reg
, 0, 0, code
- cfg
->native_code
);
7814 mono_add_var_location (cfg
, ins
, FALSE
, ins
->inst_basereg
, ins
->inst_offset
, code
- cfg
->native_code
, 0);
7818 case ArgInFloatSSEReg
:
7819 amd64_movss_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, ainfo
->reg
);
7821 case ArgInDoubleSSEReg
:
7822 amd64_movsd_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, ainfo
->reg
);
7824 case ArgValuetypeInReg
:
7825 for (quad
= 0; quad
< 2; quad
++) {
7826 switch (ainfo
->pair_storage
[quad
]) {
7828 amd64_mov_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
+ (quad
* sizeof (target_mgreg_t
)), ainfo
->pair_regs
[quad
], sizeof (target_mgreg_t
));
7830 case ArgInFloatSSEReg
:
7831 amd64_movss_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
+ (quad
* sizeof (target_mgreg_t
)), ainfo
->pair_regs
[quad
]);
7833 case ArgInDoubleSSEReg
:
7834 amd64_movsd_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
+ (quad
* sizeof (target_mgreg_t
)), ainfo
->pair_regs
[quad
]);
7839 g_assert_not_reached ();
7843 case ArgValuetypeAddrInIReg
:
7844 if (ainfo
->pair_storage
[0] == ArgInIReg
)
7845 amd64_mov_membase_reg (code
, ins
->inst_left
->inst_basereg
, ins
->inst_left
->inst_offset
, ainfo
->pair_regs
[0], sizeof (target_mgreg_t
));
7847 case ArgValuetypeAddrOnStack
:
7849 case ArgGSharedVtInReg
:
7850 amd64_mov_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, ainfo
->reg
, 8);
7856 /* Argument allocated to (non-volatile) register */
7857 switch (ainfo
->storage
) {
7859 amd64_mov_reg_reg (code
, ins
->dreg
, ainfo
->reg
, 8);
7862 amd64_mov_reg_membase (code
, ins
->dreg
, AMD64_RBP
, ARGS_OFFSET
+ ainfo
->offset
, 8);
7865 g_assert_not_reached ();
7868 if (i
== 0 && sig
->hasthis
) {
7869 g_assert (ainfo
->storage
== ArgInIReg
);
7870 mono_add_var_location (cfg
, ins
, TRUE
, ainfo
->reg
, 0, 0, code
- cfg
->native_code
);
7871 mono_add_var_location (cfg
, ins
, TRUE
, ins
->dreg
, 0, code
- cfg
->native_code
, 0);
7876 if (cfg
->method
->save_lmf
)
7877 args_clobbered
= TRUE
;
7880 * Optimize the common case of the first bblock making a call with the same
7881 * arguments as the method. This works because the arguments are still in their
7882 * original argument registers.
7883 * FIXME: Generalize this
7885 if (!args_clobbered
) {
7886 MonoBasicBlock
*first_bb
= cfg
->bb_entry
;
7888 int filter
= FILTER_IL_SEQ_POINT
;
7890 next
= mono_bb_first_inst (first_bb
, filter
);
7891 if (!next
&& first_bb
->next_bb
) {
7892 first_bb
= first_bb
->next_bb
;
7893 next
= mono_bb_first_inst (first_bb
, filter
);
7896 if (first_bb
->in_count
> 1)
7899 for (i
= 0; next
&& i
< sig
->param_count
+ sig
->hasthis
; ++i
) {
7900 ArgInfo
*ainfo
= cinfo
->args
+ i
;
7901 gboolean match
= FALSE
;
7903 ins
= cfg
->args
[i
];
7904 if (ins
->opcode
!= OP_REGVAR
) {
7905 switch (ainfo
->storage
) {
7907 if (((next
->opcode
== OP_LOAD_MEMBASE
) || (next
->opcode
== OP_LOADI4_MEMBASE
)) && next
->inst_basereg
== ins
->inst_basereg
&& next
->inst_offset
== ins
->inst_offset
) {
7908 if (next
->dreg
== ainfo
->reg
) {
7912 next
->opcode
= OP_MOVE
;
7913 next
->sreg1
= ainfo
->reg
;
7914 /* Only continue if the instruction doesn't change argument regs */
7915 if (next
->dreg
== ainfo
->reg
|| next
->dreg
== AMD64_RAX
)
7925 /* Argument allocated to (non-volatile) register */
7926 switch (ainfo
->storage
) {
7928 if (next
->opcode
== OP_MOVE
&& next
->sreg1
== ins
->dreg
&& next
->dreg
== ainfo
->reg
) {
7939 next
= mono_inst_next (next
, filter
);
7940 //next = mono_inst_list_next (&next->node, &first_bb->ins_list);
7947 if (cfg
->gen_sdb_seq_points
) {
7948 MonoInst
*info_var
= cfg
->arch
.seq_point_info_var
;
7950 /* Initialize seq_point_info_var */
7951 if (cfg
->compile_aot
) {
7952 /* Initialize the variable from a GOT slot */
7953 /* Same as OP_AOTCONST */
7954 mono_add_patch_info (cfg
, code
- cfg
->native_code
, MONO_PATCH_INFO_SEQ_POINT_INFO
, cfg
->method
);
7955 amd64_mov_reg_membase (code
, AMD64_R11
, AMD64_RIP
, 0, sizeof(gpointer
));
7956 g_assert (info_var
->opcode
== OP_REGOFFSET
);
7957 amd64_mov_membase_reg (code
, info_var
->inst_basereg
, info_var
->inst_offset
, AMD64_R11
, 8);
7960 if (cfg
->compile_aot
) {
7961 /* Initialize ss_tramp_var */
7962 ins
= cfg
->arch
.ss_tramp_var
;
7963 g_assert (ins
->opcode
== OP_REGOFFSET
);
7965 amd64_mov_reg_membase (code
, AMD64_R11
, info_var
->inst_basereg
, info_var
->inst_offset
, 8);
7966 amd64_mov_reg_membase (code
, AMD64_R11
, AMD64_R11
, MONO_STRUCT_OFFSET (SeqPointInfo
, ss_tramp_addr
), 8);
7967 amd64_mov_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, AMD64_R11
, 8);
7969 /* Initialize ss_tramp_var */
7970 ins
= cfg
->arch
.ss_tramp_var
;
7971 g_assert (ins
->opcode
== OP_REGOFFSET
);
7973 amd64_mov_reg_imm (code
, AMD64_R11
, (guint64
)&ss_trampoline
);
7974 amd64_mov_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, AMD64_R11
, 8);
7976 /* Initialize bp_tramp_var */
7977 ins
= cfg
->arch
.bp_tramp_var
;
7978 g_assert (ins
->opcode
== OP_REGOFFSET
);
7980 amd64_mov_reg_imm (code
, AMD64_R11
, (guint64
)&bp_trampoline
);
7981 amd64_mov_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, AMD64_R11
, 8);
7985 set_code_cursor (cfg
, code
);
7991 mono_arch_emit_epilog (MonoCompile
*cfg
)
7993 MonoMethod
*method
= cfg
->method
;
7996 int max_epilog_size
;
7998 gint32 lmf_offset
= cfg
->lmf_var
? cfg
->lmf_var
->inst_offset
: -1;
7999 gint32 save_area_offset
= cfg
->arch
.reg_save_area_offset
;
8001 max_epilog_size
= get_max_epilog_size (cfg
);
8003 code
= realloc_code (cfg
, max_epilog_size
);
8005 cfg
->has_unwind_info_for_epilog
= TRUE
;
8007 /* Mark the start of the epilog */
8008 mono_emit_unwind_op_mark_loc (cfg
, code
, 0);
8010 /* Save the uwind state which is needed by the out-of-line code */
8011 mono_emit_unwind_op_remember_state (cfg
, code
);
8013 /* the code restoring the registers must be kept in sync with OP_TAILCALL */
8015 if (method
->save_lmf
) {
8016 if (cfg
->used_int_regs
& (1 << AMD64_RBP
))
8017 amd64_mov_reg_membase (code
, AMD64_RBP
, cfg
->frame_reg
, lmf_offset
+ MONO_STRUCT_OFFSET (MonoLMF
, rbp
), 8);
8018 if (cfg
->arch
.omit_fp
)
8020 * emit_setup_lmf () marks RBP as saved, we have to mark it as same value here before clearing up the stack
8021 * since its stack slot will become invalid.
8023 mono_emit_unwind_op_same_value (cfg
, code
, AMD64_RBP
);
8026 /* Restore callee saved regs */
8027 for (i
= 0; i
< AMD64_NREG
; ++i
) {
8028 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->arch
.saved_iregs
& (1 << i
))) {
8029 /* Restore only used_int_regs, not arch.saved_iregs */
8030 #if defined(MONO_SUPPORT_TASKLETS)
8031 int restore_reg
= 1;
8033 int restore_reg
= (cfg
->used_int_regs
& (1 << i
));
8036 amd64_mov_reg_membase (code
, i
, cfg
->frame_reg
, save_area_offset
, 8);
8037 mono_emit_unwind_op_same_value (cfg
, code
, i
);
8038 async_exc_point (code
);
8040 save_area_offset
+= 8;
8044 /* Load returned vtypes into registers if needed */
8045 cinfo
= cfg
->arch
.cinfo
;
8046 if (cinfo
->ret
.storage
== ArgValuetypeInReg
) {
8047 ArgInfo
*ainfo
= &cinfo
->ret
;
8048 MonoInst
*inst
= cfg
->ret
;
8050 for (quad
= 0; quad
< 2; quad
++) {
8051 switch (ainfo
->pair_storage
[quad
]) {
8053 amd64_mov_reg_membase (code
, ainfo
->pair_regs
[quad
], inst
->inst_basereg
, inst
->inst_offset
+ (quad
* sizeof (target_mgreg_t
)), ainfo
->pair_size
[quad
]);
8055 case ArgInFloatSSEReg
:
8056 amd64_movss_reg_membase (code
, ainfo
->pair_regs
[quad
], inst
->inst_basereg
, inst
->inst_offset
+ (quad
* sizeof (target_mgreg_t
)));
8058 case ArgInDoubleSSEReg
:
8059 amd64_movsd_reg_membase (code
, ainfo
->pair_regs
[quad
], inst
->inst_basereg
, inst
->inst_offset
+ (quad
* sizeof (target_mgreg_t
)));
8064 g_assert_not_reached ();
8069 if (cfg
->arch
.omit_fp
) {
8070 if (cfg
->arch
.stack_alloc_size
) {
8071 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, cfg
->arch
.stack_alloc_size
);
8075 amd64_lea_membase (code
, AMD64_RSP
, AMD64_RBP
, 0);
8076 amd64_pop_reg (code
, AMD64_RBP
);
8077 mono_emit_unwind_op_same_value (cfg
, code
, AMD64_RBP
);
8080 mono_emit_unwind_op_same_value (cfg
, code
, AMD64_RBP
);
8083 mono_emit_unwind_op_def_cfa (cfg
, code
, AMD64_RSP
, 8);
8084 async_exc_point (code
);
8087 /* Restore the unwind state to be the same as before the epilog */
8088 mono_emit_unwind_op_restore_state (cfg
, code
);
8090 set_code_cursor (cfg
, code
);
8094 mono_arch_emit_exceptions (MonoCompile
*cfg
)
8096 MonoJumpInfo
*patch_info
;
8099 MonoClass
*exc_classes
[16];
8100 guint8
*exc_throw_start
[16], *exc_throw_end
[16];
8101 guint32 code_size
= 0;
8103 /* Compute needed space */
8104 for (patch_info
= cfg
->patch_info
; patch_info
; patch_info
= patch_info
->next
) {
8105 if (patch_info
->type
== MONO_PATCH_INFO_EXC
)
8107 if (patch_info
->type
== MONO_PATCH_INFO_R8
)
8108 code_size
+= 8 + 15; /* sizeof (double) + alignment */
8109 if (patch_info
->type
== MONO_PATCH_INFO_R4
)
8110 code_size
+= 4 + 15; /* sizeof (float) + alignment */
8111 if (patch_info
->type
== MONO_PATCH_INFO_GC_CARD_TABLE_ADDR
)
8112 code_size
+= 8 + 7; /*sizeof (void*) + alignment */
8115 code
= realloc_code (cfg
, code_size
);
8117 /* add code to raise exceptions */
8119 for (patch_info
= cfg
->patch_info
; patch_info
; patch_info
= patch_info
->next
) {
8120 switch (patch_info
->type
) {
8121 case MONO_PATCH_INFO_EXC
: {
8122 MonoClass
*exc_class
;
8126 amd64_patch (patch_info
->ip
.i
+ cfg
->native_code
, code
);
8128 exc_class
= mono_class_load_from_name (mono_defaults
.corlib
, "System", patch_info
->data
.name
);
8129 throw_ip
= patch_info
->ip
.i
;
8131 //x86_breakpoint (code);
8132 /* Find a throw sequence for the same exception class */
8133 for (i
= 0; i
< nthrows
; ++i
)
8134 if (exc_classes
[i
] == exc_class
)
8137 amd64_mov_reg_imm (code
, AMD64_ARG_REG2
, (exc_throw_end
[i
] - cfg
->native_code
) - throw_ip
);
8138 x86_jump_code (code
, exc_throw_start
[i
]);
8139 patch_info
->type
= MONO_PATCH_INFO_NONE
;
8143 amd64_mov_reg_imm_size (code
, AMD64_ARG_REG2
, 0xf0f0f0f0, 4);
8147 exc_classes
[nthrows
] = exc_class
;
8148 exc_throw_start
[nthrows
] = code
;
8150 amd64_mov_reg_imm (code
, AMD64_ARG_REG1
, m_class_get_type_token (exc_class
) - MONO_TOKEN_TYPE_DEF
);
8152 patch_info
->type
= MONO_PATCH_INFO_NONE
;
8154 code
= emit_call (cfg
, NULL
, code
, MONO_JIT_ICALL_mono_arch_throw_corlib_exception
);
8156 amd64_mov_reg_imm (buf
, AMD64_ARG_REG2
, (code
- cfg
->native_code
) - throw_ip
);
8161 exc_throw_end
[nthrows
] = code
;
8171 set_code_cursor (cfg
, code
);
8174 /* Handle relocations with RIP relative addressing */
8175 for (patch_info
= cfg
->patch_info
; patch_info
; patch_info
= patch_info
->next
) {
8176 gboolean remove
= FALSE
;
8177 guint8
*orig_code
= code
;
8179 switch (patch_info
->type
) {
8180 case MONO_PATCH_INFO_R8
:
8181 case MONO_PATCH_INFO_R4
: {
8182 guint8
*pos
, *patch_pos
;
8185 /* The SSE opcodes require a 16 byte alignment */
8186 code
= (guint8
*)ALIGN_TO (code
, 16);
8188 pos
= cfg
->native_code
+ patch_info
->ip
.i
;
8189 if (IS_REX (pos
[1])) {
8190 patch_pos
= pos
+ 5;
8191 target_pos
= code
- pos
- 9;
8194 patch_pos
= pos
+ 4;
8195 target_pos
= code
- pos
- 8;
8198 if (patch_info
->type
== MONO_PATCH_INFO_R8
) {
8199 *(double*)code
= *(double*)patch_info
->data
.target
;
8200 code
+= sizeof (double);
8202 *(float*)code
= *(float*)patch_info
->data
.target
;
8203 code
+= sizeof (float);
8206 *(guint32
*)(patch_pos
) = target_pos
;
8211 case MONO_PATCH_INFO_GC_CARD_TABLE_ADDR
: {
8214 if (cfg
->compile_aot
)
8217 /*loading is faster against aligned addresses.*/
8218 code
= (guint8
*)ALIGN_TO (code
, 8);
8219 memset (orig_code
, 0, code
- orig_code
);
8221 pos
= cfg
->native_code
+ patch_info
->ip
.i
;
8223 /*alu_op [rex] modr/m imm32 - 7 or 8 bytes */
8224 if (IS_REX (pos
[1]))
8225 *(guint32
*)(pos
+ 4) = (guint8
*)code
- pos
- 8;
8227 *(guint32
*)(pos
+ 3) = (guint8
*)code
- pos
- 7;
8229 *(gpointer
*)code
= (gpointer
)patch_info
->data
.target
;
8230 code
+= sizeof (gpointer
);
8240 if (patch_info
== cfg
->patch_info
)
8241 cfg
->patch_info
= patch_info
->next
;
8245 for (tmp
= cfg
->patch_info
; tmp
->next
!= patch_info
; tmp
= tmp
->next
)
8247 tmp
->next
= patch_info
->next
;
8250 set_code_cursor (cfg
, code
);
8253 set_code_cursor (cfg
, code
);
8256 #endif /* DISABLE_JIT */
8260 mono_arch_flush_icache (guint8
*code
, gint size
)
8262 /* call/ret required (or likely other control transfer) */
8266 mono_arch_flush_register_windows (void)
8271 mono_arch_is_inst_imm (int opcode
, int imm_opcode
, gint64 imm
)
8273 return amd64_use_imm32 (imm
);
8277 * Determine whenever the trap whose info is in SIGINFO is caused by
8281 mono_arch_is_int_overflow (void *sigctx
, void *info
)
8288 mono_sigctx_to_monoctx (sigctx
, &ctx
);
8290 rip
= (guint8
*)ctx
.gregs
[AMD64_RIP
];
8292 if (IS_REX (rip
[0])) {
8293 reg
= amd64_rex_b (rip
[0]);
8299 if ((rip
[0] == 0xf7) && (x86_modrm_mod (rip
[1]) == 0x3) && (x86_modrm_reg (rip
[1]) == 0x7)) {
8301 reg
+= x86_modrm_rm (rip
[1]);
8303 value
= ctx
.gregs
[reg
];
8313 mono_arch_get_patch_offset (guint8
*code
)
8319 * \return TRUE if no sw breakpoint was present.
8321 * Copy \p size bytes from \p code - \p offset to the buffer \p buf. If the debugger inserted software
8322 * breakpoints in the original code, they are removed in the copy.
8325 mono_breakpoint_clean_code (guint8
*method_start
, guint8
*code
, int offset
, guint8
*buf
, int size
)
8328 * If method_start is non-NULL we need to perform bound checks, since we access memory
8329 * at code - offset we could go before the start of the method and end up in a different
8330 * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
8333 if (!method_start
|| code
- offset
>= method_start
) {
8334 memcpy (buf
, code
- offset
, size
);
8336 int diff
= code
- method_start
;
8337 memset (buf
, 0, size
);
8338 memcpy (buf
+ offset
- diff
, method_start
, diff
+ size
- offset
);
8344 mono_arch_get_this_arg_reg (guint8
*code
)
8346 return AMD64_ARG_REG1
;
8350 mono_arch_get_this_arg_from_call (host_mgreg_t
*regs
, guint8
*code
)
8352 return (gpointer
)regs
[mono_arch_get_this_arg_reg (code
)];
8355 #define MAX_ARCH_DELEGATE_PARAMS 10
8358 get_delegate_invoke_impl (MonoTrampInfo
**info
, gboolean has_target
, guint32 param_count
)
8360 guint8
*code
, *start
;
8361 GSList
*unwind_ops
= NULL
;
8364 unwind_ops
= mono_arch_get_cie_program ();
8366 const int size
= 64;
8368 start
= code
= (guint8
*)mono_global_codeman_reserve (size
+ MONO_TRAMPOLINE_UNWINDINFO_SIZE(0));
8372 /* Replace the this argument with the target */
8373 amd64_mov_reg_reg (code
, AMD64_RAX
, AMD64_ARG_REG1
, 8);
8374 amd64_mov_reg_membase (code
, AMD64_ARG_REG1
, AMD64_RAX
, MONO_STRUCT_OFFSET (MonoDelegate
, target
), 8);
8375 amd64_jump_membase (code
, AMD64_RAX
, MONO_STRUCT_OFFSET (MonoDelegate
, method_ptr
));
8378 if (param_count
== 0) {
8379 amd64_jump_membase (code
, AMD64_ARG_REG1
, MONO_STRUCT_OFFSET (MonoDelegate
, method_ptr
));
8381 /* We have to shift the arguments left */
8382 amd64_mov_reg_reg (code
, AMD64_RAX
, AMD64_ARG_REG1
, 8);
8383 for (i
= 0; i
< param_count
; ++i
) {
8386 amd64_mov_reg_reg (code
, param_regs
[i
], param_regs
[i
+ 1], 8);
8388 amd64_mov_reg_membase (code
, param_regs
[i
], AMD64_RSP
, 0x28, 8);
8390 amd64_mov_reg_reg (code
, param_regs
[i
], param_regs
[i
+ 1], 8);
8394 amd64_jump_membase (code
, AMD64_RAX
, MONO_STRUCT_OFFSET (MonoDelegate
, method_ptr
));
8398 g_assertf ((code
- start
) <= size
, "%d %d", (int)(code
- start
), size
);
8399 g_assert_checked (mono_arch_unwindinfo_validate_size (unwind_ops
, MONO_TRAMPOLINE_UNWINDINFO_SIZE(0)));
8401 mono_arch_flush_icache (start
, code
- start
);
8404 *info
= mono_tramp_info_create ("delegate_invoke_impl_has_target", start
, code
- start
, NULL
, unwind_ops
);
8406 char *name
= g_strdup_printf ("delegate_invoke_impl_target_%d", param_count
);
8407 *info
= mono_tramp_info_create (name
, start
, code
- start
, NULL
, unwind_ops
);
8411 if (mono_jit_map_is_enabled ()) {
8414 buff
= (char*)"delegate_invoke_has_target";
8416 buff
= g_strdup_printf ("delegate_invoke_no_target_%d", param_count
);
8417 mono_emit_jit_tramp (start
, code
- start
, buff
);
8421 MONO_PROFILER_RAISE (jit_code_buffer
, (start
, code
- start
, MONO_PROFILER_CODE_BUFFER_DELEGATE_INVOKE
, NULL
));
8426 #define MAX_VIRTUAL_DELEGATE_OFFSET 32
8429 get_delegate_virtual_invoke_impl (MonoTrampInfo
**info
, gboolean load_imt_reg
, int offset
)
8431 guint8
*code
, *start
;
8432 const int size
= 20;
8436 if (offset
/ (int)sizeof (target_mgreg_t
) > MAX_VIRTUAL_DELEGATE_OFFSET
)
8439 start
= code
= (guint8
*)mono_global_codeman_reserve (size
+ MONO_TRAMPOLINE_UNWINDINFO_SIZE(0));
8441 unwind_ops
= mono_arch_get_cie_program ();
8443 /* Replace the this argument with the target */
8444 amd64_mov_reg_reg (code
, AMD64_RAX
, AMD64_ARG_REG1
, 8);
8445 amd64_mov_reg_membase (code
, AMD64_ARG_REG1
, AMD64_RAX
, MONO_STRUCT_OFFSET (MonoDelegate
, target
), 8);
8448 /* Load the IMT reg */
8449 amd64_mov_reg_membase (code
, MONO_ARCH_IMT_REG
, AMD64_RAX
, MONO_STRUCT_OFFSET (MonoDelegate
, method
), 8);
8452 /* Load the vtable */
8453 amd64_mov_reg_membase (code
, AMD64_RAX
, AMD64_ARG_REG1
, MONO_STRUCT_OFFSET (MonoObject
, vtable
), 8);
8454 amd64_jump_membase (code
, AMD64_RAX
, offset
);
8456 g_assertf ((code
- start
) <= size
, "%d %d", (int)(code
- start
), size
);
8458 MONO_PROFILER_RAISE (jit_code_buffer
, (start
, code
- start
, MONO_PROFILER_CODE_BUFFER_DELEGATE_INVOKE
, NULL
));
8460 tramp_name
= mono_get_delegate_virtual_invoke_impl_name (load_imt_reg
, offset
);
8461 *info
= mono_tramp_info_create (tramp_name
, start
, code
- start
, NULL
, unwind_ops
);
8462 g_free (tramp_name
);
8468 * mono_arch_get_delegate_invoke_impls:
8470 * Return a list of MonoTrampInfo structures for the delegate invoke impl
8474 mono_arch_get_delegate_invoke_impls (void)
8477 MonoTrampInfo
*info
;
8480 get_delegate_invoke_impl (&info
, TRUE
, 0);
8481 res
= g_slist_prepend (res
, info
);
8483 for (i
= 0; i
<= MAX_ARCH_DELEGATE_PARAMS
; ++i
) {
8484 get_delegate_invoke_impl (&info
, FALSE
, i
);
8485 res
= g_slist_prepend (res
, info
);
8488 for (i
= 1; i
<= MONO_IMT_SIZE
; ++i
) {
8489 get_delegate_virtual_invoke_impl (&info
, TRUE
, - i
* TARGET_SIZEOF_VOID_P
);
8490 res
= g_slist_prepend (res
, info
);
8493 for (i
= 0; i
<= MAX_VIRTUAL_DELEGATE_OFFSET
; ++i
) {
8494 get_delegate_virtual_invoke_impl (&info
, FALSE
, i
* TARGET_SIZEOF_VOID_P
);
8495 res
= g_slist_prepend (res
, info
);
8496 get_delegate_virtual_invoke_impl (&info
, TRUE
, i
* TARGET_SIZEOF_VOID_P
);
8497 res
= g_slist_prepend (res
, info
);
8504 mono_arch_get_delegate_invoke_impl (MonoMethodSignature
*sig
, gboolean has_target
)
8506 guint8
*code
, *start
;
8509 if (sig
->param_count
> MAX_ARCH_DELEGATE_PARAMS
)
8512 /* FIXME: Support more cases */
8513 if (MONO_TYPE_ISSTRUCT (mini_get_underlying_type (sig
->ret
)))
8517 static guint8
* cached
= NULL
;
8522 if (mono_ee_features
.use_aot_trampolines
) {
8523 start
= (guint8
*)mono_aot_get_trampoline ("delegate_invoke_impl_has_target");
8525 MonoTrampInfo
*info
;
8526 start
= (guint8
*)get_delegate_invoke_impl (&info
, TRUE
, 0);
8527 mono_tramp_info_register (info
, NULL
);
8530 mono_memory_barrier ();
8534 static guint8
* cache
[MAX_ARCH_DELEGATE_PARAMS
+ 1] = {NULL
};
8535 for (i
= 0; i
< sig
->param_count
; ++i
)
8536 if (!mono_is_regsize_var (sig
->params
[i
]))
8538 if (sig
->param_count
> 4)
8541 code
= cache
[sig
->param_count
];
8545 if (mono_ee_features
.use_aot_trampolines
) {
8546 char *name
= g_strdup_printf ("delegate_invoke_impl_target_%d", sig
->param_count
);
8547 start
= (guint8
*)mono_aot_get_trampoline (name
);
8550 MonoTrampInfo
*info
;
8551 start
= (guint8
*)get_delegate_invoke_impl (&info
, FALSE
, sig
->param_count
);
8552 mono_tramp_info_register (info
, NULL
);
8555 mono_memory_barrier ();
8557 cache
[sig
->param_count
] = start
;
8564 mono_arch_get_delegate_virtual_invoke_impl (MonoMethodSignature
*sig
, MonoMethod
*method
, int offset
, gboolean load_imt_reg
)
8566 MonoTrampInfo
*info
;
8569 code
= get_delegate_virtual_invoke_impl (&info
, load_imt_reg
, offset
);
8571 mono_tramp_info_register (info
, NULL
);
8576 mono_arch_finish_init (void)
8578 #if !defined(HOST_WIN32) && defined(MONO_XEN_OPT)
8579 optimize_for_xen
= access ("/proc/xen", F_OK
) == 0;
8583 #define CMP_SIZE (6 + 1)
8584 #define CMP_REG_REG_SIZE (4 + 1)
8585 #define BR_SMALL_SIZE 2
8586 #define BR_LARGE_SIZE 6
8587 #define MOV_REG_IMM_SIZE 10
8588 #define MOV_REG_IMM_32BIT_SIZE 6
8589 #define JUMP_REG_SIZE (2 + 1)
8592 imt_branch_distance (MonoIMTCheckItem
**imt_entries
, int start
, int target
)
8594 int i
, distance
= 0;
8595 for (i
= start
; i
< target
; ++i
)
8596 distance
+= imt_entries
[i
]->chunk_size
;
8601 * LOCKING: called with the domain lock held
8604 mono_arch_build_imt_trampoline (MonoVTable
*vtable
, MonoDomain
*domain
, MonoIMTCheckItem
**imt_entries
, int count
,
8605 gpointer fail_tramp
)
8609 guint8
*code
, *start
;
8610 gboolean vtable_is_32bit
= ((gsize
)(vtable
) == (gsize
)(int)(gsize
)(vtable
));
8613 for (i
= 0; i
< count
; ++i
) {
8614 MonoIMTCheckItem
*item
= imt_entries
[i
];
8615 if (item
->is_equals
) {
8616 if (item
->check_target_idx
) {
8617 if (!item
->compare_done
) {
8618 if (amd64_use_imm32 ((gint64
)item
->key
))
8619 item
->chunk_size
+= CMP_SIZE
;
8621 item
->chunk_size
+= MOV_REG_IMM_SIZE
+ CMP_REG_REG_SIZE
;
8623 if (item
->has_target_code
) {
8624 item
->chunk_size
+= MOV_REG_IMM_SIZE
;
8626 if (vtable_is_32bit
)
8627 item
->chunk_size
+= MOV_REG_IMM_32BIT_SIZE
;
8629 item
->chunk_size
+= MOV_REG_IMM_SIZE
;
8631 item
->chunk_size
+= BR_SMALL_SIZE
+ JUMP_REG_SIZE
;
8634 item
->chunk_size
+= MOV_REG_IMM_SIZE
* 3 + CMP_REG_REG_SIZE
+
8635 BR_SMALL_SIZE
+ JUMP_REG_SIZE
* 2;
8637 if (vtable_is_32bit
)
8638 item
->chunk_size
+= MOV_REG_IMM_32BIT_SIZE
;
8640 item
->chunk_size
+= MOV_REG_IMM_SIZE
;
8641 item
->chunk_size
+= JUMP_REG_SIZE
;
8642 /* with assert below:
8643 * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
8648 if (amd64_use_imm32 ((gint64
)item
->key
))
8649 item
->chunk_size
+= CMP_SIZE
;
8651 item
->chunk_size
+= MOV_REG_IMM_SIZE
+ CMP_REG_REG_SIZE
;
8652 item
->chunk_size
+= BR_LARGE_SIZE
;
8653 imt_entries
[item
->check_target_idx
]->compare_done
= TRUE
;
8655 size
+= item
->chunk_size
;
8658 code
= (guint8
*)mono_method_alloc_generic_virtual_trampoline (domain
, size
+ MONO_TRAMPOLINE_UNWINDINFO_SIZE(0));
8660 code
= (guint8
*)mono_domain_code_reserve (domain
, size
+ MONO_TRAMPOLINE_UNWINDINFO_SIZE(0));
8663 unwind_ops
= mono_arch_get_cie_program ();
8665 for (i
= 0; i
< count
; ++i
) {
8666 MonoIMTCheckItem
*item
= imt_entries
[i
];
8667 item
->code_target
= code
;
8668 if (item
->is_equals
) {
8669 gboolean fail_case
= !item
->check_target_idx
&& fail_tramp
;
8671 if (item
->check_target_idx
|| fail_case
) {
8672 if (!item
->compare_done
|| fail_case
) {
8673 if (amd64_use_imm32 ((gint64
)item
->key
))
8674 amd64_alu_reg_imm_size (code
, X86_CMP
, MONO_ARCH_IMT_REG
, (guint32
)(gssize
)item
->key
, sizeof(gpointer
));
8676 amd64_mov_reg_imm_size (code
, MONO_ARCH_IMT_SCRATCH_REG
, item
->key
, sizeof(gpointer
));
8677 amd64_alu_reg_reg (code
, X86_CMP
, MONO_ARCH_IMT_REG
, MONO_ARCH_IMT_SCRATCH_REG
);
8680 item
->jmp_code
= code
;
8681 amd64_branch8 (code
, X86_CC_NE
, 0, FALSE
);
8682 if (item
->has_target_code
) {
8683 amd64_mov_reg_imm (code
, MONO_ARCH_IMT_SCRATCH_REG
, item
->value
.target_code
);
8684 amd64_jump_reg (code
, MONO_ARCH_IMT_SCRATCH_REG
);
8686 amd64_mov_reg_imm (code
, MONO_ARCH_IMT_SCRATCH_REG
, & (vtable
->vtable
[item
->value
.vtable_slot
]));
8687 amd64_jump_membase (code
, MONO_ARCH_IMT_SCRATCH_REG
, 0);
8691 amd64_patch (item
->jmp_code
, code
);
8692 amd64_mov_reg_imm (code
, MONO_ARCH_IMT_SCRATCH_REG
, fail_tramp
);
8693 amd64_jump_reg (code
, MONO_ARCH_IMT_SCRATCH_REG
);
8694 item
->jmp_code
= NULL
;
8697 /* enable the commented code to assert on wrong method */
8699 if (amd64_is_imm32 (item
->key
))
8700 amd64_alu_reg_imm_size (code
, X86_CMP
, MONO_ARCH_IMT_REG
, (guint32
)(gssize
)item
->key
, sizeof(gpointer
));
8702 amd64_mov_reg_imm (code
, MONO_ARCH_IMT_SCRATCH_REG
, item
->key
);
8703 amd64_alu_reg_reg (code
, X86_CMP
, MONO_ARCH_IMT_REG
, MONO_ARCH_IMT_SCRATCH_REG
);
8705 item
->jmp_code
= code
;
8706 amd64_branch8 (code
, X86_CC_NE
, 0, FALSE
);
8707 /* See the comment below about R10 */
8708 amd64_mov_reg_imm (code
, MONO_ARCH_IMT_SCRATCH_REG
, & (vtable
->vtable
[item
->value
.vtable_slot
]));
8709 amd64_jump_membase (code
, MONO_ARCH_IMT_SCRATCH_REG
, 0);
8710 amd64_patch (item
->jmp_code
, code
);
8711 amd64_breakpoint (code
);
8712 item
->jmp_code
= NULL
;
8714 /* We're using R10 (MONO_ARCH_IMT_SCRATCH_REG) here because R11 (MONO_ARCH_IMT_REG)
8715 needs to be preserved. R10 needs
8716 to be preserved for calls which
8717 require a runtime generic context,
8718 but interface calls don't. */
8719 amd64_mov_reg_imm (code
, MONO_ARCH_IMT_SCRATCH_REG
, & (vtable
->vtable
[item
->value
.vtable_slot
]));
8720 amd64_jump_membase (code
, MONO_ARCH_IMT_SCRATCH_REG
, 0);
8724 if (amd64_use_imm32 ((gint64
)item
->key
))
8725 amd64_alu_reg_imm_size (code
, X86_CMP
, MONO_ARCH_IMT_REG
, (guint32
)(gssize
)item
->key
, sizeof (target_mgreg_t
));
8727 amd64_mov_reg_imm_size (code
, MONO_ARCH_IMT_SCRATCH_REG
, item
->key
, sizeof (target_mgreg_t
));
8728 amd64_alu_reg_reg (code
, X86_CMP
, MONO_ARCH_IMT_REG
, MONO_ARCH_IMT_SCRATCH_REG
);
8730 item
->jmp_code
= code
;
8731 if (x86_is_imm8 (imt_branch_distance (imt_entries
, i
, item
->check_target_idx
)))
8732 x86_branch8 (code
, X86_CC_GE
, 0, FALSE
);
8734 x86_branch32 (code
, X86_CC_GE
, 0, FALSE
);
8736 g_assertf (code
- item
->code_target
<= item
->chunk_size
, "%X %X", (guint
)(code
- item
->code_target
), (guint
)item
->chunk_size
);
8738 /* patch the branches to get to the target items */
8739 for (i
= 0; i
< count
; ++i
) {
8740 MonoIMTCheckItem
*item
= imt_entries
[i
];
8741 if (item
->jmp_code
) {
8742 if (item
->check_target_idx
) {
8743 amd64_patch (item
->jmp_code
, imt_entries
[item
->check_target_idx
]->code_target
);
8749 UnlockedAdd (&mono_stats
.imt_trampolines_size
, code
- start
);
8750 g_assert (code
- start
<= size
);
8751 g_assert_checked (mono_arch_unwindinfo_validate_size (unwind_ops
, MONO_TRAMPOLINE_UNWINDINFO_SIZE(0)));
8753 MONO_PROFILER_RAISE (jit_code_buffer
, (start
, code
- start
, MONO_PROFILER_CODE_BUFFER_IMT_TRAMPOLINE
, NULL
));
8755 mono_tramp_info_register (mono_tramp_info_create (NULL
, start
, code
- start
, NULL
, unwind_ops
), domain
);
8761 mono_arch_find_imt_method (host_mgreg_t
*regs
, guint8
*code
)
8763 return (MonoMethod
*)regs
[MONO_ARCH_IMT_REG
];
8767 mono_arch_find_static_call_vtable (host_mgreg_t
*regs
, guint8
*code
)
8769 return (MonoVTable
*) regs
[MONO_ARCH_RGCTX_REG
];
8773 mono_arch_get_cie_program (void)
8777 mono_add_unwind_op_def_cfa (l
, (guint8
*)NULL
, (guint8
*)NULL
, AMD64_RSP
, 8);
8778 mono_add_unwind_op_offset (l
, (guint8
*)NULL
, (guint8
*)NULL
, AMD64_RIP
, -8);
8786 mono_arch_emit_inst_for_method (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
8788 MonoInst
*ins
= NULL
;
8791 if (cmethod
->klass
== mono_class_try_get_math_class ()) {
8792 if (strcmp (cmethod
->name
, "Sqrt") == 0) {
8794 } else if (strcmp (cmethod
->name
, "Abs") == 0 && fsig
->params
[0]->type
== MONO_TYPE_R8
) {
8798 if (opcode
&& fsig
->param_count
== 1) {
8799 MONO_INST_NEW (cfg
, ins
, opcode
);
8800 ins
->type
= STACK_R8
;
8801 ins
->dreg
= mono_alloc_freg (cfg
);
8802 ins
->sreg1
= args
[0]->dreg
;
8803 MONO_ADD_INS (cfg
->cbb
, ins
);
8807 if (cfg
->opt
& MONO_OPT_CMOV
) {
8808 if (strcmp (cmethod
->name
, "Min") == 0) {
8809 if (fsig
->params
[0]->type
== MONO_TYPE_I4
)
8811 if (fsig
->params
[0]->type
== MONO_TYPE_U4
)
8812 opcode
= OP_IMIN_UN
;
8813 else if (fsig
->params
[0]->type
== MONO_TYPE_I8
)
8815 else if (fsig
->params
[0]->type
== MONO_TYPE_U8
)
8816 opcode
= OP_LMIN_UN
;
8817 } else if (strcmp (cmethod
->name
, "Max") == 0) {
8818 if (fsig
->params
[0]->type
== MONO_TYPE_I4
)
8820 if (fsig
->params
[0]->type
== MONO_TYPE_U4
)
8821 opcode
= OP_IMAX_UN
;
8822 else if (fsig
->params
[0]->type
== MONO_TYPE_I8
)
8824 else if (fsig
->params
[0]->type
== MONO_TYPE_U8
)
8825 opcode
= OP_LMAX_UN
;
8829 if (opcode
&& fsig
->param_count
== 2) {
8830 MONO_INST_NEW (cfg
, ins
, opcode
);
8831 ins
->type
= fsig
->params
[0]->type
== MONO_TYPE_I4
? STACK_I4
: STACK_I8
;
8832 ins
->dreg
= mono_alloc_ireg (cfg
);
8833 ins
->sreg1
= args
[0]->dreg
;
8834 ins
->sreg2
= args
[1]->dreg
;
8835 MONO_ADD_INS (cfg
->cbb
, ins
);
8839 /* OP_FREM is not IEEE compatible */
8840 else if (strcmp (cmethod
->name
, "IEEERemainder") == 0 && fsig
->param_count
== 2) {
8841 MONO_INST_NEW (cfg
, ins
, OP_FREM
);
8842 ins
->inst_i0
= args
[0];
8843 ins
->inst_i1
= args
[1];
8847 if ((mini_get_cpu_features (cfg
) & MONO_CPU_X86_SSE41
) != 0 && fsig
->param_count
== 1 && fsig
->params
[0]->type
== MONO_TYPE_R8
) {
8849 if (!strcmp (cmethod
->name
, "Round"))
8851 else if (!strcmp (cmethod
->name
, "Floor"))
8853 else if (!strcmp (cmethod
->name
, "Ceiling"))
8856 int xreg
= alloc_xreg (cfg
);
8857 EMIT_NEW_UNALU (cfg
, ins
, OP_FCONV_TO_R8_X
, xreg
, args
[0]->dreg
);
8858 EMIT_NEW_UNALU (cfg
, ins
, OP_SSE41_ROUNDPD
, xreg
, xreg
);
8859 ins
->inst_c0
= mode
;
8860 int dreg
= alloc_freg (cfg
);
8861 EMIT_NEW_UNALU (cfg
, ins
, OP_EXTRACT_R8
, dreg
, xreg
);
8872 mono_arch_context_get_int_reg (MonoContext
*ctx
, int reg
)
8874 return ctx
->gregs
[reg
];
8878 mono_arch_context_set_int_reg (MonoContext
*ctx
, int reg
, host_mgreg_t val
)
8880 ctx
->gregs
[reg
] = val
;
8884 * mono_arch_emit_load_aotconst:
8886 * Emit code to load the contents of the GOT slot identified by TRAMP_TYPE and
8887 * TARGET from the mscorlib GOT in full-aot code.
8888 * On AMD64, the result is placed into R11.
8891 mono_arch_emit_load_aotconst (guint8
*start
, guint8
*code
, MonoJumpInfo
**ji
, MonoJumpInfoType tramp_type
, gconstpointer target
)
8893 *ji
= mono_patch_info_list_prepend (*ji
, code
- start
, tramp_type
, target
);
8894 amd64_mov_reg_membase (code
, AMD64_R11
, AMD64_RIP
, 0, 8);
8900 * mono_arch_get_trampolines:
8902 * Return a list of MonoTrampInfo structures describing arch specific trampolines
8906 mono_arch_get_trampolines (gboolean aot
)
8908 return mono_amd64_get_exception_trampolines (aot
);
8911 /* Soft Debug support */
8912 #ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED
8915 * mono_arch_set_breakpoint:
8917 * Set a breakpoint at the native code corresponding to JI at NATIVE_OFFSET.
8918 * The location should contain code emitted by OP_SEQ_POINT.
8921 mono_arch_set_breakpoint (MonoJitInfo
*ji
, guint8
*ip
)
8926 guint32 native_offset
= ip
- (guint8
*)ji
->code_start
;
8927 SeqPointInfo
*info
= mono_arch_get_seq_point_info (mono_domain_get (), (guint8
*)ji
->code_start
);
8929 g_assert (info
->bp_addrs
[native_offset
] == 0);
8930 info
->bp_addrs
[native_offset
] = mini_get_breakpoint_trampoline ();
8932 /* ip points to a mov r11, 0 */
8933 g_assert (code
[0] == 0x41);
8934 g_assert (code
[1] == 0xbb);
8935 amd64_mov_reg_imm (code
, AMD64_R11
, 1);
8940 * mono_arch_clear_breakpoint:
8942 * Clear the breakpoint at IP.
8945 mono_arch_clear_breakpoint (MonoJitInfo
*ji
, guint8
*ip
)
8950 guint32 native_offset
= ip
- (guint8
*)ji
->code_start
;
8951 SeqPointInfo
*info
= mono_arch_get_seq_point_info (mono_domain_get (), (guint8
*)ji
->code_start
);
8953 info
->bp_addrs
[native_offset
] = NULL
;
8955 amd64_mov_reg_imm (code
, AMD64_R11
, 0);
8960 mono_arch_is_breakpoint_event (void *info
, void *sigctx
)
8962 /* We use soft breakpoints on amd64 */
8967 * mono_arch_skip_breakpoint:
8969 * Modify CTX so the ip is placed after the breakpoint instruction, so when
8970 * we resume, the instruction is not executed again.
8973 mono_arch_skip_breakpoint (MonoContext
*ctx
, MonoJitInfo
*ji
)
8975 g_assert_not_reached ();
8979 * mono_arch_start_single_stepping:
8981 * Start single stepping.
8984 mono_arch_start_single_stepping (void)
8986 ss_trampoline
= mini_get_single_step_trampoline ();
8990 * mono_arch_stop_single_stepping:
8992 * Stop single stepping.
8995 mono_arch_stop_single_stepping (void)
8997 ss_trampoline
= NULL
;
9001 * mono_arch_is_single_step_event:
9003 * Return whenever the machine state in SIGCTX corresponds to a single
9007 mono_arch_is_single_step_event (void *info
, void *sigctx
)
9009 /* We use soft breakpoints on amd64 */
9014 * mono_arch_skip_single_step:
9016 * Modify CTX so the ip is placed after the single step trigger instruction,
9017 * we resume, the instruction is not executed again.
9020 mono_arch_skip_single_step (MonoContext
*ctx
)
9022 g_assert_not_reached ();
9026 * mono_arch_create_seq_point_info:
9028 * Return a pointer to a data structure which is used by the sequence
9029 * point implementation in AOTed code.
9032 mono_arch_get_seq_point_info (MonoDomain
*domain
, guint8
*code
)
9037 // FIXME: Add a free function
9039 mono_domain_lock (domain
);
9040 info
= (SeqPointInfo
*)g_hash_table_lookup (domain_jit_info (domain
)->arch_seq_points
,
9042 mono_domain_unlock (domain
);
9045 ji
= mono_jit_info_table_find (domain
, code
);
9048 // FIXME: Optimize the size
9049 info
= (SeqPointInfo
*)g_malloc0 (sizeof (SeqPointInfo
) + (ji
->code_size
* sizeof (gpointer
)));
9051 info
->ss_tramp_addr
= &ss_trampoline
;
9053 mono_domain_lock (domain
);
9054 g_hash_table_insert (domain_jit_info (domain
)->arch_seq_points
,
9056 mono_domain_unlock (domain
);
9065 mono_arch_opcode_supported (int opcode
)
9068 case OP_ATOMIC_ADD_I4
:
9069 case OP_ATOMIC_ADD_I8
:
9070 case OP_ATOMIC_EXCHANGE_I4
:
9071 case OP_ATOMIC_EXCHANGE_I8
:
9072 case OP_ATOMIC_CAS_I4
:
9073 case OP_ATOMIC_CAS_I8
:
9074 case OP_ATOMIC_LOAD_I1
:
9075 case OP_ATOMIC_LOAD_I2
:
9076 case OP_ATOMIC_LOAD_I4
:
9077 case OP_ATOMIC_LOAD_I8
:
9078 case OP_ATOMIC_LOAD_U1
:
9079 case OP_ATOMIC_LOAD_U2
:
9080 case OP_ATOMIC_LOAD_U4
:
9081 case OP_ATOMIC_LOAD_U8
:
9082 case OP_ATOMIC_LOAD_R4
:
9083 case OP_ATOMIC_LOAD_R8
:
9084 case OP_ATOMIC_STORE_I1
:
9085 case OP_ATOMIC_STORE_I2
:
9086 case OP_ATOMIC_STORE_I4
:
9087 case OP_ATOMIC_STORE_I8
:
9088 case OP_ATOMIC_STORE_U1
:
9089 case OP_ATOMIC_STORE_U2
:
9090 case OP_ATOMIC_STORE_U4
:
9091 case OP_ATOMIC_STORE_U8
:
9092 case OP_ATOMIC_STORE_R4
:
9093 case OP_ATOMIC_STORE_R8
:
9101 mono_arch_get_call_info (MonoMemPool
*mp
, MonoMethodSignature
*sig
)
9103 return get_call_info (mp
, sig
);
9107 mono_arch_load_function (MonoJitICallId jit_icall_id
)
9109 gpointer target
= NULL
;
9110 switch (jit_icall_id
) {
9111 #undef MONO_AOT_ICALL
9112 #define MONO_AOT_ICALL(x) case MONO_JIT_ICALL_ ## x: target = (gpointer)x; break;
9113 MONO_AOT_ICALL (mono_amd64_resume_unwind
)
9114 MONO_AOT_ICALL (mono_amd64_start_gsharedvt_call
)
9115 MONO_AOT_ICALL (mono_amd64_throw_corlib_exception
)
9116 MONO_AOT_ICALL (mono_amd64_throw_exception
)