2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "tcg-pool.inc.c"
27 #ifdef CONFIG_DEBUG_TCG
28 static const char * const tcg_target_reg_names
[TCG_TARGET_NB_REGS
] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
38 static const int tcg_target_reg_alloc_order
[] = {
39 #if TCG_TARGET_REG_BITS == 64
66 static const int tcg_target_call_iarg_regs
[] = {
67 #if TCG_TARGET_REG_BITS == 64
80 /* 32 bit mode uses stack based calling convention (GCC default). */
84 static const int tcg_target_call_oarg_regs
[] = {
86 #if TCG_TARGET_REG_BITS == 32
91 /* Constants we accept. */
92 #define TCG_CT_CONST_S32 0x100
93 #define TCG_CT_CONST_U32 0x200
94 #define TCG_CT_CONST_I32 0x400
95 #define TCG_CT_CONST_WSZ 0x800
97 /* Registers used with L constraint, which are the first argument
98 registers on x86_64, and two random call clobbered registers on
100 #if TCG_TARGET_REG_BITS == 64
101 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
102 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
104 # define TCG_REG_L0 TCG_REG_EAX
105 # define TCG_REG_L1 TCG_REG_EDX
108 /* The host compiler should supply <cpuid.h> to enable runtime features
109 detection, as we're not going to go so far as our own inline assembly.
110 If not available, default values will be assumed. */
111 #if defined(CONFIG_CPUID_H)
112 #include "qemu/cpuid.h"
115 /* For 64-bit, we always know that CMOV is available. */
116 #if TCG_TARGET_REG_BITS == 64
118 #elif defined(CONFIG_CPUID_H)
119 static bool have_cmov
;
124 /* We need these symbols in tcg-target.h, and we can't properly conditionalize
125 it there. Therefore we always define the variable. */
129 #ifdef CONFIG_CPUID_H
130 static bool have_movbe
;
131 static bool have_bmi2
;
132 static bool have_lzcnt
;
134 # define have_movbe 0
136 # define have_lzcnt 0
139 static tcg_insn_unit
*tb_ret_addr
;
141 static void patch_reloc(tcg_insn_unit
*code_ptr
, int type
,
142 intptr_t value
, intptr_t addend
)
147 value
-= (uintptr_t)code_ptr
;
148 if (value
!= (int32_t)value
) {
151 tcg_patch32(code_ptr
, value
);
154 value
-= (uintptr_t)code_ptr
;
155 if (value
!= (int8_t)value
) {
158 tcg_patch8(code_ptr
, value
);
165 /* parse target specific constraints */
166 static const char *target_parse_constraint(TCGArgConstraint
*ct
,
167 const char *ct_str
, TCGType type
)
171 ct
->ct
|= TCG_CT_REG
;
172 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EAX
);
175 ct
->ct
|= TCG_CT_REG
;
176 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EBX
);
179 ct
->ct
|= TCG_CT_REG
;
180 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_ECX
);
183 ct
->ct
|= TCG_CT_REG
;
184 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EDX
);
187 ct
->ct
|= TCG_CT_REG
;
188 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_ESI
);
191 ct
->ct
|= TCG_CT_REG
;
192 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EDI
);
195 ct
->ct
|= TCG_CT_REG
;
196 if (TCG_TARGET_REG_BITS
== 64) {
197 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
199 tcg_regset_set32(ct
->u
.regs
, 0, 0xf);
203 ct
->ct
|= TCG_CT_REG
;
204 tcg_regset_set32(ct
->u
.regs
, 0, 0xf);
207 ct
->ct
|= TCG_CT_REG
;
208 if (TCG_TARGET_REG_BITS
== 64) {
209 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
211 tcg_regset_set32(ct
->u
.regs
, 0, 0xff);
215 /* With TZCNT/LZCNT, we can have operand-size as an input. */
216 ct
->ct
|= TCG_CT_CONST_WSZ
;
219 /* qemu_ld/st address constraint */
221 ct
->ct
|= TCG_CT_REG
;
222 if (TCG_TARGET_REG_BITS
== 64) {
223 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
225 tcg_regset_set32(ct
->u
.regs
, 0, 0xff);
227 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_L0
);
228 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_L1
);
232 ct
->ct
|= (type
== TCG_TYPE_I32
? TCG_CT_CONST
: TCG_CT_CONST_S32
);
235 ct
->ct
|= (type
== TCG_TYPE_I32
? TCG_CT_CONST
: TCG_CT_CONST_U32
);
238 ct
->ct
|= (type
== TCG_TYPE_I32
? TCG_CT_CONST
: TCG_CT_CONST_I32
);
247 /* test if a constant matches the constraint */
248 static inline int tcg_target_const_match(tcg_target_long val
, TCGType type
,
249 const TCGArgConstraint
*arg_ct
)
252 if (ct
& TCG_CT_CONST
) {
255 if ((ct
& TCG_CT_CONST_S32
) && val
== (int32_t)val
) {
258 if ((ct
& TCG_CT_CONST_U32
) && val
== (uint32_t)val
) {
261 if ((ct
& TCG_CT_CONST_I32
) && ~val
== (int32_t)~val
) {
264 if ((ct
& TCG_CT_CONST_WSZ
) && val
== (type
== TCG_TYPE_I32
? 32 : 64)) {
270 #if TCG_TARGET_REG_BITS == 64
271 # define LOWREGMASK(x) ((x) & 7)
273 # define LOWREGMASK(x) (x)
276 #define P_EXT 0x100 /* 0x0f opcode prefix */
277 #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
278 #define P_DATA16 0x400 /* 0x66 opcode prefix */
279 #if TCG_TARGET_REG_BITS == 64
280 # define P_ADDR32 0x800 /* 0x67 opcode prefix */
281 # define P_REXW 0x1000 /* Set REX.W = 1 */
282 # define P_REXB_R 0x2000 /* REG field as byte register */
283 # define P_REXB_RM 0x4000 /* R/M field as byte register */
284 # define P_GS 0x8000 /* gs segment override */
292 #define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
293 #define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
295 #define OPC_ARITH_EvIz (0x81)
296 #define OPC_ARITH_EvIb (0x83)
297 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
298 #define OPC_ANDN (0xf2 | P_EXT38)
299 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
300 #define OPC_BSF (0xbc | P_EXT)
301 #define OPC_BSR (0xbd | P_EXT)
302 #define OPC_BSWAP (0xc8 | P_EXT)
303 #define OPC_CALL_Jz (0xe8)
304 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
305 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
306 #define OPC_DEC_r32 (0x48)
307 #define OPC_IMUL_GvEv (0xaf | P_EXT)
308 #define OPC_IMUL_GvEvIb (0x6b)
309 #define OPC_IMUL_GvEvIz (0x69)
310 #define OPC_INC_r32 (0x40)
311 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
312 #define OPC_JCC_short (0x70) /* ... plus condition code */
313 #define OPC_JMP_long (0xe9)
314 #define OPC_JMP_short (0xeb)
315 #define OPC_LEA (0x8d)
316 #define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3)
317 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
318 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
319 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
320 #define OPC_MOVB_EvIz (0xc6)
321 #define OPC_MOVL_EvIz (0xc7)
322 #define OPC_MOVL_Iv (0xb8)
323 #define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
324 #define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
325 #define OPC_MOVSBL (0xbe | P_EXT)
326 #define OPC_MOVSWL (0xbf | P_EXT)
327 #define OPC_MOVSLQ (0x63 | P_REXW)
328 #define OPC_MOVZBL (0xb6 | P_EXT)
329 #define OPC_MOVZWL (0xb7 | P_EXT)
330 #define OPC_POP_r32 (0x58)
331 #define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3)
332 #define OPC_PUSH_r32 (0x50)
333 #define OPC_PUSH_Iv (0x68)
334 #define OPC_PUSH_Ib (0x6a)
335 #define OPC_RET (0xc3)
336 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
337 #define OPC_SHIFT_1 (0xd1)
338 #define OPC_SHIFT_Ib (0xc1)
339 #define OPC_SHIFT_cl (0xd3)
340 #define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
341 #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
342 #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
343 #define OPC_TESTL (0x85)
344 #define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
345 #define OPC_XCHG_ax_r32 (0x90)
347 #define OPC_GRP3_Ev (0xf7)
348 #define OPC_GRP5 (0xff)
350 /* Group 1 opcode extensions for 0x80-0x83.
351 These are also used as modifiers for OPC_ARITH. */
361 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
368 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
376 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
377 #define EXT5_INC_Ev 0
378 #define EXT5_DEC_Ev 1
379 #define EXT5_CALLN_Ev 2
380 #define EXT5_JMPN_Ev 4
382 /* Condition codes to be added to OPC_JCC_{long,short}. */
401 static const uint8_t tcg_cond_to_jcc
[] = {
402 [TCG_COND_EQ
] = JCC_JE
,
403 [TCG_COND_NE
] = JCC_JNE
,
404 [TCG_COND_LT
] = JCC_JL
,
405 [TCG_COND_GE
] = JCC_JGE
,
406 [TCG_COND_LE
] = JCC_JLE
,
407 [TCG_COND_GT
] = JCC_JG
,
408 [TCG_COND_LTU
] = JCC_JB
,
409 [TCG_COND_GEU
] = JCC_JAE
,
410 [TCG_COND_LEU
] = JCC_JBE
,
411 [TCG_COND_GTU
] = JCC_JA
,
414 #if TCG_TARGET_REG_BITS == 64
415 static void tcg_out_opc(TCGContext
*s
, int opc
, int r
, int rm
, int x
)
422 if (opc
& P_DATA16
) {
423 /* We should never be asking for both 16 and 64-bit operation. */
424 tcg_debug_assert((opc
& P_REXW
) == 0);
427 if (opc
& P_ADDR32
) {
430 if (opc
& P_SIMDF3
) {
432 } else if (opc
& P_SIMDF2
) {
437 rex
|= (opc
& P_REXW
) ? 0x8 : 0x0; /* REX.W */
438 rex
|= (r
& 8) >> 1; /* REX.R */
439 rex
|= (x
& 8) >> 2; /* REX.X */
440 rex
|= (rm
& 8) >> 3; /* REX.B */
442 /* P_REXB_{R,RM} indicates that the given register is the low byte.
443 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
444 as otherwise the encoding indicates %[abcd]h. Note that the values
445 that are ORed in merely indicate that the REX byte must be present;
446 those bits get discarded in output. */
447 rex
|= opc
& (r
>= 4 ? P_REXB_R
: 0);
448 rex
|= opc
& (rm
>= 4 ? P_REXB_RM
: 0);
451 tcg_out8(s
, (uint8_t)(rex
| 0x40));
454 if (opc
& (P_EXT
| P_EXT38
)) {
464 static void tcg_out_opc(TCGContext
*s
, int opc
)
466 if (opc
& P_DATA16
) {
469 if (opc
& P_SIMDF3
) {
471 } else if (opc
& P_SIMDF2
) {
474 if (opc
& (P_EXT
| P_EXT38
)) {
482 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
483 the 32-bit compilation paths. This method works with all versions of gcc,
484 whereas relying on optimization may not be able to exclude them. */
485 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
488 static void tcg_out_modrm(TCGContext
*s
, int opc
, int r
, int rm
)
490 tcg_out_opc(s
, opc
, r
, rm
, 0);
491 tcg_out8(s
, 0xc0 | (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
494 static void tcg_out_vex_modrm(TCGContext
*s
, int opc
, int r
, int v
, int rm
)
498 if ((opc
& (P_REXW
| P_EXT
| P_EXT38
)) || (rm
& 8)) {
499 /* Three byte VEX prefix. */
505 } else if (opc
& P_EXT
) {
510 tmp
|= 0x40; /* VEX.X */
511 tmp
|= (r
& 8 ? 0 : 0x80); /* VEX.R */
512 tmp
|= (rm
& 8 ? 0 : 0x20); /* VEX.B */
515 tmp
= (opc
& P_REXW
? 0x80 : 0); /* VEX.W */
517 /* Two byte VEX prefix. */
520 tmp
= (r
& 8 ? 0 : 0x80); /* VEX.R */
523 if (opc
& P_DATA16
) {
525 } else if (opc
& P_SIMDF3
) {
527 } else if (opc
& P_SIMDF2
) {
530 tmp
|= (~v
& 15) << 3; /* VEX.vvvv */
533 tcg_out8(s
, 0xc0 | (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
536 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
537 We handle either RM and INDEX missing with a negative value. In 64-bit
538 mode for absolute addresses, ~RM is the size of the immediate operand
539 that will follow the instruction. */
541 static void tcg_out_modrm_sib_offset(TCGContext
*s
, int opc
, int r
, int rm
,
542 int index
, int shift
, intptr_t offset
)
546 if (index
< 0 && rm
< 0) {
547 if (TCG_TARGET_REG_BITS
== 64) {
548 /* Try for a rip-relative addressing mode. This has replaced
549 the 32-bit-mode absolute addressing encoding. */
550 intptr_t pc
= (intptr_t)s
->code_ptr
+ 5 + ~rm
;
551 intptr_t disp
= offset
- pc
;
552 if (disp
== (int32_t)disp
) {
553 tcg_out_opc(s
, opc
, r
, 0, 0);
554 tcg_out8(s
, (LOWREGMASK(r
) << 3) | 5);
559 /* Try for an absolute address encoding. This requires the
560 use of the MODRM+SIB encoding and is therefore larger than
561 rip-relative addressing. */
562 if (offset
== (int32_t)offset
) {
563 tcg_out_opc(s
, opc
, r
, 0, 0);
564 tcg_out8(s
, (LOWREGMASK(r
) << 3) | 4);
565 tcg_out8(s
, (4 << 3) | 5);
566 tcg_out32(s
, offset
);
570 /* ??? The memory isn't directly addressable. */
573 /* Absolute address. */
574 tcg_out_opc(s
, opc
, r
, 0, 0);
575 tcg_out8(s
, (r
<< 3) | 5);
576 tcg_out32(s
, offset
);
581 /* Find the length of the immediate addend. Note that the encoding
582 that would be used for (%ebp) indicates absolute addressing. */
584 mod
= 0, len
= 4, rm
= 5;
585 } else if (offset
== 0 && LOWREGMASK(rm
) != TCG_REG_EBP
) {
587 } else if (offset
== (int8_t)offset
) {
593 /* Use a single byte MODRM format if possible. Note that the encoding
594 that would be used for %esp is the escape to the two byte form. */
595 if (index
< 0 && LOWREGMASK(rm
) != TCG_REG_ESP
) {
596 /* Single byte MODRM format. */
597 tcg_out_opc(s
, opc
, r
, rm
, 0);
598 tcg_out8(s
, mod
| (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
600 /* Two byte MODRM+SIB format. */
602 /* Note that the encoding that would place %esp into the index
603 field indicates no index register. In 64-bit mode, the REX.X
604 bit counts, so %r12 can be used as the index. */
608 tcg_debug_assert(index
!= TCG_REG_ESP
);
611 tcg_out_opc(s
, opc
, r
, rm
, index
);
612 tcg_out8(s
, mod
| (LOWREGMASK(r
) << 3) | 4);
613 tcg_out8(s
, (shift
<< 6) | (LOWREGMASK(index
) << 3) | LOWREGMASK(rm
));
618 } else if (len
== 4) {
619 tcg_out32(s
, offset
);
623 /* A simplification of the above with no index or shift. */
624 static inline void tcg_out_modrm_offset(TCGContext
*s
, int opc
, int r
,
625 int rm
, intptr_t offset
)
627 tcg_out_modrm_sib_offset(s
, opc
, r
, rm
, -1, 0, offset
);
630 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
631 static inline void tgen_arithr(TCGContext
*s
, int subop
, int dest
, int src
)
633 /* Propagate an opcode prefix, such as P_REXW. */
634 int ext
= subop
& ~0x7;
637 tcg_out_modrm(s
, OPC_ARITH_GvEv
+ (subop
<< 3) + ext
, dest
, src
);
640 static inline void tcg_out_mov(TCGContext
*s
, TCGType type
,
641 TCGReg ret
, TCGReg arg
)
644 int opc
= OPC_MOVL_GvEv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
645 tcg_out_modrm(s
, opc
, ret
, arg
);
649 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
650 TCGReg ret
, tcg_target_long arg
)
652 tcg_target_long diff
;
655 tgen_arithr(s
, ARITH_XOR
, ret
, ret
);
658 if (arg
== (uint32_t)arg
|| type
== TCG_TYPE_I32
) {
659 tcg_out_opc(s
, OPC_MOVL_Iv
+ LOWREGMASK(ret
), 0, ret
, 0);
663 if (arg
== (int32_t)arg
) {
664 tcg_out_modrm(s
, OPC_MOVL_EvIz
+ P_REXW
, 0, ret
);
669 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
670 diff
= arg
- ((uintptr_t)s
->code_ptr
+ 7);
671 if (diff
== (int32_t)diff
) {
672 tcg_out_opc(s
, OPC_LEA
| P_REXW
, ret
, 0, 0);
673 tcg_out8(s
, (LOWREGMASK(ret
) << 3) | 5);
678 tcg_out_opc(s
, OPC_MOVL_Iv
+ P_REXW
+ LOWREGMASK(ret
), 0, ret
, 0);
682 static inline void tcg_out_pushi(TCGContext
*s
, tcg_target_long val
)
684 if (val
== (int8_t)val
) {
685 tcg_out_opc(s
, OPC_PUSH_Ib
, 0, 0, 0);
687 } else if (val
== (int32_t)val
) {
688 tcg_out_opc(s
, OPC_PUSH_Iv
, 0, 0, 0);
695 static inline void tcg_out_mb(TCGContext
*s
, TCGArg a0
)
697 /* Given the strength of x86 memory ordering, we only need care for
698 store-load ordering. Experimentally, "lock orl $0,0(%esp)" is
699 faster than "mfence", so don't bother with the sse insn. */
700 if (a0
& TCG_MO_ST_LD
) {
702 tcg_out_modrm_offset(s
, OPC_ARITH_EvIb
, ARITH_OR
, TCG_REG_ESP
, 0);
707 static inline void tcg_out_push(TCGContext
*s
, int reg
)
709 tcg_out_opc(s
, OPC_PUSH_r32
+ LOWREGMASK(reg
), 0, reg
, 0);
712 static inline void tcg_out_pop(TCGContext
*s
, int reg
)
714 tcg_out_opc(s
, OPC_POP_r32
+ LOWREGMASK(reg
), 0, reg
, 0);
717 static inline void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
,
718 TCGReg arg1
, intptr_t arg2
)
720 int opc
= OPC_MOVL_GvEv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
721 tcg_out_modrm_offset(s
, opc
, ret
, arg1
, arg2
);
724 static inline void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg arg
,
725 TCGReg arg1
, intptr_t arg2
)
727 int opc
= OPC_MOVL_EvGv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
728 tcg_out_modrm_offset(s
, opc
, arg
, arg1
, arg2
);
731 static bool tcg_out_sti(TCGContext
*s
, TCGType type
, TCGArg val
,
732 TCGReg base
, intptr_t ofs
)
735 if (TCG_TARGET_REG_BITS
== 64 && type
== TCG_TYPE_I64
) {
736 if (val
!= (int32_t)val
) {
741 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
| rexw
, 0, base
, ofs
);
746 static void tcg_out_shifti(TCGContext
*s
, int subopc
, int reg
, int count
)
748 /* Propagate an opcode prefix, such as P_DATA16. */
749 int ext
= subopc
& ~0x7;
753 tcg_out_modrm(s
, OPC_SHIFT_1
+ ext
, subopc
, reg
);
755 tcg_out_modrm(s
, OPC_SHIFT_Ib
+ ext
, subopc
, reg
);
760 static inline void tcg_out_bswap32(TCGContext
*s
, int reg
)
762 tcg_out_opc(s
, OPC_BSWAP
+ LOWREGMASK(reg
), 0, reg
, 0);
765 static inline void tcg_out_rolw_8(TCGContext
*s
, int reg
)
767 tcg_out_shifti(s
, SHIFT_ROL
+ P_DATA16
, reg
, 8);
770 static inline void tcg_out_ext8u(TCGContext
*s
, int dest
, int src
)
773 tcg_debug_assert(src
< 4 || TCG_TARGET_REG_BITS
== 64);
774 tcg_out_modrm(s
, OPC_MOVZBL
+ P_REXB_RM
, dest
, src
);
777 static void tcg_out_ext8s(TCGContext
*s
, int dest
, int src
, int rexw
)
780 tcg_debug_assert(src
< 4 || TCG_TARGET_REG_BITS
== 64);
781 tcg_out_modrm(s
, OPC_MOVSBL
+ P_REXB_RM
+ rexw
, dest
, src
);
784 static inline void tcg_out_ext16u(TCGContext
*s
, int dest
, int src
)
787 tcg_out_modrm(s
, OPC_MOVZWL
, dest
, src
);
790 static inline void tcg_out_ext16s(TCGContext
*s
, int dest
, int src
, int rexw
)
793 tcg_out_modrm(s
, OPC_MOVSWL
+ rexw
, dest
, src
);
796 static inline void tcg_out_ext32u(TCGContext
*s
, int dest
, int src
)
798 /* 32-bit mov zero extends. */
799 tcg_out_modrm(s
, OPC_MOVL_GvEv
, dest
, src
);
802 static inline void tcg_out_ext32s(TCGContext
*s
, int dest
, int src
)
804 tcg_out_modrm(s
, OPC_MOVSLQ
, dest
, src
);
807 static inline void tcg_out_bswap64(TCGContext
*s
, int reg
)
809 tcg_out_opc(s
, OPC_BSWAP
+ P_REXW
+ LOWREGMASK(reg
), 0, reg
, 0);
812 static void tgen_arithi(TCGContext
*s
, int c
, int r0
,
813 tcg_target_long val
, int cf
)
817 if (TCG_TARGET_REG_BITS
== 64) {
822 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
823 partial flags update stalls on Pentium4 and are not recommended
824 by current Intel optimization manuals. */
825 if (!cf
&& (c
== ARITH_ADD
|| c
== ARITH_SUB
) && (val
== 1 || val
== -1)) {
826 int is_inc
= (c
== ARITH_ADD
) ^ (val
< 0);
827 if (TCG_TARGET_REG_BITS
== 64) {
828 /* The single-byte increment encodings are re-tasked as the
829 REX prefixes. Use the MODRM encoding. */
830 tcg_out_modrm(s
, OPC_GRP5
+ rexw
,
831 (is_inc
? EXT5_INC_Ev
: EXT5_DEC_Ev
), r0
);
833 tcg_out8(s
, (is_inc
? OPC_INC_r32
: OPC_DEC_r32
) + r0
);
838 if (c
== ARITH_AND
) {
839 if (TCG_TARGET_REG_BITS
== 64) {
840 if (val
== 0xffffffffu
) {
841 tcg_out_ext32u(s
, r0
, r0
);
844 if (val
== (uint32_t)val
) {
845 /* AND with no high bits set can use a 32-bit operation. */
849 if (val
== 0xffu
&& (r0
< 4 || TCG_TARGET_REG_BITS
== 64)) {
850 tcg_out_ext8u(s
, r0
, r0
);
853 if (val
== 0xffffu
) {
854 tcg_out_ext16u(s
, r0
, r0
);
859 if (val
== (int8_t)val
) {
860 tcg_out_modrm(s
, OPC_ARITH_EvIb
+ rexw
, c
, r0
);
864 if (rexw
== 0 || val
== (int32_t)val
) {
865 tcg_out_modrm(s
, OPC_ARITH_EvIz
+ rexw
, c
, r0
);
873 static void tcg_out_addi(TCGContext
*s
, int reg
, tcg_target_long val
)
876 tgen_arithi(s
, ARITH_ADD
+ P_REXW
, reg
, val
, 0);
880 /* Use SMALL != 0 to force a short forward branch. */
881 static void tcg_out_jxx(TCGContext
*s
, int opc
, TCGLabel
*l
, int small
)
886 val
= tcg_pcrel_diff(s
, l
->u
.value_ptr
);
888 if ((int8_t)val1
== val1
) {
890 tcg_out8(s
, OPC_JMP_short
);
892 tcg_out8(s
, OPC_JCC_short
+ opc
);
900 tcg_out8(s
, OPC_JMP_long
);
901 tcg_out32(s
, val
- 5);
903 tcg_out_opc(s
, OPC_JCC_long
+ opc
, 0, 0, 0);
904 tcg_out32(s
, val
- 6);
909 tcg_out8(s
, OPC_JMP_short
);
911 tcg_out8(s
, OPC_JCC_short
+ opc
);
913 tcg_out_reloc(s
, s
->code_ptr
, R_386_PC8
, l
, -1);
917 tcg_out8(s
, OPC_JMP_long
);
919 tcg_out_opc(s
, OPC_JCC_long
+ opc
, 0, 0, 0);
921 tcg_out_reloc(s
, s
->code_ptr
, R_386_PC32
, l
, -4);
926 static void tcg_out_cmp(TCGContext
*s
, TCGArg arg1
, TCGArg arg2
,
927 int const_arg2
, int rexw
)
932 tcg_out_modrm(s
, OPC_TESTL
+ rexw
, arg1
, arg1
);
934 tgen_arithi(s
, ARITH_CMP
+ rexw
, arg1
, arg2
, 0);
937 tgen_arithr(s
, ARITH_CMP
+ rexw
, arg1
, arg2
);
941 static void tcg_out_brcond32(TCGContext
*s
, TCGCond cond
,
942 TCGArg arg1
, TCGArg arg2
, int const_arg2
,
943 TCGLabel
*label
, int small
)
945 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, 0);
946 tcg_out_jxx(s
, tcg_cond_to_jcc
[cond
], label
, small
);
949 #if TCG_TARGET_REG_BITS == 64
950 static void tcg_out_brcond64(TCGContext
*s
, TCGCond cond
,
951 TCGArg arg1
, TCGArg arg2
, int const_arg2
,
952 TCGLabel
*label
, int small
)
954 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, P_REXW
);
955 tcg_out_jxx(s
, tcg_cond_to_jcc
[cond
], label
, small
);
958 /* XXX: we implement it at the target level to avoid having to
959 handle cross basic blocks temporaries */
960 static void tcg_out_brcond2(TCGContext
*s
, const TCGArg
*args
,
961 const int *const_args
, int small
)
963 TCGLabel
*label_next
= gen_new_label();
964 TCGLabel
*label_this
= arg_label(args
[5]);
968 tcg_out_brcond32(s
, TCG_COND_NE
, args
[0], args
[2], const_args
[2],
970 tcg_out_brcond32(s
, TCG_COND_EQ
, args
[1], args
[3], const_args
[3],
974 tcg_out_brcond32(s
, TCG_COND_NE
, args
[0], args
[2], const_args
[2],
976 tcg_out_brcond32(s
, TCG_COND_NE
, args
[1], args
[3], const_args
[3],
980 tcg_out_brcond32(s
, TCG_COND_LT
, args
[1], args
[3], const_args
[3],
982 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
983 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[0], args
[2], const_args
[2],
987 tcg_out_brcond32(s
, TCG_COND_LT
, args
[1], args
[3], const_args
[3],
989 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
990 tcg_out_brcond32(s
, TCG_COND_LEU
, args
[0], args
[2], const_args
[2],
994 tcg_out_brcond32(s
, TCG_COND_GT
, args
[1], args
[3], const_args
[3],
996 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
997 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[0], args
[2], const_args
[2],
1001 tcg_out_brcond32(s
, TCG_COND_GT
, args
[1], args
[3], const_args
[3],
1003 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
1004 tcg_out_brcond32(s
, TCG_COND_GEU
, args
[0], args
[2], const_args
[2],
1008 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[1], args
[3], const_args
[3],
1010 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
1011 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[0], args
[2], const_args
[2],
1015 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[1], args
[3], const_args
[3],
1017 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
1018 tcg_out_brcond32(s
, TCG_COND_LEU
, args
[0], args
[2], const_args
[2],
1022 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[1], args
[3], const_args
[3],
1024 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
1025 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[0], args
[2], const_args
[2],
1029 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[1], args
[3], const_args
[3],
1031 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
1032 tcg_out_brcond32(s
, TCG_COND_GEU
, args
[0], args
[2], const_args
[2],
1038 tcg_out_label(s
, label_next
, s
->code_ptr
);
1042 static void tcg_out_setcond32(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
1043 TCGArg arg1
, TCGArg arg2
, int const_arg2
)
1045 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, 0);
1046 tcg_out_modrm(s
, OPC_SETCC
| tcg_cond_to_jcc
[cond
], 0, dest
);
1047 tcg_out_ext8u(s
, dest
, dest
);
1050 #if TCG_TARGET_REG_BITS == 64
1051 static void tcg_out_setcond64(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
1052 TCGArg arg1
, TCGArg arg2
, int const_arg2
)
1054 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, P_REXW
);
1055 tcg_out_modrm(s
, OPC_SETCC
| tcg_cond_to_jcc
[cond
], 0, dest
);
1056 tcg_out_ext8u(s
, dest
, dest
);
1059 static void tcg_out_setcond2(TCGContext
*s
, const TCGArg
*args
,
1060 const int *const_args
)
1063 TCGLabel
*label_true
, *label_over
;
1065 memcpy(new_args
, args
+1, 5*sizeof(TCGArg
));
1067 if (args
[0] == args
[1] || args
[0] == args
[2]
1068 || (!const_args
[3] && args
[0] == args
[3])
1069 || (!const_args
[4] && args
[0] == args
[4])) {
1070 /* When the destination overlaps with one of the argument
1071 registers, don't do anything tricky. */
1072 label_true
= gen_new_label();
1073 label_over
= gen_new_label();
1075 new_args
[5] = label_arg(label_true
);
1076 tcg_out_brcond2(s
, new_args
, const_args
+1, 1);
1078 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 0);
1079 tcg_out_jxx(s
, JCC_JMP
, label_over
, 1);
1080 tcg_out_label(s
, label_true
, s
->code_ptr
);
1082 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 1);
1083 tcg_out_label(s
, label_over
, s
->code_ptr
);
1085 /* When the destination does not overlap one of the arguments,
1086 clear the destination first, jump if cond false, and emit an
1087 increment in the true case. This results in smaller code. */
1089 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 0);
1091 label_over
= gen_new_label();
1092 new_args
[4] = tcg_invert_cond(new_args
[4]);
1093 new_args
[5] = label_arg(label_over
);
1094 tcg_out_brcond2(s
, new_args
, const_args
+1, 1);
1096 tgen_arithi(s
, ARITH_ADD
, args
[0], 1, 0);
1097 tcg_out_label(s
, label_over
, s
->code_ptr
);
1102 static void tcg_out_cmov(TCGContext
*s
, TCGCond cond
, int rexw
,
1103 TCGReg dest
, TCGReg v1
)
1106 tcg_out_modrm(s
, OPC_CMOVCC
| tcg_cond_to_jcc
[cond
] | rexw
, dest
, v1
);
1108 TCGLabel
*over
= gen_new_label();
1109 tcg_out_jxx(s
, tcg_cond_to_jcc
[tcg_invert_cond(cond
)], over
, 1);
1110 tcg_out_mov(s
, TCG_TYPE_I32
, dest
, v1
);
1111 tcg_out_label(s
, over
, s
->code_ptr
);
1115 static void tcg_out_movcond32(TCGContext
*s
, TCGCond cond
, TCGReg dest
,
1116 TCGReg c1
, TCGArg c2
, int const_c2
,
1119 tcg_out_cmp(s
, c1
, c2
, const_c2
, 0);
1120 tcg_out_cmov(s
, cond
, 0, dest
, v1
);
1123 #if TCG_TARGET_REG_BITS == 64
1124 static void tcg_out_movcond64(TCGContext
*s
, TCGCond cond
, TCGReg dest
,
1125 TCGReg c1
, TCGArg c2
, int const_c2
,
1128 tcg_out_cmp(s
, c1
, c2
, const_c2
, P_REXW
);
1129 tcg_out_cmov(s
, cond
, P_REXW
, dest
, v1
);
1133 static void tcg_out_ctz(TCGContext
*s
, int rexw
, TCGReg dest
, TCGReg arg1
,
1134 TCGArg arg2
, bool const_a2
)
1137 tcg_out_modrm(s
, OPC_TZCNT
+ rexw
, dest
, arg1
);
1139 tcg_debug_assert(arg2
== (rexw
? 64 : 32));
1141 tcg_debug_assert(dest
!= arg2
);
1142 tcg_out_cmov(s
, TCG_COND_LTU
, rexw
, dest
, arg2
);
1145 tcg_debug_assert(dest
!= arg2
);
1146 tcg_out_modrm(s
, OPC_BSF
+ rexw
, dest
, arg1
);
1147 tcg_out_cmov(s
, TCG_COND_EQ
, rexw
, dest
, arg2
);
1151 static void tcg_out_clz(TCGContext
*s
, int rexw
, TCGReg dest
, TCGReg arg1
,
1152 TCGArg arg2
, bool const_a2
)
1155 tcg_out_modrm(s
, OPC_LZCNT
+ rexw
, dest
, arg1
);
1157 tcg_debug_assert(arg2
== (rexw
? 64 : 32));
1159 tcg_debug_assert(dest
!= arg2
);
1160 tcg_out_cmov(s
, TCG_COND_LTU
, rexw
, dest
, arg2
);
1163 tcg_debug_assert(!const_a2
);
1164 tcg_debug_assert(dest
!= arg1
);
1165 tcg_debug_assert(dest
!= arg2
);
1167 /* Recall that the output of BSR is the index not the count. */
1168 tcg_out_modrm(s
, OPC_BSR
+ rexw
, dest
, arg1
);
1169 tgen_arithi(s
, ARITH_XOR
+ rexw
, dest
, rexw
? 63 : 31, 0);
1171 /* Since we have destroyed the flags from BSR, we have to re-test. */
1172 tcg_out_cmp(s
, arg1
, 0, 1, rexw
);
1173 tcg_out_cmov(s
, TCG_COND_EQ
, rexw
, dest
, arg2
);
1177 static void tcg_out_branch(TCGContext
*s
, int call
, tcg_insn_unit
*dest
)
1179 intptr_t disp
= tcg_pcrel_diff(s
, dest
) - 5;
1181 if (disp
== (int32_t)disp
) {
1182 tcg_out_opc(s
, call
? OPC_CALL_Jz
: OPC_JMP_long
, 0, 0, 0);
1185 /* rip-relative addressing into the constant pool.
1186 This is 6 + 8 = 14 bytes, as compared to using an
1187 an immediate load 10 + 6 = 16 bytes, plus we may
1188 be able to re-use the pool constant for more calls. */
1189 tcg_out_opc(s
, OPC_GRP5
, 0, 0, 0);
1190 tcg_out8(s
, (call
? EXT5_CALLN_Ev
: EXT5_JMPN_Ev
) << 3 | 5);
1191 new_pool_label(s
, (uintptr_t)dest
, R_386_PC32
, s
->code_ptr
, -4);
1196 static inline void tcg_out_call(TCGContext
*s
, tcg_insn_unit
*dest
)
1198 tcg_out_branch(s
, 1, dest
);
1201 static void tcg_out_jmp(TCGContext
*s
, tcg_insn_unit
*dest
)
1203 tcg_out_branch(s
, 0, dest
);
1206 static void tcg_out_nopn(TCGContext
*s
, int n
)
1209 /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
1210 * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
1211 * duplicate prefix, and all of the interesting recent cores can
1212 * decode and discard the duplicates in a single cycle.
1214 tcg_debug_assert(n
>= 1);
1215 for (i
= 1; i
< n
; ++i
) {
1221 #if defined(CONFIG_SOFTMMU)
1222 #include "tcg-ldst.inc.c"
1224 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1225 * int mmu_idx, uintptr_t ra)
1227 static void * const qemu_ld_helpers
[16] = {
1228 [MO_UB
] = helper_ret_ldub_mmu
,
1229 [MO_LEUW
] = helper_le_lduw_mmu
,
1230 [MO_LEUL
] = helper_le_ldul_mmu
,
1231 [MO_LEQ
] = helper_le_ldq_mmu
,
1232 [MO_BEUW
] = helper_be_lduw_mmu
,
1233 [MO_BEUL
] = helper_be_ldul_mmu
,
1234 [MO_BEQ
] = helper_be_ldq_mmu
,
1237 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1238 * uintxx_t val, int mmu_idx, uintptr_t ra)
1240 static void * const qemu_st_helpers
[16] = {
1241 [MO_UB
] = helper_ret_stb_mmu
,
1242 [MO_LEUW
] = helper_le_stw_mmu
,
1243 [MO_LEUL
] = helper_le_stl_mmu
,
1244 [MO_LEQ
] = helper_le_stq_mmu
,
1245 [MO_BEUW
] = helper_be_stw_mmu
,
1246 [MO_BEUL
] = helper_be_stl_mmu
,
1247 [MO_BEQ
] = helper_be_stq_mmu
,
1250 /* Perform the TLB load and compare.
1253 ADDRLO and ADDRHI contain the low and high part of the address.
1255 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1257 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1258 This should be offsetof addr_read or addr_write.
1261 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1262 positions of the displacements of forward jumps to the TLB miss case.
1264 Second argument register is loaded with the low part of the address.
1265 In the TLB hit case, it has been adjusted as indicated by the TLB
1266 and so is a host address. In the TLB miss case, it continues to
1267 hold a guest address.
1269 First argument register is clobbered. */
1271 static inline void tcg_out_tlb_load(TCGContext
*s
, TCGReg addrlo
, TCGReg addrhi
,
1272 int mem_index
, TCGMemOp opc
,
1273 tcg_insn_unit
**label_ptr
, int which
)
1275 const TCGReg r0
= TCG_REG_L0
;
1276 const TCGReg r1
= TCG_REG_L1
;
1277 TCGType ttype
= TCG_TYPE_I32
;
1278 TCGType tlbtype
= TCG_TYPE_I32
;
1279 int trexw
= 0, hrexw
= 0, tlbrexw
= 0;
1280 unsigned a_bits
= get_alignment_bits(opc
);
1281 unsigned s_bits
= opc
& MO_SIZE
;
1282 unsigned a_mask
= (1 << a_bits
) - 1;
1283 unsigned s_mask
= (1 << s_bits
) - 1;
1284 target_ulong tlb_mask
;
1286 if (TCG_TARGET_REG_BITS
== 64) {
1287 if (TARGET_LONG_BITS
== 64) {
1288 ttype
= TCG_TYPE_I64
;
1291 if (TCG_TYPE_PTR
== TCG_TYPE_I64
) {
1293 if (TARGET_PAGE_BITS
+ CPU_TLB_BITS
> 32) {
1294 tlbtype
= TCG_TYPE_I64
;
1300 tcg_out_mov(s
, tlbtype
, r0
, addrlo
);
1301 /* If the required alignment is at least as large as the access, simply
1302 copy the address and mask. For lesser alignments, check that we don't
1303 cross pages for the complete access. */
1304 if (a_bits
>= s_bits
) {
1305 tcg_out_mov(s
, ttype
, r1
, addrlo
);
1307 tcg_out_modrm_offset(s
, OPC_LEA
+ trexw
, r1
, addrlo
, s_mask
- a_mask
);
1309 tlb_mask
= (target_ulong
)TARGET_PAGE_MASK
| a_mask
;
1311 tcg_out_shifti(s
, SHIFT_SHR
+ tlbrexw
, r0
,
1312 TARGET_PAGE_BITS
- CPU_TLB_ENTRY_BITS
);
1314 tgen_arithi(s
, ARITH_AND
+ trexw
, r1
, tlb_mask
, 0);
1315 tgen_arithi(s
, ARITH_AND
+ tlbrexw
, r0
,
1316 (CPU_TLB_SIZE
- 1) << CPU_TLB_ENTRY_BITS
, 0);
1318 tcg_out_modrm_sib_offset(s
, OPC_LEA
+ hrexw
, r0
, TCG_AREG0
, r0
, 0,
1319 offsetof(CPUArchState
, tlb_table
[mem_index
][0])
1323 tcg_out_modrm_offset(s
, OPC_CMP_GvEv
+ trexw
, r1
, r0
, 0);
1325 /* Prepare for both the fast path add of the tlb addend, and the slow
1326 path function argument setup. There are two cases worth note:
1327 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1328 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1329 copies the entire guest address for the slow path, while truncation
1330 for the 32-bit host happens with the fastpath ADDL below. */
1331 tcg_out_mov(s
, ttype
, r1
, addrlo
);
1334 tcg_out_opc(s
, OPC_JCC_long
+ JCC_JNE
, 0, 0, 0);
1335 label_ptr
[0] = s
->code_ptr
;
1338 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1339 /* cmp 4(r0), addrhi */
1340 tcg_out_modrm_offset(s
, OPC_CMP_GvEv
, addrhi
, r0
, 4);
1343 tcg_out_opc(s
, OPC_JCC_long
+ JCC_JNE
, 0, 0, 0);
1344 label_ptr
[1] = s
->code_ptr
;
1350 /* add addend(r0), r1 */
1351 tcg_out_modrm_offset(s
, OPC_ADD_GvEv
+ hrexw
, r1
, r0
,
1352 offsetof(CPUTLBEntry
, addend
) - which
);
1356 * Record the context of a call to the out of line helper code for the slow path
1357 * for a load or store, so that we can later generate the correct helper code
1359 static void add_qemu_ldst_label(TCGContext
*s
, bool is_ld
, TCGMemOpIdx oi
,
1360 TCGReg datalo
, TCGReg datahi
,
1361 TCGReg addrlo
, TCGReg addrhi
,
1362 tcg_insn_unit
*raddr
,
1363 tcg_insn_unit
**label_ptr
)
1365 TCGLabelQemuLdst
*label
= new_ldst_label(s
);
1367 label
->is_ld
= is_ld
;
1369 label
->datalo_reg
= datalo
;
1370 label
->datahi_reg
= datahi
;
1371 label
->addrlo_reg
= addrlo
;
1372 label
->addrhi_reg
= addrhi
;
1373 label
->raddr
= raddr
;
1374 label
->label_ptr
[0] = label_ptr
[0];
1375 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1376 label
->label_ptr
[1] = label_ptr
[1];
1381 * Generate code for the slow path for a load at the end of block
1383 static void tcg_out_qemu_ld_slow_path(TCGContext
*s
, TCGLabelQemuLdst
*l
)
1385 TCGMemOpIdx oi
= l
->oi
;
1386 TCGMemOp opc
= get_memop(oi
);
1388 tcg_insn_unit
**label_ptr
= &l
->label_ptr
[0];
1390 /* resolve label address */
1391 tcg_patch32(label_ptr
[0], s
->code_ptr
- label_ptr
[0] - 4);
1392 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1393 tcg_patch32(label_ptr
[1], s
->code_ptr
- label_ptr
[1] - 4);
1396 if (TCG_TARGET_REG_BITS
== 32) {
1399 tcg_out_st(s
, TCG_TYPE_PTR
, TCG_AREG0
, TCG_REG_ESP
, ofs
);
1402 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrlo_reg
, TCG_REG_ESP
, ofs
);
1405 if (TARGET_LONG_BITS
== 64) {
1406 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrhi_reg
, TCG_REG_ESP
, ofs
);
1410 tcg_out_sti(s
, TCG_TYPE_I32
, oi
, TCG_REG_ESP
, ofs
);
1413 tcg_out_sti(s
, TCG_TYPE_PTR
, (uintptr_t)l
->raddr
, TCG_REG_ESP
, ofs
);
1415 tcg_out_mov(s
, TCG_TYPE_PTR
, tcg_target_call_iarg_regs
[0], TCG_AREG0
);
1416 /* The second argument is already loaded with addrlo. */
1417 tcg_out_movi(s
, TCG_TYPE_I32
, tcg_target_call_iarg_regs
[2], oi
);
1418 tcg_out_movi(s
, TCG_TYPE_PTR
, tcg_target_call_iarg_regs
[3],
1419 (uintptr_t)l
->raddr
);
1422 tcg_out_call(s
, qemu_ld_helpers
[opc
& (MO_BSWAP
| MO_SIZE
)]);
1424 data_reg
= l
->datalo_reg
;
1425 switch (opc
& MO_SSIZE
) {
1427 tcg_out_ext8s(s
, data_reg
, TCG_REG_EAX
, P_REXW
);
1430 tcg_out_ext16s(s
, data_reg
, TCG_REG_EAX
, P_REXW
);
1432 #if TCG_TARGET_REG_BITS == 64
1434 tcg_out_ext32s(s
, data_reg
, TCG_REG_EAX
);
1439 /* Note that the helpers have zero-extended to tcg_target_long. */
1441 tcg_out_mov(s
, TCG_TYPE_I32
, data_reg
, TCG_REG_EAX
);
1444 if (TCG_TARGET_REG_BITS
== 64) {
1445 tcg_out_mov(s
, TCG_TYPE_I64
, data_reg
, TCG_REG_RAX
);
1446 } else if (data_reg
== TCG_REG_EDX
) {
1447 /* xchg %edx, %eax */
1448 tcg_out_opc(s
, OPC_XCHG_ax_r32
+ TCG_REG_EDX
, 0, 0, 0);
1449 tcg_out_mov(s
, TCG_TYPE_I32
, l
->datahi_reg
, TCG_REG_EAX
);
1451 tcg_out_mov(s
, TCG_TYPE_I32
, data_reg
, TCG_REG_EAX
);
1452 tcg_out_mov(s
, TCG_TYPE_I32
, l
->datahi_reg
, TCG_REG_EDX
);
1459 /* Jump to the code corresponding to next IR of qemu_st */
1460 tcg_out_jmp(s
, l
->raddr
);
1464 * Generate code for the slow path for a store at the end of block
1466 static void tcg_out_qemu_st_slow_path(TCGContext
*s
, TCGLabelQemuLdst
*l
)
1468 TCGMemOpIdx oi
= l
->oi
;
1469 TCGMemOp opc
= get_memop(oi
);
1470 TCGMemOp s_bits
= opc
& MO_SIZE
;
1471 tcg_insn_unit
**label_ptr
= &l
->label_ptr
[0];
1474 /* resolve label address */
1475 tcg_patch32(label_ptr
[0], s
->code_ptr
- label_ptr
[0] - 4);
1476 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1477 tcg_patch32(label_ptr
[1], s
->code_ptr
- label_ptr
[1] - 4);
1480 if (TCG_TARGET_REG_BITS
== 32) {
1483 tcg_out_st(s
, TCG_TYPE_PTR
, TCG_AREG0
, TCG_REG_ESP
, ofs
);
1486 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrlo_reg
, TCG_REG_ESP
, ofs
);
1489 if (TARGET_LONG_BITS
== 64) {
1490 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrhi_reg
, TCG_REG_ESP
, ofs
);
1494 tcg_out_st(s
, TCG_TYPE_I32
, l
->datalo_reg
, TCG_REG_ESP
, ofs
);
1497 if (s_bits
== MO_64
) {
1498 tcg_out_st(s
, TCG_TYPE_I32
, l
->datahi_reg
, TCG_REG_ESP
, ofs
);
1502 tcg_out_sti(s
, TCG_TYPE_I32
, oi
, TCG_REG_ESP
, ofs
);
1505 retaddr
= TCG_REG_EAX
;
1506 tcg_out_movi(s
, TCG_TYPE_PTR
, retaddr
, (uintptr_t)l
->raddr
);
1507 tcg_out_st(s
, TCG_TYPE_PTR
, retaddr
, TCG_REG_ESP
, ofs
);
1509 tcg_out_mov(s
, TCG_TYPE_PTR
, tcg_target_call_iarg_regs
[0], TCG_AREG0
);
1510 /* The second argument is already loaded with addrlo. */
1511 tcg_out_mov(s
, (s_bits
== MO_64
? TCG_TYPE_I64
: TCG_TYPE_I32
),
1512 tcg_target_call_iarg_regs
[2], l
->datalo_reg
);
1513 tcg_out_movi(s
, TCG_TYPE_I32
, tcg_target_call_iarg_regs
[3], oi
);
1515 if (ARRAY_SIZE(tcg_target_call_iarg_regs
) > 4) {
1516 retaddr
= tcg_target_call_iarg_regs
[4];
1517 tcg_out_movi(s
, TCG_TYPE_PTR
, retaddr
, (uintptr_t)l
->raddr
);
1519 retaddr
= TCG_REG_RAX
;
1520 tcg_out_movi(s
, TCG_TYPE_PTR
, retaddr
, (uintptr_t)l
->raddr
);
1521 tcg_out_st(s
, TCG_TYPE_PTR
, retaddr
, TCG_REG_ESP
,
1522 TCG_TARGET_CALL_STACK_OFFSET
);
1526 /* "Tail call" to the helper, with the return address back inline. */
1527 tcg_out_push(s
, retaddr
);
1528 tcg_out_jmp(s
, qemu_st_helpers
[opc
& (MO_BSWAP
| MO_SIZE
)]);
1530 #elif defined(__x86_64__) && defined(__linux__)
1531 # include <asm/prctl.h>
1532 # include <sys/prctl.h>
1534 int arch_prctl(int code
, unsigned long addr
);
1536 static int guest_base_flags
;
1537 static inline void setup_guest_base_seg(void)
1539 if (arch_prctl(ARCH_SET_GS
, guest_base
) == 0) {
1540 guest_base_flags
= P_GS
;
1544 # define guest_base_flags 0
1545 static inline void setup_guest_base_seg(void) { }
1546 #endif /* SOFTMMU */
1548 static void tcg_out_qemu_ld_direct(TCGContext
*s
, TCGReg datalo
, TCGReg datahi
,
1549 TCGReg base
, int index
, intptr_t ofs
,
1550 int seg
, TCGMemOp memop
)
1552 const TCGMemOp real_bswap
= memop
& MO_BSWAP
;
1553 TCGMemOp bswap
= real_bswap
;
1554 int movop
= OPC_MOVL_GvEv
;
1556 if (have_movbe
&& real_bswap
) {
1558 movop
= OPC_MOVBE_GyMy
;
1561 switch (memop
& MO_SSIZE
) {
1563 tcg_out_modrm_sib_offset(s
, OPC_MOVZBL
+ seg
, datalo
,
1564 base
, index
, 0, ofs
);
1567 tcg_out_modrm_sib_offset(s
, OPC_MOVSBL
+ P_REXW
+ seg
, datalo
,
1568 base
, index
, 0, ofs
);
1571 tcg_out_modrm_sib_offset(s
, OPC_MOVZWL
+ seg
, datalo
,
1572 base
, index
, 0, ofs
);
1574 tcg_out_rolw_8(s
, datalo
);
1580 tcg_out_modrm_sib_offset(s
, OPC_MOVBE_GyMy
+ P_DATA16
+ seg
,
1581 datalo
, base
, index
, 0, ofs
);
1583 tcg_out_modrm_sib_offset(s
, OPC_MOVZWL
+ seg
, datalo
,
1584 base
, index
, 0, ofs
);
1585 tcg_out_rolw_8(s
, datalo
);
1587 tcg_out_modrm(s
, OPC_MOVSWL
+ P_REXW
, datalo
, datalo
);
1589 tcg_out_modrm_sib_offset(s
, OPC_MOVSWL
+ P_REXW
+ seg
,
1590 datalo
, base
, index
, 0, ofs
);
1594 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datalo
, base
, index
, 0, ofs
);
1596 tcg_out_bswap32(s
, datalo
);
1599 #if TCG_TARGET_REG_BITS == 64
1602 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datalo
,
1603 base
, index
, 0, ofs
);
1605 tcg_out_bswap32(s
, datalo
);
1607 tcg_out_ext32s(s
, datalo
, datalo
);
1609 tcg_out_modrm_sib_offset(s
, OPC_MOVSLQ
+ seg
, datalo
,
1610 base
, index
, 0, ofs
);
1615 if (TCG_TARGET_REG_BITS
== 64) {
1616 tcg_out_modrm_sib_offset(s
, movop
+ P_REXW
+ seg
, datalo
,
1617 base
, index
, 0, ofs
);
1619 tcg_out_bswap64(s
, datalo
);
1627 if (base
!= datalo
) {
1628 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datalo
,
1629 base
, index
, 0, ofs
);
1630 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datahi
,
1631 base
, index
, 0, ofs
+ 4);
1633 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datahi
,
1634 base
, index
, 0, ofs
+ 4);
1635 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datalo
,
1636 base
, index
, 0, ofs
);
1639 tcg_out_bswap32(s
, datalo
);
1640 tcg_out_bswap32(s
, datahi
);
1649 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1650 EAX. It will be useful once fixed registers globals are less
1652 static void tcg_out_qemu_ld(TCGContext
*s
, const TCGArg
*args
, bool is64
)
1654 TCGReg datalo
, datahi
, addrlo
;
1655 TCGReg addrhi
__attribute__((unused
));
1658 #if defined(CONFIG_SOFTMMU)
1660 tcg_insn_unit
*label_ptr
[2];
1664 datahi
= (TCG_TARGET_REG_BITS
== 32 && is64
? *args
++ : 0);
1666 addrhi
= (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
? *args
++ : 0);
1668 opc
= get_memop(oi
);
1670 #if defined(CONFIG_SOFTMMU)
1671 mem_index
= get_mmuidx(oi
);
1673 tcg_out_tlb_load(s
, addrlo
, addrhi
, mem_index
, opc
,
1674 label_ptr
, offsetof(CPUTLBEntry
, addr_read
));
1677 tcg_out_qemu_ld_direct(s
, datalo
, datahi
, TCG_REG_L1
, -1, 0, 0, opc
);
1679 /* Record the current context of a load into ldst label */
1680 add_qemu_ldst_label(s
, true, oi
, datalo
, datahi
, addrlo
, addrhi
,
1681 s
->code_ptr
, label_ptr
);
1684 int32_t offset
= guest_base
;
1685 TCGReg base
= addrlo
;
1689 /* For a 32-bit guest, the high 32 bits may contain garbage.
1690 We can do this with the ADDR32 prefix if we're not using
1691 a guest base, or when using segmentation. Otherwise we
1692 need to zero-extend manually. */
1693 if (guest_base
== 0 || guest_base_flags
) {
1694 seg
= guest_base_flags
;
1696 if (TCG_TARGET_REG_BITS
> TARGET_LONG_BITS
) {
1699 } else if (TCG_TARGET_REG_BITS
== 64) {
1700 if (TARGET_LONG_BITS
== 32) {
1701 tcg_out_ext32u(s
, TCG_REG_L0
, base
);
1704 if (offset
!= guest_base
) {
1705 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_L1
, guest_base
);
1711 tcg_out_qemu_ld_direct(s
, datalo
, datahi
,
1712 base
, index
, offset
, seg
, opc
);
1717 static void tcg_out_qemu_st_direct(TCGContext
*s
, TCGReg datalo
, TCGReg datahi
,
1718 TCGReg base
, intptr_t ofs
, int seg
,
1721 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1722 we could perform the bswap twice to restore the original value
1723 instead of moving to the scratch. But as it is, the L constraint
1724 means that TCG_REG_L0 is definitely free here. */
1725 const TCGReg scratch
= TCG_REG_L0
;
1726 const TCGMemOp real_bswap
= memop
& MO_BSWAP
;
1727 TCGMemOp bswap
= real_bswap
;
1728 int movop
= OPC_MOVL_EvGv
;
1730 if (have_movbe
&& real_bswap
) {
1732 movop
= OPC_MOVBE_MyGy
;
1735 switch (memop
& MO_SIZE
) {
1737 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1738 Use the scratch register if necessary. */
1739 if (TCG_TARGET_REG_BITS
== 32 && datalo
>= 4) {
1740 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1743 tcg_out_modrm_offset(s
, OPC_MOVB_EvGv
+ P_REXB_R
+ seg
,
1748 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1749 tcg_out_rolw_8(s
, scratch
);
1752 tcg_out_modrm_offset(s
, movop
+ P_DATA16
+ seg
, datalo
, base
, ofs
);
1756 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1757 tcg_out_bswap32(s
, scratch
);
1760 tcg_out_modrm_offset(s
, movop
+ seg
, datalo
, base
, ofs
);
1763 if (TCG_TARGET_REG_BITS
== 64) {
1765 tcg_out_mov(s
, TCG_TYPE_I64
, scratch
, datalo
);
1766 tcg_out_bswap64(s
, scratch
);
1769 tcg_out_modrm_offset(s
, movop
+ P_REXW
+ seg
, datalo
, base
, ofs
);
1771 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datahi
);
1772 tcg_out_bswap32(s
, scratch
);
1773 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
+ seg
, scratch
, base
, ofs
);
1774 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1775 tcg_out_bswap32(s
, scratch
);
1776 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
+ seg
, scratch
, base
, ofs
+4);
1783 tcg_out_modrm_offset(s
, movop
+ seg
, datalo
, base
, ofs
);
1784 tcg_out_modrm_offset(s
, movop
+ seg
, datahi
, base
, ofs
+4);
1792 static void tcg_out_qemu_st(TCGContext
*s
, const TCGArg
*args
, bool is64
)
1794 TCGReg datalo
, datahi
, addrlo
;
1795 TCGReg addrhi
__attribute__((unused
));
1798 #if defined(CONFIG_SOFTMMU)
1800 tcg_insn_unit
*label_ptr
[2];
1804 datahi
= (TCG_TARGET_REG_BITS
== 32 && is64
? *args
++ : 0);
1806 addrhi
= (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
? *args
++ : 0);
1808 opc
= get_memop(oi
);
1810 #if defined(CONFIG_SOFTMMU)
1811 mem_index
= get_mmuidx(oi
);
1813 tcg_out_tlb_load(s
, addrlo
, addrhi
, mem_index
, opc
,
1814 label_ptr
, offsetof(CPUTLBEntry
, addr_write
));
1817 tcg_out_qemu_st_direct(s
, datalo
, datahi
, TCG_REG_L1
, 0, 0, opc
);
1819 /* Record the current context of a store into ldst label */
1820 add_qemu_ldst_label(s
, false, oi
, datalo
, datahi
, addrlo
, addrhi
,
1821 s
->code_ptr
, label_ptr
);
1824 int32_t offset
= guest_base
;
1825 TCGReg base
= addrlo
;
1828 /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
1829 if (guest_base
== 0 || guest_base_flags
) {
1830 seg
= guest_base_flags
;
1832 if (TCG_TARGET_REG_BITS
> TARGET_LONG_BITS
) {
1835 } else if (TCG_TARGET_REG_BITS
== 64) {
1836 /* ??? Note that we can't use the same SIB addressing scheme
1837 as for loads, since we require L0 free for bswap. */
1838 if (offset
!= guest_base
) {
1839 if (TARGET_LONG_BITS
== 32) {
1840 tcg_out_ext32u(s
, TCG_REG_L0
, base
);
1843 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_L1
, guest_base
);
1844 tgen_arithr(s
, ARITH_ADD
+ P_REXW
, TCG_REG_L1
, base
);
1847 } else if (TARGET_LONG_BITS
== 32) {
1848 tcg_out_ext32u(s
, TCG_REG_L1
, base
);
1853 tcg_out_qemu_st_direct(s
, datalo
, datahi
, base
, offset
, seg
, opc
);
1858 static inline void tcg_out_op(TCGContext
*s
, TCGOpcode opc
,
1859 const TCGArg
*args
, const int *const_args
)
1862 int c
, const_a2
, vexop
, rexw
= 0;
1864 #if TCG_TARGET_REG_BITS == 64
1865 # define OP_32_64(x) \
1866 case glue(glue(INDEX_op_, x), _i64): \
1867 rexw = P_REXW; /* FALLTHRU */ \
1868 case glue(glue(INDEX_op_, x), _i32)
1870 # define OP_32_64(x) \
1871 case glue(glue(INDEX_op_, x), _i32)
1874 /* Hoist the loads of the most common arguments. */
1878 const_a2
= const_args
[2];
1881 case INDEX_op_exit_tb
:
1882 /* Reuse the zeroing that exists for goto_ptr. */
1884 tcg_out_jmp(s
, s
->code_gen_epilogue
);
1886 tcg_out_movi(s
, TCG_TYPE_PTR
, TCG_REG_EAX
, a0
);
1887 tcg_out_jmp(s
, tb_ret_addr
);
1890 case INDEX_op_goto_tb
:
1891 if (s
->tb_jmp_insn_offset
) {
1892 /* direct jump method */
1894 /* jump displacement must be aligned for atomic patching;
1895 * see if we need to add extra nops before jump
1897 gap
= tcg_pcrel_diff(s
, QEMU_ALIGN_PTR_UP(s
->code_ptr
+ 1, 4));
1899 tcg_out_nopn(s
, gap
- 1);
1901 tcg_out8(s
, OPC_JMP_long
); /* jmp im */
1902 s
->tb_jmp_insn_offset
[a0
] = tcg_current_code_size(s
);
1905 /* indirect jump method */
1906 tcg_out_modrm_offset(s
, OPC_GRP5
, EXT5_JMPN_Ev
, -1,
1907 (intptr_t)(s
->tb_jmp_target_addr
+ a0
));
1909 s
->tb_jmp_reset_offset
[a0
] = tcg_current_code_size(s
);
1911 case INDEX_op_goto_ptr
:
1912 /* jmp to the given host address (could be epilogue) */
1913 tcg_out_modrm(s
, OPC_GRP5
, EXT5_JMPN_Ev
, a0
);
1916 tcg_out_jxx(s
, JCC_JMP
, arg_label(a0
), 0);
1919 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1920 tcg_out_modrm_offset(s
, OPC_MOVZBL
, a0
, a1
, a2
);
1923 tcg_out_modrm_offset(s
, OPC_MOVSBL
+ rexw
, a0
, a1
, a2
);
1926 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1927 tcg_out_modrm_offset(s
, OPC_MOVZWL
, a0
, a1
, a2
);
1930 tcg_out_modrm_offset(s
, OPC_MOVSWL
+ rexw
, a0
, a1
, a2
);
1932 #if TCG_TARGET_REG_BITS == 64
1933 case INDEX_op_ld32u_i64
:
1935 case INDEX_op_ld_i32
:
1936 tcg_out_ld(s
, TCG_TYPE_I32
, a0
, a1
, a2
);
1940 if (const_args
[0]) {
1941 tcg_out_modrm_offset(s
, OPC_MOVB_EvIz
, 0, a1
, a2
);
1944 tcg_out_modrm_offset(s
, OPC_MOVB_EvGv
| P_REXB_R
, a0
, a1
, a2
);
1948 if (const_args
[0]) {
1949 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
| P_DATA16
, 0, a1
, a2
);
1952 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
| P_DATA16
, a0
, a1
, a2
);
1955 #if TCG_TARGET_REG_BITS == 64
1956 case INDEX_op_st32_i64
:
1958 case INDEX_op_st_i32
:
1959 if (const_args
[0]) {
1960 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
, 0, a1
, a2
);
1963 tcg_out_st(s
, TCG_TYPE_I32
, a0
, a1
, a2
);
1968 /* For 3-operand addition, use LEA. */
1973 } else if (a0
== a2
) {
1974 /* Watch out for dest = src + dest, since we've removed
1975 the matching constraint on the add. */
1976 tgen_arithr(s
, ARITH_ADD
+ rexw
, a0
, a1
);
1980 tcg_out_modrm_sib_offset(s
, OPC_LEA
+ rexw
, a0
, a1
, a2
, 0, c3
);
1999 tgen_arithi(s
, c
+ rexw
, a0
, a2
, 0);
2001 tgen_arithr(s
, c
+ rexw
, a0
, a2
);
2007 tcg_out_mov(s
, rexw
? TCG_TYPE_I64
: TCG_TYPE_I32
, a0
, a1
);
2008 tgen_arithi(s
, ARITH_AND
+ rexw
, a0
, ~a2
, 0);
2010 tcg_out_vex_modrm(s
, OPC_ANDN
+ rexw
, a0
, a2
, a1
);
2018 if (val
== (int8_t)val
) {
2019 tcg_out_modrm(s
, OPC_IMUL_GvEvIb
+ rexw
, a0
, a0
);
2022 tcg_out_modrm(s
, OPC_IMUL_GvEvIz
+ rexw
, a0
, a0
);
2026 tcg_out_modrm(s
, OPC_IMUL_GvEv
+ rexw
, a0
, a2
);
2031 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_IDIV
, args
[4]);
2034 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_DIV
, args
[4]);
2038 /* For small constant 3-operand shift, use LEA. */
2039 if (const_a2
&& a0
!= a1
&& (a2
- 1) < 3) {
2041 /* shl $1,a1,a0 -> lea (a1,a1),a0 */
2042 tcg_out_modrm_sib_offset(s
, OPC_LEA
+ rexw
, a0
, a1
, a1
, 0, 0);
2044 /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */
2045 tcg_out_modrm_sib_offset(s
, OPC_LEA
+ rexw
, a0
, -1, a1
, a2
, 0);
2051 goto gen_shift_maybe_vex
;
2055 goto gen_shift_maybe_vex
;
2059 goto gen_shift_maybe_vex
;
2066 gen_shift_maybe_vex
:
2069 tcg_out_vex_modrm(s
, vexop
+ rexw
, a0
, a2
, a1
);
2072 tcg_out_mov(s
, rexw
? TCG_TYPE_I64
: TCG_TYPE_I32
, a0
, a1
);
2077 tcg_out_shifti(s
, c
+ rexw
, a0
, a2
);
2079 tcg_out_modrm(s
, OPC_SHIFT_cl
+ rexw
, c
, a0
);
2084 tcg_out_ctz(s
, rexw
, args
[0], args
[1], args
[2], const_args
[2]);
2087 tcg_out_clz(s
, rexw
, args
[0], args
[1], args
[2], const_args
[2]);
2090 tcg_out_modrm(s
, OPC_POPCNT
+ rexw
, a0
, a1
);
2093 case INDEX_op_brcond_i32
:
2094 tcg_out_brcond32(s
, a2
, a0
, a1
, const_args
[1], arg_label(args
[3]), 0);
2096 case INDEX_op_setcond_i32
:
2097 tcg_out_setcond32(s
, args
[3], a0
, a1
, a2
, const_a2
);
2099 case INDEX_op_movcond_i32
:
2100 tcg_out_movcond32(s
, args
[5], a0
, a1
, a2
, const_a2
, args
[3]);
2104 tcg_out_rolw_8(s
, a0
);
2107 tcg_out_bswap32(s
, a0
);
2111 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_NEG
, a0
);
2114 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_NOT
, a0
);
2118 tcg_out_ext8s(s
, a0
, a1
, rexw
);
2121 tcg_out_ext16s(s
, a0
, a1
, rexw
);
2124 tcg_out_ext8u(s
, a0
, a1
);
2127 tcg_out_ext16u(s
, a0
, a1
);
2130 case INDEX_op_qemu_ld_i32
:
2131 tcg_out_qemu_ld(s
, args
, 0);
2133 case INDEX_op_qemu_ld_i64
:
2134 tcg_out_qemu_ld(s
, args
, 1);
2136 case INDEX_op_qemu_st_i32
:
2137 tcg_out_qemu_st(s
, args
, 0);
2139 case INDEX_op_qemu_st_i64
:
2140 tcg_out_qemu_st(s
, args
, 1);
2144 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_MUL
, args
[3]);
2147 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_IMUL
, args
[3]);
2150 if (const_args
[4]) {
2151 tgen_arithi(s
, ARITH_ADD
+ rexw
, a0
, args
[4], 1);
2153 tgen_arithr(s
, ARITH_ADD
+ rexw
, a0
, args
[4]);
2155 if (const_args
[5]) {
2156 tgen_arithi(s
, ARITH_ADC
+ rexw
, a1
, args
[5], 1);
2158 tgen_arithr(s
, ARITH_ADC
+ rexw
, a1
, args
[5]);
2162 if (const_args
[4]) {
2163 tgen_arithi(s
, ARITH_SUB
+ rexw
, a0
, args
[4], 1);
2165 tgen_arithr(s
, ARITH_SUB
+ rexw
, a0
, args
[4]);
2167 if (const_args
[5]) {
2168 tgen_arithi(s
, ARITH_SBB
+ rexw
, a1
, args
[5], 1);
2170 tgen_arithr(s
, ARITH_SBB
+ rexw
, a1
, args
[5]);
2174 #if TCG_TARGET_REG_BITS == 32
2175 case INDEX_op_brcond2_i32
:
2176 tcg_out_brcond2(s
, args
, const_args
, 0);
2178 case INDEX_op_setcond2_i32
:
2179 tcg_out_setcond2(s
, args
, const_args
);
2181 #else /* TCG_TARGET_REG_BITS == 64 */
2182 case INDEX_op_ld32s_i64
:
2183 tcg_out_modrm_offset(s
, OPC_MOVSLQ
, a0
, a1
, a2
);
2185 case INDEX_op_ld_i64
:
2186 tcg_out_ld(s
, TCG_TYPE_I64
, a0
, a1
, a2
);
2188 case INDEX_op_st_i64
:
2189 if (const_args
[0]) {
2190 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
| P_REXW
, 0, a1
, a2
);
2193 tcg_out_st(s
, TCG_TYPE_I64
, a0
, a1
, a2
);
2197 case INDEX_op_brcond_i64
:
2198 tcg_out_brcond64(s
, a2
, a0
, a1
, const_args
[1], arg_label(args
[3]), 0);
2200 case INDEX_op_setcond_i64
:
2201 tcg_out_setcond64(s
, args
[3], a0
, a1
, a2
, const_a2
);
2203 case INDEX_op_movcond_i64
:
2204 tcg_out_movcond64(s
, args
[5], a0
, a1
, a2
, const_a2
, args
[3]);
2207 case INDEX_op_bswap64_i64
:
2208 tcg_out_bswap64(s
, a0
);
2210 case INDEX_op_extu_i32_i64
:
2211 case INDEX_op_ext32u_i64
:
2212 tcg_out_ext32u(s
, a0
, a1
);
2214 case INDEX_op_ext_i32_i64
:
2215 case INDEX_op_ext32s_i64
:
2216 tcg_out_ext32s(s
, a0
, a1
);
2221 if (args
[3] == 0 && args
[4] == 8) {
2222 /* load bits 0..7 */
2223 tcg_out_modrm(s
, OPC_MOVB_EvGv
| P_REXB_R
| P_REXB_RM
, a2
, a0
);
2224 } else if (args
[3] == 8 && args
[4] == 8) {
2225 /* load bits 8..15 */
2226 tcg_out_modrm(s
, OPC_MOVB_EvGv
, a2
, a0
+ 4);
2227 } else if (args
[3] == 0 && args
[4] == 16) {
2228 /* load bits 0..15 */
2229 tcg_out_modrm(s
, OPC_MOVL_EvGv
| P_DATA16
, a2
, a0
);
2235 case INDEX_op_extract_i64
:
2236 if (a2
+ args
[3] == 32) {
2237 /* This is a 32-bit zero-extending right shift. */
2238 tcg_out_mov(s
, TCG_TYPE_I32
, a0
, a1
);
2239 tcg_out_shifti(s
, SHIFT_SHR
, a0
, a2
);
2243 case INDEX_op_extract_i32
:
2244 /* On the off-chance that we can use the high-byte registers.
2245 Otherwise we emit the same ext16 + shift pattern that we
2246 would have gotten from the normal tcg-op.c expansion. */
2247 tcg_debug_assert(a2
== 8 && args
[3] == 8);
2248 if (a1
< 4 && a0
< 8) {
2249 tcg_out_modrm(s
, OPC_MOVZBL
, a0
, a1
+ 4);
2251 tcg_out_ext16u(s
, a0
, a1
);
2252 tcg_out_shifti(s
, SHIFT_SHR
, a0
, 8);
2256 case INDEX_op_sextract_i32
:
2257 /* We don't implement sextract_i64, as we cannot sign-extend to
2258 64-bits without using the REX prefix that explicitly excludes
2259 access to the high-byte registers. */
2260 tcg_debug_assert(a2
== 8 && args
[3] == 8);
2261 if (a1
< 4 && a0
< 8) {
2262 tcg_out_modrm(s
, OPC_MOVSBL
, a0
, a1
+ 4);
2264 tcg_out_ext16s(s
, a0
, a1
, 0);
2265 tcg_out_shifti(s
, SHIFT_SAR
, a0
, 8);
2272 case INDEX_op_mov_i32
: /* Always emitted via tcg_out_mov. */
2273 case INDEX_op_mov_i64
:
2274 case INDEX_op_movi_i32
: /* Always emitted via tcg_out_movi. */
2275 case INDEX_op_movi_i64
:
2276 case INDEX_op_call
: /* Always emitted via tcg_out_call. */
2284 static const TCGTargetOpDef
*tcg_target_op_def(TCGOpcode op
)
2286 static const TCGTargetOpDef r
= { .args_ct_str
= { "r" } };
2287 static const TCGTargetOpDef ri_r
= { .args_ct_str
= { "ri", "r" } };
2288 static const TCGTargetOpDef re_r
= { .args_ct_str
= { "re", "r" } };
2289 static const TCGTargetOpDef qi_r
= { .args_ct_str
= { "qi", "r" } };
2290 static const TCGTargetOpDef r_r
= { .args_ct_str
= { "r", "r" } };
2291 static const TCGTargetOpDef r_q
= { .args_ct_str
= { "r", "q" } };
2292 static const TCGTargetOpDef r_re
= { .args_ct_str
= { "r", "re" } };
2293 static const TCGTargetOpDef r_0
= { .args_ct_str
= { "r", "0" } };
2294 static const TCGTargetOpDef r_r_ri
= { .args_ct_str
= { "r", "r", "ri" } };
2295 static const TCGTargetOpDef r_r_re
= { .args_ct_str
= { "r", "r", "re" } };
2296 static const TCGTargetOpDef r_0_re
= { .args_ct_str
= { "r", "0", "re" } };
2297 static const TCGTargetOpDef r_0_ci
= { .args_ct_str
= { "r", "0", "ci" } };
2298 static const TCGTargetOpDef r_L
= { .args_ct_str
= { "r", "L" } };
2299 static const TCGTargetOpDef L_L
= { .args_ct_str
= { "L", "L" } };
2300 static const TCGTargetOpDef r_L_L
= { .args_ct_str
= { "r", "L", "L" } };
2301 static const TCGTargetOpDef r_r_L
= { .args_ct_str
= { "r", "r", "L" } };
2302 static const TCGTargetOpDef L_L_L
= { .args_ct_str
= { "L", "L", "L" } };
2303 static const TCGTargetOpDef r_r_L_L
2304 = { .args_ct_str
= { "r", "r", "L", "L" } };
2305 static const TCGTargetOpDef L_L_L_L
2306 = { .args_ct_str
= { "L", "L", "L", "L" } };
2309 case INDEX_op_goto_ptr
:
2312 case INDEX_op_ld8u_i32
:
2313 case INDEX_op_ld8u_i64
:
2314 case INDEX_op_ld8s_i32
:
2315 case INDEX_op_ld8s_i64
:
2316 case INDEX_op_ld16u_i32
:
2317 case INDEX_op_ld16u_i64
:
2318 case INDEX_op_ld16s_i32
:
2319 case INDEX_op_ld16s_i64
:
2320 case INDEX_op_ld_i32
:
2321 case INDEX_op_ld32u_i64
:
2322 case INDEX_op_ld32s_i64
:
2323 case INDEX_op_ld_i64
:
2326 case INDEX_op_st8_i32
:
2327 case INDEX_op_st8_i64
:
2329 case INDEX_op_st16_i32
:
2330 case INDEX_op_st16_i64
:
2331 case INDEX_op_st_i32
:
2332 case INDEX_op_st32_i64
:
2334 case INDEX_op_st_i64
:
2337 case INDEX_op_add_i32
:
2338 case INDEX_op_add_i64
:
2340 case INDEX_op_sub_i32
:
2341 case INDEX_op_sub_i64
:
2342 case INDEX_op_mul_i32
:
2343 case INDEX_op_mul_i64
:
2344 case INDEX_op_or_i32
:
2345 case INDEX_op_or_i64
:
2346 case INDEX_op_xor_i32
:
2347 case INDEX_op_xor_i64
:
2350 case INDEX_op_and_i32
:
2351 case INDEX_op_and_i64
:
2353 static const TCGTargetOpDef
and
2354 = { .args_ct_str
= { "r", "0", "reZ" } };
2358 case INDEX_op_andc_i32
:
2359 case INDEX_op_andc_i64
:
2361 static const TCGTargetOpDef andc
2362 = { .args_ct_str
= { "r", "r", "rI" } };
2367 case INDEX_op_shl_i32
:
2368 case INDEX_op_shl_i64
:
2369 case INDEX_op_shr_i32
:
2370 case INDEX_op_shr_i64
:
2371 case INDEX_op_sar_i32
:
2372 case INDEX_op_sar_i64
:
2373 return have_bmi2
? &r_r_ri
: &r_0_ci
;
2374 case INDEX_op_rotl_i32
:
2375 case INDEX_op_rotl_i64
:
2376 case INDEX_op_rotr_i32
:
2377 case INDEX_op_rotr_i64
:
2380 case INDEX_op_brcond_i32
:
2381 case INDEX_op_brcond_i64
:
2384 case INDEX_op_bswap16_i32
:
2385 case INDEX_op_bswap16_i64
:
2386 case INDEX_op_bswap32_i32
:
2387 case INDEX_op_bswap32_i64
:
2388 case INDEX_op_bswap64_i64
:
2389 case INDEX_op_neg_i32
:
2390 case INDEX_op_neg_i64
:
2391 case INDEX_op_not_i32
:
2392 case INDEX_op_not_i64
:
2395 case INDEX_op_ext8s_i32
:
2396 case INDEX_op_ext8s_i64
:
2397 case INDEX_op_ext8u_i32
:
2398 case INDEX_op_ext8u_i64
:
2400 case INDEX_op_ext16s_i32
:
2401 case INDEX_op_ext16s_i64
:
2402 case INDEX_op_ext16u_i32
:
2403 case INDEX_op_ext16u_i64
:
2404 case INDEX_op_ext32s_i64
:
2405 case INDEX_op_ext32u_i64
:
2406 case INDEX_op_ext_i32_i64
:
2407 case INDEX_op_extu_i32_i64
:
2408 case INDEX_op_extract_i32
:
2409 case INDEX_op_extract_i64
:
2410 case INDEX_op_sextract_i32
:
2411 case INDEX_op_ctpop_i32
:
2412 case INDEX_op_ctpop_i64
:
2415 case INDEX_op_deposit_i32
:
2416 case INDEX_op_deposit_i64
:
2418 static const TCGTargetOpDef dep
2419 = { .args_ct_str
= { "Q", "0", "Q" } };
2422 case INDEX_op_setcond_i32
:
2423 case INDEX_op_setcond_i64
:
2425 static const TCGTargetOpDef setc
2426 = { .args_ct_str
= { "q", "r", "re" } };
2429 case INDEX_op_movcond_i32
:
2430 case INDEX_op_movcond_i64
:
2432 static const TCGTargetOpDef movc
2433 = { .args_ct_str
= { "r", "r", "re", "r", "0" } };
2436 case INDEX_op_div2_i32
:
2437 case INDEX_op_div2_i64
:
2438 case INDEX_op_divu2_i32
:
2439 case INDEX_op_divu2_i64
:
2441 static const TCGTargetOpDef div2
2442 = { .args_ct_str
= { "a", "d", "0", "1", "r" } };
2445 case INDEX_op_mulu2_i32
:
2446 case INDEX_op_mulu2_i64
:
2447 case INDEX_op_muls2_i32
:
2448 case INDEX_op_muls2_i64
:
2450 static const TCGTargetOpDef mul2
2451 = { .args_ct_str
= { "a", "d", "a", "r" } };
2454 case INDEX_op_add2_i32
:
2455 case INDEX_op_add2_i64
:
2456 case INDEX_op_sub2_i32
:
2457 case INDEX_op_sub2_i64
:
2459 static const TCGTargetOpDef arith2
2460 = { .args_ct_str
= { "r", "r", "0", "1", "re", "re" } };
2463 case INDEX_op_ctz_i32
:
2464 case INDEX_op_ctz_i64
:
2466 static const TCGTargetOpDef ctz
[2] = {
2467 { .args_ct_str
= { "&r", "r", "r" } },
2468 { .args_ct_str
= { "&r", "r", "rW" } },
2470 return &ctz
[have_bmi1
];
2472 case INDEX_op_clz_i32
:
2473 case INDEX_op_clz_i64
:
2475 static const TCGTargetOpDef clz
[2] = {
2476 { .args_ct_str
= { "&r", "r", "r" } },
2477 { .args_ct_str
= { "&r", "r", "rW" } },
2479 return &clz
[have_lzcnt
];
2482 case INDEX_op_qemu_ld_i32
:
2483 return TARGET_LONG_BITS
<= TCG_TARGET_REG_BITS
? &r_L
: &r_L_L
;
2484 case INDEX_op_qemu_st_i32
:
2485 return TARGET_LONG_BITS
<= TCG_TARGET_REG_BITS
? &L_L
: &L_L_L
;
2486 case INDEX_op_qemu_ld_i64
:
2487 return (TCG_TARGET_REG_BITS
== 64 ? &r_L
2488 : TARGET_LONG_BITS
<= TCG_TARGET_REG_BITS
? &r_r_L
2490 case INDEX_op_qemu_st_i64
:
2491 return (TCG_TARGET_REG_BITS
== 64 ? &L_L
2492 : TARGET_LONG_BITS
<= TCG_TARGET_REG_BITS
? &L_L_L
2495 case INDEX_op_brcond2_i32
:
2497 static const TCGTargetOpDef b2
2498 = { .args_ct_str
= { "r", "r", "ri", "ri" } };
2501 case INDEX_op_setcond2_i32
:
2503 static const TCGTargetOpDef s2
2504 = { .args_ct_str
= { "r", "r", "r", "ri", "ri" } };
2514 static int tcg_target_callee_save_regs
[] = {
2515 #if TCG_TARGET_REG_BITS == 64
2524 TCG_REG_R14
, /* Currently used for the global env. */
2527 TCG_REG_EBP
, /* Currently used for the global env. */
2534 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2535 and tcg_register_jit. */
2538 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2539 * (TCG_TARGET_REG_BITS / 8))
2541 #define FRAME_SIZE \
2543 + TCG_STATIC_CALL_ARGS_SIZE \
2544 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2545 + TCG_TARGET_STACK_ALIGN - 1) \
2546 & ~(TCG_TARGET_STACK_ALIGN - 1))
2548 /* Generate global QEMU prologue and epilogue code */
2549 static void tcg_target_qemu_prologue(TCGContext
*s
)
2551 int i
, stack_addend
;
2555 /* Reserve some stack space, also for TCG temps. */
2556 stack_addend
= FRAME_SIZE
- PUSH_SIZE
;
2557 tcg_set_frame(s
, TCG_REG_CALL_STACK
, TCG_STATIC_CALL_ARGS_SIZE
,
2558 CPU_TEMP_BUF_NLONGS
* sizeof(long));
2560 /* Save all callee saved registers. */
2561 for (i
= 0; i
< ARRAY_SIZE(tcg_target_callee_save_regs
); i
++) {
2562 tcg_out_push(s
, tcg_target_callee_save_regs
[i
]);
2565 #if TCG_TARGET_REG_BITS == 32
2566 tcg_out_ld(s
, TCG_TYPE_PTR
, TCG_AREG0
, TCG_REG_ESP
,
2567 (ARRAY_SIZE(tcg_target_callee_save_regs
) + 1) * 4);
2568 tcg_out_addi(s
, TCG_REG_ESP
, -stack_addend
);
2570 tcg_out_modrm_offset(s
, OPC_GRP5
, EXT5_JMPN_Ev
, TCG_REG_ESP
,
2571 (ARRAY_SIZE(tcg_target_callee_save_regs
) + 2) * 4
2574 tcg_out_mov(s
, TCG_TYPE_PTR
, TCG_AREG0
, tcg_target_call_iarg_regs
[0]);
2575 tcg_out_addi(s
, TCG_REG_ESP
, -stack_addend
);
2577 tcg_out_modrm(s
, OPC_GRP5
, EXT5_JMPN_Ev
, tcg_target_call_iarg_regs
[1]);
2581 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2582 * and fall through to the rest of the epilogue.
2584 s
->code_gen_epilogue
= s
->code_ptr
;
2585 tcg_out_movi(s
, TCG_TYPE_REG
, TCG_REG_EAX
, 0);
2588 tb_ret_addr
= s
->code_ptr
;
2590 tcg_out_addi(s
, TCG_REG_CALL_STACK
, stack_addend
);
2592 for (i
= ARRAY_SIZE(tcg_target_callee_save_regs
) - 1; i
>= 0; i
--) {
2593 tcg_out_pop(s
, tcg_target_callee_save_regs
[i
]);
2595 tcg_out_opc(s
, OPC_RET
, 0, 0, 0);
2597 #if !defined(CONFIG_SOFTMMU)
2598 /* Try to set up a segment register to point to guest_base. */
2600 setup_guest_base_seg();
2605 static void tcg_out_nop_fill(tcg_insn_unit
*p
, int count
)
2607 memset(p
, 0x90, count
);
2610 static void tcg_target_init(TCGContext
*s
)
2612 #ifdef CONFIG_CPUID_H
2613 unsigned a
, b
, c
, d
;
2614 int max
= __get_cpuid_max(0, 0);
2617 __cpuid(1, a
, b
, c
, d
);
2619 /* For 32-bit, 99% certainty that we're running on hardware that
2620 supports cmov, but we still need to check. In case cmov is not
2621 available, we'll use a small forward branch. */
2622 have_cmov
= (d
& bit_CMOV
) != 0;
2624 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2625 need to probe for it. */
2626 have_movbe
= (c
& bit_MOVBE
) != 0;
2627 have_popcnt
= (c
& bit_POPCNT
) != 0;
2631 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2632 __cpuid_count(7, 0, a
, b
, c
, d
);
2633 have_bmi1
= (b
& bit_BMI
) != 0;
2634 have_bmi2
= (b
& bit_BMI2
) != 0;
2637 max
= __get_cpuid_max(0x8000000, 0);
2639 __cpuid(0x80000001, a
, b
, c
, d
);
2640 /* LZCNT was introduced with AMD Barcelona and Intel Haswell CPUs. */
2641 have_lzcnt
= (c
& bit_LZCNT
) != 0;
2643 #endif /* CONFIG_CPUID_H */
2645 if (TCG_TARGET_REG_BITS
== 64) {
2646 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I32
], 0, 0xffff);
2647 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I64
], 0, 0xffff);
2649 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I32
], 0, 0xff);
2652 tcg_regset_clear(tcg_target_call_clobber_regs
);
2653 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_EAX
);
2654 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_EDX
);
2655 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_ECX
);
2656 if (TCG_TARGET_REG_BITS
== 64) {
2657 #if !defined(_WIN64)
2658 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_RDI
);
2659 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_RSI
);
2661 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R8
);
2662 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R9
);
2663 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R10
);
2664 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R11
);
2667 tcg_regset_clear(s
->reserved_regs
);
2668 tcg_regset_set_reg(s
->reserved_regs
, TCG_REG_CALL_STACK
);
2673 uint8_t fde_def_cfa
[4];
2674 uint8_t fde_reg_ofs
[14];
2677 /* We're expecting a 2 byte uleb128 encoded value. */
2678 QEMU_BUILD_BUG_ON(FRAME_SIZE
>= (1 << 14));
2680 #if !defined(__ELF__)
2681 /* Host machine without ELF. */
2682 #elif TCG_TARGET_REG_BITS == 64
2683 #define ELF_HOST_MACHINE EM_X86_64
2684 static const DebugFrame debug_frame
= {
2685 .h
.cie
.len
= sizeof(DebugFrameCIE
)-4, /* length after .len member */
2688 .h
.cie
.code_align
= 1,
2689 .h
.cie
.data_align
= 0x78, /* sleb128 -8 */
2690 .h
.cie
.return_column
= 16,
2692 /* Total FDE size does not include the "len" member. */
2693 .h
.fde
.len
= sizeof(DebugFrame
) - offsetof(DebugFrame
, h
.fde
.cie_offset
),
2696 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2697 (FRAME_SIZE
& 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2701 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2702 /* The following ordering must match tcg_target_callee_save_regs. */
2703 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2704 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2705 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2706 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2707 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2708 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2712 #define ELF_HOST_MACHINE EM_386
2713 static const DebugFrame debug_frame
= {
2714 .h
.cie
.len
= sizeof(DebugFrameCIE
)-4, /* length after .len member */
2717 .h
.cie
.code_align
= 1,
2718 .h
.cie
.data_align
= 0x7c, /* sleb128 -4 */
2719 .h
.cie
.return_column
= 8,
2721 /* Total FDE size does not include the "len" member. */
2722 .h
.fde
.len
= sizeof(DebugFrame
) - offsetof(DebugFrame
, h
.fde
.cie_offset
),
2725 12, 4, /* DW_CFA_def_cfa %esp, ... */
2726 (FRAME_SIZE
& 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2730 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2731 /* The following ordering must match tcg_target_callee_save_regs. */
2732 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2733 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2734 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2735 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2740 #if defined(ELF_HOST_MACHINE)
2741 void tcg_register_jit(void *buf
, size_t buf_size
)
2743 tcg_register_jit_int(buf
, buf_size
, &debug_frame
, sizeof(debug_frame
));