2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "tcg-be-ldst.h"
27 #ifdef CONFIG_DEBUG_TCG
28 static const char * const tcg_target_reg_names
[TCG_TARGET_NB_REGS
] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
38 static const int tcg_target_reg_alloc_order
[] = {
39 #if TCG_TARGET_REG_BITS == 64
66 static const int tcg_target_call_iarg_regs
[] = {
67 #if TCG_TARGET_REG_BITS == 64
80 /* 32 bit mode uses stack based calling convention (GCC default). */
84 static const int tcg_target_call_oarg_regs
[] = {
86 #if TCG_TARGET_REG_BITS == 32
91 /* Constants we accept. */
92 #define TCG_CT_CONST_S32 0x100
93 #define TCG_CT_CONST_U32 0x200
94 #define TCG_CT_CONST_I32 0x400
96 /* Registers used with L constraint, which are the first argument
97 registers on x86_64, and two random call clobbered registers on
99 #if TCG_TARGET_REG_BITS == 64
100 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
101 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
103 # define TCG_REG_L0 TCG_REG_EAX
104 # define TCG_REG_L1 TCG_REG_EDX
107 /* The host compiler should supply <cpuid.h> to enable runtime features
108 detection, as we're not going to go so far as our own inline assembly.
109 If not available, default values will be assumed. */
110 #if defined(CONFIG_CPUID_H)
114 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
116 #if TCG_TARGET_REG_BITS == 64
118 #elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
119 static bool have_cmov
;
124 /* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
125 going to attempt to determine at runtime whether movbe is available. */
126 #if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
127 static bool have_movbe
;
129 # define have_movbe 0
132 /* We need this symbol in tcg-target.h, and we can't properly conditionalize
133 it there. Therefore we always define the variable. */
136 #if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
137 static bool have_bmi2
;
142 static tcg_insn_unit
*tb_ret_addr
;
144 static void patch_reloc(tcg_insn_unit
*code_ptr
, int type
,
145 intptr_t value
, intptr_t addend
)
150 value
-= (uintptr_t)code_ptr
;
151 if (value
!= (int32_t)value
) {
154 tcg_patch32(code_ptr
, value
);
157 value
-= (uintptr_t)code_ptr
;
158 if (value
!= (int8_t)value
) {
161 tcg_patch8(code_ptr
, value
);
168 /* parse target specific constraints */
169 static int target_parse_constraint(TCGArgConstraint
*ct
, const char **pct_str
)
176 ct
->ct
|= TCG_CT_REG
;
177 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EAX
);
180 ct
->ct
|= TCG_CT_REG
;
181 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EBX
);
185 ct
->ct
|= TCG_CT_REG
;
186 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_ECX
);
189 ct
->ct
|= TCG_CT_REG
;
190 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EDX
);
193 ct
->ct
|= TCG_CT_REG
;
194 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_ESI
);
197 ct
->ct
|= TCG_CT_REG
;
198 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EDI
);
201 ct
->ct
|= TCG_CT_REG
;
202 if (TCG_TARGET_REG_BITS
== 64) {
203 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
205 tcg_regset_set32(ct
->u
.regs
, 0, 0xf);
209 ct
->ct
|= TCG_CT_REG
;
210 tcg_regset_set32(ct
->u
.regs
, 0, 0xf);
214 ct
->ct
|= TCG_CT_REG
;
215 if (TCG_TARGET_REG_BITS
== 64) {
216 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
218 tcg_regset_set32(ct
->u
.regs
, 0, 0xff);
222 /* With SHRX et al, we need not use ECX as shift count register. */
229 /* qemu_ld/st address constraint */
231 ct
->ct
|= TCG_CT_REG
;
232 if (TCG_TARGET_REG_BITS
== 64) {
233 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
235 tcg_regset_set32(ct
->u
.regs
, 0, 0xff);
237 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_L0
);
238 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_L1
);
242 ct
->ct
|= TCG_CT_CONST_S32
;
245 ct
->ct
|= TCG_CT_CONST_U32
;
248 ct
->ct
|= TCG_CT_CONST_I32
;
259 /* test if a constant matches the constraint */
260 static inline int tcg_target_const_match(tcg_target_long val
, TCGType type
,
261 const TCGArgConstraint
*arg_ct
)
264 if (ct
& TCG_CT_CONST
) {
267 if ((ct
& TCG_CT_CONST_S32
) && val
== (int32_t)val
) {
270 if ((ct
& TCG_CT_CONST_U32
) && val
== (uint32_t)val
) {
273 if ((ct
& TCG_CT_CONST_I32
) && ~val
== (int32_t)~val
) {
279 #if TCG_TARGET_REG_BITS == 64
280 # define LOWREGMASK(x) ((x) & 7)
282 # define LOWREGMASK(x) (x)
285 #define P_EXT 0x100 /* 0x0f opcode prefix */
286 #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
287 #define P_DATA16 0x400 /* 0x66 opcode prefix */
288 #if TCG_TARGET_REG_BITS == 64
289 # define P_ADDR32 0x800 /* 0x67 opcode prefix */
290 # define P_REXW 0x1000 /* Set REX.W = 1 */
291 # define P_REXB_R 0x2000 /* REG field as byte register */
292 # define P_REXB_RM 0x4000 /* R/M field as byte register */
293 # define P_GS 0x8000 /* gs segment override */
301 #define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
302 #define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
304 #define OPC_ARITH_EvIz (0x81)
305 #define OPC_ARITH_EvIb (0x83)
306 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
307 #define OPC_ANDN (0xf2 | P_EXT38)
308 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
309 #define OPC_BSWAP (0xc8 | P_EXT)
310 #define OPC_CALL_Jz (0xe8)
311 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
312 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
313 #define OPC_DEC_r32 (0x48)
314 #define OPC_IMUL_GvEv (0xaf | P_EXT)
315 #define OPC_IMUL_GvEvIb (0x6b)
316 #define OPC_IMUL_GvEvIz (0x69)
317 #define OPC_INC_r32 (0x40)
318 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
319 #define OPC_JCC_short (0x70) /* ... plus condition code */
320 #define OPC_JMP_long (0xe9)
321 #define OPC_JMP_short (0xeb)
322 #define OPC_LEA (0x8d)
323 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
324 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
325 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
326 #define OPC_MOVB_EvIz (0xc6)
327 #define OPC_MOVL_EvIz (0xc7)
328 #define OPC_MOVL_Iv (0xb8)
329 #define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
330 #define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
331 #define OPC_MOVSBL (0xbe | P_EXT)
332 #define OPC_MOVSWL (0xbf | P_EXT)
333 #define OPC_MOVSLQ (0x63 | P_REXW)
334 #define OPC_MOVZBL (0xb6 | P_EXT)
335 #define OPC_MOVZWL (0xb7 | P_EXT)
336 #define OPC_POP_r32 (0x58)
337 #define OPC_PUSH_r32 (0x50)
338 #define OPC_PUSH_Iv (0x68)
339 #define OPC_PUSH_Ib (0x6a)
340 #define OPC_RET (0xc3)
341 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
342 #define OPC_SHIFT_1 (0xd1)
343 #define OPC_SHIFT_Ib (0xc1)
344 #define OPC_SHIFT_cl (0xd3)
345 #define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
346 #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
347 #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
348 #define OPC_TESTL (0x85)
349 #define OPC_XCHG_ax_r32 (0x90)
351 #define OPC_GRP3_Ev (0xf7)
352 #define OPC_GRP5 (0xff)
354 /* Group 1 opcode extensions for 0x80-0x83.
355 These are also used as modifiers for OPC_ARITH. */
365 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
372 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
380 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
381 #define EXT5_INC_Ev 0
382 #define EXT5_DEC_Ev 1
383 #define EXT5_CALLN_Ev 2
384 #define EXT5_JMPN_Ev 4
386 /* Condition codes to be added to OPC_JCC_{long,short}. */
405 static const uint8_t tcg_cond_to_jcc
[] = {
406 [TCG_COND_EQ
] = JCC_JE
,
407 [TCG_COND_NE
] = JCC_JNE
,
408 [TCG_COND_LT
] = JCC_JL
,
409 [TCG_COND_GE
] = JCC_JGE
,
410 [TCG_COND_LE
] = JCC_JLE
,
411 [TCG_COND_GT
] = JCC_JG
,
412 [TCG_COND_LTU
] = JCC_JB
,
413 [TCG_COND_GEU
] = JCC_JAE
,
414 [TCG_COND_LEU
] = JCC_JBE
,
415 [TCG_COND_GTU
] = JCC_JA
,
418 #if TCG_TARGET_REG_BITS == 64
419 static void tcg_out_opc(TCGContext
*s
, int opc
, int r
, int rm
, int x
)
426 if (opc
& P_DATA16
) {
427 /* We should never be asking for both 16 and 64-bit operation. */
428 tcg_debug_assert((opc
& P_REXW
) == 0);
431 if (opc
& P_ADDR32
) {
436 rex
|= (opc
& P_REXW
) ? 0x8 : 0x0; /* REX.W */
437 rex
|= (r
& 8) >> 1; /* REX.R */
438 rex
|= (x
& 8) >> 2; /* REX.X */
439 rex
|= (rm
& 8) >> 3; /* REX.B */
441 /* P_REXB_{R,RM} indicates that the given register is the low byte.
442 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
443 as otherwise the encoding indicates %[abcd]h. Note that the values
444 that are ORed in merely indicate that the REX byte must be present;
445 those bits get discarded in output. */
446 rex
|= opc
& (r
>= 4 ? P_REXB_R
: 0);
447 rex
|= opc
& (rm
>= 4 ? P_REXB_RM
: 0);
450 tcg_out8(s
, (uint8_t)(rex
| 0x40));
453 if (opc
& (P_EXT
| P_EXT38
)) {
463 static void tcg_out_opc(TCGContext
*s
, int opc
)
465 if (opc
& P_DATA16
) {
468 if (opc
& (P_EXT
| P_EXT38
)) {
476 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
477 the 32-bit compilation paths. This method works with all versions of gcc,
478 whereas relying on optimization may not be able to exclude them. */
479 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
482 static void tcg_out_modrm(TCGContext
*s
, int opc
, int r
, int rm
)
484 tcg_out_opc(s
, opc
, r
, rm
, 0);
485 tcg_out8(s
, 0xc0 | (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
488 static void tcg_out_vex_modrm(TCGContext
*s
, int opc
, int r
, int v
, int rm
)
492 if ((opc
& (P_REXW
| P_EXT
| P_EXT38
)) || (rm
& 8)) {
493 /* Three byte VEX prefix. */
499 } else if (opc
& P_EXT
) {
504 tmp
|= 0x40; /* VEX.X */
505 tmp
|= (r
& 8 ? 0 : 0x80); /* VEX.R */
506 tmp
|= (rm
& 8 ? 0 : 0x20); /* VEX.B */
509 tmp
= (opc
& P_REXW
? 0x80 : 0); /* VEX.W */
511 /* Two byte VEX prefix. */
514 tmp
= (r
& 8 ? 0 : 0x80); /* VEX.R */
517 if (opc
& P_DATA16
) {
519 } else if (opc
& P_SIMDF3
) {
521 } else if (opc
& P_SIMDF2
) {
524 tmp
|= (~v
& 15) << 3; /* VEX.vvvv */
527 tcg_out8(s
, 0xc0 | (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
530 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
531 We handle either RM and INDEX missing with a negative value. In 64-bit
532 mode for absolute addresses, ~RM is the size of the immediate operand
533 that will follow the instruction. */
535 static void tcg_out_modrm_sib_offset(TCGContext
*s
, int opc
, int r
, int rm
,
536 int index
, int shift
, intptr_t offset
)
540 if (index
< 0 && rm
< 0) {
541 if (TCG_TARGET_REG_BITS
== 64) {
542 /* Try for a rip-relative addressing mode. This has replaced
543 the 32-bit-mode absolute addressing encoding. */
544 intptr_t pc
= (intptr_t)s
->code_ptr
+ 5 + ~rm
;
545 intptr_t disp
= offset
- pc
;
546 if (disp
== (int32_t)disp
) {
547 tcg_out_opc(s
, opc
, r
, 0, 0);
548 tcg_out8(s
, (LOWREGMASK(r
) << 3) | 5);
553 /* Try for an absolute address encoding. This requires the
554 use of the MODRM+SIB encoding and is therefore larger than
555 rip-relative addressing. */
556 if (offset
== (int32_t)offset
) {
557 tcg_out_opc(s
, opc
, r
, 0, 0);
558 tcg_out8(s
, (LOWREGMASK(r
) << 3) | 4);
559 tcg_out8(s
, (4 << 3) | 5);
560 tcg_out32(s
, offset
);
564 /* ??? The memory isn't directly addressable. */
567 /* Absolute address. */
568 tcg_out_opc(s
, opc
, r
, 0, 0);
569 tcg_out8(s
, (r
<< 3) | 5);
570 tcg_out32(s
, offset
);
575 /* Find the length of the immediate addend. Note that the encoding
576 that would be used for (%ebp) indicates absolute addressing. */
578 mod
= 0, len
= 4, rm
= 5;
579 } else if (offset
== 0 && LOWREGMASK(rm
) != TCG_REG_EBP
) {
581 } else if (offset
== (int8_t)offset
) {
587 /* Use a single byte MODRM format if possible. Note that the encoding
588 that would be used for %esp is the escape to the two byte form. */
589 if (index
< 0 && LOWREGMASK(rm
) != TCG_REG_ESP
) {
590 /* Single byte MODRM format. */
591 tcg_out_opc(s
, opc
, r
, rm
, 0);
592 tcg_out8(s
, mod
| (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
594 /* Two byte MODRM+SIB format. */
596 /* Note that the encoding that would place %esp into the index
597 field indicates no index register. In 64-bit mode, the REX.X
598 bit counts, so %r12 can be used as the index. */
602 tcg_debug_assert(index
!= TCG_REG_ESP
);
605 tcg_out_opc(s
, opc
, r
, rm
, index
);
606 tcg_out8(s
, mod
| (LOWREGMASK(r
) << 3) | 4);
607 tcg_out8(s
, (shift
<< 6) | (LOWREGMASK(index
) << 3) | LOWREGMASK(rm
));
612 } else if (len
== 4) {
613 tcg_out32(s
, offset
);
617 /* A simplification of the above with no index or shift. */
618 static inline void tcg_out_modrm_offset(TCGContext
*s
, int opc
, int r
,
619 int rm
, intptr_t offset
)
621 tcg_out_modrm_sib_offset(s
, opc
, r
, rm
, -1, 0, offset
);
624 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
625 static inline void tgen_arithr(TCGContext
*s
, int subop
, int dest
, int src
)
627 /* Propagate an opcode prefix, such as P_REXW. */
628 int ext
= subop
& ~0x7;
631 tcg_out_modrm(s
, OPC_ARITH_GvEv
+ (subop
<< 3) + ext
, dest
, src
);
634 static inline void tcg_out_mov(TCGContext
*s
, TCGType type
,
635 TCGReg ret
, TCGReg arg
)
638 int opc
= OPC_MOVL_GvEv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
639 tcg_out_modrm(s
, opc
, ret
, arg
);
643 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
644 TCGReg ret
, tcg_target_long arg
)
646 tcg_target_long diff
;
649 tgen_arithr(s
, ARITH_XOR
, ret
, ret
);
652 if (arg
== (uint32_t)arg
|| type
== TCG_TYPE_I32
) {
653 tcg_out_opc(s
, OPC_MOVL_Iv
+ LOWREGMASK(ret
), 0, ret
, 0);
657 if (arg
== (int32_t)arg
) {
658 tcg_out_modrm(s
, OPC_MOVL_EvIz
+ P_REXW
, 0, ret
);
663 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
664 diff
= arg
- ((uintptr_t)s
->code_ptr
+ 7);
665 if (diff
== (int32_t)diff
) {
666 tcg_out_opc(s
, OPC_LEA
| P_REXW
, ret
, 0, 0);
667 tcg_out8(s
, (LOWREGMASK(ret
) << 3) | 5);
672 tcg_out_opc(s
, OPC_MOVL_Iv
+ P_REXW
+ LOWREGMASK(ret
), 0, ret
, 0);
676 static inline void tcg_out_pushi(TCGContext
*s
, tcg_target_long val
)
678 if (val
== (int8_t)val
) {
679 tcg_out_opc(s
, OPC_PUSH_Ib
, 0, 0, 0);
681 } else if (val
== (int32_t)val
) {
682 tcg_out_opc(s
, OPC_PUSH_Iv
, 0, 0, 0);
689 static inline void tcg_out_mb(TCGContext
*s
, TCGArg a0
)
691 /* Given the strength of x86 memory ordering, we only need care for
692 store-load ordering. Experimentally, "lock orl $0,0(%esp)" is
693 faster than "mfence", so don't bother with the sse insn. */
694 if (a0
& TCG_MO_ST_LD
) {
696 tcg_out_modrm_offset(s
, OPC_ARITH_EvIb
, ARITH_OR
, TCG_REG_ESP
, 0);
701 static inline void tcg_out_push(TCGContext
*s
, int reg
)
703 tcg_out_opc(s
, OPC_PUSH_r32
+ LOWREGMASK(reg
), 0, reg
, 0);
706 static inline void tcg_out_pop(TCGContext
*s
, int reg
)
708 tcg_out_opc(s
, OPC_POP_r32
+ LOWREGMASK(reg
), 0, reg
, 0);
711 static inline void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
,
712 TCGReg arg1
, intptr_t arg2
)
714 int opc
= OPC_MOVL_GvEv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
715 tcg_out_modrm_offset(s
, opc
, ret
, arg1
, arg2
);
718 static inline void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg arg
,
719 TCGReg arg1
, intptr_t arg2
)
721 int opc
= OPC_MOVL_EvGv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
722 tcg_out_modrm_offset(s
, opc
, arg
, arg1
, arg2
);
725 static bool tcg_out_sti(TCGContext
*s
, TCGType type
, TCGArg val
,
726 TCGReg base
, intptr_t ofs
)
729 if (TCG_TARGET_REG_BITS
== 64 && type
== TCG_TYPE_I64
) {
730 if (val
!= (int32_t)val
) {
735 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
| rexw
, 0, base
, ofs
);
740 static void tcg_out_shifti(TCGContext
*s
, int subopc
, int reg
, int count
)
742 /* Propagate an opcode prefix, such as P_DATA16. */
743 int ext
= subopc
& ~0x7;
747 tcg_out_modrm(s
, OPC_SHIFT_1
+ ext
, subopc
, reg
);
749 tcg_out_modrm(s
, OPC_SHIFT_Ib
+ ext
, subopc
, reg
);
754 static inline void tcg_out_bswap32(TCGContext
*s
, int reg
)
756 tcg_out_opc(s
, OPC_BSWAP
+ LOWREGMASK(reg
), 0, reg
, 0);
759 static inline void tcg_out_rolw_8(TCGContext
*s
, int reg
)
761 tcg_out_shifti(s
, SHIFT_ROL
+ P_DATA16
, reg
, 8);
764 static inline void tcg_out_ext8u(TCGContext
*s
, int dest
, int src
)
767 tcg_debug_assert(src
< 4 || TCG_TARGET_REG_BITS
== 64);
768 tcg_out_modrm(s
, OPC_MOVZBL
+ P_REXB_RM
, dest
, src
);
771 static void tcg_out_ext8s(TCGContext
*s
, int dest
, int src
, int rexw
)
774 tcg_debug_assert(src
< 4 || TCG_TARGET_REG_BITS
== 64);
775 tcg_out_modrm(s
, OPC_MOVSBL
+ P_REXB_RM
+ rexw
, dest
, src
);
778 static inline void tcg_out_ext16u(TCGContext
*s
, int dest
, int src
)
781 tcg_out_modrm(s
, OPC_MOVZWL
, dest
, src
);
784 static inline void tcg_out_ext16s(TCGContext
*s
, int dest
, int src
, int rexw
)
787 tcg_out_modrm(s
, OPC_MOVSWL
+ rexw
, dest
, src
);
790 static inline void tcg_out_ext32u(TCGContext
*s
, int dest
, int src
)
792 /* 32-bit mov zero extends. */
793 tcg_out_modrm(s
, OPC_MOVL_GvEv
, dest
, src
);
796 static inline void tcg_out_ext32s(TCGContext
*s
, int dest
, int src
)
798 tcg_out_modrm(s
, OPC_MOVSLQ
, dest
, src
);
801 static inline void tcg_out_bswap64(TCGContext
*s
, int reg
)
803 tcg_out_opc(s
, OPC_BSWAP
+ P_REXW
+ LOWREGMASK(reg
), 0, reg
, 0);
806 static void tgen_arithi(TCGContext
*s
, int c
, int r0
,
807 tcg_target_long val
, int cf
)
811 if (TCG_TARGET_REG_BITS
== 64) {
816 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
817 partial flags update stalls on Pentium4 and are not recommended
818 by current Intel optimization manuals. */
819 if (!cf
&& (c
== ARITH_ADD
|| c
== ARITH_SUB
) && (val
== 1 || val
== -1)) {
820 int is_inc
= (c
== ARITH_ADD
) ^ (val
< 0);
821 if (TCG_TARGET_REG_BITS
== 64) {
822 /* The single-byte increment encodings are re-tasked as the
823 REX prefixes. Use the MODRM encoding. */
824 tcg_out_modrm(s
, OPC_GRP5
+ rexw
,
825 (is_inc
? EXT5_INC_Ev
: EXT5_DEC_Ev
), r0
);
827 tcg_out8(s
, (is_inc
? OPC_INC_r32
: OPC_DEC_r32
) + r0
);
832 if (c
== ARITH_AND
) {
833 if (TCG_TARGET_REG_BITS
== 64) {
834 if (val
== 0xffffffffu
) {
835 tcg_out_ext32u(s
, r0
, r0
);
838 if (val
== (uint32_t)val
) {
839 /* AND with no high bits set can use a 32-bit operation. */
843 if (val
== 0xffu
&& (r0
< 4 || TCG_TARGET_REG_BITS
== 64)) {
844 tcg_out_ext8u(s
, r0
, r0
);
847 if (val
== 0xffffu
) {
848 tcg_out_ext16u(s
, r0
, r0
);
853 if (val
== (int8_t)val
) {
854 tcg_out_modrm(s
, OPC_ARITH_EvIb
+ rexw
, c
, r0
);
858 if (rexw
== 0 || val
== (int32_t)val
) {
859 tcg_out_modrm(s
, OPC_ARITH_EvIz
+ rexw
, c
, r0
);
867 static void tcg_out_addi(TCGContext
*s
, int reg
, tcg_target_long val
)
870 tgen_arithi(s
, ARITH_ADD
+ P_REXW
, reg
, val
, 0);
874 /* Use SMALL != 0 to force a short forward branch. */
875 static void tcg_out_jxx(TCGContext
*s
, int opc
, TCGLabel
*l
, int small
)
880 val
= tcg_pcrel_diff(s
, l
->u
.value_ptr
);
882 if ((int8_t)val1
== val1
) {
884 tcg_out8(s
, OPC_JMP_short
);
886 tcg_out8(s
, OPC_JCC_short
+ opc
);
894 tcg_out8(s
, OPC_JMP_long
);
895 tcg_out32(s
, val
- 5);
897 tcg_out_opc(s
, OPC_JCC_long
+ opc
, 0, 0, 0);
898 tcg_out32(s
, val
- 6);
903 tcg_out8(s
, OPC_JMP_short
);
905 tcg_out8(s
, OPC_JCC_short
+ opc
);
907 tcg_out_reloc(s
, s
->code_ptr
, R_386_PC8
, l
, -1);
911 tcg_out8(s
, OPC_JMP_long
);
913 tcg_out_opc(s
, OPC_JCC_long
+ opc
, 0, 0, 0);
915 tcg_out_reloc(s
, s
->code_ptr
, R_386_PC32
, l
, -4);
920 static void tcg_out_cmp(TCGContext
*s
, TCGArg arg1
, TCGArg arg2
,
921 int const_arg2
, int rexw
)
926 tcg_out_modrm(s
, OPC_TESTL
+ rexw
, arg1
, arg1
);
928 tgen_arithi(s
, ARITH_CMP
+ rexw
, arg1
, arg2
, 0);
931 tgen_arithr(s
, ARITH_CMP
+ rexw
, arg1
, arg2
);
935 static void tcg_out_brcond32(TCGContext
*s
, TCGCond cond
,
936 TCGArg arg1
, TCGArg arg2
, int const_arg2
,
937 TCGLabel
*label
, int small
)
939 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, 0);
940 tcg_out_jxx(s
, tcg_cond_to_jcc
[cond
], label
, small
);
943 #if TCG_TARGET_REG_BITS == 64
944 static void tcg_out_brcond64(TCGContext
*s
, TCGCond cond
,
945 TCGArg arg1
, TCGArg arg2
, int const_arg2
,
946 TCGLabel
*label
, int small
)
948 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, P_REXW
);
949 tcg_out_jxx(s
, tcg_cond_to_jcc
[cond
], label
, small
);
952 /* XXX: we implement it at the target level to avoid having to
953 handle cross basic blocks temporaries */
954 static void tcg_out_brcond2(TCGContext
*s
, const TCGArg
*args
,
955 const int *const_args
, int small
)
957 TCGLabel
*label_next
= gen_new_label();
958 TCGLabel
*label_this
= arg_label(args
[5]);
962 tcg_out_brcond32(s
, TCG_COND_NE
, args
[0], args
[2], const_args
[2],
964 tcg_out_brcond32(s
, TCG_COND_EQ
, args
[1], args
[3], const_args
[3],
968 tcg_out_brcond32(s
, TCG_COND_NE
, args
[0], args
[2], const_args
[2],
970 tcg_out_brcond32(s
, TCG_COND_NE
, args
[1], args
[3], const_args
[3],
974 tcg_out_brcond32(s
, TCG_COND_LT
, args
[1], args
[3], const_args
[3],
976 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
977 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[0], args
[2], const_args
[2],
981 tcg_out_brcond32(s
, TCG_COND_LT
, args
[1], args
[3], const_args
[3],
983 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
984 tcg_out_brcond32(s
, TCG_COND_LEU
, args
[0], args
[2], const_args
[2],
988 tcg_out_brcond32(s
, TCG_COND_GT
, args
[1], args
[3], const_args
[3],
990 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
991 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[0], args
[2], const_args
[2],
995 tcg_out_brcond32(s
, TCG_COND_GT
, args
[1], args
[3], const_args
[3],
997 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
998 tcg_out_brcond32(s
, TCG_COND_GEU
, args
[0], args
[2], const_args
[2],
1002 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[1], args
[3], const_args
[3],
1004 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
1005 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[0], args
[2], const_args
[2],
1009 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[1], args
[3], const_args
[3],
1011 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
1012 tcg_out_brcond32(s
, TCG_COND_LEU
, args
[0], args
[2], const_args
[2],
1016 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[1], args
[3], const_args
[3],
1018 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
1019 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[0], args
[2], const_args
[2],
1023 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[1], args
[3], const_args
[3],
1025 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
1026 tcg_out_brcond32(s
, TCG_COND_GEU
, args
[0], args
[2], const_args
[2],
1032 tcg_out_label(s
, label_next
, s
->code_ptr
);
1036 static void tcg_out_setcond32(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
1037 TCGArg arg1
, TCGArg arg2
, int const_arg2
)
1039 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, 0);
1040 tcg_out_modrm(s
, OPC_SETCC
| tcg_cond_to_jcc
[cond
], 0, dest
);
1041 tcg_out_ext8u(s
, dest
, dest
);
1044 #if TCG_TARGET_REG_BITS == 64
1045 static void tcg_out_setcond64(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
1046 TCGArg arg1
, TCGArg arg2
, int const_arg2
)
1048 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, P_REXW
);
1049 tcg_out_modrm(s
, OPC_SETCC
| tcg_cond_to_jcc
[cond
], 0, dest
);
1050 tcg_out_ext8u(s
, dest
, dest
);
1053 static void tcg_out_setcond2(TCGContext
*s
, const TCGArg
*args
,
1054 const int *const_args
)
1057 TCGLabel
*label_true
, *label_over
;
1059 memcpy(new_args
, args
+1, 5*sizeof(TCGArg
));
1061 if (args
[0] == args
[1] || args
[0] == args
[2]
1062 || (!const_args
[3] && args
[0] == args
[3])
1063 || (!const_args
[4] && args
[0] == args
[4])) {
1064 /* When the destination overlaps with one of the argument
1065 registers, don't do anything tricky. */
1066 label_true
= gen_new_label();
1067 label_over
= gen_new_label();
1069 new_args
[5] = label_arg(label_true
);
1070 tcg_out_brcond2(s
, new_args
, const_args
+1, 1);
1072 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 0);
1073 tcg_out_jxx(s
, JCC_JMP
, label_over
, 1);
1074 tcg_out_label(s
, label_true
, s
->code_ptr
);
1076 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 1);
1077 tcg_out_label(s
, label_over
, s
->code_ptr
);
1079 /* When the destination does not overlap one of the arguments,
1080 clear the destination first, jump if cond false, and emit an
1081 increment in the true case. This results in smaller code. */
1083 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 0);
1085 label_over
= gen_new_label();
1086 new_args
[4] = tcg_invert_cond(new_args
[4]);
1087 new_args
[5] = label_arg(label_over
);
1088 tcg_out_brcond2(s
, new_args
, const_args
+1, 1);
1090 tgen_arithi(s
, ARITH_ADD
, args
[0], 1, 0);
1091 tcg_out_label(s
, label_over
, s
->code_ptr
);
1096 static void tcg_out_movcond32(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
1097 TCGArg c1
, TCGArg c2
, int const_c2
,
1100 tcg_out_cmp(s
, c1
, c2
, const_c2
, 0);
1102 tcg_out_modrm(s
, OPC_CMOVCC
| tcg_cond_to_jcc
[cond
], dest
, v1
);
1104 TCGLabel
*over
= gen_new_label();
1105 tcg_out_jxx(s
, tcg_cond_to_jcc
[tcg_invert_cond(cond
)], over
, 1);
1106 tcg_out_mov(s
, TCG_TYPE_I32
, dest
, v1
);
1107 tcg_out_label(s
, over
, s
->code_ptr
);
1111 #if TCG_TARGET_REG_BITS == 64
1112 static void tcg_out_movcond64(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
1113 TCGArg c1
, TCGArg c2
, int const_c2
,
1116 tcg_out_cmp(s
, c1
, c2
, const_c2
, P_REXW
);
1117 tcg_out_modrm(s
, OPC_CMOVCC
| tcg_cond_to_jcc
[cond
] | P_REXW
, dest
, v1
);
1121 static void tcg_out_branch(TCGContext
*s
, int call
, tcg_insn_unit
*dest
)
1123 intptr_t disp
= tcg_pcrel_diff(s
, dest
) - 5;
1125 if (disp
== (int32_t)disp
) {
1126 tcg_out_opc(s
, call
? OPC_CALL_Jz
: OPC_JMP_long
, 0, 0, 0);
1129 tcg_out_movi(s
, TCG_TYPE_PTR
, TCG_REG_R10
, (uintptr_t)dest
);
1130 tcg_out_modrm(s
, OPC_GRP5
,
1131 call
? EXT5_CALLN_Ev
: EXT5_JMPN_Ev
, TCG_REG_R10
);
1135 static inline void tcg_out_call(TCGContext
*s
, tcg_insn_unit
*dest
)
1137 tcg_out_branch(s
, 1, dest
);
1140 static void tcg_out_jmp(TCGContext
*s
, tcg_insn_unit
*dest
)
1142 tcg_out_branch(s
, 0, dest
);
1145 static void tcg_out_nopn(TCGContext
*s
, int n
)
1148 /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
1149 * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
1150 * duplicate prefix, and all of the interesting recent cores can
1151 * decode and discard the duplicates in a single cycle.
1153 tcg_debug_assert(n
>= 1);
1154 for (i
= 1; i
< n
; ++i
) {
1160 #if defined(CONFIG_SOFTMMU)
1161 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1162 * int mmu_idx, uintptr_t ra)
1164 static void * const qemu_ld_helpers
[16] = {
1165 [MO_UB
] = helper_ret_ldub_mmu
,
1166 [MO_LEUW
] = helper_le_lduw_mmu
,
1167 [MO_LEUL
] = helper_le_ldul_mmu
,
1168 [MO_LEQ
] = helper_le_ldq_mmu
,
1169 [MO_BEUW
] = helper_be_lduw_mmu
,
1170 [MO_BEUL
] = helper_be_ldul_mmu
,
1171 [MO_BEQ
] = helper_be_ldq_mmu
,
1174 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1175 * uintxx_t val, int mmu_idx, uintptr_t ra)
1177 static void * const qemu_st_helpers
[16] = {
1178 [MO_UB
] = helper_ret_stb_mmu
,
1179 [MO_LEUW
] = helper_le_stw_mmu
,
1180 [MO_LEUL
] = helper_le_stl_mmu
,
1181 [MO_LEQ
] = helper_le_stq_mmu
,
1182 [MO_BEUW
] = helper_be_stw_mmu
,
1183 [MO_BEUL
] = helper_be_stl_mmu
,
1184 [MO_BEQ
] = helper_be_stq_mmu
,
1187 /* Perform the TLB load and compare.
1190 ADDRLO and ADDRHI contain the low and high part of the address.
1192 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1194 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1195 This should be offsetof addr_read or addr_write.
1198 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1199 positions of the displacements of forward jumps to the TLB miss case.
1201 Second argument register is loaded with the low part of the address.
1202 In the TLB hit case, it has been adjusted as indicated by the TLB
1203 and so is a host address. In the TLB miss case, it continues to
1204 hold a guest address.
1206 First argument register is clobbered. */
1208 static inline void tcg_out_tlb_load(TCGContext
*s
, TCGReg addrlo
, TCGReg addrhi
,
1209 int mem_index
, TCGMemOp opc
,
1210 tcg_insn_unit
**label_ptr
, int which
)
1212 const TCGReg r0
= TCG_REG_L0
;
1213 const TCGReg r1
= TCG_REG_L1
;
1214 TCGType ttype
= TCG_TYPE_I32
;
1215 TCGType tlbtype
= TCG_TYPE_I32
;
1216 int trexw
= 0, hrexw
= 0, tlbrexw
= 0;
1217 unsigned a_bits
= get_alignment_bits(opc
);
1218 unsigned s_bits
= opc
& MO_SIZE
;
1219 unsigned a_mask
= (1 << a_bits
) - 1;
1220 unsigned s_mask
= (1 << s_bits
) - 1;
1221 target_ulong tlb_mask
;
1223 if (TCG_TARGET_REG_BITS
== 64) {
1224 if (TARGET_LONG_BITS
== 64) {
1225 ttype
= TCG_TYPE_I64
;
1228 if (TCG_TYPE_PTR
== TCG_TYPE_I64
) {
1230 if (TARGET_PAGE_BITS
+ CPU_TLB_BITS
> 32) {
1231 tlbtype
= TCG_TYPE_I64
;
1237 tcg_out_mov(s
, tlbtype
, r0
, addrlo
);
1238 /* If the required alignment is at least as large as the access, simply
1239 copy the address and mask. For lesser alignments, check that we don't
1240 cross pages for the complete access. */
1241 if (a_bits
>= s_bits
) {
1242 tcg_out_mov(s
, ttype
, r1
, addrlo
);
1244 tcg_out_modrm_offset(s
, OPC_LEA
+ trexw
, r1
, addrlo
, s_mask
- a_mask
);
1246 tlb_mask
= (target_ulong
)TARGET_PAGE_MASK
| a_mask
;
1248 tcg_out_shifti(s
, SHIFT_SHR
+ tlbrexw
, r0
,
1249 TARGET_PAGE_BITS
- CPU_TLB_ENTRY_BITS
);
1251 tgen_arithi(s
, ARITH_AND
+ trexw
, r1
, tlb_mask
, 0);
1252 tgen_arithi(s
, ARITH_AND
+ tlbrexw
, r0
,
1253 (CPU_TLB_SIZE
- 1) << CPU_TLB_ENTRY_BITS
, 0);
1255 tcg_out_modrm_sib_offset(s
, OPC_LEA
+ hrexw
, r0
, TCG_AREG0
, r0
, 0,
1256 offsetof(CPUArchState
, tlb_table
[mem_index
][0])
1260 tcg_out_modrm_offset(s
, OPC_CMP_GvEv
+ trexw
, r1
, r0
, 0);
1262 /* Prepare for both the fast path add of the tlb addend, and the slow
1263 path function argument setup. There are two cases worth note:
1264 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1265 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1266 copies the entire guest address for the slow path, while truncation
1267 for the 32-bit host happens with the fastpath ADDL below. */
1268 tcg_out_mov(s
, ttype
, r1
, addrlo
);
1271 tcg_out_opc(s
, OPC_JCC_long
+ JCC_JNE
, 0, 0, 0);
1272 label_ptr
[0] = s
->code_ptr
;
1275 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1276 /* cmp 4(r0), addrhi */
1277 tcg_out_modrm_offset(s
, OPC_CMP_GvEv
, addrhi
, r0
, 4);
1280 tcg_out_opc(s
, OPC_JCC_long
+ JCC_JNE
, 0, 0, 0);
1281 label_ptr
[1] = s
->code_ptr
;
1287 /* add addend(r0), r1 */
1288 tcg_out_modrm_offset(s
, OPC_ADD_GvEv
+ hrexw
, r1
, r0
,
1289 offsetof(CPUTLBEntry
, addend
) - which
);
1293 * Record the context of a call to the out of line helper code for the slow path
1294 * for a load or store, so that we can later generate the correct helper code
1296 static void add_qemu_ldst_label(TCGContext
*s
, bool is_ld
, TCGMemOpIdx oi
,
1297 TCGReg datalo
, TCGReg datahi
,
1298 TCGReg addrlo
, TCGReg addrhi
,
1299 tcg_insn_unit
*raddr
,
1300 tcg_insn_unit
**label_ptr
)
1302 TCGLabelQemuLdst
*label
= new_ldst_label(s
);
1304 label
->is_ld
= is_ld
;
1306 label
->datalo_reg
= datalo
;
1307 label
->datahi_reg
= datahi
;
1308 label
->addrlo_reg
= addrlo
;
1309 label
->addrhi_reg
= addrhi
;
1310 label
->raddr
= raddr
;
1311 label
->label_ptr
[0] = label_ptr
[0];
1312 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1313 label
->label_ptr
[1] = label_ptr
[1];
1318 * Generate code for the slow path for a load at the end of block
1320 static void tcg_out_qemu_ld_slow_path(TCGContext
*s
, TCGLabelQemuLdst
*l
)
1322 TCGMemOpIdx oi
= l
->oi
;
1323 TCGMemOp opc
= get_memop(oi
);
1325 tcg_insn_unit
**label_ptr
= &l
->label_ptr
[0];
1327 /* resolve label address */
1328 tcg_patch32(label_ptr
[0], s
->code_ptr
- label_ptr
[0] - 4);
1329 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1330 tcg_patch32(label_ptr
[1], s
->code_ptr
- label_ptr
[1] - 4);
1333 if (TCG_TARGET_REG_BITS
== 32) {
1336 tcg_out_st(s
, TCG_TYPE_PTR
, TCG_AREG0
, TCG_REG_ESP
, ofs
);
1339 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrlo_reg
, TCG_REG_ESP
, ofs
);
1342 if (TARGET_LONG_BITS
== 64) {
1343 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrhi_reg
, TCG_REG_ESP
, ofs
);
1347 tcg_out_sti(s
, TCG_TYPE_I32
, oi
, TCG_REG_ESP
, ofs
);
1350 tcg_out_sti(s
, TCG_TYPE_PTR
, (uintptr_t)l
->raddr
, TCG_REG_ESP
, ofs
);
1352 tcg_out_mov(s
, TCG_TYPE_PTR
, tcg_target_call_iarg_regs
[0], TCG_AREG0
);
1353 /* The second argument is already loaded with addrlo. */
1354 tcg_out_movi(s
, TCG_TYPE_I32
, tcg_target_call_iarg_regs
[2], oi
);
1355 tcg_out_movi(s
, TCG_TYPE_PTR
, tcg_target_call_iarg_regs
[3],
1356 (uintptr_t)l
->raddr
);
1359 tcg_out_call(s
, qemu_ld_helpers
[opc
& (MO_BSWAP
| MO_SIZE
)]);
1361 data_reg
= l
->datalo_reg
;
1362 switch (opc
& MO_SSIZE
) {
1364 tcg_out_ext8s(s
, data_reg
, TCG_REG_EAX
, P_REXW
);
1367 tcg_out_ext16s(s
, data_reg
, TCG_REG_EAX
, P_REXW
);
1369 #if TCG_TARGET_REG_BITS == 64
1371 tcg_out_ext32s(s
, data_reg
, TCG_REG_EAX
);
1376 /* Note that the helpers have zero-extended to tcg_target_long. */
1378 tcg_out_mov(s
, TCG_TYPE_I32
, data_reg
, TCG_REG_EAX
);
1381 if (TCG_TARGET_REG_BITS
== 64) {
1382 tcg_out_mov(s
, TCG_TYPE_I64
, data_reg
, TCG_REG_RAX
);
1383 } else if (data_reg
== TCG_REG_EDX
) {
1384 /* xchg %edx, %eax */
1385 tcg_out_opc(s
, OPC_XCHG_ax_r32
+ TCG_REG_EDX
, 0, 0, 0);
1386 tcg_out_mov(s
, TCG_TYPE_I32
, l
->datahi_reg
, TCG_REG_EAX
);
1388 tcg_out_mov(s
, TCG_TYPE_I32
, data_reg
, TCG_REG_EAX
);
1389 tcg_out_mov(s
, TCG_TYPE_I32
, l
->datahi_reg
, TCG_REG_EDX
);
1396 /* Jump to the code corresponding to next IR of qemu_st */
1397 tcg_out_jmp(s
, l
->raddr
);
1401 * Generate code for the slow path for a store at the end of block
1403 static void tcg_out_qemu_st_slow_path(TCGContext
*s
, TCGLabelQemuLdst
*l
)
1405 TCGMemOpIdx oi
= l
->oi
;
1406 TCGMemOp opc
= get_memop(oi
);
1407 TCGMemOp s_bits
= opc
& MO_SIZE
;
1408 tcg_insn_unit
**label_ptr
= &l
->label_ptr
[0];
1411 /* resolve label address */
1412 tcg_patch32(label_ptr
[0], s
->code_ptr
- label_ptr
[0] - 4);
1413 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1414 tcg_patch32(label_ptr
[1], s
->code_ptr
- label_ptr
[1] - 4);
1417 if (TCG_TARGET_REG_BITS
== 32) {
1420 tcg_out_st(s
, TCG_TYPE_PTR
, TCG_AREG0
, TCG_REG_ESP
, ofs
);
1423 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrlo_reg
, TCG_REG_ESP
, ofs
);
1426 if (TARGET_LONG_BITS
== 64) {
1427 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrhi_reg
, TCG_REG_ESP
, ofs
);
1431 tcg_out_st(s
, TCG_TYPE_I32
, l
->datalo_reg
, TCG_REG_ESP
, ofs
);
1434 if (s_bits
== MO_64
) {
1435 tcg_out_st(s
, TCG_TYPE_I32
, l
->datahi_reg
, TCG_REG_ESP
, ofs
);
1439 tcg_out_sti(s
, TCG_TYPE_I32
, oi
, TCG_REG_ESP
, ofs
);
1442 retaddr
= TCG_REG_EAX
;
1443 tcg_out_movi(s
, TCG_TYPE_PTR
, retaddr
, (uintptr_t)l
->raddr
);
1444 tcg_out_st(s
, TCG_TYPE_PTR
, retaddr
, TCG_REG_ESP
, ofs
);
1446 tcg_out_mov(s
, TCG_TYPE_PTR
, tcg_target_call_iarg_regs
[0], TCG_AREG0
);
1447 /* The second argument is already loaded with addrlo. */
1448 tcg_out_mov(s
, (s_bits
== MO_64
? TCG_TYPE_I64
: TCG_TYPE_I32
),
1449 tcg_target_call_iarg_regs
[2], l
->datalo_reg
);
1450 tcg_out_movi(s
, TCG_TYPE_I32
, tcg_target_call_iarg_regs
[3], oi
);
1452 if (ARRAY_SIZE(tcg_target_call_iarg_regs
) > 4) {
1453 retaddr
= tcg_target_call_iarg_regs
[4];
1454 tcg_out_movi(s
, TCG_TYPE_PTR
, retaddr
, (uintptr_t)l
->raddr
);
1456 retaddr
= TCG_REG_RAX
;
1457 tcg_out_movi(s
, TCG_TYPE_PTR
, retaddr
, (uintptr_t)l
->raddr
);
1458 tcg_out_st(s
, TCG_TYPE_PTR
, retaddr
, TCG_REG_ESP
,
1459 TCG_TARGET_CALL_STACK_OFFSET
);
1463 /* "Tail call" to the helper, with the return address back inline. */
1464 tcg_out_push(s
, retaddr
);
1465 tcg_out_jmp(s
, qemu_st_helpers
[opc
& (MO_BSWAP
| MO_SIZE
)]);
1467 #elif defined(__x86_64__) && defined(__linux__)
1468 # include <asm/prctl.h>
1469 # include <sys/prctl.h>
1471 int arch_prctl(int code
, unsigned long addr
);
1473 static int guest_base_flags
;
1474 static inline void setup_guest_base_seg(void)
1476 if (arch_prctl(ARCH_SET_GS
, guest_base
) == 0) {
1477 guest_base_flags
= P_GS
;
1481 # define guest_base_flags 0
1482 static inline void setup_guest_base_seg(void) { }
1483 #endif /* SOFTMMU */
1485 static void tcg_out_qemu_ld_direct(TCGContext
*s
, TCGReg datalo
, TCGReg datahi
,
1486 TCGReg base
, int index
, intptr_t ofs
,
1487 int seg
, TCGMemOp memop
)
1489 const TCGMemOp real_bswap
= memop
& MO_BSWAP
;
1490 TCGMemOp bswap
= real_bswap
;
1491 int movop
= OPC_MOVL_GvEv
;
1493 if (have_movbe
&& real_bswap
) {
1495 movop
= OPC_MOVBE_GyMy
;
1498 switch (memop
& MO_SSIZE
) {
1500 tcg_out_modrm_sib_offset(s
, OPC_MOVZBL
+ seg
, datalo
,
1501 base
, index
, 0, ofs
);
1504 tcg_out_modrm_sib_offset(s
, OPC_MOVSBL
+ P_REXW
+ seg
, datalo
,
1505 base
, index
, 0, ofs
);
1508 tcg_out_modrm_sib_offset(s
, OPC_MOVZWL
+ seg
, datalo
,
1509 base
, index
, 0, ofs
);
1511 tcg_out_rolw_8(s
, datalo
);
1517 tcg_out_modrm_sib_offset(s
, OPC_MOVBE_GyMy
+ P_DATA16
+ seg
,
1518 datalo
, base
, index
, 0, ofs
);
1520 tcg_out_modrm_sib_offset(s
, OPC_MOVZWL
+ seg
, datalo
,
1521 base
, index
, 0, ofs
);
1522 tcg_out_rolw_8(s
, datalo
);
1524 tcg_out_modrm(s
, OPC_MOVSWL
+ P_REXW
, datalo
, datalo
);
1526 tcg_out_modrm_sib_offset(s
, OPC_MOVSWL
+ P_REXW
+ seg
,
1527 datalo
, base
, index
, 0, ofs
);
1531 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datalo
, base
, index
, 0, ofs
);
1533 tcg_out_bswap32(s
, datalo
);
1536 #if TCG_TARGET_REG_BITS == 64
1539 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datalo
,
1540 base
, index
, 0, ofs
);
1542 tcg_out_bswap32(s
, datalo
);
1544 tcg_out_ext32s(s
, datalo
, datalo
);
1546 tcg_out_modrm_sib_offset(s
, OPC_MOVSLQ
+ seg
, datalo
,
1547 base
, index
, 0, ofs
);
1552 if (TCG_TARGET_REG_BITS
== 64) {
1553 tcg_out_modrm_sib_offset(s
, movop
+ P_REXW
+ seg
, datalo
,
1554 base
, index
, 0, ofs
);
1556 tcg_out_bswap64(s
, datalo
);
1564 if (base
!= datalo
) {
1565 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datalo
,
1566 base
, index
, 0, ofs
);
1567 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datahi
,
1568 base
, index
, 0, ofs
+ 4);
1570 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datahi
,
1571 base
, index
, 0, ofs
+ 4);
1572 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datalo
,
1573 base
, index
, 0, ofs
);
1576 tcg_out_bswap32(s
, datalo
);
1577 tcg_out_bswap32(s
, datahi
);
1586 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1587 EAX. It will be useful once fixed registers globals are less
1589 static void tcg_out_qemu_ld(TCGContext
*s
, const TCGArg
*args
, bool is64
)
1591 TCGReg datalo
, datahi
, addrlo
;
1592 TCGReg addrhi
__attribute__((unused
));
1595 #if defined(CONFIG_SOFTMMU)
1597 tcg_insn_unit
*label_ptr
[2];
1601 datahi
= (TCG_TARGET_REG_BITS
== 32 && is64
? *args
++ : 0);
1603 addrhi
= (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
? *args
++ : 0);
1605 opc
= get_memop(oi
);
1607 #if defined(CONFIG_SOFTMMU)
1608 mem_index
= get_mmuidx(oi
);
1610 tcg_out_tlb_load(s
, addrlo
, addrhi
, mem_index
, opc
,
1611 label_ptr
, offsetof(CPUTLBEntry
, addr_read
));
1614 tcg_out_qemu_ld_direct(s
, datalo
, datahi
, TCG_REG_L1
, -1, 0, 0, opc
);
1616 /* Record the current context of a load into ldst label */
1617 add_qemu_ldst_label(s
, true, oi
, datalo
, datahi
, addrlo
, addrhi
,
1618 s
->code_ptr
, label_ptr
);
1621 int32_t offset
= guest_base
;
1622 TCGReg base
= addrlo
;
1626 /* For a 32-bit guest, the high 32 bits may contain garbage.
1627 We can do this with the ADDR32 prefix if we're not using
1628 a guest base, or when using segmentation. Otherwise we
1629 need to zero-extend manually. */
1630 if (guest_base
== 0 || guest_base_flags
) {
1631 seg
= guest_base_flags
;
1633 if (TCG_TARGET_REG_BITS
> TARGET_LONG_BITS
) {
1636 } else if (TCG_TARGET_REG_BITS
== 64) {
1637 if (TARGET_LONG_BITS
== 32) {
1638 tcg_out_ext32u(s
, TCG_REG_L0
, base
);
1641 if (offset
!= guest_base
) {
1642 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_L1
, guest_base
);
1648 tcg_out_qemu_ld_direct(s
, datalo
, datahi
,
1649 base
, index
, offset
, seg
, opc
);
1654 static void tcg_out_qemu_st_direct(TCGContext
*s
, TCGReg datalo
, TCGReg datahi
,
1655 TCGReg base
, intptr_t ofs
, int seg
,
1658 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1659 we could perform the bswap twice to restore the original value
1660 instead of moving to the scratch. But as it is, the L constraint
1661 means that TCG_REG_L0 is definitely free here. */
1662 const TCGReg scratch
= TCG_REG_L0
;
1663 const TCGMemOp real_bswap
= memop
& MO_BSWAP
;
1664 TCGMemOp bswap
= real_bswap
;
1665 int movop
= OPC_MOVL_EvGv
;
1667 if (have_movbe
&& real_bswap
) {
1669 movop
= OPC_MOVBE_MyGy
;
1672 switch (memop
& MO_SIZE
) {
1674 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1675 Use the scratch register if necessary. */
1676 if (TCG_TARGET_REG_BITS
== 32 && datalo
>= 4) {
1677 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1680 tcg_out_modrm_offset(s
, OPC_MOVB_EvGv
+ P_REXB_R
+ seg
,
1685 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1686 tcg_out_rolw_8(s
, scratch
);
1689 tcg_out_modrm_offset(s
, movop
+ P_DATA16
+ seg
, datalo
, base
, ofs
);
1693 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1694 tcg_out_bswap32(s
, scratch
);
1697 tcg_out_modrm_offset(s
, movop
+ seg
, datalo
, base
, ofs
);
1700 if (TCG_TARGET_REG_BITS
== 64) {
1702 tcg_out_mov(s
, TCG_TYPE_I64
, scratch
, datalo
);
1703 tcg_out_bswap64(s
, scratch
);
1706 tcg_out_modrm_offset(s
, movop
+ P_REXW
+ seg
, datalo
, base
, ofs
);
1708 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datahi
);
1709 tcg_out_bswap32(s
, scratch
);
1710 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
+ seg
, scratch
, base
, ofs
);
1711 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1712 tcg_out_bswap32(s
, scratch
);
1713 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
+ seg
, scratch
, base
, ofs
+4);
1720 tcg_out_modrm_offset(s
, movop
+ seg
, datalo
, base
, ofs
);
1721 tcg_out_modrm_offset(s
, movop
+ seg
, datahi
, base
, ofs
+4);
1729 static void tcg_out_qemu_st(TCGContext
*s
, const TCGArg
*args
, bool is64
)
1731 TCGReg datalo
, datahi
, addrlo
;
1732 TCGReg addrhi
__attribute__((unused
));
1735 #if defined(CONFIG_SOFTMMU)
1737 tcg_insn_unit
*label_ptr
[2];
1741 datahi
= (TCG_TARGET_REG_BITS
== 32 && is64
? *args
++ : 0);
1743 addrhi
= (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
? *args
++ : 0);
1745 opc
= get_memop(oi
);
1747 #if defined(CONFIG_SOFTMMU)
1748 mem_index
= get_mmuidx(oi
);
1750 tcg_out_tlb_load(s
, addrlo
, addrhi
, mem_index
, opc
,
1751 label_ptr
, offsetof(CPUTLBEntry
, addr_write
));
1754 tcg_out_qemu_st_direct(s
, datalo
, datahi
, TCG_REG_L1
, 0, 0, opc
);
1756 /* Record the current context of a store into ldst label */
1757 add_qemu_ldst_label(s
, false, oi
, datalo
, datahi
, addrlo
, addrhi
,
1758 s
->code_ptr
, label_ptr
);
1761 int32_t offset
= guest_base
;
1762 TCGReg base
= addrlo
;
1765 /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
1766 if (guest_base
== 0 || guest_base_flags
) {
1767 seg
= guest_base_flags
;
1769 if (TCG_TARGET_REG_BITS
> TARGET_LONG_BITS
) {
1772 } else if (TCG_TARGET_REG_BITS
== 64) {
1773 /* ??? Note that we can't use the same SIB addressing scheme
1774 as for loads, since we require L0 free for bswap. */
1775 if (offset
!= guest_base
) {
1776 if (TARGET_LONG_BITS
== 32) {
1777 tcg_out_ext32u(s
, TCG_REG_L0
, base
);
1780 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_L1
, guest_base
);
1781 tgen_arithr(s
, ARITH_ADD
+ P_REXW
, TCG_REG_L1
, base
);
1784 } else if (TARGET_LONG_BITS
== 32) {
1785 tcg_out_ext32u(s
, TCG_REG_L1
, base
);
1790 tcg_out_qemu_st_direct(s
, datalo
, datahi
, base
, offset
, seg
, opc
);
1795 static inline void tcg_out_op(TCGContext
*s
, TCGOpcode opc
,
1796 const TCGArg
*args
, const int *const_args
)
1798 int c
, vexop
, rexw
= 0;
1800 #if TCG_TARGET_REG_BITS == 64
1801 # define OP_32_64(x) \
1802 case glue(glue(INDEX_op_, x), _i64): \
1803 rexw = P_REXW; /* FALLTHRU */ \
1804 case glue(glue(INDEX_op_, x), _i32)
1806 # define OP_32_64(x) \
1807 case glue(glue(INDEX_op_, x), _i32)
1811 case INDEX_op_exit_tb
:
1812 tcg_out_movi(s
, TCG_TYPE_PTR
, TCG_REG_EAX
, args
[0]);
1813 tcg_out_jmp(s
, tb_ret_addr
);
1815 case INDEX_op_goto_tb
:
1816 if (s
->tb_jmp_insn_offset
) {
1817 /* direct jump method */
1819 /* jump displacement must be aligned for atomic patching;
1820 * see if we need to add extra nops before jump
1822 gap
= tcg_pcrel_diff(s
, QEMU_ALIGN_PTR_UP(s
->code_ptr
+ 1, 4));
1824 tcg_out_nopn(s
, gap
- 1);
1826 tcg_out8(s
, OPC_JMP_long
); /* jmp im */
1827 s
->tb_jmp_insn_offset
[args
[0]] = tcg_current_code_size(s
);
1830 /* indirect jump method */
1831 tcg_out_modrm_offset(s
, OPC_GRP5
, EXT5_JMPN_Ev
, -1,
1832 (intptr_t)(s
->tb_jmp_target_addr
+ args
[0]));
1834 s
->tb_jmp_reset_offset
[args
[0]] = tcg_current_code_size(s
);
1837 tcg_out_jxx(s
, JCC_JMP
, arg_label(args
[0]), 0);
1840 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1841 tcg_out_modrm_offset(s
, OPC_MOVZBL
, args
[0], args
[1], args
[2]);
1844 tcg_out_modrm_offset(s
, OPC_MOVSBL
+ rexw
, args
[0], args
[1], args
[2]);
1847 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1848 tcg_out_modrm_offset(s
, OPC_MOVZWL
, args
[0], args
[1], args
[2]);
1851 tcg_out_modrm_offset(s
, OPC_MOVSWL
+ rexw
, args
[0], args
[1], args
[2]);
1853 #if TCG_TARGET_REG_BITS == 64
1854 case INDEX_op_ld32u_i64
:
1856 case INDEX_op_ld_i32
:
1857 tcg_out_ld(s
, TCG_TYPE_I32
, args
[0], args
[1], args
[2]);
1861 if (const_args
[0]) {
1862 tcg_out_modrm_offset(s
, OPC_MOVB_EvIz
,
1863 0, args
[1], args
[2]);
1864 tcg_out8(s
, args
[0]);
1866 tcg_out_modrm_offset(s
, OPC_MOVB_EvGv
| P_REXB_R
,
1867 args
[0], args
[1], args
[2]);
1871 if (const_args
[0]) {
1872 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
| P_DATA16
,
1873 0, args
[1], args
[2]);
1874 tcg_out16(s
, args
[0]);
1876 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
| P_DATA16
,
1877 args
[0], args
[1], args
[2]);
1880 #if TCG_TARGET_REG_BITS == 64
1881 case INDEX_op_st32_i64
:
1883 case INDEX_op_st_i32
:
1884 if (const_args
[0]) {
1885 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
, 0, args
[1], args
[2]);
1886 tcg_out32(s
, args
[0]);
1888 tcg_out_st(s
, TCG_TYPE_I32
, args
[0], args
[1], args
[2]);
1893 /* For 3-operand addition, use LEA. */
1894 if (args
[0] != args
[1]) {
1895 TCGArg a0
= args
[0], a1
= args
[1], a2
= args
[2], c3
= 0;
1897 if (const_args
[2]) {
1899 } else if (a0
== a2
) {
1900 /* Watch out for dest = src + dest, since we've removed
1901 the matching constraint on the add. */
1902 tgen_arithr(s
, ARITH_ADD
+ rexw
, a0
, a1
);
1906 tcg_out_modrm_sib_offset(s
, OPC_LEA
+ rexw
, a0
, a1
, a2
, 0, c3
);
1924 if (const_args
[2]) {
1925 tgen_arithi(s
, c
+ rexw
, args
[0], args
[2], 0);
1927 tgen_arithr(s
, c
+ rexw
, args
[0], args
[2]);
1932 if (const_args
[2]) {
1933 tcg_out_mov(s
, rexw
? TCG_TYPE_I64
: TCG_TYPE_I32
,
1935 tgen_arithi(s
, ARITH_AND
+ rexw
, args
[0], ~args
[2], 0);
1937 tcg_out_vex_modrm(s
, OPC_ANDN
+ rexw
, args
[0], args
[2], args
[1]);
1942 if (const_args
[2]) {
1945 if (val
== (int8_t)val
) {
1946 tcg_out_modrm(s
, OPC_IMUL_GvEvIb
+ rexw
, args
[0], args
[0]);
1949 tcg_out_modrm(s
, OPC_IMUL_GvEvIz
+ rexw
, args
[0], args
[0]);
1953 tcg_out_modrm(s
, OPC_IMUL_GvEv
+ rexw
, args
[0], args
[2]);
1958 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_IDIV
, args
[4]);
1961 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_DIV
, args
[4]);
1967 goto gen_shift_maybe_vex
;
1971 goto gen_shift_maybe_vex
;
1975 goto gen_shift_maybe_vex
;
1982 gen_shift_maybe_vex
:
1983 if (have_bmi2
&& !const_args
[2]) {
1984 tcg_out_vex_modrm(s
, vexop
+ rexw
, args
[0], args
[2], args
[1]);
1989 if (const_args
[2]) {
1990 tcg_out_shifti(s
, c
+ rexw
, args
[0], args
[2]);
1992 tcg_out_modrm(s
, OPC_SHIFT_cl
+ rexw
, c
, args
[0]);
1996 case INDEX_op_brcond_i32
:
1997 tcg_out_brcond32(s
, args
[2], args
[0], args
[1], const_args
[1],
1998 arg_label(args
[3]), 0);
2000 case INDEX_op_setcond_i32
:
2001 tcg_out_setcond32(s
, args
[3], args
[0], args
[1],
2002 args
[2], const_args
[2]);
2004 case INDEX_op_movcond_i32
:
2005 tcg_out_movcond32(s
, args
[5], args
[0], args
[1],
2006 args
[2], const_args
[2], args
[3]);
2010 tcg_out_rolw_8(s
, args
[0]);
2013 tcg_out_bswap32(s
, args
[0]);
2017 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_NEG
, args
[0]);
2020 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_NOT
, args
[0]);
2024 tcg_out_ext8s(s
, args
[0], args
[1], rexw
);
2027 tcg_out_ext16s(s
, args
[0], args
[1], rexw
);
2030 tcg_out_ext8u(s
, args
[0], args
[1]);
2033 tcg_out_ext16u(s
, args
[0], args
[1]);
2036 case INDEX_op_qemu_ld_i32
:
2037 tcg_out_qemu_ld(s
, args
, 0);
2039 case INDEX_op_qemu_ld_i64
:
2040 tcg_out_qemu_ld(s
, args
, 1);
2042 case INDEX_op_qemu_st_i32
:
2043 tcg_out_qemu_st(s
, args
, 0);
2045 case INDEX_op_qemu_st_i64
:
2046 tcg_out_qemu_st(s
, args
, 1);
2050 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_MUL
, args
[3]);
2053 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_IMUL
, args
[3]);
2056 if (const_args
[4]) {
2057 tgen_arithi(s
, ARITH_ADD
+ rexw
, args
[0], args
[4], 1);
2059 tgen_arithr(s
, ARITH_ADD
+ rexw
, args
[0], args
[4]);
2061 if (const_args
[5]) {
2062 tgen_arithi(s
, ARITH_ADC
+ rexw
, args
[1], args
[5], 1);
2064 tgen_arithr(s
, ARITH_ADC
+ rexw
, args
[1], args
[5]);
2068 if (const_args
[4]) {
2069 tgen_arithi(s
, ARITH_SUB
+ rexw
, args
[0], args
[4], 1);
2071 tgen_arithr(s
, ARITH_SUB
+ rexw
, args
[0], args
[4]);
2073 if (const_args
[5]) {
2074 tgen_arithi(s
, ARITH_SBB
+ rexw
, args
[1], args
[5], 1);
2076 tgen_arithr(s
, ARITH_SBB
+ rexw
, args
[1], args
[5]);
2080 #if TCG_TARGET_REG_BITS == 32
2081 case INDEX_op_brcond2_i32
:
2082 tcg_out_brcond2(s
, args
, const_args
, 0);
2084 case INDEX_op_setcond2_i32
:
2085 tcg_out_setcond2(s
, args
, const_args
);
2087 #else /* TCG_TARGET_REG_BITS == 64 */
2088 case INDEX_op_ld32s_i64
:
2089 tcg_out_modrm_offset(s
, OPC_MOVSLQ
, args
[0], args
[1], args
[2]);
2091 case INDEX_op_ld_i64
:
2092 tcg_out_ld(s
, TCG_TYPE_I64
, args
[0], args
[1], args
[2]);
2094 case INDEX_op_st_i64
:
2095 if (const_args
[0]) {
2096 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
| P_REXW
,
2097 0, args
[1], args
[2]);
2098 tcg_out32(s
, args
[0]);
2100 tcg_out_st(s
, TCG_TYPE_I64
, args
[0], args
[1], args
[2]);
2104 case INDEX_op_brcond_i64
:
2105 tcg_out_brcond64(s
, args
[2], args
[0], args
[1], const_args
[1],
2106 arg_label(args
[3]), 0);
2108 case INDEX_op_setcond_i64
:
2109 tcg_out_setcond64(s
, args
[3], args
[0], args
[1],
2110 args
[2], const_args
[2]);
2112 case INDEX_op_movcond_i64
:
2113 tcg_out_movcond64(s
, args
[5], args
[0], args
[1],
2114 args
[2], const_args
[2], args
[3]);
2117 case INDEX_op_bswap64_i64
:
2118 tcg_out_bswap64(s
, args
[0]);
2120 case INDEX_op_extu_i32_i64
:
2121 case INDEX_op_ext32u_i64
:
2122 tcg_out_ext32u(s
, args
[0], args
[1]);
2124 case INDEX_op_ext_i32_i64
:
2125 case INDEX_op_ext32s_i64
:
2126 tcg_out_ext32s(s
, args
[0], args
[1]);
2131 if (args
[3] == 0 && args
[4] == 8) {
2132 /* load bits 0..7 */
2133 tcg_out_modrm(s
, OPC_MOVB_EvGv
| P_REXB_R
| P_REXB_RM
,
2135 } else if (args
[3] == 8 && args
[4] == 8) {
2136 /* load bits 8..15 */
2137 tcg_out_modrm(s
, OPC_MOVB_EvGv
, args
[2], args
[0] + 4);
2138 } else if (args
[3] == 0 && args
[4] == 16) {
2139 /* load bits 0..15 */
2140 tcg_out_modrm(s
, OPC_MOVL_EvGv
| P_DATA16
, args
[2], args
[0]);
2147 tcg_out_mb(s
, args
[0]);
2149 case INDEX_op_mov_i32
: /* Always emitted via tcg_out_mov. */
2150 case INDEX_op_mov_i64
:
2151 case INDEX_op_movi_i32
: /* Always emitted via tcg_out_movi. */
2152 case INDEX_op_movi_i64
:
2153 case INDEX_op_call
: /* Always emitted via tcg_out_call. */
2161 static const TCGTargetOpDef x86_op_defs
[] = {
2162 { INDEX_op_exit_tb
, { } },
2163 { INDEX_op_goto_tb
, { } },
2164 { INDEX_op_br
, { } },
2165 { INDEX_op_ld8u_i32
, { "r", "r" } },
2166 { INDEX_op_ld8s_i32
, { "r", "r" } },
2167 { INDEX_op_ld16u_i32
, { "r", "r" } },
2168 { INDEX_op_ld16s_i32
, { "r", "r" } },
2169 { INDEX_op_ld_i32
, { "r", "r" } },
2170 { INDEX_op_st8_i32
, { "qi", "r" } },
2171 { INDEX_op_st16_i32
, { "ri", "r" } },
2172 { INDEX_op_st_i32
, { "ri", "r" } },
2174 { INDEX_op_add_i32
, { "r", "r", "ri" } },
2175 { INDEX_op_sub_i32
, { "r", "0", "ri" } },
2176 { INDEX_op_mul_i32
, { "r", "0", "ri" } },
2177 { INDEX_op_div2_i32
, { "a", "d", "0", "1", "r" } },
2178 { INDEX_op_divu2_i32
, { "a", "d", "0", "1", "r" } },
2179 { INDEX_op_and_i32
, { "r", "0", "ri" } },
2180 { INDEX_op_or_i32
, { "r", "0", "ri" } },
2181 { INDEX_op_xor_i32
, { "r", "0", "ri" } },
2182 { INDEX_op_andc_i32
, { "r", "r", "ri" } },
2184 { INDEX_op_shl_i32
, { "r", "0", "Ci" } },
2185 { INDEX_op_shr_i32
, { "r", "0", "Ci" } },
2186 { INDEX_op_sar_i32
, { "r", "0", "Ci" } },
2187 { INDEX_op_rotl_i32
, { "r", "0", "ci" } },
2188 { INDEX_op_rotr_i32
, { "r", "0", "ci" } },
2190 { INDEX_op_brcond_i32
, { "r", "ri" } },
2192 { INDEX_op_bswap16_i32
, { "r", "0" } },
2193 { INDEX_op_bswap32_i32
, { "r", "0" } },
2195 { INDEX_op_neg_i32
, { "r", "0" } },
2197 { INDEX_op_not_i32
, { "r", "0" } },
2199 { INDEX_op_ext8s_i32
, { "r", "q" } },
2200 { INDEX_op_ext16s_i32
, { "r", "r" } },
2201 { INDEX_op_ext8u_i32
, { "r", "q" } },
2202 { INDEX_op_ext16u_i32
, { "r", "r" } },
2204 { INDEX_op_setcond_i32
, { "q", "r", "ri" } },
2206 { INDEX_op_deposit_i32
, { "Q", "0", "Q" } },
2207 { INDEX_op_movcond_i32
, { "r", "r", "ri", "r", "0" } },
2209 { INDEX_op_mulu2_i32
, { "a", "d", "a", "r" } },
2210 { INDEX_op_muls2_i32
, { "a", "d", "a", "r" } },
2211 { INDEX_op_add2_i32
, { "r", "r", "0", "1", "ri", "ri" } },
2212 { INDEX_op_sub2_i32
, { "r", "r", "0", "1", "ri", "ri" } },
2214 { INDEX_op_mb
, { } },
2216 #if TCG_TARGET_REG_BITS == 32
2217 { INDEX_op_brcond2_i32
, { "r", "r", "ri", "ri" } },
2218 { INDEX_op_setcond2_i32
, { "r", "r", "r", "ri", "ri" } },
2220 { INDEX_op_ld8u_i64
, { "r", "r" } },
2221 { INDEX_op_ld8s_i64
, { "r", "r" } },
2222 { INDEX_op_ld16u_i64
, { "r", "r" } },
2223 { INDEX_op_ld16s_i64
, { "r", "r" } },
2224 { INDEX_op_ld32u_i64
, { "r", "r" } },
2225 { INDEX_op_ld32s_i64
, { "r", "r" } },
2226 { INDEX_op_ld_i64
, { "r", "r" } },
2227 { INDEX_op_st8_i64
, { "ri", "r" } },
2228 { INDEX_op_st16_i64
, { "ri", "r" } },
2229 { INDEX_op_st32_i64
, { "ri", "r" } },
2230 { INDEX_op_st_i64
, { "re", "r" } },
2232 { INDEX_op_add_i64
, { "r", "r", "re" } },
2233 { INDEX_op_mul_i64
, { "r", "0", "re" } },
2234 { INDEX_op_div2_i64
, { "a", "d", "0", "1", "r" } },
2235 { INDEX_op_divu2_i64
, { "a", "d", "0", "1", "r" } },
2236 { INDEX_op_sub_i64
, { "r", "0", "re" } },
2237 { INDEX_op_and_i64
, { "r", "0", "reZ" } },
2238 { INDEX_op_or_i64
, { "r", "0", "re" } },
2239 { INDEX_op_xor_i64
, { "r", "0", "re" } },
2240 { INDEX_op_andc_i64
, { "r", "r", "rI" } },
2242 { INDEX_op_shl_i64
, { "r", "0", "Ci" } },
2243 { INDEX_op_shr_i64
, { "r", "0", "Ci" } },
2244 { INDEX_op_sar_i64
, { "r", "0", "Ci" } },
2245 { INDEX_op_rotl_i64
, { "r", "0", "ci" } },
2246 { INDEX_op_rotr_i64
, { "r", "0", "ci" } },
2248 { INDEX_op_brcond_i64
, { "r", "re" } },
2249 { INDEX_op_setcond_i64
, { "r", "r", "re" } },
2251 { INDEX_op_bswap16_i64
, { "r", "0" } },
2252 { INDEX_op_bswap32_i64
, { "r", "0" } },
2253 { INDEX_op_bswap64_i64
, { "r", "0" } },
2254 { INDEX_op_neg_i64
, { "r", "0" } },
2255 { INDEX_op_not_i64
, { "r", "0" } },
2257 { INDEX_op_ext8s_i64
, { "r", "r" } },
2258 { INDEX_op_ext16s_i64
, { "r", "r" } },
2259 { INDEX_op_ext32s_i64
, { "r", "r" } },
2260 { INDEX_op_ext8u_i64
, { "r", "r" } },
2261 { INDEX_op_ext16u_i64
, { "r", "r" } },
2262 { INDEX_op_ext32u_i64
, { "r", "r" } },
2264 { INDEX_op_ext_i32_i64
, { "r", "r" } },
2265 { INDEX_op_extu_i32_i64
, { "r", "r" } },
2267 { INDEX_op_deposit_i64
, { "Q", "0", "Q" } },
2268 { INDEX_op_movcond_i64
, { "r", "r", "re", "r", "0" } },
2270 { INDEX_op_mulu2_i64
, { "a", "d", "a", "r" } },
2271 { INDEX_op_muls2_i64
, { "a", "d", "a", "r" } },
2272 { INDEX_op_add2_i64
, { "r", "r", "0", "1", "re", "re" } },
2273 { INDEX_op_sub2_i64
, { "r", "r", "0", "1", "re", "re" } },
2276 #if TCG_TARGET_REG_BITS == 64
2277 { INDEX_op_qemu_ld_i32
, { "r", "L" } },
2278 { INDEX_op_qemu_st_i32
, { "L", "L" } },
2279 { INDEX_op_qemu_ld_i64
, { "r", "L" } },
2280 { INDEX_op_qemu_st_i64
, { "L", "L" } },
2281 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2282 { INDEX_op_qemu_ld_i32
, { "r", "L" } },
2283 { INDEX_op_qemu_st_i32
, { "L", "L" } },
2284 { INDEX_op_qemu_ld_i64
, { "r", "r", "L" } },
2285 { INDEX_op_qemu_st_i64
, { "L", "L", "L" } },
2287 { INDEX_op_qemu_ld_i32
, { "r", "L", "L" } },
2288 { INDEX_op_qemu_st_i32
, { "L", "L", "L" } },
2289 { INDEX_op_qemu_ld_i64
, { "r", "r", "L", "L" } },
2290 { INDEX_op_qemu_st_i64
, { "L", "L", "L", "L" } },
2295 static int tcg_target_callee_save_regs
[] = {
2296 #if TCG_TARGET_REG_BITS == 64
2305 TCG_REG_R14
, /* Currently used for the global env. */
2308 TCG_REG_EBP
, /* Currently used for the global env. */
2315 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2316 and tcg_register_jit. */
2319 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2320 * (TCG_TARGET_REG_BITS / 8))
2322 #define FRAME_SIZE \
2324 + TCG_STATIC_CALL_ARGS_SIZE \
2325 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2326 + TCG_TARGET_STACK_ALIGN - 1) \
2327 & ~(TCG_TARGET_STACK_ALIGN - 1))
2329 /* Generate global QEMU prologue and epilogue code */
2330 static void tcg_target_qemu_prologue(TCGContext
*s
)
2332 int i
, stack_addend
;
2336 /* Reserve some stack space, also for TCG temps. */
2337 stack_addend
= FRAME_SIZE
- PUSH_SIZE
;
2338 tcg_set_frame(s
, TCG_REG_CALL_STACK
, TCG_STATIC_CALL_ARGS_SIZE
,
2339 CPU_TEMP_BUF_NLONGS
* sizeof(long));
2341 /* Save all callee saved registers. */
2342 for (i
= 0; i
< ARRAY_SIZE(tcg_target_callee_save_regs
); i
++) {
2343 tcg_out_push(s
, tcg_target_callee_save_regs
[i
]);
2346 #if TCG_TARGET_REG_BITS == 32
2347 tcg_out_ld(s
, TCG_TYPE_PTR
, TCG_AREG0
, TCG_REG_ESP
,
2348 (ARRAY_SIZE(tcg_target_callee_save_regs
) + 1) * 4);
2349 tcg_out_addi(s
, TCG_REG_ESP
, -stack_addend
);
2351 tcg_out_modrm_offset(s
, OPC_GRP5
, EXT5_JMPN_Ev
, TCG_REG_ESP
,
2352 (ARRAY_SIZE(tcg_target_callee_save_regs
) + 2) * 4
2355 tcg_out_mov(s
, TCG_TYPE_PTR
, TCG_AREG0
, tcg_target_call_iarg_regs
[0]);
2356 tcg_out_addi(s
, TCG_REG_ESP
, -stack_addend
);
2358 tcg_out_modrm(s
, OPC_GRP5
, EXT5_JMPN_Ev
, tcg_target_call_iarg_regs
[1]);
2362 tb_ret_addr
= s
->code_ptr
;
2364 tcg_out_addi(s
, TCG_REG_CALL_STACK
, stack_addend
);
2366 for (i
= ARRAY_SIZE(tcg_target_callee_save_regs
) - 1; i
>= 0; i
--) {
2367 tcg_out_pop(s
, tcg_target_callee_save_regs
[i
]);
2369 tcg_out_opc(s
, OPC_RET
, 0, 0, 0);
2371 #if !defined(CONFIG_SOFTMMU)
2372 /* Try to set up a segment register to point to guest_base. */
2374 setup_guest_base_seg();
2379 static void tcg_target_init(TCGContext
*s
)
2381 #ifdef CONFIG_CPUID_H
2382 unsigned a
, b
, c
, d
;
2383 int max
= __get_cpuid_max(0, 0);
2386 __cpuid(1, a
, b
, c
, d
);
2388 /* For 32-bit, 99% certainty that we're running on hardware that
2389 supports cmov, but we still need to check. In case cmov is not
2390 available, we'll use a small forward branch. */
2391 have_cmov
= (d
& bit_CMOV
) != 0;
2394 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2395 need to probe for it. */
2396 have_movbe
= (c
& bit_MOVBE
) != 0;
2401 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2402 __cpuid_count(7, 0, a
, b
, c
, d
);
2404 have_bmi1
= (b
& bit_BMI
) != 0;
2407 have_bmi2
= (b
& bit_BMI2
) != 0;
2412 if (TCG_TARGET_REG_BITS
== 64) {
2413 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I32
], 0, 0xffff);
2414 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I64
], 0, 0xffff);
2416 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I32
], 0, 0xff);
2419 tcg_regset_clear(tcg_target_call_clobber_regs
);
2420 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_EAX
);
2421 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_EDX
);
2422 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_ECX
);
2423 if (TCG_TARGET_REG_BITS
== 64) {
2424 #if !defined(_WIN64)
2425 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_RDI
);
2426 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_RSI
);
2428 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R8
);
2429 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R9
);
2430 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R10
);
2431 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R11
);
2434 tcg_regset_clear(s
->reserved_regs
);
2435 tcg_regset_set_reg(s
->reserved_regs
, TCG_REG_CALL_STACK
);
2437 tcg_add_target_add_op_defs(x86_op_defs
);
2442 uint8_t fde_def_cfa
[4];
2443 uint8_t fde_reg_ofs
[14];
2446 /* We're expecting a 2 byte uleb128 encoded value. */
2447 QEMU_BUILD_BUG_ON(FRAME_SIZE
>= (1 << 14));
2449 #if !defined(__ELF__)
2450 /* Host machine without ELF. */
2451 #elif TCG_TARGET_REG_BITS == 64
2452 #define ELF_HOST_MACHINE EM_X86_64
2453 static const DebugFrame debug_frame
= {
2454 .h
.cie
.len
= sizeof(DebugFrameCIE
)-4, /* length after .len member */
2457 .h
.cie
.code_align
= 1,
2458 .h
.cie
.data_align
= 0x78, /* sleb128 -8 */
2459 .h
.cie
.return_column
= 16,
2461 /* Total FDE size does not include the "len" member. */
2462 .h
.fde
.len
= sizeof(DebugFrame
) - offsetof(DebugFrame
, h
.fde
.cie_offset
),
2465 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2466 (FRAME_SIZE
& 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2470 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2471 /* The following ordering must match tcg_target_callee_save_regs. */
2472 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2473 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2474 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2475 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2476 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2477 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2481 #define ELF_HOST_MACHINE EM_386
2482 static const DebugFrame debug_frame
= {
2483 .h
.cie
.len
= sizeof(DebugFrameCIE
)-4, /* length after .len member */
2486 .h
.cie
.code_align
= 1,
2487 .h
.cie
.data_align
= 0x7c, /* sleb128 -4 */
2488 .h
.cie
.return_column
= 8,
2490 /* Total FDE size does not include the "len" member. */
2491 .h
.fde
.len
= sizeof(DebugFrame
) - offsetof(DebugFrame
, h
.fde
.cie_offset
),
2494 12, 4, /* DW_CFA_def_cfa %esp, ... */
2495 (FRAME_SIZE
& 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2499 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2500 /* The following ordering must match tcg_target_callee_save_regs. */
2501 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2502 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2503 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2504 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2509 #if defined(ELF_HOST_MACHINE)
2510 void tcg_register_jit(void *buf
, size_t buf_size
)
2512 tcg_register_jit_int(buf
, buf_size
, &debug_frame
, sizeof(debug_frame
));