2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
26 #include "tcg-be-ldst.h"
29 static const char * const tcg_target_reg_names
[TCG_TARGET_NB_REGS
] = {
30 #if TCG_TARGET_REG_BITS == 64
31 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
32 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
34 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
39 static const int tcg_target_reg_alloc_order
[] = {
40 #if TCG_TARGET_REG_BITS == 64
67 static const int tcg_target_call_iarg_regs
[] = {
68 #if TCG_TARGET_REG_BITS == 64
81 /* 32 bit mode uses stack based calling convention (GCC default). */
85 static const int tcg_target_call_oarg_regs
[] = {
87 #if TCG_TARGET_REG_BITS == 32
92 /* Constants we accept. */
93 #define TCG_CT_CONST_S32 0x100
94 #define TCG_CT_CONST_U32 0x200
95 #define TCG_CT_CONST_I32 0x400
97 /* Registers used with L constraint, which are the first argument
98 registers on x86_64, and two random call clobbered registers on
100 #if TCG_TARGET_REG_BITS == 64
101 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
102 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
104 # define TCG_REG_L0 TCG_REG_EAX
105 # define TCG_REG_L1 TCG_REG_EDX
108 /* The host compiler should supply <cpuid.h> to enable runtime features
109 detection, as we're not going to go so far as our own inline assembly.
110 If not available, default values will be assumed. */
111 #if defined(CONFIG_CPUID_H)
115 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
117 #if TCG_TARGET_REG_BITS == 64
119 #elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
120 static bool have_cmov
;
125 /* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
126 going to attempt to determine at runtime whether movbe is available. */
127 #if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
128 static bool have_movbe
;
130 # define have_movbe 0
133 /* We need this symbol in tcg-target.h, and we can't properly conditionalize
134 it there. Therefore we always define the variable. */
137 #if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
138 static bool have_bmi2
;
143 static tcg_insn_unit
*tb_ret_addr
;
145 static void patch_reloc(tcg_insn_unit
*code_ptr
, int type
,
146 intptr_t value
, intptr_t addend
)
151 value
-= (uintptr_t)code_ptr
;
152 if (value
!= (int32_t)value
) {
155 tcg_patch32(code_ptr
, value
);
158 value
-= (uintptr_t)code_ptr
;
159 if (value
!= (int8_t)value
) {
162 tcg_patch8(code_ptr
, value
);
169 /* parse target specific constraints */
170 static int target_parse_constraint(TCGArgConstraint
*ct
, const char **pct_str
)
177 ct
->ct
|= TCG_CT_REG
;
178 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EAX
);
181 ct
->ct
|= TCG_CT_REG
;
182 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EBX
);
186 ct
->ct
|= TCG_CT_REG
;
187 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_ECX
);
190 ct
->ct
|= TCG_CT_REG
;
191 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EDX
);
194 ct
->ct
|= TCG_CT_REG
;
195 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_ESI
);
198 ct
->ct
|= TCG_CT_REG
;
199 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EDI
);
202 ct
->ct
|= TCG_CT_REG
;
203 if (TCG_TARGET_REG_BITS
== 64) {
204 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
206 tcg_regset_set32(ct
->u
.regs
, 0, 0xf);
210 ct
->ct
|= TCG_CT_REG
;
211 tcg_regset_set32(ct
->u
.regs
, 0, 0xf);
215 ct
->ct
|= TCG_CT_REG
;
216 if (TCG_TARGET_REG_BITS
== 64) {
217 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
219 tcg_regset_set32(ct
->u
.regs
, 0, 0xff);
223 /* With SHRX et al, we need not use ECX as shift count register. */
230 /* qemu_ld/st address constraint */
232 ct
->ct
|= TCG_CT_REG
;
233 if (TCG_TARGET_REG_BITS
== 64) {
234 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
236 tcg_regset_set32(ct
->u
.regs
, 0, 0xff);
238 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_L0
);
239 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_L1
);
243 ct
->ct
|= TCG_CT_CONST_S32
;
246 ct
->ct
|= TCG_CT_CONST_U32
;
249 ct
->ct
|= TCG_CT_CONST_I32
;
260 /* test if a constant matches the constraint */
261 static inline int tcg_target_const_match(tcg_target_long val
, TCGType type
,
262 const TCGArgConstraint
*arg_ct
)
265 if (ct
& TCG_CT_CONST
) {
268 if ((ct
& TCG_CT_CONST_S32
) && val
== (int32_t)val
) {
271 if ((ct
& TCG_CT_CONST_U32
) && val
== (uint32_t)val
) {
274 if ((ct
& TCG_CT_CONST_I32
) && ~val
== (int32_t)~val
) {
280 #if TCG_TARGET_REG_BITS == 64
281 # define LOWREGMASK(x) ((x) & 7)
283 # define LOWREGMASK(x) (x)
286 #define P_EXT 0x100 /* 0x0f opcode prefix */
287 #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
288 #define P_DATA16 0x400 /* 0x66 opcode prefix */
289 #if TCG_TARGET_REG_BITS == 64
290 # define P_ADDR32 0x800 /* 0x67 opcode prefix */
291 # define P_REXW 0x1000 /* Set REX.W = 1 */
292 # define P_REXB_R 0x2000 /* REG field as byte register */
293 # define P_REXB_RM 0x4000 /* R/M field as byte register */
294 # define P_GS 0x8000 /* gs segment override */
302 #define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
303 #define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
305 #define OPC_ARITH_EvIz (0x81)
306 #define OPC_ARITH_EvIb (0x83)
307 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
308 #define OPC_ANDN (0xf2 | P_EXT38)
309 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
310 #define OPC_BSWAP (0xc8 | P_EXT)
311 #define OPC_CALL_Jz (0xe8)
312 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
313 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
314 #define OPC_DEC_r32 (0x48)
315 #define OPC_IMUL_GvEv (0xaf | P_EXT)
316 #define OPC_IMUL_GvEvIb (0x6b)
317 #define OPC_IMUL_GvEvIz (0x69)
318 #define OPC_INC_r32 (0x40)
319 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
320 #define OPC_JCC_short (0x70) /* ... plus condition code */
321 #define OPC_JMP_long (0xe9)
322 #define OPC_JMP_short (0xeb)
323 #define OPC_LEA (0x8d)
324 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
325 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
326 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
327 #define OPC_MOVB_EvIz (0xc6)
328 #define OPC_MOVL_EvIz (0xc7)
329 #define OPC_MOVL_Iv (0xb8)
330 #define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
331 #define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
332 #define OPC_MOVSBL (0xbe | P_EXT)
333 #define OPC_MOVSWL (0xbf | P_EXT)
334 #define OPC_MOVSLQ (0x63 | P_REXW)
335 #define OPC_MOVZBL (0xb6 | P_EXT)
336 #define OPC_MOVZWL (0xb7 | P_EXT)
337 #define OPC_POP_r32 (0x58)
338 #define OPC_PUSH_r32 (0x50)
339 #define OPC_PUSH_Iv (0x68)
340 #define OPC_PUSH_Ib (0x6a)
341 #define OPC_RET (0xc3)
342 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
343 #define OPC_SHIFT_1 (0xd1)
344 #define OPC_SHIFT_Ib (0xc1)
345 #define OPC_SHIFT_cl (0xd3)
346 #define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
347 #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
348 #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
349 #define OPC_TESTL (0x85)
350 #define OPC_XCHG_ax_r32 (0x90)
352 #define OPC_GRP3_Ev (0xf7)
353 #define OPC_GRP5 (0xff)
355 /* Group 1 opcode extensions for 0x80-0x83.
356 These are also used as modifiers for OPC_ARITH. */
366 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
373 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
381 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
382 #define EXT5_INC_Ev 0
383 #define EXT5_DEC_Ev 1
384 #define EXT5_CALLN_Ev 2
385 #define EXT5_JMPN_Ev 4
387 /* Condition codes to be added to OPC_JCC_{long,short}. */
406 static const uint8_t tcg_cond_to_jcc
[] = {
407 [TCG_COND_EQ
] = JCC_JE
,
408 [TCG_COND_NE
] = JCC_JNE
,
409 [TCG_COND_LT
] = JCC_JL
,
410 [TCG_COND_GE
] = JCC_JGE
,
411 [TCG_COND_LE
] = JCC_JLE
,
412 [TCG_COND_GT
] = JCC_JG
,
413 [TCG_COND_LTU
] = JCC_JB
,
414 [TCG_COND_GEU
] = JCC_JAE
,
415 [TCG_COND_LEU
] = JCC_JBE
,
416 [TCG_COND_GTU
] = JCC_JA
,
419 #if TCG_TARGET_REG_BITS == 64
420 static void tcg_out_opc(TCGContext
*s
, int opc
, int r
, int rm
, int x
)
427 if (opc
& P_DATA16
) {
428 /* We should never be asking for both 16 and 64-bit operation. */
429 assert((opc
& P_REXW
) == 0);
432 if (opc
& P_ADDR32
) {
437 rex
|= (opc
& P_REXW
) ? 0x8 : 0x0; /* REX.W */
438 rex
|= (r
& 8) >> 1; /* REX.R */
439 rex
|= (x
& 8) >> 2; /* REX.X */
440 rex
|= (rm
& 8) >> 3; /* REX.B */
442 /* P_REXB_{R,RM} indicates that the given register is the low byte.
443 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
444 as otherwise the encoding indicates %[abcd]h. Note that the values
445 that are ORed in merely indicate that the REX byte must be present;
446 those bits get discarded in output. */
447 rex
|= opc
& (r
>= 4 ? P_REXB_R
: 0);
448 rex
|= opc
& (rm
>= 4 ? P_REXB_RM
: 0);
451 tcg_out8(s
, (uint8_t)(rex
| 0x40));
454 if (opc
& (P_EXT
| P_EXT38
)) {
464 static void tcg_out_opc(TCGContext
*s
, int opc
)
466 if (opc
& P_DATA16
) {
469 if (opc
& (P_EXT
| P_EXT38
)) {
477 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
478 the 32-bit compilation paths. This method works with all versions of gcc,
479 whereas relying on optimization may not be able to exclude them. */
480 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
483 static void tcg_out_modrm(TCGContext
*s
, int opc
, int r
, int rm
)
485 tcg_out_opc(s
, opc
, r
, rm
, 0);
486 tcg_out8(s
, 0xc0 | (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
489 static void tcg_out_vex_modrm(TCGContext
*s
, int opc
, int r
, int v
, int rm
)
493 if ((opc
& (P_REXW
| P_EXT
| P_EXT38
)) || (rm
& 8)) {
494 /* Three byte VEX prefix. */
500 } else if (opc
& P_EXT
) {
505 tmp
|= 0x40; /* VEX.X */
506 tmp
|= (r
& 8 ? 0 : 0x80); /* VEX.R */
507 tmp
|= (rm
& 8 ? 0 : 0x20); /* VEX.B */
510 tmp
= (opc
& P_REXW
? 0x80 : 0); /* VEX.W */
512 /* Two byte VEX prefix. */
515 tmp
= (r
& 8 ? 0 : 0x80); /* VEX.R */
518 if (opc
& P_DATA16
) {
520 } else if (opc
& P_SIMDF3
) {
522 } else if (opc
& P_SIMDF2
) {
525 tmp
|= (~v
& 15) << 3; /* VEX.vvvv */
528 tcg_out8(s
, 0xc0 | (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
531 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
532 We handle either RM and INDEX missing with a negative value. In 64-bit
533 mode for absolute addresses, ~RM is the size of the immediate operand
534 that will follow the instruction. */
536 static void tcg_out_modrm_sib_offset(TCGContext
*s
, int opc
, int r
, int rm
,
537 int index
, int shift
, intptr_t offset
)
541 if (index
< 0 && rm
< 0) {
542 if (TCG_TARGET_REG_BITS
== 64) {
543 /* Try for a rip-relative addressing mode. This has replaced
544 the 32-bit-mode absolute addressing encoding. */
545 intptr_t pc
= (intptr_t)s
->code_ptr
+ 5 + ~rm
;
546 intptr_t disp
= offset
- pc
;
547 if (disp
== (int32_t)disp
) {
548 tcg_out_opc(s
, opc
, r
, 0, 0);
549 tcg_out8(s
, (LOWREGMASK(r
) << 3) | 5);
554 /* Try for an absolute address encoding. This requires the
555 use of the MODRM+SIB encoding and is therefore larger than
556 rip-relative addressing. */
557 if (offset
== (int32_t)offset
) {
558 tcg_out_opc(s
, opc
, r
, 0, 0);
559 tcg_out8(s
, (LOWREGMASK(r
) << 3) | 4);
560 tcg_out8(s
, (4 << 3) | 5);
561 tcg_out32(s
, offset
);
565 /* ??? The memory isn't directly addressable. */
568 /* Absolute address. */
569 tcg_out_opc(s
, opc
, r
, 0, 0);
570 tcg_out8(s
, (r
<< 3) | 5);
571 tcg_out32(s
, offset
);
576 /* Find the length of the immediate addend. Note that the encoding
577 that would be used for (%ebp) indicates absolute addressing. */
579 mod
= 0, len
= 4, rm
= 5;
580 } else if (offset
== 0 && LOWREGMASK(rm
) != TCG_REG_EBP
) {
582 } else if (offset
== (int8_t)offset
) {
588 /* Use a single byte MODRM format if possible. Note that the encoding
589 that would be used for %esp is the escape to the two byte form. */
590 if (index
< 0 && LOWREGMASK(rm
) != TCG_REG_ESP
) {
591 /* Single byte MODRM format. */
592 tcg_out_opc(s
, opc
, r
, rm
, 0);
593 tcg_out8(s
, mod
| (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
595 /* Two byte MODRM+SIB format. */
597 /* Note that the encoding that would place %esp into the index
598 field indicates no index register. In 64-bit mode, the REX.X
599 bit counts, so %r12 can be used as the index. */
603 assert(index
!= TCG_REG_ESP
);
606 tcg_out_opc(s
, opc
, r
, rm
, index
);
607 tcg_out8(s
, mod
| (LOWREGMASK(r
) << 3) | 4);
608 tcg_out8(s
, (shift
<< 6) | (LOWREGMASK(index
) << 3) | LOWREGMASK(rm
));
613 } else if (len
== 4) {
614 tcg_out32(s
, offset
);
618 /* A simplification of the above with no index or shift. */
619 static inline void tcg_out_modrm_offset(TCGContext
*s
, int opc
, int r
,
620 int rm
, intptr_t offset
)
622 tcg_out_modrm_sib_offset(s
, opc
, r
, rm
, -1, 0, offset
);
625 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
626 static inline void tgen_arithr(TCGContext
*s
, int subop
, int dest
, int src
)
628 /* Propagate an opcode prefix, such as P_REXW. */
629 int ext
= subop
& ~0x7;
632 tcg_out_modrm(s
, OPC_ARITH_GvEv
+ (subop
<< 3) + ext
, dest
, src
);
635 static inline void tcg_out_mov(TCGContext
*s
, TCGType type
,
636 TCGReg ret
, TCGReg arg
)
639 int opc
= OPC_MOVL_GvEv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
640 tcg_out_modrm(s
, opc
, ret
, arg
);
644 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
645 TCGReg ret
, tcg_target_long arg
)
647 tcg_target_long diff
;
650 tgen_arithr(s
, ARITH_XOR
, ret
, ret
);
653 if (arg
== (uint32_t)arg
|| type
== TCG_TYPE_I32
) {
654 tcg_out_opc(s
, OPC_MOVL_Iv
+ LOWREGMASK(ret
), 0, ret
, 0);
658 if (arg
== (int32_t)arg
) {
659 tcg_out_modrm(s
, OPC_MOVL_EvIz
+ P_REXW
, 0, ret
);
664 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
665 diff
= arg
- ((uintptr_t)s
->code_ptr
+ 7);
666 if (diff
== (int32_t)diff
) {
667 tcg_out_opc(s
, OPC_LEA
| P_REXW
, ret
, 0, 0);
668 tcg_out8(s
, (LOWREGMASK(ret
) << 3) | 5);
673 tcg_out_opc(s
, OPC_MOVL_Iv
+ P_REXW
+ LOWREGMASK(ret
), 0, ret
, 0);
677 static inline void tcg_out_pushi(TCGContext
*s
, tcg_target_long val
)
679 if (val
== (int8_t)val
) {
680 tcg_out_opc(s
, OPC_PUSH_Ib
, 0, 0, 0);
682 } else if (val
== (int32_t)val
) {
683 tcg_out_opc(s
, OPC_PUSH_Iv
, 0, 0, 0);
690 static inline void tcg_out_push(TCGContext
*s
, int reg
)
692 tcg_out_opc(s
, OPC_PUSH_r32
+ LOWREGMASK(reg
), 0, reg
, 0);
695 static inline void tcg_out_pop(TCGContext
*s
, int reg
)
697 tcg_out_opc(s
, OPC_POP_r32
+ LOWREGMASK(reg
), 0, reg
, 0);
700 static inline void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
,
701 TCGReg arg1
, intptr_t arg2
)
703 int opc
= OPC_MOVL_GvEv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
704 tcg_out_modrm_offset(s
, opc
, ret
, arg1
, arg2
);
707 static inline void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg arg
,
708 TCGReg arg1
, intptr_t arg2
)
710 int opc
= OPC_MOVL_EvGv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
711 tcg_out_modrm_offset(s
, opc
, arg
, arg1
, arg2
);
714 static inline void tcg_out_sti(TCGContext
*s
, TCGType type
, TCGReg base
,
715 tcg_target_long ofs
, tcg_target_long val
)
717 int opc
= OPC_MOVL_EvIz
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
718 tcg_out_modrm_offset(s
, opc
, 0, base
, ofs
);
722 static void tcg_out_shifti(TCGContext
*s
, int subopc
, int reg
, int count
)
724 /* Propagate an opcode prefix, such as P_DATA16. */
725 int ext
= subopc
& ~0x7;
729 tcg_out_modrm(s
, OPC_SHIFT_1
+ ext
, subopc
, reg
);
731 tcg_out_modrm(s
, OPC_SHIFT_Ib
+ ext
, subopc
, reg
);
736 static inline void tcg_out_bswap32(TCGContext
*s
, int reg
)
738 tcg_out_opc(s
, OPC_BSWAP
+ LOWREGMASK(reg
), 0, reg
, 0);
741 static inline void tcg_out_rolw_8(TCGContext
*s
, int reg
)
743 tcg_out_shifti(s
, SHIFT_ROL
+ P_DATA16
, reg
, 8);
746 static inline void tcg_out_ext8u(TCGContext
*s
, int dest
, int src
)
749 assert(src
< 4 || TCG_TARGET_REG_BITS
== 64);
750 tcg_out_modrm(s
, OPC_MOVZBL
+ P_REXB_RM
, dest
, src
);
753 static void tcg_out_ext8s(TCGContext
*s
, int dest
, int src
, int rexw
)
756 assert(src
< 4 || TCG_TARGET_REG_BITS
== 64);
757 tcg_out_modrm(s
, OPC_MOVSBL
+ P_REXB_RM
+ rexw
, dest
, src
);
760 static inline void tcg_out_ext16u(TCGContext
*s
, int dest
, int src
)
763 tcg_out_modrm(s
, OPC_MOVZWL
, dest
, src
);
766 static inline void tcg_out_ext16s(TCGContext
*s
, int dest
, int src
, int rexw
)
769 tcg_out_modrm(s
, OPC_MOVSWL
+ rexw
, dest
, src
);
772 static inline void tcg_out_ext32u(TCGContext
*s
, int dest
, int src
)
774 /* 32-bit mov zero extends. */
775 tcg_out_modrm(s
, OPC_MOVL_GvEv
, dest
, src
);
778 static inline void tcg_out_ext32s(TCGContext
*s
, int dest
, int src
)
780 tcg_out_modrm(s
, OPC_MOVSLQ
, dest
, src
);
783 static inline void tcg_out_bswap64(TCGContext
*s
, int reg
)
785 tcg_out_opc(s
, OPC_BSWAP
+ P_REXW
+ LOWREGMASK(reg
), 0, reg
, 0);
788 static void tgen_arithi(TCGContext
*s
, int c
, int r0
,
789 tcg_target_long val
, int cf
)
793 if (TCG_TARGET_REG_BITS
== 64) {
798 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
799 partial flags update stalls on Pentium4 and are not recommended
800 by current Intel optimization manuals. */
801 if (!cf
&& (c
== ARITH_ADD
|| c
== ARITH_SUB
) && (val
== 1 || val
== -1)) {
802 int is_inc
= (c
== ARITH_ADD
) ^ (val
< 0);
803 if (TCG_TARGET_REG_BITS
== 64) {
804 /* The single-byte increment encodings are re-tasked as the
805 REX prefixes. Use the MODRM encoding. */
806 tcg_out_modrm(s
, OPC_GRP5
+ rexw
,
807 (is_inc
? EXT5_INC_Ev
: EXT5_DEC_Ev
), r0
);
809 tcg_out8(s
, (is_inc
? OPC_INC_r32
: OPC_DEC_r32
) + r0
);
814 if (c
== ARITH_AND
) {
815 if (TCG_TARGET_REG_BITS
== 64) {
816 if (val
== 0xffffffffu
) {
817 tcg_out_ext32u(s
, r0
, r0
);
820 if (val
== (uint32_t)val
) {
821 /* AND with no high bits set can use a 32-bit operation. */
825 if (val
== 0xffu
&& (r0
< 4 || TCG_TARGET_REG_BITS
== 64)) {
826 tcg_out_ext8u(s
, r0
, r0
);
829 if (val
== 0xffffu
) {
830 tcg_out_ext16u(s
, r0
, r0
);
835 if (val
== (int8_t)val
) {
836 tcg_out_modrm(s
, OPC_ARITH_EvIb
+ rexw
, c
, r0
);
840 if (rexw
== 0 || val
== (int32_t)val
) {
841 tcg_out_modrm(s
, OPC_ARITH_EvIz
+ rexw
, c
, r0
);
849 static void tcg_out_addi(TCGContext
*s
, int reg
, tcg_target_long val
)
852 tgen_arithi(s
, ARITH_ADD
+ P_REXW
, reg
, val
, 0);
856 /* Use SMALL != 0 to force a short forward branch. */
857 static void tcg_out_jxx(TCGContext
*s
, int opc
, TCGLabel
*l
, int small
)
862 val
= tcg_pcrel_diff(s
, l
->u
.value_ptr
);
864 if ((int8_t)val1
== val1
) {
866 tcg_out8(s
, OPC_JMP_short
);
868 tcg_out8(s
, OPC_JCC_short
+ opc
);
876 tcg_out8(s
, OPC_JMP_long
);
877 tcg_out32(s
, val
- 5);
879 tcg_out_opc(s
, OPC_JCC_long
+ opc
, 0, 0, 0);
880 tcg_out32(s
, val
- 6);
885 tcg_out8(s
, OPC_JMP_short
);
887 tcg_out8(s
, OPC_JCC_short
+ opc
);
889 tcg_out_reloc(s
, s
->code_ptr
, R_386_PC8
, l
, -1);
893 tcg_out8(s
, OPC_JMP_long
);
895 tcg_out_opc(s
, OPC_JCC_long
+ opc
, 0, 0, 0);
897 tcg_out_reloc(s
, s
->code_ptr
, R_386_PC32
, l
, -4);
902 static void tcg_out_cmp(TCGContext
*s
, TCGArg arg1
, TCGArg arg2
,
903 int const_arg2
, int rexw
)
908 tcg_out_modrm(s
, OPC_TESTL
+ rexw
, arg1
, arg1
);
910 tgen_arithi(s
, ARITH_CMP
+ rexw
, arg1
, arg2
, 0);
913 tgen_arithr(s
, ARITH_CMP
+ rexw
, arg1
, arg2
);
917 static void tcg_out_brcond32(TCGContext
*s
, TCGCond cond
,
918 TCGArg arg1
, TCGArg arg2
, int const_arg2
,
919 TCGLabel
*label
, int small
)
921 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, 0);
922 tcg_out_jxx(s
, tcg_cond_to_jcc
[cond
], label
, small
);
925 #if TCG_TARGET_REG_BITS == 64
926 static void tcg_out_brcond64(TCGContext
*s
, TCGCond cond
,
927 TCGArg arg1
, TCGArg arg2
, int const_arg2
,
928 TCGLabel
*label
, int small
)
930 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, P_REXW
);
931 tcg_out_jxx(s
, tcg_cond_to_jcc
[cond
], label
, small
);
934 /* XXX: we implement it at the target level to avoid having to
935 handle cross basic blocks temporaries */
936 static void tcg_out_brcond2(TCGContext
*s
, const TCGArg
*args
,
937 const int *const_args
, int small
)
939 TCGLabel
*label_next
= gen_new_label();
940 TCGLabel
*label_this
= arg_label(args
[5]);
944 tcg_out_brcond32(s
, TCG_COND_NE
, args
[0], args
[2], const_args
[2],
946 tcg_out_brcond32(s
, TCG_COND_EQ
, args
[1], args
[3], const_args
[3],
950 tcg_out_brcond32(s
, TCG_COND_NE
, args
[0], args
[2], const_args
[2],
952 tcg_out_brcond32(s
, TCG_COND_NE
, args
[1], args
[3], const_args
[3],
956 tcg_out_brcond32(s
, TCG_COND_LT
, args
[1], args
[3], const_args
[3],
958 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
959 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[0], args
[2], const_args
[2],
963 tcg_out_brcond32(s
, TCG_COND_LT
, args
[1], args
[3], const_args
[3],
965 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
966 tcg_out_brcond32(s
, TCG_COND_LEU
, args
[0], args
[2], const_args
[2],
970 tcg_out_brcond32(s
, TCG_COND_GT
, args
[1], args
[3], const_args
[3],
972 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
973 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[0], args
[2], const_args
[2],
977 tcg_out_brcond32(s
, TCG_COND_GT
, args
[1], args
[3], const_args
[3],
979 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
980 tcg_out_brcond32(s
, TCG_COND_GEU
, args
[0], args
[2], const_args
[2],
984 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[1], args
[3], const_args
[3],
986 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
987 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[0], args
[2], const_args
[2],
991 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[1], args
[3], const_args
[3],
993 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
994 tcg_out_brcond32(s
, TCG_COND_LEU
, args
[0], args
[2], const_args
[2],
998 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[1], args
[3], const_args
[3],
1000 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
1001 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[0], args
[2], const_args
[2],
1005 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[1], args
[3], const_args
[3],
1007 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
1008 tcg_out_brcond32(s
, TCG_COND_GEU
, args
[0], args
[2], const_args
[2],
1014 tcg_out_label(s
, label_next
, s
->code_ptr
);
1018 static void tcg_out_setcond32(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
1019 TCGArg arg1
, TCGArg arg2
, int const_arg2
)
1021 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, 0);
1022 tcg_out_modrm(s
, OPC_SETCC
| tcg_cond_to_jcc
[cond
], 0, dest
);
1023 tcg_out_ext8u(s
, dest
, dest
);
1026 #if TCG_TARGET_REG_BITS == 64
1027 static void tcg_out_setcond64(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
1028 TCGArg arg1
, TCGArg arg2
, int const_arg2
)
1030 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, P_REXW
);
1031 tcg_out_modrm(s
, OPC_SETCC
| tcg_cond_to_jcc
[cond
], 0, dest
);
1032 tcg_out_ext8u(s
, dest
, dest
);
1035 static void tcg_out_setcond2(TCGContext
*s
, const TCGArg
*args
,
1036 const int *const_args
)
1039 TCGLabel
*label_true
, *label_over
;
1041 memcpy(new_args
, args
+1, 5*sizeof(TCGArg
));
1043 if (args
[0] == args
[1] || args
[0] == args
[2]
1044 || (!const_args
[3] && args
[0] == args
[3])
1045 || (!const_args
[4] && args
[0] == args
[4])) {
1046 /* When the destination overlaps with one of the argument
1047 registers, don't do anything tricky. */
1048 label_true
= gen_new_label();
1049 label_over
= gen_new_label();
1051 new_args
[5] = label_arg(label_true
);
1052 tcg_out_brcond2(s
, new_args
, const_args
+1, 1);
1054 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 0);
1055 tcg_out_jxx(s
, JCC_JMP
, label_over
, 1);
1056 tcg_out_label(s
, label_true
, s
->code_ptr
);
1058 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 1);
1059 tcg_out_label(s
, label_over
, s
->code_ptr
);
1061 /* When the destination does not overlap one of the arguments,
1062 clear the destination first, jump if cond false, and emit an
1063 increment in the true case. This results in smaller code. */
1065 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 0);
1067 label_over
= gen_new_label();
1068 new_args
[4] = tcg_invert_cond(new_args
[4]);
1069 new_args
[5] = label_arg(label_over
);
1070 tcg_out_brcond2(s
, new_args
, const_args
+1, 1);
1072 tgen_arithi(s
, ARITH_ADD
, args
[0], 1, 0);
1073 tcg_out_label(s
, label_over
, s
->code_ptr
);
1078 static void tcg_out_movcond32(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
1079 TCGArg c1
, TCGArg c2
, int const_c2
,
1082 tcg_out_cmp(s
, c1
, c2
, const_c2
, 0);
1084 tcg_out_modrm(s
, OPC_CMOVCC
| tcg_cond_to_jcc
[cond
], dest
, v1
);
1086 TCGLabel
*over
= gen_new_label();
1087 tcg_out_jxx(s
, tcg_cond_to_jcc
[tcg_invert_cond(cond
)], over
, 1);
1088 tcg_out_mov(s
, TCG_TYPE_I32
, dest
, v1
);
1089 tcg_out_label(s
, over
, s
->code_ptr
);
1093 #if TCG_TARGET_REG_BITS == 64
1094 static void tcg_out_movcond64(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
1095 TCGArg c1
, TCGArg c2
, int const_c2
,
1098 tcg_out_cmp(s
, c1
, c2
, const_c2
, P_REXW
);
1099 tcg_out_modrm(s
, OPC_CMOVCC
| tcg_cond_to_jcc
[cond
] | P_REXW
, dest
, v1
);
1103 static void tcg_out_branch(TCGContext
*s
, int call
, tcg_insn_unit
*dest
)
1105 intptr_t disp
= tcg_pcrel_diff(s
, dest
) - 5;
1107 if (disp
== (int32_t)disp
) {
1108 tcg_out_opc(s
, call
? OPC_CALL_Jz
: OPC_JMP_long
, 0, 0, 0);
1111 tcg_out_movi(s
, TCG_TYPE_PTR
, TCG_REG_R10
, (uintptr_t)dest
);
1112 tcg_out_modrm(s
, OPC_GRP5
,
1113 call
? EXT5_CALLN_Ev
: EXT5_JMPN_Ev
, TCG_REG_R10
);
1117 static inline void tcg_out_call(TCGContext
*s
, tcg_insn_unit
*dest
)
1119 tcg_out_branch(s
, 1, dest
);
1122 static void tcg_out_jmp(TCGContext
*s
, tcg_insn_unit
*dest
)
1124 tcg_out_branch(s
, 0, dest
);
1127 #if defined(CONFIG_SOFTMMU)
1128 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1129 * int mmu_idx, uintptr_t ra)
1131 static void * const qemu_ld_helpers
[16] = {
1132 [MO_UB
] = helper_ret_ldub_mmu
,
1133 [MO_LEUW
] = helper_le_lduw_mmu
,
1134 [MO_LEUL
] = helper_le_ldul_mmu
,
1135 [MO_LEQ
] = helper_le_ldq_mmu
,
1136 [MO_BEUW
] = helper_be_lduw_mmu
,
1137 [MO_BEUL
] = helper_be_ldul_mmu
,
1138 [MO_BEQ
] = helper_be_ldq_mmu
,
1141 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1142 * uintxx_t val, int mmu_idx, uintptr_t ra)
1144 static void * const qemu_st_helpers
[16] = {
1145 [MO_UB
] = helper_ret_stb_mmu
,
1146 [MO_LEUW
] = helper_le_stw_mmu
,
1147 [MO_LEUL
] = helper_le_stl_mmu
,
1148 [MO_LEQ
] = helper_le_stq_mmu
,
1149 [MO_BEUW
] = helper_be_stw_mmu
,
1150 [MO_BEUL
] = helper_be_stl_mmu
,
1151 [MO_BEQ
] = helper_be_stq_mmu
,
1154 /* Perform the TLB load and compare.
1157 ADDRLO and ADDRHI contain the low and high part of the address.
1159 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1161 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1162 This should be offsetof addr_read or addr_write.
1165 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1166 positions of the displacements of forward jumps to the TLB miss case.
1168 Second argument register is loaded with the low part of the address.
1169 In the TLB hit case, it has been adjusted as indicated by the TLB
1170 and so is a host address. In the TLB miss case, it continues to
1171 hold a guest address.
1173 First argument register is clobbered. */
1175 static inline void tcg_out_tlb_load(TCGContext
*s
, TCGReg addrlo
, TCGReg addrhi
,
1176 int mem_index
, TCGMemOp opc
,
1177 tcg_insn_unit
**label_ptr
, int which
)
1179 const TCGReg r0
= TCG_REG_L0
;
1180 const TCGReg r1
= TCG_REG_L1
;
1181 TCGType ttype
= TCG_TYPE_I32
;
1182 TCGType tlbtype
= TCG_TYPE_I32
;
1183 int trexw
= 0, hrexw
= 0, tlbrexw
= 0;
1184 int s_mask
= (1 << (opc
& MO_SIZE
)) - 1;
1185 bool aligned
= (opc
& MO_AMASK
) == MO_ALIGN
|| s_mask
== 0;
1187 if (TCG_TARGET_REG_BITS
== 64) {
1188 if (TARGET_LONG_BITS
== 64) {
1189 ttype
= TCG_TYPE_I64
;
1192 if (TCG_TYPE_PTR
== TCG_TYPE_I64
) {
1194 if (TARGET_PAGE_BITS
+ CPU_TLB_BITS
> 32) {
1195 tlbtype
= TCG_TYPE_I64
;
1201 tcg_out_mov(s
, tlbtype
, r0
, addrlo
);
1203 tcg_out_mov(s
, ttype
, r1
, addrlo
);
1205 /* For unaligned access check that we don't cross pages using
1206 the page address of the last byte. */
1207 tcg_out_modrm_offset(s
, OPC_LEA
+ trexw
, r1
, addrlo
, s_mask
);
1210 tcg_out_shifti(s
, SHIFT_SHR
+ tlbrexw
, r0
,
1211 TARGET_PAGE_BITS
- CPU_TLB_ENTRY_BITS
);
1213 tgen_arithi(s
, ARITH_AND
+ trexw
, r1
,
1214 TARGET_PAGE_MASK
| (aligned
? s_mask
: 0), 0);
1215 tgen_arithi(s
, ARITH_AND
+ tlbrexw
, r0
,
1216 (CPU_TLB_SIZE
- 1) << CPU_TLB_ENTRY_BITS
, 0);
1218 tcg_out_modrm_sib_offset(s
, OPC_LEA
+ hrexw
, r0
, TCG_AREG0
, r0
, 0,
1219 offsetof(CPUArchState
, tlb_table
[mem_index
][0])
1223 tcg_out_modrm_offset(s
, OPC_CMP_GvEv
+ trexw
, r1
, r0
, 0);
1225 /* Prepare for both the fast path add of the tlb addend, and the slow
1226 path function argument setup. There are two cases worth note:
1227 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1228 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1229 copies the entire guest address for the slow path, while truncation
1230 for the 32-bit host happens with the fastpath ADDL below. */
1231 tcg_out_mov(s
, ttype
, r1
, addrlo
);
1234 tcg_out_opc(s
, OPC_JCC_long
+ JCC_JNE
, 0, 0, 0);
1235 label_ptr
[0] = s
->code_ptr
;
1238 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1239 /* cmp 4(r0), addrhi */
1240 tcg_out_modrm_offset(s
, OPC_CMP_GvEv
, addrhi
, r0
, 4);
1243 tcg_out_opc(s
, OPC_JCC_long
+ JCC_JNE
, 0, 0, 0);
1244 label_ptr
[1] = s
->code_ptr
;
1250 /* add addend(r0), r1 */
1251 tcg_out_modrm_offset(s
, OPC_ADD_GvEv
+ hrexw
, r1
, r0
,
1252 offsetof(CPUTLBEntry
, addend
) - which
);
1256 * Record the context of a call to the out of line helper code for the slow path
1257 * for a load or store, so that we can later generate the correct helper code
1259 static void add_qemu_ldst_label(TCGContext
*s
, bool is_ld
, TCGMemOpIdx oi
,
1260 TCGReg datalo
, TCGReg datahi
,
1261 TCGReg addrlo
, TCGReg addrhi
,
1262 tcg_insn_unit
*raddr
,
1263 tcg_insn_unit
**label_ptr
)
1265 TCGLabelQemuLdst
*label
= new_ldst_label(s
);
1267 label
->is_ld
= is_ld
;
1269 label
->datalo_reg
= datalo
;
1270 label
->datahi_reg
= datahi
;
1271 label
->addrlo_reg
= addrlo
;
1272 label
->addrhi_reg
= addrhi
;
1273 label
->raddr
= raddr
;
1274 label
->label_ptr
[0] = label_ptr
[0];
1275 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1276 label
->label_ptr
[1] = label_ptr
[1];
1281 * Generate code for the slow path for a load at the end of block
1283 static void tcg_out_qemu_ld_slow_path(TCGContext
*s
, TCGLabelQemuLdst
*l
)
1285 TCGMemOpIdx oi
= l
->oi
;
1286 TCGMemOp opc
= get_memop(oi
);
1288 tcg_insn_unit
**label_ptr
= &l
->label_ptr
[0];
1290 /* resolve label address */
1291 tcg_patch32(label_ptr
[0], s
->code_ptr
- label_ptr
[0] - 4);
1292 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1293 tcg_patch32(label_ptr
[1], s
->code_ptr
- label_ptr
[1] - 4);
1296 if (TCG_TARGET_REG_BITS
== 32) {
1299 tcg_out_st(s
, TCG_TYPE_PTR
, TCG_AREG0
, TCG_REG_ESP
, ofs
);
1302 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrlo_reg
, TCG_REG_ESP
, ofs
);
1305 if (TARGET_LONG_BITS
== 64) {
1306 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrhi_reg
, TCG_REG_ESP
, ofs
);
1310 tcg_out_sti(s
, TCG_TYPE_I32
, TCG_REG_ESP
, ofs
, oi
);
1313 tcg_out_sti(s
, TCG_TYPE_PTR
, TCG_REG_ESP
, ofs
, (uintptr_t)l
->raddr
);
1315 tcg_out_mov(s
, TCG_TYPE_PTR
, tcg_target_call_iarg_regs
[0], TCG_AREG0
);
1316 /* The second argument is already loaded with addrlo. */
1317 tcg_out_movi(s
, TCG_TYPE_I32
, tcg_target_call_iarg_regs
[2], oi
);
1318 tcg_out_movi(s
, TCG_TYPE_PTR
, tcg_target_call_iarg_regs
[3],
1319 (uintptr_t)l
->raddr
);
1322 tcg_out_call(s
, qemu_ld_helpers
[opc
& (MO_BSWAP
| MO_SIZE
)]);
1324 data_reg
= l
->datalo_reg
;
1325 switch (opc
& MO_SSIZE
) {
1327 tcg_out_ext8s(s
, data_reg
, TCG_REG_EAX
, P_REXW
);
1330 tcg_out_ext16s(s
, data_reg
, TCG_REG_EAX
, P_REXW
);
1332 #if TCG_TARGET_REG_BITS == 64
1334 tcg_out_ext32s(s
, data_reg
, TCG_REG_EAX
);
1339 /* Note that the helpers have zero-extended to tcg_target_long. */
1341 tcg_out_mov(s
, TCG_TYPE_I32
, data_reg
, TCG_REG_EAX
);
1344 if (TCG_TARGET_REG_BITS
== 64) {
1345 tcg_out_mov(s
, TCG_TYPE_I64
, data_reg
, TCG_REG_RAX
);
1346 } else if (data_reg
== TCG_REG_EDX
) {
1347 /* xchg %edx, %eax */
1348 tcg_out_opc(s
, OPC_XCHG_ax_r32
+ TCG_REG_EDX
, 0, 0, 0);
1349 tcg_out_mov(s
, TCG_TYPE_I32
, l
->datahi_reg
, TCG_REG_EAX
);
1351 tcg_out_mov(s
, TCG_TYPE_I32
, data_reg
, TCG_REG_EAX
);
1352 tcg_out_mov(s
, TCG_TYPE_I32
, l
->datahi_reg
, TCG_REG_EDX
);
1359 /* Jump to the code corresponding to next IR of qemu_st */
1360 tcg_out_jmp(s
, l
->raddr
);
1364 * Generate code for the slow path for a store at the end of block
1366 static void tcg_out_qemu_st_slow_path(TCGContext
*s
, TCGLabelQemuLdst
*l
)
1368 TCGMemOpIdx oi
= l
->oi
;
1369 TCGMemOp opc
= get_memop(oi
);
1370 TCGMemOp s_bits
= opc
& MO_SIZE
;
1371 tcg_insn_unit
**label_ptr
= &l
->label_ptr
[0];
1374 /* resolve label address */
1375 tcg_patch32(label_ptr
[0], s
->code_ptr
- label_ptr
[0] - 4);
1376 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1377 tcg_patch32(label_ptr
[1], s
->code_ptr
- label_ptr
[1] - 4);
1380 if (TCG_TARGET_REG_BITS
== 32) {
1383 tcg_out_st(s
, TCG_TYPE_PTR
, TCG_AREG0
, TCG_REG_ESP
, ofs
);
1386 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrlo_reg
, TCG_REG_ESP
, ofs
);
1389 if (TARGET_LONG_BITS
== 64) {
1390 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrhi_reg
, TCG_REG_ESP
, ofs
);
1394 tcg_out_st(s
, TCG_TYPE_I32
, l
->datalo_reg
, TCG_REG_ESP
, ofs
);
1397 if (s_bits
== MO_64
) {
1398 tcg_out_st(s
, TCG_TYPE_I32
, l
->datahi_reg
, TCG_REG_ESP
, ofs
);
1402 tcg_out_sti(s
, TCG_TYPE_I32
, TCG_REG_ESP
, ofs
, oi
);
1405 retaddr
= TCG_REG_EAX
;
1406 tcg_out_movi(s
, TCG_TYPE_PTR
, retaddr
, (uintptr_t)l
->raddr
);
1407 tcg_out_st(s
, TCG_TYPE_PTR
, retaddr
, TCG_REG_ESP
, ofs
);
1409 tcg_out_mov(s
, TCG_TYPE_PTR
, tcg_target_call_iarg_regs
[0], TCG_AREG0
);
1410 /* The second argument is already loaded with addrlo. */
1411 tcg_out_mov(s
, (s_bits
== MO_64
? TCG_TYPE_I64
: TCG_TYPE_I32
),
1412 tcg_target_call_iarg_regs
[2], l
->datalo_reg
);
1413 tcg_out_movi(s
, TCG_TYPE_I32
, tcg_target_call_iarg_regs
[3], oi
);
1415 if (ARRAY_SIZE(tcg_target_call_iarg_regs
) > 4) {
1416 retaddr
= tcg_target_call_iarg_regs
[4];
1417 tcg_out_movi(s
, TCG_TYPE_PTR
, retaddr
, (uintptr_t)l
->raddr
);
1419 retaddr
= TCG_REG_RAX
;
1420 tcg_out_movi(s
, TCG_TYPE_PTR
, retaddr
, (uintptr_t)l
->raddr
);
1421 tcg_out_st(s
, TCG_TYPE_PTR
, retaddr
, TCG_REG_ESP
,
1422 TCG_TARGET_CALL_STACK_OFFSET
);
1426 /* "Tail call" to the helper, with the return address back inline. */
1427 tcg_out_push(s
, retaddr
);
1428 tcg_out_jmp(s
, qemu_st_helpers
[opc
& (MO_BSWAP
| MO_SIZE
)]);
1430 #elif defined(__x86_64__) && defined(__linux__)
1431 # include <asm/prctl.h>
1432 # include <sys/prctl.h>
1434 int arch_prctl(int code
, unsigned long addr
);
1436 static int guest_base_flags
;
1437 static inline void setup_guest_base_seg(void)
1439 if (arch_prctl(ARCH_SET_GS
, guest_base
) == 0) {
1440 guest_base_flags
= P_GS
;
1444 # define guest_base_flags 0
1445 static inline void setup_guest_base_seg(void) { }
1446 #endif /* SOFTMMU */
1448 static void tcg_out_qemu_ld_direct(TCGContext
*s
, TCGReg datalo
, TCGReg datahi
,
1449 TCGReg base
, int index
, intptr_t ofs
,
1450 int seg
, TCGMemOp memop
)
1452 const TCGMemOp real_bswap
= memop
& MO_BSWAP
;
1453 TCGMemOp bswap
= real_bswap
;
1454 int movop
= OPC_MOVL_GvEv
;
1456 if (have_movbe
&& real_bswap
) {
1458 movop
= OPC_MOVBE_GyMy
;
1461 switch (memop
& MO_SSIZE
) {
1463 tcg_out_modrm_sib_offset(s
, OPC_MOVZBL
+ seg
, datalo
,
1464 base
, index
, 0, ofs
);
1467 tcg_out_modrm_sib_offset(s
, OPC_MOVSBL
+ P_REXW
+ seg
, datalo
,
1468 base
, index
, 0, ofs
);
1471 tcg_out_modrm_sib_offset(s
, OPC_MOVZWL
+ seg
, datalo
,
1472 base
, index
, 0, ofs
);
1474 tcg_out_rolw_8(s
, datalo
);
1480 tcg_out_modrm_sib_offset(s
, OPC_MOVBE_GyMy
+ P_DATA16
+ seg
,
1481 datalo
, base
, index
, 0, ofs
);
1483 tcg_out_modrm_sib_offset(s
, OPC_MOVZWL
+ seg
, datalo
,
1484 base
, index
, 0, ofs
);
1485 tcg_out_rolw_8(s
, datalo
);
1487 tcg_out_modrm(s
, OPC_MOVSWL
+ P_REXW
, datalo
, datalo
);
1489 tcg_out_modrm_sib_offset(s
, OPC_MOVSWL
+ P_REXW
+ seg
,
1490 datalo
, base
, index
, 0, ofs
);
1494 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datalo
, base
, index
, 0, ofs
);
1496 tcg_out_bswap32(s
, datalo
);
1499 #if TCG_TARGET_REG_BITS == 64
1502 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datalo
,
1503 base
, index
, 0, ofs
);
1505 tcg_out_bswap32(s
, datalo
);
1507 tcg_out_ext32s(s
, datalo
, datalo
);
1509 tcg_out_modrm_sib_offset(s
, OPC_MOVSLQ
+ seg
, datalo
,
1510 base
, index
, 0, ofs
);
1515 if (TCG_TARGET_REG_BITS
== 64) {
1516 tcg_out_modrm_sib_offset(s
, movop
+ P_REXW
+ seg
, datalo
,
1517 base
, index
, 0, ofs
);
1519 tcg_out_bswap64(s
, datalo
);
1527 if (base
!= datalo
) {
1528 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datalo
,
1529 base
, index
, 0, ofs
);
1530 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datahi
,
1531 base
, index
, 0, ofs
+ 4);
1533 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datahi
,
1534 base
, index
, 0, ofs
+ 4);
1535 tcg_out_modrm_sib_offset(s
, movop
+ seg
, datalo
,
1536 base
, index
, 0, ofs
);
1539 tcg_out_bswap32(s
, datalo
);
1540 tcg_out_bswap32(s
, datahi
);
1549 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1550 EAX. It will be useful once fixed registers globals are less
1552 static void tcg_out_qemu_ld(TCGContext
*s
, const TCGArg
*args
, bool is64
)
1554 TCGReg datalo
, datahi
, addrlo
;
1555 TCGReg addrhi
__attribute__((unused
));
1558 #if defined(CONFIG_SOFTMMU)
1560 tcg_insn_unit
*label_ptr
[2];
1564 datahi
= (TCG_TARGET_REG_BITS
== 32 && is64
? *args
++ : 0);
1566 addrhi
= (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
? *args
++ : 0);
1568 opc
= get_memop(oi
);
1570 #if defined(CONFIG_SOFTMMU)
1571 mem_index
= get_mmuidx(oi
);
1573 tcg_out_tlb_load(s
, addrlo
, addrhi
, mem_index
, opc
,
1574 label_ptr
, offsetof(CPUTLBEntry
, addr_read
));
1577 tcg_out_qemu_ld_direct(s
, datalo
, datahi
, TCG_REG_L1
, -1, 0, 0, opc
);
1579 /* Record the current context of a load into ldst label */
1580 add_qemu_ldst_label(s
, true, oi
, datalo
, datahi
, addrlo
, addrhi
,
1581 s
->code_ptr
, label_ptr
);
1584 int32_t offset
= guest_base
;
1585 TCGReg base
= addrlo
;
1589 /* For a 32-bit guest, the high 32 bits may contain garbage.
1590 We can do this with the ADDR32 prefix if we're not using
1591 a guest base, or when using segmentation. Otherwise we
1592 need to zero-extend manually. */
1593 if (guest_base
== 0 || guest_base_flags
) {
1594 seg
= guest_base_flags
;
1596 if (TCG_TARGET_REG_BITS
> TARGET_LONG_BITS
) {
1599 } else if (TCG_TARGET_REG_BITS
== 64) {
1600 if (TARGET_LONG_BITS
== 32) {
1601 tcg_out_ext32u(s
, TCG_REG_L0
, base
);
1604 if (offset
!= guest_base
) {
1605 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_L1
, guest_base
);
1611 tcg_out_qemu_ld_direct(s
, datalo
, datahi
,
1612 base
, index
, offset
, seg
, opc
);
1617 static void tcg_out_qemu_st_direct(TCGContext
*s
, TCGReg datalo
, TCGReg datahi
,
1618 TCGReg base
, intptr_t ofs
, int seg
,
1621 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1622 we could perform the bswap twice to restore the original value
1623 instead of moving to the scratch. But as it is, the L constraint
1624 means that TCG_REG_L0 is definitely free here. */
1625 const TCGReg scratch
= TCG_REG_L0
;
1626 const TCGMemOp real_bswap
= memop
& MO_BSWAP
;
1627 TCGMemOp bswap
= real_bswap
;
1628 int movop
= OPC_MOVL_EvGv
;
1630 if (have_movbe
&& real_bswap
) {
1632 movop
= OPC_MOVBE_MyGy
;
1635 switch (memop
& MO_SIZE
) {
1637 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1638 Use the scratch register if necessary. */
1639 if (TCG_TARGET_REG_BITS
== 32 && datalo
>= 4) {
1640 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1643 tcg_out_modrm_offset(s
, OPC_MOVB_EvGv
+ P_REXB_R
+ seg
,
1648 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1649 tcg_out_rolw_8(s
, scratch
);
1652 tcg_out_modrm_offset(s
, movop
+ P_DATA16
+ seg
, datalo
, base
, ofs
);
1656 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1657 tcg_out_bswap32(s
, scratch
);
1660 tcg_out_modrm_offset(s
, movop
+ seg
, datalo
, base
, ofs
);
1663 if (TCG_TARGET_REG_BITS
== 64) {
1665 tcg_out_mov(s
, TCG_TYPE_I64
, scratch
, datalo
);
1666 tcg_out_bswap64(s
, scratch
);
1669 tcg_out_modrm_offset(s
, movop
+ P_REXW
+ seg
, datalo
, base
, ofs
);
1671 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datahi
);
1672 tcg_out_bswap32(s
, scratch
);
1673 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
+ seg
, scratch
, base
, ofs
);
1674 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1675 tcg_out_bswap32(s
, scratch
);
1676 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
+ seg
, scratch
, base
, ofs
+4);
1683 tcg_out_modrm_offset(s
, movop
+ seg
, datalo
, base
, ofs
);
1684 tcg_out_modrm_offset(s
, movop
+ seg
, datahi
, base
, ofs
+4);
1692 static void tcg_out_qemu_st(TCGContext
*s
, const TCGArg
*args
, bool is64
)
1694 TCGReg datalo
, datahi
, addrlo
;
1695 TCGReg addrhi
__attribute__((unused
));
1698 #if defined(CONFIG_SOFTMMU)
1700 tcg_insn_unit
*label_ptr
[2];
1704 datahi
= (TCG_TARGET_REG_BITS
== 32 && is64
? *args
++ : 0);
1706 addrhi
= (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
? *args
++ : 0);
1708 opc
= get_memop(oi
);
1710 #if defined(CONFIG_SOFTMMU)
1711 mem_index
= get_mmuidx(oi
);
1713 tcg_out_tlb_load(s
, addrlo
, addrhi
, mem_index
, opc
,
1714 label_ptr
, offsetof(CPUTLBEntry
, addr_write
));
1717 tcg_out_qemu_st_direct(s
, datalo
, datahi
, TCG_REG_L1
, 0, 0, opc
);
1719 /* Record the current context of a store into ldst label */
1720 add_qemu_ldst_label(s
, false, oi
, datalo
, datahi
, addrlo
, addrhi
,
1721 s
->code_ptr
, label_ptr
);
1724 int32_t offset
= guest_base
;
1725 TCGReg base
= addrlo
;
1728 /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
1729 if (guest_base
== 0 || guest_base_flags
) {
1730 seg
= guest_base_flags
;
1732 if (TCG_TARGET_REG_BITS
> TARGET_LONG_BITS
) {
1735 } else if (TCG_TARGET_REG_BITS
== 64) {
1736 /* ??? Note that we can't use the same SIB addressing scheme
1737 as for loads, since we require L0 free for bswap. */
1738 if (offset
!= guest_base
) {
1739 if (TARGET_LONG_BITS
== 32) {
1740 tcg_out_ext32u(s
, TCG_REG_L0
, base
);
1743 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_L1
, guest_base
);
1744 tgen_arithr(s
, ARITH_ADD
+ P_REXW
, TCG_REG_L1
, base
);
1747 } else if (TARGET_LONG_BITS
== 32) {
1748 tcg_out_ext32u(s
, TCG_REG_L1
, base
);
1753 tcg_out_qemu_st_direct(s
, datalo
, datahi
, base
, offset
, seg
, opc
);
1758 static inline void tcg_out_op(TCGContext
*s
, TCGOpcode opc
,
1759 const TCGArg
*args
, const int *const_args
)
1761 int c
, vexop
, rexw
= 0;
1763 #if TCG_TARGET_REG_BITS == 64
1764 # define OP_32_64(x) \
1765 case glue(glue(INDEX_op_, x), _i64): \
1766 rexw = P_REXW; /* FALLTHRU */ \
1767 case glue(glue(INDEX_op_, x), _i32)
1769 # define OP_32_64(x) \
1770 case glue(glue(INDEX_op_, x), _i32)
1774 case INDEX_op_exit_tb
:
1775 tcg_out_movi(s
, TCG_TYPE_PTR
, TCG_REG_EAX
, args
[0]);
1776 tcg_out_jmp(s
, tb_ret_addr
);
1778 case INDEX_op_goto_tb
:
1779 if (s
->tb_jmp_offset
) {
1780 /* direct jump method */
1781 tcg_out8(s
, OPC_JMP_long
); /* jmp im */
1782 s
->tb_jmp_offset
[args
[0]] = tcg_current_code_size(s
);
1785 /* indirect jump method */
1786 tcg_out_modrm_offset(s
, OPC_GRP5
, EXT5_JMPN_Ev
, -1,
1787 (intptr_t)(s
->tb_next
+ args
[0]));
1789 s
->tb_next_offset
[args
[0]] = tcg_current_code_size(s
);
1792 tcg_out_jxx(s
, JCC_JMP
, arg_label(args
[0]), 0);
1795 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1796 tcg_out_modrm_offset(s
, OPC_MOVZBL
, args
[0], args
[1], args
[2]);
1799 tcg_out_modrm_offset(s
, OPC_MOVSBL
+ rexw
, args
[0], args
[1], args
[2]);
1802 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1803 tcg_out_modrm_offset(s
, OPC_MOVZWL
, args
[0], args
[1], args
[2]);
1806 tcg_out_modrm_offset(s
, OPC_MOVSWL
+ rexw
, args
[0], args
[1], args
[2]);
1808 #if TCG_TARGET_REG_BITS == 64
1809 case INDEX_op_ld32u_i64
:
1811 case INDEX_op_ld_i32
:
1812 tcg_out_ld(s
, TCG_TYPE_I32
, args
[0], args
[1], args
[2]);
1816 if (const_args
[0]) {
1817 tcg_out_modrm_offset(s
, OPC_MOVB_EvIz
,
1818 0, args
[1], args
[2]);
1819 tcg_out8(s
, args
[0]);
1821 tcg_out_modrm_offset(s
, OPC_MOVB_EvGv
| P_REXB_R
,
1822 args
[0], args
[1], args
[2]);
1826 if (const_args
[0]) {
1827 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
| P_DATA16
,
1828 0, args
[1], args
[2]);
1829 tcg_out16(s
, args
[0]);
1831 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
| P_DATA16
,
1832 args
[0], args
[1], args
[2]);
1835 #if TCG_TARGET_REG_BITS == 64
1836 case INDEX_op_st32_i64
:
1838 case INDEX_op_st_i32
:
1839 if (const_args
[0]) {
1840 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
, 0, args
[1], args
[2]);
1841 tcg_out32(s
, args
[0]);
1843 tcg_out_st(s
, TCG_TYPE_I32
, args
[0], args
[1], args
[2]);
1848 /* For 3-operand addition, use LEA. */
1849 if (args
[0] != args
[1]) {
1850 TCGArg a0
= args
[0], a1
= args
[1], a2
= args
[2], c3
= 0;
1852 if (const_args
[2]) {
1854 } else if (a0
== a2
) {
1855 /* Watch out for dest = src + dest, since we've removed
1856 the matching constraint on the add. */
1857 tgen_arithr(s
, ARITH_ADD
+ rexw
, a0
, a1
);
1861 tcg_out_modrm_sib_offset(s
, OPC_LEA
+ rexw
, a0
, a1
, a2
, 0, c3
);
1879 if (const_args
[2]) {
1880 tgen_arithi(s
, c
+ rexw
, args
[0], args
[2], 0);
1882 tgen_arithr(s
, c
+ rexw
, args
[0], args
[2]);
1887 if (const_args
[2]) {
1888 tcg_out_mov(s
, rexw
? TCG_TYPE_I64
: TCG_TYPE_I32
,
1890 tgen_arithi(s
, ARITH_AND
+ rexw
, args
[0], ~args
[2], 0);
1892 tcg_out_vex_modrm(s
, OPC_ANDN
+ rexw
, args
[0], args
[2], args
[1]);
1897 if (const_args
[2]) {
1900 if (val
== (int8_t)val
) {
1901 tcg_out_modrm(s
, OPC_IMUL_GvEvIb
+ rexw
, args
[0], args
[0]);
1904 tcg_out_modrm(s
, OPC_IMUL_GvEvIz
+ rexw
, args
[0], args
[0]);
1908 tcg_out_modrm(s
, OPC_IMUL_GvEv
+ rexw
, args
[0], args
[2]);
1913 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_IDIV
, args
[4]);
1916 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_DIV
, args
[4]);
1922 goto gen_shift_maybe_vex
;
1926 goto gen_shift_maybe_vex
;
1930 goto gen_shift_maybe_vex
;
1937 gen_shift_maybe_vex
:
1938 if (have_bmi2
&& !const_args
[2]) {
1939 tcg_out_vex_modrm(s
, vexop
+ rexw
, args
[0], args
[2], args
[1]);
1944 if (const_args
[2]) {
1945 tcg_out_shifti(s
, c
+ rexw
, args
[0], args
[2]);
1947 tcg_out_modrm(s
, OPC_SHIFT_cl
+ rexw
, c
, args
[0]);
1951 case INDEX_op_brcond_i32
:
1952 tcg_out_brcond32(s
, args
[2], args
[0], args
[1], const_args
[1],
1953 arg_label(args
[3]), 0);
1955 case INDEX_op_setcond_i32
:
1956 tcg_out_setcond32(s
, args
[3], args
[0], args
[1],
1957 args
[2], const_args
[2]);
1959 case INDEX_op_movcond_i32
:
1960 tcg_out_movcond32(s
, args
[5], args
[0], args
[1],
1961 args
[2], const_args
[2], args
[3]);
1965 tcg_out_rolw_8(s
, args
[0]);
1968 tcg_out_bswap32(s
, args
[0]);
1972 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_NEG
, args
[0]);
1975 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_NOT
, args
[0]);
1979 tcg_out_ext8s(s
, args
[0], args
[1], rexw
);
1982 tcg_out_ext16s(s
, args
[0], args
[1], rexw
);
1985 tcg_out_ext8u(s
, args
[0], args
[1]);
1988 tcg_out_ext16u(s
, args
[0], args
[1]);
1991 case INDEX_op_qemu_ld_i32
:
1992 tcg_out_qemu_ld(s
, args
, 0);
1994 case INDEX_op_qemu_ld_i64
:
1995 tcg_out_qemu_ld(s
, args
, 1);
1997 case INDEX_op_qemu_st_i32
:
1998 tcg_out_qemu_st(s
, args
, 0);
2000 case INDEX_op_qemu_st_i64
:
2001 tcg_out_qemu_st(s
, args
, 1);
2005 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_MUL
, args
[3]);
2008 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_IMUL
, args
[3]);
2011 if (const_args
[4]) {
2012 tgen_arithi(s
, ARITH_ADD
+ rexw
, args
[0], args
[4], 1);
2014 tgen_arithr(s
, ARITH_ADD
+ rexw
, args
[0], args
[4]);
2016 if (const_args
[5]) {
2017 tgen_arithi(s
, ARITH_ADC
+ rexw
, args
[1], args
[5], 1);
2019 tgen_arithr(s
, ARITH_ADC
+ rexw
, args
[1], args
[5]);
2023 if (const_args
[4]) {
2024 tgen_arithi(s
, ARITH_SUB
+ rexw
, args
[0], args
[4], 1);
2026 tgen_arithr(s
, ARITH_SUB
+ rexw
, args
[0], args
[4]);
2028 if (const_args
[5]) {
2029 tgen_arithi(s
, ARITH_SBB
+ rexw
, args
[1], args
[5], 1);
2031 tgen_arithr(s
, ARITH_SBB
+ rexw
, args
[1], args
[5]);
2035 #if TCG_TARGET_REG_BITS == 32
2036 case INDEX_op_brcond2_i32
:
2037 tcg_out_brcond2(s
, args
, const_args
, 0);
2039 case INDEX_op_setcond2_i32
:
2040 tcg_out_setcond2(s
, args
, const_args
);
2042 #else /* TCG_TARGET_REG_BITS == 64 */
2043 case INDEX_op_ld32s_i64
:
2044 tcg_out_modrm_offset(s
, OPC_MOVSLQ
, args
[0], args
[1], args
[2]);
2046 case INDEX_op_ld_i64
:
2047 tcg_out_ld(s
, TCG_TYPE_I64
, args
[0], args
[1], args
[2]);
2049 case INDEX_op_st_i64
:
2050 if (const_args
[0]) {
2051 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
| P_REXW
,
2052 0, args
[1], args
[2]);
2053 tcg_out32(s
, args
[0]);
2055 tcg_out_st(s
, TCG_TYPE_I64
, args
[0], args
[1], args
[2]);
2059 case INDEX_op_brcond_i64
:
2060 tcg_out_brcond64(s
, args
[2], args
[0], args
[1], const_args
[1],
2061 arg_label(args
[3]), 0);
2063 case INDEX_op_setcond_i64
:
2064 tcg_out_setcond64(s
, args
[3], args
[0], args
[1],
2065 args
[2], const_args
[2]);
2067 case INDEX_op_movcond_i64
:
2068 tcg_out_movcond64(s
, args
[5], args
[0], args
[1],
2069 args
[2], const_args
[2], args
[3]);
2072 case INDEX_op_bswap64_i64
:
2073 tcg_out_bswap64(s
, args
[0]);
2075 case INDEX_op_extu_i32_i64
:
2076 case INDEX_op_ext32u_i64
:
2077 tcg_out_ext32u(s
, args
[0], args
[1]);
2079 case INDEX_op_ext_i32_i64
:
2080 case INDEX_op_ext32s_i64
:
2081 tcg_out_ext32s(s
, args
[0], args
[1]);
2086 if (args
[3] == 0 && args
[4] == 8) {
2087 /* load bits 0..7 */
2088 tcg_out_modrm(s
, OPC_MOVB_EvGv
| P_REXB_R
| P_REXB_RM
,
2090 } else if (args
[3] == 8 && args
[4] == 8) {
2091 /* load bits 8..15 */
2092 tcg_out_modrm(s
, OPC_MOVB_EvGv
, args
[2], args
[0] + 4);
2093 } else if (args
[3] == 0 && args
[4] == 16) {
2094 /* load bits 0..15 */
2095 tcg_out_modrm(s
, OPC_MOVL_EvGv
| P_DATA16
, args
[2], args
[0]);
2101 case INDEX_op_mov_i32
: /* Always emitted via tcg_out_mov. */
2102 case INDEX_op_mov_i64
:
2103 case INDEX_op_movi_i32
: /* Always emitted via tcg_out_movi. */
2104 case INDEX_op_movi_i64
:
2105 case INDEX_op_call
: /* Always emitted via tcg_out_call. */
2113 static const TCGTargetOpDef x86_op_defs
[] = {
2114 { INDEX_op_exit_tb
, { } },
2115 { INDEX_op_goto_tb
, { } },
2116 { INDEX_op_br
, { } },
2117 { INDEX_op_ld8u_i32
, { "r", "r" } },
2118 { INDEX_op_ld8s_i32
, { "r", "r" } },
2119 { INDEX_op_ld16u_i32
, { "r", "r" } },
2120 { INDEX_op_ld16s_i32
, { "r", "r" } },
2121 { INDEX_op_ld_i32
, { "r", "r" } },
2122 { INDEX_op_st8_i32
, { "qi", "r" } },
2123 { INDEX_op_st16_i32
, { "ri", "r" } },
2124 { INDEX_op_st_i32
, { "ri", "r" } },
2126 { INDEX_op_add_i32
, { "r", "r", "ri" } },
2127 { INDEX_op_sub_i32
, { "r", "0", "ri" } },
2128 { INDEX_op_mul_i32
, { "r", "0", "ri" } },
2129 { INDEX_op_div2_i32
, { "a", "d", "0", "1", "r" } },
2130 { INDEX_op_divu2_i32
, { "a", "d", "0", "1", "r" } },
2131 { INDEX_op_and_i32
, { "r", "0", "ri" } },
2132 { INDEX_op_or_i32
, { "r", "0", "ri" } },
2133 { INDEX_op_xor_i32
, { "r", "0", "ri" } },
2134 { INDEX_op_andc_i32
, { "r", "r", "ri" } },
2136 { INDEX_op_shl_i32
, { "r", "0", "Ci" } },
2137 { INDEX_op_shr_i32
, { "r", "0", "Ci" } },
2138 { INDEX_op_sar_i32
, { "r", "0", "Ci" } },
2139 { INDEX_op_rotl_i32
, { "r", "0", "ci" } },
2140 { INDEX_op_rotr_i32
, { "r", "0", "ci" } },
2142 { INDEX_op_brcond_i32
, { "r", "ri" } },
2144 { INDEX_op_bswap16_i32
, { "r", "0" } },
2145 { INDEX_op_bswap32_i32
, { "r", "0" } },
2147 { INDEX_op_neg_i32
, { "r", "0" } },
2149 { INDEX_op_not_i32
, { "r", "0" } },
2151 { INDEX_op_ext8s_i32
, { "r", "q" } },
2152 { INDEX_op_ext16s_i32
, { "r", "r" } },
2153 { INDEX_op_ext8u_i32
, { "r", "q" } },
2154 { INDEX_op_ext16u_i32
, { "r", "r" } },
2156 { INDEX_op_setcond_i32
, { "q", "r", "ri" } },
2158 { INDEX_op_deposit_i32
, { "Q", "0", "Q" } },
2159 { INDEX_op_movcond_i32
, { "r", "r", "ri", "r", "0" } },
2161 { INDEX_op_mulu2_i32
, { "a", "d", "a", "r" } },
2162 { INDEX_op_muls2_i32
, { "a", "d", "a", "r" } },
2163 { INDEX_op_add2_i32
, { "r", "r", "0", "1", "ri", "ri" } },
2164 { INDEX_op_sub2_i32
, { "r", "r", "0", "1", "ri", "ri" } },
2166 #if TCG_TARGET_REG_BITS == 32
2167 { INDEX_op_brcond2_i32
, { "r", "r", "ri", "ri" } },
2168 { INDEX_op_setcond2_i32
, { "r", "r", "r", "ri", "ri" } },
2170 { INDEX_op_ld8u_i64
, { "r", "r" } },
2171 { INDEX_op_ld8s_i64
, { "r", "r" } },
2172 { INDEX_op_ld16u_i64
, { "r", "r" } },
2173 { INDEX_op_ld16s_i64
, { "r", "r" } },
2174 { INDEX_op_ld32u_i64
, { "r", "r" } },
2175 { INDEX_op_ld32s_i64
, { "r", "r" } },
2176 { INDEX_op_ld_i64
, { "r", "r" } },
2177 { INDEX_op_st8_i64
, { "ri", "r" } },
2178 { INDEX_op_st16_i64
, { "ri", "r" } },
2179 { INDEX_op_st32_i64
, { "ri", "r" } },
2180 { INDEX_op_st_i64
, { "re", "r" } },
2182 { INDEX_op_add_i64
, { "r", "r", "re" } },
2183 { INDEX_op_mul_i64
, { "r", "0", "re" } },
2184 { INDEX_op_div2_i64
, { "a", "d", "0", "1", "r" } },
2185 { INDEX_op_divu2_i64
, { "a", "d", "0", "1", "r" } },
2186 { INDEX_op_sub_i64
, { "r", "0", "re" } },
2187 { INDEX_op_and_i64
, { "r", "0", "reZ" } },
2188 { INDEX_op_or_i64
, { "r", "0", "re" } },
2189 { INDEX_op_xor_i64
, { "r", "0", "re" } },
2190 { INDEX_op_andc_i64
, { "r", "r", "rI" } },
2192 { INDEX_op_shl_i64
, { "r", "0", "Ci" } },
2193 { INDEX_op_shr_i64
, { "r", "0", "Ci" } },
2194 { INDEX_op_sar_i64
, { "r", "0", "Ci" } },
2195 { INDEX_op_rotl_i64
, { "r", "0", "ci" } },
2196 { INDEX_op_rotr_i64
, { "r", "0", "ci" } },
2198 { INDEX_op_brcond_i64
, { "r", "re" } },
2199 { INDEX_op_setcond_i64
, { "r", "r", "re" } },
2201 { INDEX_op_bswap16_i64
, { "r", "0" } },
2202 { INDEX_op_bswap32_i64
, { "r", "0" } },
2203 { INDEX_op_bswap64_i64
, { "r", "0" } },
2204 { INDEX_op_neg_i64
, { "r", "0" } },
2205 { INDEX_op_not_i64
, { "r", "0" } },
2207 { INDEX_op_ext8s_i64
, { "r", "r" } },
2208 { INDEX_op_ext16s_i64
, { "r", "r" } },
2209 { INDEX_op_ext32s_i64
, { "r", "r" } },
2210 { INDEX_op_ext8u_i64
, { "r", "r" } },
2211 { INDEX_op_ext16u_i64
, { "r", "r" } },
2212 { INDEX_op_ext32u_i64
, { "r", "r" } },
2214 { INDEX_op_ext_i32_i64
, { "r", "r" } },
2215 { INDEX_op_extu_i32_i64
, { "r", "r" } },
2217 { INDEX_op_deposit_i64
, { "Q", "0", "Q" } },
2218 { INDEX_op_movcond_i64
, { "r", "r", "re", "r", "0" } },
2220 { INDEX_op_mulu2_i64
, { "a", "d", "a", "r" } },
2221 { INDEX_op_muls2_i64
, { "a", "d", "a", "r" } },
2222 { INDEX_op_add2_i64
, { "r", "r", "0", "1", "re", "re" } },
2223 { INDEX_op_sub2_i64
, { "r", "r", "0", "1", "re", "re" } },
2226 #if TCG_TARGET_REG_BITS == 64
2227 { INDEX_op_qemu_ld_i32
, { "r", "L" } },
2228 { INDEX_op_qemu_st_i32
, { "L", "L" } },
2229 { INDEX_op_qemu_ld_i64
, { "r", "L" } },
2230 { INDEX_op_qemu_st_i64
, { "L", "L" } },
2231 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2232 { INDEX_op_qemu_ld_i32
, { "r", "L" } },
2233 { INDEX_op_qemu_st_i32
, { "L", "L" } },
2234 { INDEX_op_qemu_ld_i64
, { "r", "r", "L" } },
2235 { INDEX_op_qemu_st_i64
, { "L", "L", "L" } },
2237 { INDEX_op_qemu_ld_i32
, { "r", "L", "L" } },
2238 { INDEX_op_qemu_st_i32
, { "L", "L", "L" } },
2239 { INDEX_op_qemu_ld_i64
, { "r", "r", "L", "L" } },
2240 { INDEX_op_qemu_st_i64
, { "L", "L", "L", "L" } },
2245 static int tcg_target_callee_save_regs
[] = {
2246 #if TCG_TARGET_REG_BITS == 64
2255 TCG_REG_R14
, /* Currently used for the global env. */
2258 TCG_REG_EBP
, /* Currently used for the global env. */
2265 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2266 and tcg_register_jit. */
2269 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2270 * (TCG_TARGET_REG_BITS / 8))
2272 #define FRAME_SIZE \
2274 + TCG_STATIC_CALL_ARGS_SIZE \
2275 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2276 + TCG_TARGET_STACK_ALIGN - 1) \
2277 & ~(TCG_TARGET_STACK_ALIGN - 1))
2279 /* Generate global QEMU prologue and epilogue code */
2280 static void tcg_target_qemu_prologue(TCGContext
*s
)
2282 int i
, stack_addend
;
2286 /* Reserve some stack space, also for TCG temps. */
2287 stack_addend
= FRAME_SIZE
- PUSH_SIZE
;
2288 tcg_set_frame(s
, TCG_REG_CALL_STACK
, TCG_STATIC_CALL_ARGS_SIZE
,
2289 CPU_TEMP_BUF_NLONGS
* sizeof(long));
2291 /* Save all callee saved registers. */
2292 for (i
= 0; i
< ARRAY_SIZE(tcg_target_callee_save_regs
); i
++) {
2293 tcg_out_push(s
, tcg_target_callee_save_regs
[i
]);
2296 #if TCG_TARGET_REG_BITS == 32
2297 tcg_out_ld(s
, TCG_TYPE_PTR
, TCG_AREG0
, TCG_REG_ESP
,
2298 (ARRAY_SIZE(tcg_target_callee_save_regs
) + 1) * 4);
2299 tcg_out_addi(s
, TCG_REG_ESP
, -stack_addend
);
2301 tcg_out_modrm_offset(s
, OPC_GRP5
, EXT5_JMPN_Ev
, TCG_REG_ESP
,
2302 (ARRAY_SIZE(tcg_target_callee_save_regs
) + 2) * 4
2305 tcg_out_mov(s
, TCG_TYPE_PTR
, TCG_AREG0
, tcg_target_call_iarg_regs
[0]);
2306 tcg_out_addi(s
, TCG_REG_ESP
, -stack_addend
);
2308 tcg_out_modrm(s
, OPC_GRP5
, EXT5_JMPN_Ev
, tcg_target_call_iarg_regs
[1]);
2312 tb_ret_addr
= s
->code_ptr
;
2314 tcg_out_addi(s
, TCG_REG_CALL_STACK
, stack_addend
);
2316 for (i
= ARRAY_SIZE(tcg_target_callee_save_regs
) - 1; i
>= 0; i
--) {
2317 tcg_out_pop(s
, tcg_target_callee_save_regs
[i
]);
2319 tcg_out_opc(s
, OPC_RET
, 0, 0, 0);
2321 #if !defined(CONFIG_SOFTMMU)
2322 /* Try to set up a segment register to point to guest_base. */
2324 setup_guest_base_seg();
2329 static void tcg_target_init(TCGContext
*s
)
2331 #ifdef CONFIG_CPUID_H
2332 unsigned a
, b
, c
, d
;
2333 int max
= __get_cpuid_max(0, 0);
2336 __cpuid(1, a
, b
, c
, d
);
2338 /* For 32-bit, 99% certainty that we're running on hardware that
2339 supports cmov, but we still need to check. In case cmov is not
2340 available, we'll use a small forward branch. */
2341 have_cmov
= (d
& bit_CMOV
) != 0;
2344 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2345 need to probe for it. */
2346 have_movbe
= (c
& bit_MOVBE
) != 0;
2351 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2352 __cpuid_count(7, 0, a
, b
, c
, d
);
2354 have_bmi1
= (b
& bit_BMI
) != 0;
2357 have_bmi2
= (b
& bit_BMI2
) != 0;
2362 if (TCG_TARGET_REG_BITS
== 64) {
2363 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I32
], 0, 0xffff);
2364 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I64
], 0, 0xffff);
2366 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I32
], 0, 0xff);
2369 tcg_regset_clear(tcg_target_call_clobber_regs
);
2370 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_EAX
);
2371 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_EDX
);
2372 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_ECX
);
2373 if (TCG_TARGET_REG_BITS
== 64) {
2374 #if !defined(_WIN64)
2375 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_RDI
);
2376 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_RSI
);
2378 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R8
);
2379 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R9
);
2380 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R10
);
2381 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R11
);
2384 tcg_regset_clear(s
->reserved_regs
);
2385 tcg_regset_set_reg(s
->reserved_regs
, TCG_REG_CALL_STACK
);
2387 tcg_add_target_add_op_defs(x86_op_defs
);
2392 uint8_t fde_def_cfa
[4];
2393 uint8_t fde_reg_ofs
[14];
2396 /* We're expecting a 2 byte uleb128 encoded value. */
2397 QEMU_BUILD_BUG_ON(FRAME_SIZE
>= (1 << 14));
2399 #if !defined(__ELF__)
2400 /* Host machine without ELF. */
2401 #elif TCG_TARGET_REG_BITS == 64
2402 #define ELF_HOST_MACHINE EM_X86_64
2403 static const DebugFrame debug_frame
= {
2404 .h
.cie
.len
= sizeof(DebugFrameCIE
)-4, /* length after .len member */
2407 .h
.cie
.code_align
= 1,
2408 .h
.cie
.data_align
= 0x78, /* sleb128 -8 */
2409 .h
.cie
.return_column
= 16,
2411 /* Total FDE size does not include the "len" member. */
2412 .h
.fde
.len
= sizeof(DebugFrame
) - offsetof(DebugFrame
, h
.fde
.cie_offset
),
2415 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2416 (FRAME_SIZE
& 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2420 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2421 /* The following ordering must match tcg_target_callee_save_regs. */
2422 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2423 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2424 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2425 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2426 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2427 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2431 #define ELF_HOST_MACHINE EM_386
2432 static const DebugFrame debug_frame
= {
2433 .h
.cie
.len
= sizeof(DebugFrameCIE
)-4, /* length after .len member */
2436 .h
.cie
.code_align
= 1,
2437 .h
.cie
.data_align
= 0x7c, /* sleb128 -4 */
2438 .h
.cie
.return_column
= 8,
2440 /* Total FDE size does not include the "len" member. */
2441 .h
.fde
.len
= sizeof(DebugFrame
) - offsetof(DebugFrame
, h
.fde
.cie_offset
),
2444 12, 4, /* DW_CFA_def_cfa %esp, ... */
2445 (FRAME_SIZE
& 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2449 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2450 /* The following ordering must match tcg_target_callee_save_regs. */
2451 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2452 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2453 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2454 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2459 #if defined(ELF_HOST_MACHINE)
2460 void tcg_register_jit(void *buf
, size_t buf_size
)
2462 tcg_register_jit_int(buf
, buf_size
, &debug_frame
, sizeof(debug_frame
));