2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "tcg-be-ldst.h"
28 static const char * const tcg_target_reg_names
[TCG_TARGET_NB_REGS
] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
38 static const int tcg_target_reg_alloc_order
[] = {
39 #if TCG_TARGET_REG_BITS == 64
66 static const int tcg_target_call_iarg_regs
[] = {
67 #if TCG_TARGET_REG_BITS == 64
80 /* 32 bit mode uses stack based calling convention (GCC default). */
84 static const int tcg_target_call_oarg_regs
[] = {
86 #if TCG_TARGET_REG_BITS == 32
91 /* Registers used with L constraint, which are the first argument
92 registers on x86_64, and two random call clobbered registers on
94 #if TCG_TARGET_REG_BITS == 64
95 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
96 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
98 # define TCG_REG_L0 TCG_REG_EAX
99 # define TCG_REG_L1 TCG_REG_EDX
102 /* The host compiler should supply <cpuid.h> to enable runtime features
103 detection, as we're not going to go so far as our own inline assembly.
104 If not available, default values will be assumed. */
105 #if defined(CONFIG_CPUID_H)
109 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
111 #if TCG_TARGET_REG_BITS == 64
113 #elif defined(CONFIG_CPUID_H)
114 static bool have_cmov
;
119 /* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
120 going to attempt to determine at runtime whether movbe is available. */
121 #if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
122 static bool have_movbe
;
124 # define have_movbe 0
127 static uint8_t *tb_ret_addr
;
129 static void patch_reloc(uint8_t *code_ptr
, int type
,
130 intptr_t value
, intptr_t addend
)
135 value
-= (uintptr_t)code_ptr
;
136 if (value
!= (int32_t)value
) {
139 *(uint32_t *)code_ptr
= value
;
142 value
-= (uintptr_t)code_ptr
;
143 if (value
!= (int8_t)value
) {
146 *(uint8_t *)code_ptr
= value
;
153 /* parse target specific constraints */
154 static int target_parse_constraint(TCGArgConstraint
*ct
, const char **pct_str
)
161 ct
->ct
|= TCG_CT_REG
;
162 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EAX
);
165 ct
->ct
|= TCG_CT_REG
;
166 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EBX
);
169 ct
->ct
|= TCG_CT_REG
;
170 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_ECX
);
173 ct
->ct
|= TCG_CT_REG
;
174 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EDX
);
177 ct
->ct
|= TCG_CT_REG
;
178 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_ESI
);
181 ct
->ct
|= TCG_CT_REG
;
182 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EDI
);
185 ct
->ct
|= TCG_CT_REG
;
186 if (TCG_TARGET_REG_BITS
== 64) {
187 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
189 tcg_regset_set32(ct
->u
.regs
, 0, 0xf);
193 ct
->ct
|= TCG_CT_REG
;
194 tcg_regset_set32(ct
->u
.regs
, 0, 0xf);
197 ct
->ct
|= TCG_CT_REG
;
198 if (TCG_TARGET_REG_BITS
== 64) {
199 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
201 tcg_regset_set32(ct
->u
.regs
, 0, 0xff);
205 /* qemu_ld/st address constraint */
207 ct
->ct
|= TCG_CT_REG
;
208 if (TCG_TARGET_REG_BITS
== 64) {
209 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
211 tcg_regset_set32(ct
->u
.regs
, 0, 0xff);
213 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_L0
);
214 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_L1
);
218 ct
->ct
|= TCG_CT_CONST_S32
;
221 ct
->ct
|= TCG_CT_CONST_U32
;
232 /* test if a constant matches the constraint */
233 static inline int tcg_target_const_match(tcg_target_long val
,
234 const TCGArgConstraint
*arg_ct
)
237 if (ct
& TCG_CT_CONST
) {
240 if ((ct
& TCG_CT_CONST_S32
) && val
== (int32_t)val
) {
243 if ((ct
& TCG_CT_CONST_U32
) && val
== (uint32_t)val
) {
249 #if TCG_TARGET_REG_BITS == 64
250 # define LOWREGMASK(x) ((x) & 7)
252 # define LOWREGMASK(x) (x)
255 #define P_EXT 0x100 /* 0x0f opcode prefix */
256 #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
257 #define P_DATA16 0x400 /* 0x66 opcode prefix */
258 #if TCG_TARGET_REG_BITS == 64
259 # define P_ADDR32 0x800 /* 0x67 opcode prefix */
260 # define P_REXW 0x1000 /* Set REX.W = 1 */
261 # define P_REXB_R 0x2000 /* REG field as byte register */
262 # define P_REXB_RM 0x4000 /* R/M field as byte register */
263 # define P_GS 0x8000 /* gs segment override */
272 #define OPC_ARITH_EvIz (0x81)
273 #define OPC_ARITH_EvIb (0x83)
274 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
275 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
276 #define OPC_BSWAP (0xc8 | P_EXT)
277 #define OPC_CALL_Jz (0xe8)
278 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
279 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
280 #define OPC_DEC_r32 (0x48)
281 #define OPC_IMUL_GvEv (0xaf | P_EXT)
282 #define OPC_IMUL_GvEvIb (0x6b)
283 #define OPC_IMUL_GvEvIz (0x69)
284 #define OPC_INC_r32 (0x40)
285 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
286 #define OPC_JCC_short (0x70) /* ... plus condition code */
287 #define OPC_JMP_long (0xe9)
288 #define OPC_JMP_short (0xeb)
289 #define OPC_LEA (0x8d)
290 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
291 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
292 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
293 #define OPC_MOVB_EvIz (0xc6)
294 #define OPC_MOVL_EvIz (0xc7)
295 #define OPC_MOVL_Iv (0xb8)
296 #define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
297 #define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
298 #define OPC_MOVSBL (0xbe | P_EXT)
299 #define OPC_MOVSWL (0xbf | P_EXT)
300 #define OPC_MOVSLQ (0x63 | P_REXW)
301 #define OPC_MOVZBL (0xb6 | P_EXT)
302 #define OPC_MOVZWL (0xb7 | P_EXT)
303 #define OPC_POP_r32 (0x58)
304 #define OPC_PUSH_r32 (0x50)
305 #define OPC_PUSH_Iv (0x68)
306 #define OPC_PUSH_Ib (0x6a)
307 #define OPC_RET (0xc3)
308 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
309 #define OPC_SHIFT_1 (0xd1)
310 #define OPC_SHIFT_Ib (0xc1)
311 #define OPC_SHIFT_cl (0xd3)
312 #define OPC_TESTL (0x85)
313 #define OPC_XCHG_ax_r32 (0x90)
315 #define OPC_GRP3_Ev (0xf7)
316 #define OPC_GRP5 (0xff)
318 /* Group 1 opcode extensions for 0x80-0x83.
319 These are also used as modifiers for OPC_ARITH. */
329 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
336 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
344 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
345 #define EXT5_INC_Ev 0
346 #define EXT5_DEC_Ev 1
347 #define EXT5_CALLN_Ev 2
348 #define EXT5_JMPN_Ev 4
350 /* Condition codes to be added to OPC_JCC_{long,short}. */
369 static const uint8_t tcg_cond_to_jcc
[] = {
370 [TCG_COND_EQ
] = JCC_JE
,
371 [TCG_COND_NE
] = JCC_JNE
,
372 [TCG_COND_LT
] = JCC_JL
,
373 [TCG_COND_GE
] = JCC_JGE
,
374 [TCG_COND_LE
] = JCC_JLE
,
375 [TCG_COND_GT
] = JCC_JG
,
376 [TCG_COND_LTU
] = JCC_JB
,
377 [TCG_COND_GEU
] = JCC_JAE
,
378 [TCG_COND_LEU
] = JCC_JBE
,
379 [TCG_COND_GTU
] = JCC_JA
,
382 #if TCG_TARGET_REG_BITS == 64
383 static void tcg_out_opc(TCGContext
*s
, int opc
, int r
, int rm
, int x
)
390 if (opc
& P_DATA16
) {
391 /* We should never be asking for both 16 and 64-bit operation. */
392 assert((opc
& P_REXW
) == 0);
395 if (opc
& P_ADDR32
) {
400 rex
|= (opc
& P_REXW
) ? 0x8 : 0x0; /* REX.W */
401 rex
|= (r
& 8) >> 1; /* REX.R */
402 rex
|= (x
& 8) >> 2; /* REX.X */
403 rex
|= (rm
& 8) >> 3; /* REX.B */
405 /* P_REXB_{R,RM} indicates that the given register is the low byte.
406 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
407 as otherwise the encoding indicates %[abcd]h. Note that the values
408 that are ORed in merely indicate that the REX byte must be present;
409 those bits get discarded in output. */
410 rex
|= opc
& (r
>= 4 ? P_REXB_R
: 0);
411 rex
|= opc
& (rm
>= 4 ? P_REXB_RM
: 0);
414 tcg_out8(s
, (uint8_t)(rex
| 0x40));
417 if (opc
& (P_EXT
| P_EXT38
)) {
427 static void tcg_out_opc(TCGContext
*s
, int opc
)
429 if (opc
& P_DATA16
) {
432 if (opc
& (P_EXT
| P_EXT38
)) {
440 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
441 the 32-bit compilation paths. This method works with all versions of gcc,
442 whereas relying on optimization may not be able to exclude them. */
443 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
446 static void tcg_out_modrm(TCGContext
*s
, int opc
, int r
, int rm
)
448 tcg_out_opc(s
, opc
, r
, rm
, 0);
449 tcg_out8(s
, 0xc0 | (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
452 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
453 We handle either RM and INDEX missing with a negative value. In 64-bit
454 mode for absolute addresses, ~RM is the size of the immediate operand
455 that will follow the instruction. */
457 static void tcg_out_modrm_sib_offset(TCGContext
*s
, int opc
, int r
, int rm
,
458 int index
, int shift
, intptr_t offset
)
462 if (index
< 0 && rm
< 0) {
463 if (TCG_TARGET_REG_BITS
== 64) {
464 /* Try for a rip-relative addressing mode. This has replaced
465 the 32-bit-mode absolute addressing encoding. */
466 intptr_t pc
= (intptr_t)s
->code_ptr
+ 5 + ~rm
;
467 intptr_t disp
= offset
- pc
;
468 if (disp
== (int32_t)disp
) {
469 tcg_out_opc(s
, opc
, r
, 0, 0);
470 tcg_out8(s
, (LOWREGMASK(r
) << 3) | 5);
475 /* Try for an absolute address encoding. This requires the
476 use of the MODRM+SIB encoding and is therefore larger than
477 rip-relative addressing. */
478 if (offset
== (int32_t)offset
) {
479 tcg_out_opc(s
, opc
, r
, 0, 0);
480 tcg_out8(s
, (LOWREGMASK(r
) << 3) | 4);
481 tcg_out8(s
, (4 << 3) | 5);
482 tcg_out32(s
, offset
);
486 /* ??? The memory isn't directly addressable. */
489 /* Absolute address. */
490 tcg_out_opc(s
, opc
, r
, 0, 0);
491 tcg_out8(s
, (r
<< 3) | 5);
492 tcg_out32(s
, offset
);
497 /* Find the length of the immediate addend. Note that the encoding
498 that would be used for (%ebp) indicates absolute addressing. */
500 mod
= 0, len
= 4, rm
= 5;
501 } else if (offset
== 0 && LOWREGMASK(rm
) != TCG_REG_EBP
) {
503 } else if (offset
== (int8_t)offset
) {
509 /* Use a single byte MODRM format if possible. Note that the encoding
510 that would be used for %esp is the escape to the two byte form. */
511 if (index
< 0 && LOWREGMASK(rm
) != TCG_REG_ESP
) {
512 /* Single byte MODRM format. */
513 tcg_out_opc(s
, opc
, r
, rm
, 0);
514 tcg_out8(s
, mod
| (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
516 /* Two byte MODRM+SIB format. */
518 /* Note that the encoding that would place %esp into the index
519 field indicates no index register. In 64-bit mode, the REX.X
520 bit counts, so %r12 can be used as the index. */
524 assert(index
!= TCG_REG_ESP
);
527 tcg_out_opc(s
, opc
, r
, rm
, index
);
528 tcg_out8(s
, mod
| (LOWREGMASK(r
) << 3) | 4);
529 tcg_out8(s
, (shift
<< 6) | (LOWREGMASK(index
) << 3) | LOWREGMASK(rm
));
534 } else if (len
== 4) {
535 tcg_out32(s
, offset
);
539 /* A simplification of the above with no index or shift. */
540 static inline void tcg_out_modrm_offset(TCGContext
*s
, int opc
, int r
,
541 int rm
, intptr_t offset
)
543 tcg_out_modrm_sib_offset(s
, opc
, r
, rm
, -1, 0, offset
);
546 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
547 static inline void tgen_arithr(TCGContext
*s
, int subop
, int dest
, int src
)
549 /* Propagate an opcode prefix, such as P_REXW. */
550 int ext
= subop
& ~0x7;
553 tcg_out_modrm(s
, OPC_ARITH_GvEv
+ (subop
<< 3) + ext
, dest
, src
);
556 static inline void tcg_out_mov(TCGContext
*s
, TCGType type
,
557 TCGReg ret
, TCGReg arg
)
560 int opc
= OPC_MOVL_GvEv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
561 tcg_out_modrm(s
, opc
, ret
, arg
);
565 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
566 TCGReg ret
, tcg_target_long arg
)
568 tcg_target_long diff
;
571 tgen_arithr(s
, ARITH_XOR
, ret
, ret
);
574 if (arg
== (uint32_t)arg
|| type
== TCG_TYPE_I32
) {
575 tcg_out_opc(s
, OPC_MOVL_Iv
+ LOWREGMASK(ret
), 0, ret
, 0);
579 if (arg
== (int32_t)arg
) {
580 tcg_out_modrm(s
, OPC_MOVL_EvIz
+ P_REXW
, 0, ret
);
585 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
586 diff
= arg
- ((uintptr_t)s
->code_ptr
+ 7);
587 if (diff
== (int32_t)diff
) {
588 tcg_out_opc(s
, OPC_LEA
| P_REXW
, ret
, 0, 0);
589 tcg_out8(s
, (LOWREGMASK(ret
) << 3) | 5);
594 tcg_out_opc(s
, OPC_MOVL_Iv
+ P_REXW
+ LOWREGMASK(ret
), 0, ret
, 0);
598 static inline void tcg_out_pushi(TCGContext
*s
, tcg_target_long val
)
600 if (val
== (int8_t)val
) {
601 tcg_out_opc(s
, OPC_PUSH_Ib
, 0, 0, 0);
603 } else if (val
== (int32_t)val
) {
604 tcg_out_opc(s
, OPC_PUSH_Iv
, 0, 0, 0);
611 static inline void tcg_out_push(TCGContext
*s
, int reg
)
613 tcg_out_opc(s
, OPC_PUSH_r32
+ LOWREGMASK(reg
), 0, reg
, 0);
616 static inline void tcg_out_pop(TCGContext
*s
, int reg
)
618 tcg_out_opc(s
, OPC_POP_r32
+ LOWREGMASK(reg
), 0, reg
, 0);
621 static inline void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
,
622 TCGReg arg1
, intptr_t arg2
)
624 int opc
= OPC_MOVL_GvEv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
625 tcg_out_modrm_offset(s
, opc
, ret
, arg1
, arg2
);
628 static inline void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg arg
,
629 TCGReg arg1
, intptr_t arg2
)
631 int opc
= OPC_MOVL_EvGv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
632 tcg_out_modrm_offset(s
, opc
, arg
, arg1
, arg2
);
635 static inline void tcg_out_sti(TCGContext
*s
, TCGType type
, TCGReg base
,
636 tcg_target_long ofs
, tcg_target_long val
)
638 int opc
= OPC_MOVL_EvIz
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
639 tcg_out_modrm_offset(s
, opc
, 0, base
, ofs
);
643 static void tcg_out_shifti(TCGContext
*s
, int subopc
, int reg
, int count
)
645 /* Propagate an opcode prefix, such as P_DATA16. */
646 int ext
= subopc
& ~0x7;
650 tcg_out_modrm(s
, OPC_SHIFT_1
+ ext
, subopc
, reg
);
652 tcg_out_modrm(s
, OPC_SHIFT_Ib
+ ext
, subopc
, reg
);
657 static inline void tcg_out_bswap32(TCGContext
*s
, int reg
)
659 tcg_out_opc(s
, OPC_BSWAP
+ LOWREGMASK(reg
), 0, reg
, 0);
662 static inline void tcg_out_rolw_8(TCGContext
*s
, int reg
)
664 tcg_out_shifti(s
, SHIFT_ROL
+ P_DATA16
, reg
, 8);
667 static inline void tcg_out_ext8u(TCGContext
*s
, int dest
, int src
)
670 assert(src
< 4 || TCG_TARGET_REG_BITS
== 64);
671 tcg_out_modrm(s
, OPC_MOVZBL
+ P_REXB_RM
, dest
, src
);
674 static void tcg_out_ext8s(TCGContext
*s
, int dest
, int src
, int rexw
)
677 assert(src
< 4 || TCG_TARGET_REG_BITS
== 64);
678 tcg_out_modrm(s
, OPC_MOVSBL
+ P_REXB_RM
+ rexw
, dest
, src
);
681 static inline void tcg_out_ext16u(TCGContext
*s
, int dest
, int src
)
684 tcg_out_modrm(s
, OPC_MOVZWL
, dest
, src
);
687 static inline void tcg_out_ext16s(TCGContext
*s
, int dest
, int src
, int rexw
)
690 tcg_out_modrm(s
, OPC_MOVSWL
+ rexw
, dest
, src
);
693 static inline void tcg_out_ext32u(TCGContext
*s
, int dest
, int src
)
695 /* 32-bit mov zero extends. */
696 tcg_out_modrm(s
, OPC_MOVL_GvEv
, dest
, src
);
699 static inline void tcg_out_ext32s(TCGContext
*s
, int dest
, int src
)
701 tcg_out_modrm(s
, OPC_MOVSLQ
, dest
, src
);
704 static inline void tcg_out_bswap64(TCGContext
*s
, int reg
)
706 tcg_out_opc(s
, OPC_BSWAP
+ P_REXW
+ LOWREGMASK(reg
), 0, reg
, 0);
709 static void tgen_arithi(TCGContext
*s
, int c
, int r0
,
710 tcg_target_long val
, int cf
)
714 if (TCG_TARGET_REG_BITS
== 64) {
719 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
720 partial flags update stalls on Pentium4 and are not recommended
721 by current Intel optimization manuals. */
722 if (!cf
&& (c
== ARITH_ADD
|| c
== ARITH_SUB
) && (val
== 1 || val
== -1)) {
723 int is_inc
= (c
== ARITH_ADD
) ^ (val
< 0);
724 if (TCG_TARGET_REG_BITS
== 64) {
725 /* The single-byte increment encodings are re-tasked as the
726 REX prefixes. Use the MODRM encoding. */
727 tcg_out_modrm(s
, OPC_GRP5
+ rexw
,
728 (is_inc
? EXT5_INC_Ev
: EXT5_DEC_Ev
), r0
);
730 tcg_out8(s
, (is_inc
? OPC_INC_r32
: OPC_DEC_r32
) + r0
);
735 if (c
== ARITH_AND
) {
736 if (TCG_TARGET_REG_BITS
== 64) {
737 if (val
== 0xffffffffu
) {
738 tcg_out_ext32u(s
, r0
, r0
);
741 if (val
== (uint32_t)val
) {
742 /* AND with no high bits set can use a 32-bit operation. */
746 if (val
== 0xffu
&& (r0
< 4 || TCG_TARGET_REG_BITS
== 64)) {
747 tcg_out_ext8u(s
, r0
, r0
);
750 if (val
== 0xffffu
) {
751 tcg_out_ext16u(s
, r0
, r0
);
756 if (val
== (int8_t)val
) {
757 tcg_out_modrm(s
, OPC_ARITH_EvIb
+ rexw
, c
, r0
);
761 if (rexw
== 0 || val
== (int32_t)val
) {
762 tcg_out_modrm(s
, OPC_ARITH_EvIz
+ rexw
, c
, r0
);
770 static void tcg_out_addi(TCGContext
*s
, int reg
, tcg_target_long val
)
773 tgen_arithi(s
, ARITH_ADD
+ P_REXW
, reg
, val
, 0);
777 /* Use SMALL != 0 to force a short forward branch. */
778 static void tcg_out_jxx(TCGContext
*s
, int opc
, int label_index
, int small
)
781 TCGLabel
*l
= &s
->labels
[label_index
];
784 val
= l
->u
.value
- (intptr_t)s
->code_ptr
;
786 if ((int8_t)val1
== val1
) {
788 tcg_out8(s
, OPC_JMP_short
);
790 tcg_out8(s
, OPC_JCC_short
+ opc
);
798 tcg_out8(s
, OPC_JMP_long
);
799 tcg_out32(s
, val
- 5);
801 tcg_out_opc(s
, OPC_JCC_long
+ opc
, 0, 0, 0);
802 tcg_out32(s
, val
- 6);
807 tcg_out8(s
, OPC_JMP_short
);
809 tcg_out8(s
, OPC_JCC_short
+ opc
);
811 tcg_out_reloc(s
, s
->code_ptr
, R_386_PC8
, label_index
, -1);
815 tcg_out8(s
, OPC_JMP_long
);
817 tcg_out_opc(s
, OPC_JCC_long
+ opc
, 0, 0, 0);
819 tcg_out_reloc(s
, s
->code_ptr
, R_386_PC32
, label_index
, -4);
824 static void tcg_out_cmp(TCGContext
*s
, TCGArg arg1
, TCGArg arg2
,
825 int const_arg2
, int rexw
)
830 tcg_out_modrm(s
, OPC_TESTL
+ rexw
, arg1
, arg1
);
832 tgen_arithi(s
, ARITH_CMP
+ rexw
, arg1
, arg2
, 0);
835 tgen_arithr(s
, ARITH_CMP
+ rexw
, arg1
, arg2
);
839 static void tcg_out_brcond32(TCGContext
*s
, TCGCond cond
,
840 TCGArg arg1
, TCGArg arg2
, int const_arg2
,
841 int label_index
, int small
)
843 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, 0);
844 tcg_out_jxx(s
, tcg_cond_to_jcc
[cond
], label_index
, small
);
847 #if TCG_TARGET_REG_BITS == 64
848 static void tcg_out_brcond64(TCGContext
*s
, TCGCond cond
,
849 TCGArg arg1
, TCGArg arg2
, int const_arg2
,
850 int label_index
, int small
)
852 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, P_REXW
);
853 tcg_out_jxx(s
, tcg_cond_to_jcc
[cond
], label_index
, small
);
856 /* XXX: we implement it at the target level to avoid having to
857 handle cross basic blocks temporaries */
858 static void tcg_out_brcond2(TCGContext
*s
, const TCGArg
*args
,
859 const int *const_args
, int small
)
862 label_next
= gen_new_label();
865 tcg_out_brcond32(s
, TCG_COND_NE
, args
[0], args
[2], const_args
[2],
867 tcg_out_brcond32(s
, TCG_COND_EQ
, args
[1], args
[3], const_args
[3],
871 tcg_out_brcond32(s
, TCG_COND_NE
, args
[0], args
[2], const_args
[2],
873 tcg_out_brcond32(s
, TCG_COND_NE
, args
[1], args
[3], const_args
[3],
877 tcg_out_brcond32(s
, TCG_COND_LT
, args
[1], args
[3], const_args
[3],
879 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
880 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[0], args
[2], const_args
[2],
884 tcg_out_brcond32(s
, TCG_COND_LT
, args
[1], args
[3], const_args
[3],
886 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
887 tcg_out_brcond32(s
, TCG_COND_LEU
, args
[0], args
[2], const_args
[2],
891 tcg_out_brcond32(s
, TCG_COND_GT
, args
[1], args
[3], const_args
[3],
893 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
894 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[0], args
[2], const_args
[2],
898 tcg_out_brcond32(s
, TCG_COND_GT
, args
[1], args
[3], const_args
[3],
900 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
901 tcg_out_brcond32(s
, TCG_COND_GEU
, args
[0], args
[2], const_args
[2],
905 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[1], args
[3], const_args
[3],
907 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
908 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[0], args
[2], const_args
[2],
912 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[1], args
[3], const_args
[3],
914 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
915 tcg_out_brcond32(s
, TCG_COND_LEU
, args
[0], args
[2], const_args
[2],
919 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[1], args
[3], const_args
[3],
921 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
922 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[0], args
[2], const_args
[2],
926 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[1], args
[3], const_args
[3],
928 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
929 tcg_out_brcond32(s
, TCG_COND_GEU
, args
[0], args
[2], const_args
[2],
935 tcg_out_label(s
, label_next
, s
->code_ptr
);
939 static void tcg_out_setcond32(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
940 TCGArg arg1
, TCGArg arg2
, int const_arg2
)
942 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, 0);
943 tcg_out_modrm(s
, OPC_SETCC
| tcg_cond_to_jcc
[cond
], 0, dest
);
944 tcg_out_ext8u(s
, dest
, dest
);
947 #if TCG_TARGET_REG_BITS == 64
948 static void tcg_out_setcond64(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
949 TCGArg arg1
, TCGArg arg2
, int const_arg2
)
951 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, P_REXW
);
952 tcg_out_modrm(s
, OPC_SETCC
| tcg_cond_to_jcc
[cond
], 0, dest
);
953 tcg_out_ext8u(s
, dest
, dest
);
956 static void tcg_out_setcond2(TCGContext
*s
, const TCGArg
*args
,
957 const int *const_args
)
960 int label_true
, label_over
;
962 memcpy(new_args
, args
+1, 5*sizeof(TCGArg
));
964 if (args
[0] == args
[1] || args
[0] == args
[2]
965 || (!const_args
[3] && args
[0] == args
[3])
966 || (!const_args
[4] && args
[0] == args
[4])) {
967 /* When the destination overlaps with one of the argument
968 registers, don't do anything tricky. */
969 label_true
= gen_new_label();
970 label_over
= gen_new_label();
972 new_args
[5] = label_true
;
973 tcg_out_brcond2(s
, new_args
, const_args
+1, 1);
975 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 0);
976 tcg_out_jxx(s
, JCC_JMP
, label_over
, 1);
977 tcg_out_label(s
, label_true
, s
->code_ptr
);
979 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 1);
980 tcg_out_label(s
, label_over
, s
->code_ptr
);
982 /* When the destination does not overlap one of the arguments,
983 clear the destination first, jump if cond false, and emit an
984 increment in the true case. This results in smaller code. */
986 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 0);
988 label_over
= gen_new_label();
989 new_args
[4] = tcg_invert_cond(new_args
[4]);
990 new_args
[5] = label_over
;
991 tcg_out_brcond2(s
, new_args
, const_args
+1, 1);
993 tgen_arithi(s
, ARITH_ADD
, args
[0], 1, 0);
994 tcg_out_label(s
, label_over
, s
->code_ptr
);
999 static void tcg_out_movcond32(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
1000 TCGArg c1
, TCGArg c2
, int const_c2
,
1003 tcg_out_cmp(s
, c1
, c2
, const_c2
, 0);
1005 tcg_out_modrm(s
, OPC_CMOVCC
| tcg_cond_to_jcc
[cond
], dest
, v1
);
1007 int over
= gen_new_label();
1008 tcg_out_jxx(s
, tcg_cond_to_jcc
[tcg_invert_cond(cond
)], over
, 1);
1009 tcg_out_mov(s
, TCG_TYPE_I32
, dest
, v1
);
1010 tcg_out_label(s
, over
, s
->code_ptr
);
1014 #if TCG_TARGET_REG_BITS == 64
1015 static void tcg_out_movcond64(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
1016 TCGArg c1
, TCGArg c2
, int const_c2
,
1019 tcg_out_cmp(s
, c1
, c2
, const_c2
, P_REXW
);
1020 tcg_out_modrm(s
, OPC_CMOVCC
| tcg_cond_to_jcc
[cond
] | P_REXW
, dest
, v1
);
1024 static void tcg_out_branch(TCGContext
*s
, int call
, uintptr_t dest
)
1026 intptr_t disp
= dest
- (intptr_t)s
->code_ptr
- 5;
1028 if (disp
== (int32_t)disp
) {
1029 tcg_out_opc(s
, call
? OPC_CALL_Jz
: OPC_JMP_long
, 0, 0, 0);
1032 tcg_out_movi(s
, TCG_TYPE_PTR
, TCG_REG_R10
, dest
);
1033 tcg_out_modrm(s
, OPC_GRP5
,
1034 call
? EXT5_CALLN_Ev
: EXT5_JMPN_Ev
, TCG_REG_R10
);
1038 static inline void tcg_out_calli(TCGContext
*s
, uintptr_t dest
)
1040 tcg_out_branch(s
, 1, dest
);
1043 static void tcg_out_jmp(TCGContext
*s
, uintptr_t dest
)
1045 tcg_out_branch(s
, 0, dest
);
1048 #if defined(CONFIG_SOFTMMU)
1049 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1050 * int mmu_idx, uintptr_t ra)
1052 static const void * const qemu_ld_helpers
[16] = {
1053 [MO_UB
] = helper_ret_ldub_mmu
,
1054 [MO_LEUW
] = helper_le_lduw_mmu
,
1055 [MO_LEUL
] = helper_le_ldul_mmu
,
1056 [MO_LEQ
] = helper_le_ldq_mmu
,
1057 [MO_BEUW
] = helper_be_lduw_mmu
,
1058 [MO_BEUL
] = helper_be_ldul_mmu
,
1059 [MO_BEQ
] = helper_be_ldq_mmu
,
1062 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1063 * uintxx_t val, int mmu_idx, uintptr_t ra)
1065 static const void * const qemu_st_helpers
[16] = {
1066 [MO_UB
] = helper_ret_stb_mmu
,
1067 [MO_LEUW
] = helper_le_stw_mmu
,
1068 [MO_LEUL
] = helper_le_stl_mmu
,
1069 [MO_LEQ
] = helper_le_stq_mmu
,
1070 [MO_BEUW
] = helper_be_stw_mmu
,
1071 [MO_BEUL
] = helper_be_stl_mmu
,
1072 [MO_BEQ
] = helper_be_stq_mmu
,
1075 /* Perform the TLB load and compare.
1078 ADDRLO and ADDRHI contain the low and high part of the address.
1080 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1082 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1083 This should be offsetof addr_read or addr_write.
1086 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1087 positions of the displacements of forward jumps to the TLB miss case.
1089 Second argument register is loaded with the low part of the address.
1090 In the TLB hit case, it has been adjusted as indicated by the TLB
1091 and so is a host address. In the TLB miss case, it continues to
1092 hold a guest address.
1094 First argument register is clobbered. */
1096 static inline void tcg_out_tlb_load(TCGContext
*s
, TCGReg addrlo
, TCGReg addrhi
,
1097 int mem_index
, TCGMemOp s_bits
,
1098 uint8_t **label_ptr
, int which
)
1100 const TCGReg r0
= TCG_REG_L0
;
1101 const TCGReg r1
= TCG_REG_L1
;
1102 TCGType ttype
= TCG_TYPE_I32
;
1103 TCGType htype
= TCG_TYPE_I32
;
1104 int trexw
= 0, hrexw
= 0;
1106 if (TCG_TARGET_REG_BITS
== 64) {
1107 if (TARGET_LONG_BITS
== 64) {
1108 ttype
= TCG_TYPE_I64
;
1111 if (TCG_TYPE_PTR
== TCG_TYPE_I64
) {
1112 htype
= TCG_TYPE_I64
;
1117 tcg_out_mov(s
, htype
, r0
, addrlo
);
1118 tcg_out_mov(s
, ttype
, r1
, addrlo
);
1120 tcg_out_shifti(s
, SHIFT_SHR
+ hrexw
, r0
,
1121 TARGET_PAGE_BITS
- CPU_TLB_ENTRY_BITS
);
1123 tgen_arithi(s
, ARITH_AND
+ trexw
, r1
,
1124 TARGET_PAGE_MASK
| ((1 << s_bits
) - 1), 0);
1125 tgen_arithi(s
, ARITH_AND
+ hrexw
, r0
,
1126 (CPU_TLB_SIZE
- 1) << CPU_TLB_ENTRY_BITS
, 0);
1128 tcg_out_modrm_sib_offset(s
, OPC_LEA
+ hrexw
, r0
, TCG_AREG0
, r0
, 0,
1129 offsetof(CPUArchState
, tlb_table
[mem_index
][0])
1133 tcg_out_modrm_offset(s
, OPC_CMP_GvEv
+ trexw
, r1
, r0
, 0);
1135 /* Prepare for both the fast path add of the tlb addend, and the slow
1136 path function argument setup. There are two cases worth note:
1137 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1138 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1139 copies the entire guest address for the slow path, while truncation
1140 for the 32-bit host happens with the fastpath ADDL below. */
1141 tcg_out_mov(s
, ttype
, r1
, addrlo
);
1144 tcg_out_opc(s
, OPC_JCC_long
+ JCC_JNE
, 0, 0, 0);
1145 label_ptr
[0] = s
->code_ptr
;
1148 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1149 /* cmp 4(r0), addrhi */
1150 tcg_out_modrm_offset(s
, OPC_CMP_GvEv
, addrhi
, r0
, 4);
1153 tcg_out_opc(s
, OPC_JCC_long
+ JCC_JNE
, 0, 0, 0);
1154 label_ptr
[1] = s
->code_ptr
;
1160 /* add addend(r0), r1 */
1161 tcg_out_modrm_offset(s
, OPC_ADD_GvEv
+ hrexw
, r1
, r0
,
1162 offsetof(CPUTLBEntry
, addend
) - which
);
1166 * Record the context of a call to the out of line helper code for the slow path
1167 * for a load or store, so that we can later generate the correct helper code
1169 static void add_qemu_ldst_label(TCGContext
*s
, int is_ld
, TCGMemOp opc
,
1170 TCGReg datalo
, TCGReg datahi
,
1171 TCGReg addrlo
, TCGReg addrhi
,
1172 int mem_index
, uint8_t *raddr
,
1173 uint8_t **label_ptr
)
1175 TCGLabelQemuLdst
*label
= new_ldst_label(s
);
1177 label
->is_ld
= is_ld
;
1179 label
->datalo_reg
= datalo
;
1180 label
->datahi_reg
= datahi
;
1181 label
->addrlo_reg
= addrlo
;
1182 label
->addrhi_reg
= addrhi
;
1183 label
->mem_index
= mem_index
;
1184 label
->raddr
= raddr
;
1185 label
->label_ptr
[0] = label_ptr
[0];
1186 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1187 label
->label_ptr
[1] = label_ptr
[1];
1192 * Generate code for the slow path for a load at the end of block
1194 static void tcg_out_qemu_ld_slow_path(TCGContext
*s
, TCGLabelQemuLdst
*l
)
1196 TCGMemOp opc
= l
->opc
;
1198 uint8_t **label_ptr
= &l
->label_ptr
[0];
1200 /* resolve label address */
1201 *(uint32_t *)label_ptr
[0] = (uint32_t)(s
->code_ptr
- label_ptr
[0] - 4);
1202 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1203 *(uint32_t *)label_ptr
[1] = (uint32_t)(s
->code_ptr
- label_ptr
[1] - 4);
1206 if (TCG_TARGET_REG_BITS
== 32) {
1209 tcg_out_st(s
, TCG_TYPE_PTR
, TCG_AREG0
, TCG_REG_ESP
, ofs
);
1212 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrlo_reg
, TCG_REG_ESP
, ofs
);
1215 if (TARGET_LONG_BITS
== 64) {
1216 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrhi_reg
, TCG_REG_ESP
, ofs
);
1220 tcg_out_sti(s
, TCG_TYPE_I32
, TCG_REG_ESP
, ofs
, l
->mem_index
);
1223 tcg_out_sti(s
, TCG_TYPE_I32
, TCG_REG_ESP
, ofs
, (uintptr_t)l
->raddr
);
1225 tcg_out_mov(s
, TCG_TYPE_PTR
, tcg_target_call_iarg_regs
[0], TCG_AREG0
);
1226 /* The second argument is already loaded with addrlo. */
1227 tcg_out_movi(s
, TCG_TYPE_I32
, tcg_target_call_iarg_regs
[2],
1229 tcg_out_movi(s
, TCG_TYPE_PTR
, tcg_target_call_iarg_regs
[3],
1230 (uintptr_t)l
->raddr
);
1233 tcg_out_calli(s
, (uintptr_t)qemu_ld_helpers
[opc
& ~MO_SIGN
]);
1235 data_reg
= l
->datalo_reg
;
1236 switch (opc
& MO_SSIZE
) {
1238 tcg_out_ext8s(s
, data_reg
, TCG_REG_EAX
, P_REXW
);
1241 tcg_out_ext16s(s
, data_reg
, TCG_REG_EAX
, P_REXW
);
1243 #if TCG_TARGET_REG_BITS == 64
1245 tcg_out_ext32s(s
, data_reg
, TCG_REG_EAX
);
1250 /* Note that the helpers have zero-extended to tcg_target_long. */
1252 tcg_out_mov(s
, TCG_TYPE_I32
, data_reg
, TCG_REG_EAX
);
1255 if (TCG_TARGET_REG_BITS
== 64) {
1256 tcg_out_mov(s
, TCG_TYPE_I64
, data_reg
, TCG_REG_RAX
);
1257 } else if (data_reg
== TCG_REG_EDX
) {
1258 /* xchg %edx, %eax */
1259 tcg_out_opc(s
, OPC_XCHG_ax_r32
+ TCG_REG_EDX
, 0, 0, 0);
1260 tcg_out_mov(s
, TCG_TYPE_I32
, l
->datahi_reg
, TCG_REG_EAX
);
1262 tcg_out_mov(s
, TCG_TYPE_I32
, data_reg
, TCG_REG_EAX
);
1263 tcg_out_mov(s
, TCG_TYPE_I32
, l
->datahi_reg
, TCG_REG_EDX
);
1270 /* Jump to the code corresponding to next IR of qemu_st */
1271 tcg_out_jmp(s
, (uintptr_t)l
->raddr
);
1275 * Generate code for the slow path for a store at the end of block
1277 static void tcg_out_qemu_st_slow_path(TCGContext
*s
, TCGLabelQemuLdst
*l
)
1279 TCGMemOp opc
= l
->opc
;
1280 TCGMemOp s_bits
= opc
& MO_SIZE
;
1281 uint8_t **label_ptr
= &l
->label_ptr
[0];
1284 /* resolve label address */
1285 *(uint32_t *)label_ptr
[0] = (uint32_t)(s
->code_ptr
- label_ptr
[0] - 4);
1286 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1287 *(uint32_t *)label_ptr
[1] = (uint32_t)(s
->code_ptr
- label_ptr
[1] - 4);
1290 if (TCG_TARGET_REG_BITS
== 32) {
1293 tcg_out_st(s
, TCG_TYPE_PTR
, TCG_AREG0
, TCG_REG_ESP
, ofs
);
1296 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrlo_reg
, TCG_REG_ESP
, ofs
);
1299 if (TARGET_LONG_BITS
== 64) {
1300 tcg_out_st(s
, TCG_TYPE_I32
, l
->addrhi_reg
, TCG_REG_ESP
, ofs
);
1304 tcg_out_st(s
, TCG_TYPE_I32
, l
->datalo_reg
, TCG_REG_ESP
, ofs
);
1307 if (s_bits
== MO_64
) {
1308 tcg_out_st(s
, TCG_TYPE_I32
, l
->datahi_reg
, TCG_REG_ESP
, ofs
);
1312 tcg_out_sti(s
, TCG_TYPE_I32
, TCG_REG_ESP
, ofs
, l
->mem_index
);
1315 retaddr
= TCG_REG_EAX
;
1316 tcg_out_movi(s
, TCG_TYPE_I32
, retaddr
, (uintptr_t)l
->raddr
);
1317 tcg_out_st(s
, TCG_TYPE_I32
, retaddr
, TCG_REG_ESP
, ofs
);
1319 tcg_out_mov(s
, TCG_TYPE_PTR
, tcg_target_call_iarg_regs
[0], TCG_AREG0
);
1320 /* The second argument is already loaded with addrlo. */
1321 tcg_out_mov(s
, (s_bits
== MO_64
? TCG_TYPE_I64
: TCG_TYPE_I32
),
1322 tcg_target_call_iarg_regs
[2], l
->datalo_reg
);
1323 tcg_out_movi(s
, TCG_TYPE_I32
, tcg_target_call_iarg_regs
[3],
1326 if (ARRAY_SIZE(tcg_target_call_iarg_regs
) > 4) {
1327 retaddr
= tcg_target_call_iarg_regs
[4];
1328 tcg_out_movi(s
, TCG_TYPE_PTR
, retaddr
, (uintptr_t)l
->raddr
);
1330 retaddr
= TCG_REG_RAX
;
1331 tcg_out_movi(s
, TCG_TYPE_PTR
, retaddr
, (uintptr_t)l
->raddr
);
1332 tcg_out_st(s
, TCG_TYPE_PTR
, retaddr
, TCG_REG_ESP
, 0);
1336 /* "Tail call" to the helper, with the return address back inline. */
1337 tcg_out_push(s
, retaddr
);
1338 tcg_out_jmp(s
, (uintptr_t)qemu_st_helpers
[opc
]);
1340 #elif defined(__x86_64__) && defined(__linux__)
1341 # include <asm/prctl.h>
1342 # include <sys/prctl.h>
1344 int arch_prctl(int code
, unsigned long addr
);
1346 static int guest_base_flags
;
1347 static inline void setup_guest_base_seg(void)
1349 if (arch_prctl(ARCH_SET_GS
, GUEST_BASE
) == 0) {
1350 guest_base_flags
= P_GS
;
1354 # define guest_base_flags 0
1355 static inline void setup_guest_base_seg(void) { }
1356 #endif /* SOFTMMU */
1358 static void tcg_out_qemu_ld_direct(TCGContext
*s
, TCGReg datalo
, TCGReg datahi
,
1359 TCGReg base
, intptr_t ofs
, int seg
,
1362 const TCGMemOp real_bswap
= memop
& MO_BSWAP
;
1363 TCGMemOp bswap
= real_bswap
;
1364 int movop
= OPC_MOVL_GvEv
;
1366 if (have_movbe
&& real_bswap
) {
1368 movop
= OPC_MOVBE_GyMy
;
1371 switch (memop
& MO_SSIZE
) {
1373 tcg_out_modrm_offset(s
, OPC_MOVZBL
+ seg
, datalo
, base
, ofs
);
1376 tcg_out_modrm_offset(s
, OPC_MOVSBL
+ P_REXW
+ seg
, datalo
, base
, ofs
);
1379 tcg_out_modrm_offset(s
, OPC_MOVZWL
+ seg
, datalo
, base
, ofs
);
1381 tcg_out_rolw_8(s
, datalo
);
1387 tcg_out_modrm_offset(s
, OPC_MOVBE_GyMy
+ P_DATA16
+ seg
,
1390 tcg_out_modrm_offset(s
, OPC_MOVZWL
+ seg
, datalo
, base
, ofs
);
1391 tcg_out_rolw_8(s
, datalo
);
1393 tcg_out_modrm(s
, OPC_MOVSWL
+ P_REXW
, datalo
, datalo
);
1395 tcg_out_modrm_offset(s
, OPC_MOVSWL
+ P_REXW
+ seg
,
1400 tcg_out_modrm_offset(s
, movop
+ seg
, datalo
, base
, ofs
);
1402 tcg_out_bswap32(s
, datalo
);
1405 #if TCG_TARGET_REG_BITS == 64
1408 tcg_out_modrm_offset(s
, movop
+ seg
, datalo
, base
, ofs
);
1410 tcg_out_bswap32(s
, datalo
);
1412 tcg_out_ext32s(s
, datalo
, datalo
);
1414 tcg_out_modrm_offset(s
, OPC_MOVSLQ
+ seg
, datalo
, base
, ofs
);
1419 if (TCG_TARGET_REG_BITS
== 64) {
1420 tcg_out_modrm_offset(s
, movop
+ P_REXW
+ seg
, datalo
, base
, ofs
);
1422 tcg_out_bswap64(s
, datalo
);
1430 if (base
!= datalo
) {
1431 tcg_out_modrm_offset(s
, movop
+ seg
, datalo
, base
, ofs
);
1432 tcg_out_modrm_offset(s
, movop
+ seg
, datahi
, base
, ofs
+ 4);
1434 tcg_out_modrm_offset(s
, movop
+ seg
, datahi
, base
, ofs
+ 4);
1435 tcg_out_modrm_offset(s
, movop
+ seg
, datalo
, base
, ofs
);
1438 tcg_out_bswap32(s
, datalo
);
1439 tcg_out_bswap32(s
, datahi
);
1448 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1449 EAX. It will be useful once fixed registers globals are less
1451 static void tcg_out_qemu_ld(TCGContext
*s
, const TCGArg
*args
, bool is64
)
1453 TCGReg datalo
, datahi
, addrlo
;
1454 TCGReg addrhi
__attribute__((unused
));
1456 #if defined(CONFIG_SOFTMMU)
1459 uint8_t *label_ptr
[2];
1463 datahi
= (TCG_TARGET_REG_BITS
== 32 && is64
? *args
++ : 0);
1465 addrhi
= (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
? *args
++ : 0);
1468 #if defined(CONFIG_SOFTMMU)
1469 mem_index
= *args
++;
1470 s_bits
= opc
& MO_SIZE
;
1472 tcg_out_tlb_load(s
, addrlo
, addrhi
, mem_index
, s_bits
,
1473 label_ptr
, offsetof(CPUTLBEntry
, addr_read
));
1476 tcg_out_qemu_ld_direct(s
, datalo
, datahi
, TCG_REG_L1
, 0, 0, opc
);
1478 /* Record the current context of a load into ldst label */
1479 add_qemu_ldst_label(s
, 1, opc
, datalo
, datahi
, addrlo
, addrhi
,
1480 mem_index
, s
->code_ptr
, label_ptr
);
1483 int32_t offset
= GUEST_BASE
;
1484 TCGReg base
= addrlo
;
1487 /* ??? We assume all operations have left us with register contents
1488 that are zero extended. So far this appears to be true. If we
1489 want to enforce this, we can either do an explicit zero-extension
1490 here, or (if GUEST_BASE == 0, or a segment register is in use)
1491 use the ADDR32 prefix. For now, do nothing. */
1492 if (GUEST_BASE
&& guest_base_flags
) {
1493 seg
= guest_base_flags
;
1495 } else if (TCG_TARGET_REG_BITS
== 64 && offset
!= GUEST_BASE
) {
1496 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_L1
, GUEST_BASE
);
1497 tgen_arithr(s
, ARITH_ADD
+ P_REXW
, TCG_REG_L1
, base
);
1502 tcg_out_qemu_ld_direct(s
, datalo
, datahi
, base
, offset
, seg
, opc
);
1507 static void tcg_out_qemu_st_direct(TCGContext
*s
, TCGReg datalo
, TCGReg datahi
,
1508 TCGReg base
, intptr_t ofs
, int seg
,
1511 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1512 we could perform the bswap twice to restore the original value
1513 instead of moving to the scratch. But as it is, the L constraint
1514 means that TCG_REG_L0 is definitely free here. */
1515 const TCGReg scratch
= TCG_REG_L0
;
1516 const TCGMemOp real_bswap
= memop
& MO_BSWAP
;
1517 TCGMemOp bswap
= real_bswap
;
1518 int movop
= OPC_MOVL_EvGv
;
1520 if (have_movbe
&& real_bswap
) {
1522 movop
= OPC_MOVBE_MyGy
;
1525 switch (memop
& MO_SIZE
) {
1527 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1528 Use the scratch register if necessary. */
1529 if (TCG_TARGET_REG_BITS
== 32 && datalo
>= 4) {
1530 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1533 tcg_out_modrm_offset(s
, OPC_MOVB_EvGv
+ P_REXB_R
+ seg
,
1538 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1539 tcg_out_rolw_8(s
, scratch
);
1542 tcg_out_modrm_offset(s
, movop
+ P_DATA16
+ seg
, datalo
, base
, ofs
);
1546 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1547 tcg_out_bswap32(s
, scratch
);
1550 tcg_out_modrm_offset(s
, movop
+ seg
, datalo
, base
, ofs
);
1553 if (TCG_TARGET_REG_BITS
== 64) {
1555 tcg_out_mov(s
, TCG_TYPE_I64
, scratch
, datalo
);
1556 tcg_out_bswap64(s
, scratch
);
1559 tcg_out_modrm_offset(s
, movop
+ P_REXW
+ seg
, datalo
, base
, ofs
);
1561 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datahi
);
1562 tcg_out_bswap32(s
, scratch
);
1563 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
+ seg
, scratch
, base
, ofs
);
1564 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1565 tcg_out_bswap32(s
, scratch
);
1566 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
+ seg
, scratch
, base
, ofs
+4);
1573 tcg_out_modrm_offset(s
, movop
+ seg
, datalo
, base
, ofs
);
1574 tcg_out_modrm_offset(s
, movop
+ seg
, datahi
, base
, ofs
+4);
1582 static void tcg_out_qemu_st(TCGContext
*s
, const TCGArg
*args
, bool is64
)
1584 TCGReg datalo
, datahi
, addrlo
;
1585 TCGReg addrhi
__attribute__((unused
));
1587 #if defined(CONFIG_SOFTMMU)
1590 uint8_t *label_ptr
[2];
1594 datahi
= (TCG_TARGET_REG_BITS
== 32 && is64
? *args
++ : 0);
1596 addrhi
= (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
? *args
++ : 0);
1599 #if defined(CONFIG_SOFTMMU)
1600 mem_index
= *args
++;
1601 s_bits
= opc
& MO_SIZE
;
1603 tcg_out_tlb_load(s
, addrlo
, addrhi
, mem_index
, s_bits
,
1604 label_ptr
, offsetof(CPUTLBEntry
, addr_write
));
1607 tcg_out_qemu_st_direct(s
, datalo
, datahi
, TCG_REG_L1
, 0, 0, opc
);
1609 /* Record the current context of a store into ldst label */
1610 add_qemu_ldst_label(s
, 0, opc
, datalo
, datahi
, addrlo
, addrhi
,
1611 mem_index
, s
->code_ptr
, label_ptr
);
1614 int32_t offset
= GUEST_BASE
;
1615 TCGReg base
= addrlo
;
1618 /* ??? We assume all operations have left us with register contents
1619 that are zero extended. So far this appears to be true. If we
1620 want to enforce this, we can either do an explicit zero-extension
1621 here, or (if GUEST_BASE == 0, or a segment register is in use)
1622 use the ADDR32 prefix. For now, do nothing. */
1623 if (GUEST_BASE
&& guest_base_flags
) {
1624 seg
= guest_base_flags
;
1626 } else if (TCG_TARGET_REG_BITS
== 64 && offset
!= GUEST_BASE
) {
1627 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_L1
, GUEST_BASE
);
1628 tgen_arithr(s
, ARITH_ADD
+ P_REXW
, TCG_REG_L1
, base
);
1633 tcg_out_qemu_st_direct(s
, datalo
, datahi
, base
, offset
, seg
, opc
);
1638 static inline void tcg_out_op(TCGContext
*s
, TCGOpcode opc
,
1639 const TCGArg
*args
, const int *const_args
)
1643 #if TCG_TARGET_REG_BITS == 64
1644 # define OP_32_64(x) \
1645 case glue(glue(INDEX_op_, x), _i64): \
1646 rexw = P_REXW; /* FALLTHRU */ \
1647 case glue(glue(INDEX_op_, x), _i32)
1649 # define OP_32_64(x) \
1650 case glue(glue(INDEX_op_, x), _i32)
1654 case INDEX_op_exit_tb
:
1655 tcg_out_movi(s
, TCG_TYPE_PTR
, TCG_REG_EAX
, args
[0]);
1656 tcg_out_jmp(s
, (uintptr_t)tb_ret_addr
);
1658 case INDEX_op_goto_tb
:
1659 if (s
->tb_jmp_offset
) {
1660 /* direct jump method */
1661 tcg_out8(s
, OPC_JMP_long
); /* jmp im */
1662 s
->tb_jmp_offset
[args
[0]] = s
->code_ptr
- s
->code_buf
;
1665 /* indirect jump method */
1666 tcg_out_modrm_offset(s
, OPC_GRP5
, EXT5_JMPN_Ev
, -1,
1667 (intptr_t)(s
->tb_next
+ args
[0]));
1669 s
->tb_next_offset
[args
[0]] = s
->code_ptr
- s
->code_buf
;
1672 if (const_args
[0]) {
1673 tcg_out_calli(s
, args
[0]);
1676 tcg_out_modrm(s
, OPC_GRP5
, EXT5_CALLN_Ev
, args
[0]);
1680 tcg_out_jxx(s
, JCC_JMP
, args
[0], 0);
1682 case INDEX_op_movi_i32
:
1683 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], args
[1]);
1686 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1687 tcg_out_modrm_offset(s
, OPC_MOVZBL
, args
[0], args
[1], args
[2]);
1690 tcg_out_modrm_offset(s
, OPC_MOVSBL
+ rexw
, args
[0], args
[1], args
[2]);
1693 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1694 tcg_out_modrm_offset(s
, OPC_MOVZWL
, args
[0], args
[1], args
[2]);
1697 tcg_out_modrm_offset(s
, OPC_MOVSWL
+ rexw
, args
[0], args
[1], args
[2]);
1699 #if TCG_TARGET_REG_BITS == 64
1700 case INDEX_op_ld32u_i64
:
1702 case INDEX_op_ld_i32
:
1703 tcg_out_ld(s
, TCG_TYPE_I32
, args
[0], args
[1], args
[2]);
1707 if (const_args
[0]) {
1708 tcg_out_modrm_offset(s
, OPC_MOVB_EvIz
,
1709 0, args
[1], args
[2]);
1710 tcg_out8(s
, args
[0]);
1712 tcg_out_modrm_offset(s
, OPC_MOVB_EvGv
| P_REXB_R
,
1713 args
[0], args
[1], args
[2]);
1717 if (const_args
[0]) {
1718 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
| P_DATA16
,
1719 0, args
[1], args
[2]);
1720 tcg_out16(s
, args
[0]);
1722 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
| P_DATA16
,
1723 args
[0], args
[1], args
[2]);
1726 #if TCG_TARGET_REG_BITS == 64
1727 case INDEX_op_st32_i64
:
1729 case INDEX_op_st_i32
:
1730 if (const_args
[0]) {
1731 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
, 0, args
[1], args
[2]);
1732 tcg_out32(s
, args
[0]);
1734 tcg_out_st(s
, TCG_TYPE_I32
, args
[0], args
[1], args
[2]);
1739 /* For 3-operand addition, use LEA. */
1740 if (args
[0] != args
[1]) {
1741 TCGArg a0
= args
[0], a1
= args
[1], a2
= args
[2], c3
= 0;
1743 if (const_args
[2]) {
1745 } else if (a0
== a2
) {
1746 /* Watch out for dest = src + dest, since we've removed
1747 the matching constraint on the add. */
1748 tgen_arithr(s
, ARITH_ADD
+ rexw
, a0
, a1
);
1752 tcg_out_modrm_sib_offset(s
, OPC_LEA
+ rexw
, a0
, a1
, a2
, 0, c3
);
1770 if (const_args
[2]) {
1771 tgen_arithi(s
, c
+ rexw
, args
[0], args
[2], 0);
1773 tgen_arithr(s
, c
+ rexw
, args
[0], args
[2]);
1778 if (const_args
[2]) {
1781 if (val
== (int8_t)val
) {
1782 tcg_out_modrm(s
, OPC_IMUL_GvEvIb
+ rexw
, args
[0], args
[0]);
1785 tcg_out_modrm(s
, OPC_IMUL_GvEvIz
+ rexw
, args
[0], args
[0]);
1789 tcg_out_modrm(s
, OPC_IMUL_GvEv
+ rexw
, args
[0], args
[2]);
1794 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_IDIV
, args
[4]);
1797 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_DIV
, args
[4]);
1816 if (const_args
[2]) {
1817 tcg_out_shifti(s
, c
+ rexw
, args
[0], args
[2]);
1819 tcg_out_modrm(s
, OPC_SHIFT_cl
+ rexw
, c
, args
[0]);
1823 case INDEX_op_brcond_i32
:
1824 tcg_out_brcond32(s
, args
[2], args
[0], args
[1], const_args
[1],
1827 case INDEX_op_setcond_i32
:
1828 tcg_out_setcond32(s
, args
[3], args
[0], args
[1],
1829 args
[2], const_args
[2]);
1831 case INDEX_op_movcond_i32
:
1832 tcg_out_movcond32(s
, args
[5], args
[0], args
[1],
1833 args
[2], const_args
[2], args
[3]);
1837 tcg_out_rolw_8(s
, args
[0]);
1840 tcg_out_bswap32(s
, args
[0]);
1844 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_NEG
, args
[0]);
1847 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_NOT
, args
[0]);
1851 tcg_out_ext8s(s
, args
[0], args
[1], rexw
);
1854 tcg_out_ext16s(s
, args
[0], args
[1], rexw
);
1857 tcg_out_ext8u(s
, args
[0], args
[1]);
1860 tcg_out_ext16u(s
, args
[0], args
[1]);
1863 case INDEX_op_qemu_ld_i32
:
1864 tcg_out_qemu_ld(s
, args
, 0);
1866 case INDEX_op_qemu_ld_i64
:
1867 tcg_out_qemu_ld(s
, args
, 1);
1869 case INDEX_op_qemu_st_i32
:
1870 tcg_out_qemu_st(s
, args
, 0);
1872 case INDEX_op_qemu_st_i64
:
1873 tcg_out_qemu_st(s
, args
, 1);
1877 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_MUL
, args
[3]);
1880 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_IMUL
, args
[3]);
1883 if (const_args
[4]) {
1884 tgen_arithi(s
, ARITH_ADD
+ rexw
, args
[0], args
[4], 1);
1886 tgen_arithr(s
, ARITH_ADD
+ rexw
, args
[0], args
[4]);
1888 if (const_args
[5]) {
1889 tgen_arithi(s
, ARITH_ADC
+ rexw
, args
[1], args
[5], 1);
1891 tgen_arithr(s
, ARITH_ADC
+ rexw
, args
[1], args
[5]);
1895 if (const_args
[4]) {
1896 tgen_arithi(s
, ARITH_SUB
+ rexw
, args
[0], args
[4], 1);
1898 tgen_arithr(s
, ARITH_SUB
+ rexw
, args
[0], args
[4]);
1900 if (const_args
[5]) {
1901 tgen_arithi(s
, ARITH_SBB
+ rexw
, args
[1], args
[5], 1);
1903 tgen_arithr(s
, ARITH_SBB
+ rexw
, args
[1], args
[5]);
1907 #if TCG_TARGET_REG_BITS == 32
1908 case INDEX_op_brcond2_i32
:
1909 tcg_out_brcond2(s
, args
, const_args
, 0);
1911 case INDEX_op_setcond2_i32
:
1912 tcg_out_setcond2(s
, args
, const_args
);
1914 #else /* TCG_TARGET_REG_BITS == 64 */
1915 case INDEX_op_movi_i64
:
1916 tcg_out_movi(s
, TCG_TYPE_I64
, args
[0], args
[1]);
1918 case INDEX_op_ld32s_i64
:
1919 tcg_out_modrm_offset(s
, OPC_MOVSLQ
, args
[0], args
[1], args
[2]);
1921 case INDEX_op_ld_i64
:
1922 tcg_out_ld(s
, TCG_TYPE_I64
, args
[0], args
[1], args
[2]);
1924 case INDEX_op_st_i64
:
1925 if (const_args
[0]) {
1926 tcg_out_modrm_offset(s
, OPC_MOVL_EvIz
| P_REXW
,
1927 0, args
[1], args
[2]);
1928 tcg_out32(s
, args
[0]);
1930 tcg_out_st(s
, TCG_TYPE_I64
, args
[0], args
[1], args
[2]);
1934 case INDEX_op_brcond_i64
:
1935 tcg_out_brcond64(s
, args
[2], args
[0], args
[1], const_args
[1],
1938 case INDEX_op_setcond_i64
:
1939 tcg_out_setcond64(s
, args
[3], args
[0], args
[1],
1940 args
[2], const_args
[2]);
1942 case INDEX_op_movcond_i64
:
1943 tcg_out_movcond64(s
, args
[5], args
[0], args
[1],
1944 args
[2], const_args
[2], args
[3]);
1947 case INDEX_op_bswap64_i64
:
1948 tcg_out_bswap64(s
, args
[0]);
1950 case INDEX_op_ext32u_i64
:
1951 tcg_out_ext32u(s
, args
[0], args
[1]);
1953 case INDEX_op_ext32s_i64
:
1954 tcg_out_ext32s(s
, args
[0], args
[1]);
1959 if (args
[3] == 0 && args
[4] == 8) {
1960 /* load bits 0..7 */
1961 tcg_out_modrm(s
, OPC_MOVB_EvGv
| P_REXB_R
| P_REXB_RM
,
1963 } else if (args
[3] == 8 && args
[4] == 8) {
1964 /* load bits 8..15 */
1965 tcg_out_modrm(s
, OPC_MOVB_EvGv
, args
[2], args
[0] + 4);
1966 } else if (args
[3] == 0 && args
[4] == 16) {
1967 /* load bits 0..15 */
1968 tcg_out_modrm(s
, OPC_MOVL_EvGv
| P_DATA16
, args
[2], args
[0]);
1981 static const TCGTargetOpDef x86_op_defs
[] = {
1982 { INDEX_op_exit_tb
, { } },
1983 { INDEX_op_goto_tb
, { } },
1984 { INDEX_op_call
, { "ri" } },
1985 { INDEX_op_br
, { } },
1986 { INDEX_op_mov_i32
, { "r", "r" } },
1987 { INDEX_op_movi_i32
, { "r" } },
1988 { INDEX_op_ld8u_i32
, { "r", "r" } },
1989 { INDEX_op_ld8s_i32
, { "r", "r" } },
1990 { INDEX_op_ld16u_i32
, { "r", "r" } },
1991 { INDEX_op_ld16s_i32
, { "r", "r" } },
1992 { INDEX_op_ld_i32
, { "r", "r" } },
1993 { INDEX_op_st8_i32
, { "qi", "r" } },
1994 { INDEX_op_st16_i32
, { "ri", "r" } },
1995 { INDEX_op_st_i32
, { "ri", "r" } },
1997 { INDEX_op_add_i32
, { "r", "r", "ri" } },
1998 { INDEX_op_sub_i32
, { "r", "0", "ri" } },
1999 { INDEX_op_mul_i32
, { "r", "0", "ri" } },
2000 { INDEX_op_div2_i32
, { "a", "d", "0", "1", "r" } },
2001 { INDEX_op_divu2_i32
, { "a", "d", "0", "1", "r" } },
2002 { INDEX_op_and_i32
, { "r", "0", "ri" } },
2003 { INDEX_op_or_i32
, { "r", "0", "ri" } },
2004 { INDEX_op_xor_i32
, { "r", "0", "ri" } },
2006 { INDEX_op_shl_i32
, { "r", "0", "ci" } },
2007 { INDEX_op_shr_i32
, { "r", "0", "ci" } },
2008 { INDEX_op_sar_i32
, { "r", "0", "ci" } },
2009 { INDEX_op_rotl_i32
, { "r", "0", "ci" } },
2010 { INDEX_op_rotr_i32
, { "r", "0", "ci" } },
2012 { INDEX_op_brcond_i32
, { "r", "ri" } },
2014 { INDEX_op_bswap16_i32
, { "r", "0" } },
2015 { INDEX_op_bswap32_i32
, { "r", "0" } },
2017 { INDEX_op_neg_i32
, { "r", "0" } },
2019 { INDEX_op_not_i32
, { "r", "0" } },
2021 { INDEX_op_ext8s_i32
, { "r", "q" } },
2022 { INDEX_op_ext16s_i32
, { "r", "r" } },
2023 { INDEX_op_ext8u_i32
, { "r", "q" } },
2024 { INDEX_op_ext16u_i32
, { "r", "r" } },
2026 { INDEX_op_setcond_i32
, { "q", "r", "ri" } },
2028 { INDEX_op_deposit_i32
, { "Q", "0", "Q" } },
2029 { INDEX_op_movcond_i32
, { "r", "r", "ri", "r", "0" } },
2031 { INDEX_op_mulu2_i32
, { "a", "d", "a", "r" } },
2032 { INDEX_op_muls2_i32
, { "a", "d", "a", "r" } },
2033 { INDEX_op_add2_i32
, { "r", "r", "0", "1", "ri", "ri" } },
2034 { INDEX_op_sub2_i32
, { "r", "r", "0", "1", "ri", "ri" } },
2036 #if TCG_TARGET_REG_BITS == 32
2037 { INDEX_op_brcond2_i32
, { "r", "r", "ri", "ri" } },
2038 { INDEX_op_setcond2_i32
, { "r", "r", "r", "ri", "ri" } },
2040 { INDEX_op_mov_i64
, { "r", "r" } },
2041 { INDEX_op_movi_i64
, { "r" } },
2042 { INDEX_op_ld8u_i64
, { "r", "r" } },
2043 { INDEX_op_ld8s_i64
, { "r", "r" } },
2044 { INDEX_op_ld16u_i64
, { "r", "r" } },
2045 { INDEX_op_ld16s_i64
, { "r", "r" } },
2046 { INDEX_op_ld32u_i64
, { "r", "r" } },
2047 { INDEX_op_ld32s_i64
, { "r", "r" } },
2048 { INDEX_op_ld_i64
, { "r", "r" } },
2049 { INDEX_op_st8_i64
, { "ri", "r" } },
2050 { INDEX_op_st16_i64
, { "ri", "r" } },
2051 { INDEX_op_st32_i64
, { "ri", "r" } },
2052 { INDEX_op_st_i64
, { "re", "r" } },
2054 { INDEX_op_add_i64
, { "r", "r", "re" } },
2055 { INDEX_op_mul_i64
, { "r", "0", "re" } },
2056 { INDEX_op_div2_i64
, { "a", "d", "0", "1", "r" } },
2057 { INDEX_op_divu2_i64
, { "a", "d", "0", "1", "r" } },
2058 { INDEX_op_sub_i64
, { "r", "0", "re" } },
2059 { INDEX_op_and_i64
, { "r", "0", "reZ" } },
2060 { INDEX_op_or_i64
, { "r", "0", "re" } },
2061 { INDEX_op_xor_i64
, { "r", "0", "re" } },
2063 { INDEX_op_shl_i64
, { "r", "0", "ci" } },
2064 { INDEX_op_shr_i64
, { "r", "0", "ci" } },
2065 { INDEX_op_sar_i64
, { "r", "0", "ci" } },
2066 { INDEX_op_rotl_i64
, { "r", "0", "ci" } },
2067 { INDEX_op_rotr_i64
, { "r", "0", "ci" } },
2069 { INDEX_op_brcond_i64
, { "r", "re" } },
2070 { INDEX_op_setcond_i64
, { "r", "r", "re" } },
2072 { INDEX_op_bswap16_i64
, { "r", "0" } },
2073 { INDEX_op_bswap32_i64
, { "r", "0" } },
2074 { INDEX_op_bswap64_i64
, { "r", "0" } },
2075 { INDEX_op_neg_i64
, { "r", "0" } },
2076 { INDEX_op_not_i64
, { "r", "0" } },
2078 { INDEX_op_ext8s_i64
, { "r", "r" } },
2079 { INDEX_op_ext16s_i64
, { "r", "r" } },
2080 { INDEX_op_ext32s_i64
, { "r", "r" } },
2081 { INDEX_op_ext8u_i64
, { "r", "r" } },
2082 { INDEX_op_ext16u_i64
, { "r", "r" } },
2083 { INDEX_op_ext32u_i64
, { "r", "r" } },
2085 { INDEX_op_deposit_i64
, { "Q", "0", "Q" } },
2086 { INDEX_op_movcond_i64
, { "r", "r", "re", "r", "0" } },
2088 { INDEX_op_mulu2_i64
, { "a", "d", "a", "r" } },
2089 { INDEX_op_muls2_i64
, { "a", "d", "a", "r" } },
2090 { INDEX_op_add2_i64
, { "r", "r", "0", "1", "re", "re" } },
2091 { INDEX_op_sub2_i64
, { "r", "r", "0", "1", "re", "re" } },
2094 #if TCG_TARGET_REG_BITS == 64
2095 { INDEX_op_qemu_ld_i32
, { "r", "L" } },
2096 { INDEX_op_qemu_st_i32
, { "L", "L" } },
2097 { INDEX_op_qemu_ld_i64
, { "r", "L" } },
2098 { INDEX_op_qemu_st_i64
, { "L", "L" } },
2099 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2100 { INDEX_op_qemu_ld_i32
, { "r", "L" } },
2101 { INDEX_op_qemu_st_i32
, { "L", "L" } },
2102 { INDEX_op_qemu_ld_i64
, { "r", "r", "L" } },
2103 { INDEX_op_qemu_st_i64
, { "L", "L", "L" } },
2105 { INDEX_op_qemu_ld_i32
, { "r", "L", "L" } },
2106 { INDEX_op_qemu_st_i32
, { "L", "L", "L" } },
2107 { INDEX_op_qemu_ld_i64
, { "r", "r", "L", "L" } },
2108 { INDEX_op_qemu_st_i64
, { "L", "L", "L", "L" } },
2113 static int tcg_target_callee_save_regs
[] = {
2114 #if TCG_TARGET_REG_BITS == 64
2123 TCG_REG_R14
, /* Currently used for the global env. */
2126 TCG_REG_EBP
, /* Currently used for the global env. */
2133 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2134 and tcg_register_jit. */
2137 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2138 * (TCG_TARGET_REG_BITS / 8))
2140 #define FRAME_SIZE \
2142 + TCG_STATIC_CALL_ARGS_SIZE \
2143 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2144 + TCG_TARGET_STACK_ALIGN - 1) \
2145 & ~(TCG_TARGET_STACK_ALIGN - 1))
2147 /* Generate global QEMU prologue and epilogue code */
2148 static void tcg_target_qemu_prologue(TCGContext
*s
)
2150 int i
, stack_addend
;
2154 /* Reserve some stack space, also for TCG temps. */
2155 stack_addend
= FRAME_SIZE
- PUSH_SIZE
;
2156 tcg_set_frame(s
, TCG_REG_CALL_STACK
, TCG_STATIC_CALL_ARGS_SIZE
,
2157 CPU_TEMP_BUF_NLONGS
* sizeof(long));
2159 /* Save all callee saved registers. */
2160 for (i
= 0; i
< ARRAY_SIZE(tcg_target_callee_save_regs
); i
++) {
2161 tcg_out_push(s
, tcg_target_callee_save_regs
[i
]);
2164 #if TCG_TARGET_REG_BITS == 32
2165 tcg_out_ld(s
, TCG_TYPE_PTR
, TCG_AREG0
, TCG_REG_ESP
,
2166 (ARRAY_SIZE(tcg_target_callee_save_regs
) + 1) * 4);
2167 tcg_out_addi(s
, TCG_REG_ESP
, -stack_addend
);
2169 tcg_out_modrm_offset(s
, OPC_GRP5
, EXT5_JMPN_Ev
, TCG_REG_ESP
,
2170 (ARRAY_SIZE(tcg_target_callee_save_regs
) + 2) * 4
2173 tcg_out_mov(s
, TCG_TYPE_PTR
, TCG_AREG0
, tcg_target_call_iarg_regs
[0]);
2174 tcg_out_addi(s
, TCG_REG_ESP
, -stack_addend
);
2176 tcg_out_modrm(s
, OPC_GRP5
, EXT5_JMPN_Ev
, tcg_target_call_iarg_regs
[1]);
2180 tb_ret_addr
= s
->code_ptr
;
2182 tcg_out_addi(s
, TCG_REG_CALL_STACK
, stack_addend
);
2184 for (i
= ARRAY_SIZE(tcg_target_callee_save_regs
) - 1; i
>= 0; i
--) {
2185 tcg_out_pop(s
, tcg_target_callee_save_regs
[i
]);
2187 tcg_out_opc(s
, OPC_RET
, 0, 0, 0);
2189 #if !defined(CONFIG_SOFTMMU)
2190 /* Try to set up a segment register to point to GUEST_BASE. */
2192 setup_guest_base_seg();
2197 static void tcg_target_init(TCGContext
*s
)
2199 #if !(defined(have_cmov) && defined(have_movbe))
2201 unsigned a
, b
, c
, d
;
2202 int ret
= __get_cpuid(1, &a
, &b
, &c
, &d
);
2205 /* For 32-bit, 99% certainty that we're running on hardware that
2206 supports cmov, but we still need to check. In case cmov is not
2207 available, we'll use a small forward branch. */
2208 have_cmov
= ret
&& (d
& bit_CMOV
);
2212 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2213 need to probe for it. */
2214 have_movbe
= ret
&& (c
& bit_MOVBE
);
2219 if (TCG_TARGET_REG_BITS
== 64) {
2220 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I32
], 0, 0xffff);
2221 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I64
], 0, 0xffff);
2223 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I32
], 0, 0xff);
2226 tcg_regset_clear(tcg_target_call_clobber_regs
);
2227 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_EAX
);
2228 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_EDX
);
2229 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_ECX
);
2230 if (TCG_TARGET_REG_BITS
== 64) {
2231 #if !defined(_WIN64)
2232 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_RDI
);
2233 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_RSI
);
2235 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R8
);
2236 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R9
);
2237 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R10
);
2238 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R11
);
2241 tcg_regset_clear(s
->reserved_regs
);
2242 tcg_regset_set_reg(s
->reserved_regs
, TCG_REG_CALL_STACK
);
2244 tcg_add_target_add_op_defs(x86_op_defs
);
2249 DebugFrameFDEHeader fde
;
2250 uint8_t fde_def_cfa
[4];
2251 uint8_t fde_reg_ofs
[14];
2254 /* We're expecting a 2 byte uleb128 encoded value. */
2255 QEMU_BUILD_BUG_ON(FRAME_SIZE
>= (1 << 14));
2257 #if !defined(__ELF__)
2258 /* Host machine without ELF. */
2259 #elif TCG_TARGET_REG_BITS == 64
2260 #define ELF_HOST_MACHINE EM_X86_64
2261 static DebugFrame debug_frame
= {
2262 .cie
.len
= sizeof(DebugFrameCIE
)-4, /* length after .len member */
2265 .cie
.code_align
= 1,
2266 .cie
.data_align
= 0x78, /* sleb128 -8 */
2267 .cie
.return_column
= 16,
2269 /* Total FDE size does not include the "len" member. */
2270 .fde
.len
= sizeof(DebugFrame
) - offsetof(DebugFrame
, fde
.cie_offset
),
2273 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2274 (FRAME_SIZE
& 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2278 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2279 /* The following ordering must match tcg_target_callee_save_regs. */
2280 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2281 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2282 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2283 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2284 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2285 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2289 #define ELF_HOST_MACHINE EM_386
2290 static DebugFrame debug_frame
= {
2291 .cie
.len
= sizeof(DebugFrameCIE
)-4, /* length after .len member */
2294 .cie
.code_align
= 1,
2295 .cie
.data_align
= 0x7c, /* sleb128 -4 */
2296 .cie
.return_column
= 8,
2298 /* Total FDE size does not include the "len" member. */
2299 .fde
.len
= sizeof(DebugFrame
) - offsetof(DebugFrame
, fde
.cie_offset
),
2302 12, 4, /* DW_CFA_def_cfa %esp, ... */
2303 (FRAME_SIZE
& 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2307 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2308 /* The following ordering must match tcg_target_callee_save_regs. */
2309 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2310 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2311 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2312 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2317 #if defined(ELF_HOST_MACHINE)
2318 void tcg_register_jit(void *buf
, size_t buf_size
)
2320 debug_frame
.fde
.func_start
= (uintptr_t)buf
;
2321 debug_frame
.fde
.func_len
= buf_size
;
2323 tcg_register_jit_int(buf
, buf_size
, &debug_frame
, sizeof(debug_frame
));