tcg/i386: cleanup useless #ifdef
[qemu/ar7.git] / tcg / i386 / tcg-target.c
blob5d4cf9386e55971ef7ede4fab68f2a8f512d0cb1
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "tcg-be-ldst.h"
27 #ifndef NDEBUG
28 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
32 #else
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34 #endif
36 #endif
38 static const int tcg_target_reg_alloc_order[] = {
39 #if TCG_TARGET_REG_BITS == 64
40 TCG_REG_RBP,
41 TCG_REG_RBX,
42 TCG_REG_R12,
43 TCG_REG_R13,
44 TCG_REG_R14,
45 TCG_REG_R15,
46 TCG_REG_R10,
47 TCG_REG_R11,
48 TCG_REG_R9,
49 TCG_REG_R8,
50 TCG_REG_RCX,
51 TCG_REG_RDX,
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54 TCG_REG_RAX,
55 #else
56 TCG_REG_EBX,
57 TCG_REG_ESI,
58 TCG_REG_EDI,
59 TCG_REG_EBP,
60 TCG_REG_ECX,
61 TCG_REG_EDX,
62 TCG_REG_EAX,
63 #endif
66 static const int tcg_target_call_iarg_regs[] = {
67 #if TCG_TARGET_REG_BITS == 64
68 #if defined(_WIN64)
69 TCG_REG_RCX,
70 TCG_REG_RDX,
71 #else
72 TCG_REG_RDI,
73 TCG_REG_RSI,
74 TCG_REG_RDX,
75 TCG_REG_RCX,
76 #endif
77 TCG_REG_R8,
78 TCG_REG_R9,
79 #else
80 /* 32 bit mode uses stack based calling convention (GCC default). */
81 #endif
84 static const int tcg_target_call_oarg_regs[] = {
85 TCG_REG_EAX,
86 #if TCG_TARGET_REG_BITS == 32
87 TCG_REG_EDX
88 #endif
91 /* Registers used with L constraint, which are the first argument
92 registers on x86_64, and two random call clobbered registers on
93 i386. */
94 #if TCG_TARGET_REG_BITS == 64
95 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
96 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
97 #else
98 # define TCG_REG_L0 TCG_REG_EAX
99 # define TCG_REG_L1 TCG_REG_EDX
100 #endif
102 /* The host compiler should supply <cpuid.h> to enable runtime features
103 detection, as we're not going to go so far as our own inline assembly.
104 If not available, default values will be assumed. */
105 #if defined(CONFIG_CPUID_H)
106 #include <cpuid.h>
107 #endif
109 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
110 is available. */
111 #if TCG_TARGET_REG_BITS == 64
112 # define have_cmov 1
113 #elif defined(CONFIG_CPUID_H)
114 static bool have_cmov;
115 #else
116 # define have_cmov 0
117 #endif
119 /* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
120 going to attempt to determine at runtime whether movbe is available. */
121 #if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
122 static bool have_movbe;
123 #else
124 # define have_movbe 0
125 #endif
127 static uint8_t *tb_ret_addr;
129 static void patch_reloc(uint8_t *code_ptr, int type,
130 intptr_t value, intptr_t addend)
132 value += addend;
133 switch(type) {
134 case R_386_PC32:
135 value -= (uintptr_t)code_ptr;
136 if (value != (int32_t)value) {
137 tcg_abort();
139 *(uint32_t *)code_ptr = value;
140 break;
141 case R_386_PC8:
142 value -= (uintptr_t)code_ptr;
143 if (value != (int8_t)value) {
144 tcg_abort();
146 *(uint8_t *)code_ptr = value;
147 break;
148 default:
149 tcg_abort();
153 /* parse target specific constraints */
154 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
156 const char *ct_str;
158 ct_str = *pct_str;
159 switch(ct_str[0]) {
160 case 'a':
161 ct->ct |= TCG_CT_REG;
162 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
163 break;
164 case 'b':
165 ct->ct |= TCG_CT_REG;
166 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
167 break;
168 case 'c':
169 ct->ct |= TCG_CT_REG;
170 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
171 break;
172 case 'd':
173 ct->ct |= TCG_CT_REG;
174 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
175 break;
176 case 'S':
177 ct->ct |= TCG_CT_REG;
178 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
179 break;
180 case 'D':
181 ct->ct |= TCG_CT_REG;
182 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
183 break;
184 case 'q':
185 ct->ct |= TCG_CT_REG;
186 if (TCG_TARGET_REG_BITS == 64) {
187 tcg_regset_set32(ct->u.regs, 0, 0xffff);
188 } else {
189 tcg_regset_set32(ct->u.regs, 0, 0xf);
191 break;
192 case 'Q':
193 ct->ct |= TCG_CT_REG;
194 tcg_regset_set32(ct->u.regs, 0, 0xf);
195 break;
196 case 'r':
197 ct->ct |= TCG_CT_REG;
198 if (TCG_TARGET_REG_BITS == 64) {
199 tcg_regset_set32(ct->u.regs, 0, 0xffff);
200 } else {
201 tcg_regset_set32(ct->u.regs, 0, 0xff);
203 break;
205 /* qemu_ld/st address constraint */
206 case 'L':
207 ct->ct |= TCG_CT_REG;
208 if (TCG_TARGET_REG_BITS == 64) {
209 tcg_regset_set32(ct->u.regs, 0, 0xffff);
210 } else {
211 tcg_regset_set32(ct->u.regs, 0, 0xff);
213 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
214 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
215 break;
217 case 'e':
218 ct->ct |= TCG_CT_CONST_S32;
219 break;
220 case 'Z':
221 ct->ct |= TCG_CT_CONST_U32;
222 break;
224 default:
225 return -1;
227 ct_str++;
228 *pct_str = ct_str;
229 return 0;
232 /* test if a constant matches the constraint */
233 static inline int tcg_target_const_match(tcg_target_long val,
234 const TCGArgConstraint *arg_ct)
236 int ct = arg_ct->ct;
237 if (ct & TCG_CT_CONST) {
238 return 1;
240 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
241 return 1;
243 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
244 return 1;
246 return 0;
249 #if TCG_TARGET_REG_BITS == 64
250 # define LOWREGMASK(x) ((x) & 7)
251 #else
252 # define LOWREGMASK(x) (x)
253 #endif
255 #define P_EXT 0x100 /* 0x0f opcode prefix */
256 #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
257 #define P_DATA16 0x400 /* 0x66 opcode prefix */
258 #if TCG_TARGET_REG_BITS == 64
259 # define P_ADDR32 0x800 /* 0x67 opcode prefix */
260 # define P_REXW 0x1000 /* Set REX.W = 1 */
261 # define P_REXB_R 0x2000 /* REG field as byte register */
262 # define P_REXB_RM 0x4000 /* R/M field as byte register */
263 # define P_GS 0x8000 /* gs segment override */
264 #else
265 # define P_ADDR32 0
266 # define P_REXW 0
267 # define P_REXB_R 0
268 # define P_REXB_RM 0
269 # define P_GS 0
270 #endif
272 #define OPC_ARITH_EvIz (0x81)
273 #define OPC_ARITH_EvIb (0x83)
274 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
275 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
276 #define OPC_BSWAP (0xc8 | P_EXT)
277 #define OPC_CALL_Jz (0xe8)
278 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
279 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
280 #define OPC_DEC_r32 (0x48)
281 #define OPC_IMUL_GvEv (0xaf | P_EXT)
282 #define OPC_IMUL_GvEvIb (0x6b)
283 #define OPC_IMUL_GvEvIz (0x69)
284 #define OPC_INC_r32 (0x40)
285 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
286 #define OPC_JCC_short (0x70) /* ... plus condition code */
287 #define OPC_JMP_long (0xe9)
288 #define OPC_JMP_short (0xeb)
289 #define OPC_LEA (0x8d)
290 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
291 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
292 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
293 #define OPC_MOVB_EvIz (0xc6)
294 #define OPC_MOVL_EvIz (0xc7)
295 #define OPC_MOVL_Iv (0xb8)
296 #define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
297 #define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
298 #define OPC_MOVSBL (0xbe | P_EXT)
299 #define OPC_MOVSWL (0xbf | P_EXT)
300 #define OPC_MOVSLQ (0x63 | P_REXW)
301 #define OPC_MOVZBL (0xb6 | P_EXT)
302 #define OPC_MOVZWL (0xb7 | P_EXT)
303 #define OPC_POP_r32 (0x58)
304 #define OPC_PUSH_r32 (0x50)
305 #define OPC_PUSH_Iv (0x68)
306 #define OPC_PUSH_Ib (0x6a)
307 #define OPC_RET (0xc3)
308 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
309 #define OPC_SHIFT_1 (0xd1)
310 #define OPC_SHIFT_Ib (0xc1)
311 #define OPC_SHIFT_cl (0xd3)
312 #define OPC_TESTL (0x85)
313 #define OPC_XCHG_ax_r32 (0x90)
315 #define OPC_GRP3_Ev (0xf7)
316 #define OPC_GRP5 (0xff)
318 /* Group 1 opcode extensions for 0x80-0x83.
319 These are also used as modifiers for OPC_ARITH. */
320 #define ARITH_ADD 0
321 #define ARITH_OR 1
322 #define ARITH_ADC 2
323 #define ARITH_SBB 3
324 #define ARITH_AND 4
325 #define ARITH_SUB 5
326 #define ARITH_XOR 6
327 #define ARITH_CMP 7
329 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
330 #define SHIFT_ROL 0
331 #define SHIFT_ROR 1
332 #define SHIFT_SHL 4
333 #define SHIFT_SHR 5
334 #define SHIFT_SAR 7
336 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
337 #define EXT3_NOT 2
338 #define EXT3_NEG 3
339 #define EXT3_MUL 4
340 #define EXT3_IMUL 5
341 #define EXT3_DIV 6
342 #define EXT3_IDIV 7
344 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
345 #define EXT5_INC_Ev 0
346 #define EXT5_DEC_Ev 1
347 #define EXT5_CALLN_Ev 2
348 #define EXT5_JMPN_Ev 4
350 /* Condition codes to be added to OPC_JCC_{long,short}. */
351 #define JCC_JMP (-1)
352 #define JCC_JO 0x0
353 #define JCC_JNO 0x1
354 #define JCC_JB 0x2
355 #define JCC_JAE 0x3
356 #define JCC_JE 0x4
357 #define JCC_JNE 0x5
358 #define JCC_JBE 0x6
359 #define JCC_JA 0x7
360 #define JCC_JS 0x8
361 #define JCC_JNS 0x9
362 #define JCC_JP 0xa
363 #define JCC_JNP 0xb
364 #define JCC_JL 0xc
365 #define JCC_JGE 0xd
366 #define JCC_JLE 0xe
367 #define JCC_JG 0xf
369 static const uint8_t tcg_cond_to_jcc[] = {
370 [TCG_COND_EQ] = JCC_JE,
371 [TCG_COND_NE] = JCC_JNE,
372 [TCG_COND_LT] = JCC_JL,
373 [TCG_COND_GE] = JCC_JGE,
374 [TCG_COND_LE] = JCC_JLE,
375 [TCG_COND_GT] = JCC_JG,
376 [TCG_COND_LTU] = JCC_JB,
377 [TCG_COND_GEU] = JCC_JAE,
378 [TCG_COND_LEU] = JCC_JBE,
379 [TCG_COND_GTU] = JCC_JA,
382 #if TCG_TARGET_REG_BITS == 64
383 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
385 int rex;
387 if (opc & P_GS) {
388 tcg_out8(s, 0x65);
390 if (opc & P_DATA16) {
391 /* We should never be asking for both 16 and 64-bit operation. */
392 assert((opc & P_REXW) == 0);
393 tcg_out8(s, 0x66);
395 if (opc & P_ADDR32) {
396 tcg_out8(s, 0x67);
399 rex = 0;
400 rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
401 rex |= (r & 8) >> 1; /* REX.R */
402 rex |= (x & 8) >> 2; /* REX.X */
403 rex |= (rm & 8) >> 3; /* REX.B */
405 /* P_REXB_{R,RM} indicates that the given register is the low byte.
406 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
407 as otherwise the encoding indicates %[abcd]h. Note that the values
408 that are ORed in merely indicate that the REX byte must be present;
409 those bits get discarded in output. */
410 rex |= opc & (r >= 4 ? P_REXB_R : 0);
411 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
413 if (rex) {
414 tcg_out8(s, (uint8_t)(rex | 0x40));
417 if (opc & (P_EXT | P_EXT38)) {
418 tcg_out8(s, 0x0f);
419 if (opc & P_EXT38) {
420 tcg_out8(s, 0x38);
424 tcg_out8(s, opc);
426 #else
427 static void tcg_out_opc(TCGContext *s, int opc)
429 if (opc & P_DATA16) {
430 tcg_out8(s, 0x66);
432 if (opc & (P_EXT | P_EXT38)) {
433 tcg_out8(s, 0x0f);
434 if (opc & P_EXT38) {
435 tcg_out8(s, 0x38);
438 tcg_out8(s, opc);
440 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
441 the 32-bit compilation paths. This method works with all versions of gcc,
442 whereas relying on optimization may not be able to exclude them. */
443 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
444 #endif
446 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
448 tcg_out_opc(s, opc, r, rm, 0);
449 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
452 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
453 We handle either RM and INDEX missing with a negative value. In 64-bit
454 mode for absolute addresses, ~RM is the size of the immediate operand
455 that will follow the instruction. */
457 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
458 int index, int shift, intptr_t offset)
460 int mod, len;
462 if (index < 0 && rm < 0) {
463 if (TCG_TARGET_REG_BITS == 64) {
464 /* Try for a rip-relative addressing mode. This has replaced
465 the 32-bit-mode absolute addressing encoding. */
466 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
467 intptr_t disp = offset - pc;
468 if (disp == (int32_t)disp) {
469 tcg_out_opc(s, opc, r, 0, 0);
470 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
471 tcg_out32(s, disp);
472 return;
475 /* Try for an absolute address encoding. This requires the
476 use of the MODRM+SIB encoding and is therefore larger than
477 rip-relative addressing. */
478 if (offset == (int32_t)offset) {
479 tcg_out_opc(s, opc, r, 0, 0);
480 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
481 tcg_out8(s, (4 << 3) | 5);
482 tcg_out32(s, offset);
483 return;
486 /* ??? The memory isn't directly addressable. */
487 tcg_abort();
488 } else {
489 /* Absolute address. */
490 tcg_out_opc(s, opc, r, 0, 0);
491 tcg_out8(s, (r << 3) | 5);
492 tcg_out32(s, offset);
493 return;
497 /* Find the length of the immediate addend. Note that the encoding
498 that would be used for (%ebp) indicates absolute addressing. */
499 if (rm < 0) {
500 mod = 0, len = 4, rm = 5;
501 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
502 mod = 0, len = 0;
503 } else if (offset == (int8_t)offset) {
504 mod = 0x40, len = 1;
505 } else {
506 mod = 0x80, len = 4;
509 /* Use a single byte MODRM format if possible. Note that the encoding
510 that would be used for %esp is the escape to the two byte form. */
511 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
512 /* Single byte MODRM format. */
513 tcg_out_opc(s, opc, r, rm, 0);
514 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
515 } else {
516 /* Two byte MODRM+SIB format. */
518 /* Note that the encoding that would place %esp into the index
519 field indicates no index register. In 64-bit mode, the REX.X
520 bit counts, so %r12 can be used as the index. */
521 if (index < 0) {
522 index = 4;
523 } else {
524 assert(index != TCG_REG_ESP);
527 tcg_out_opc(s, opc, r, rm, index);
528 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
529 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
532 if (len == 1) {
533 tcg_out8(s, offset);
534 } else if (len == 4) {
535 tcg_out32(s, offset);
539 /* A simplification of the above with no index or shift. */
540 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
541 int rm, intptr_t offset)
543 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
546 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
547 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
549 /* Propagate an opcode prefix, such as P_REXW. */
550 int ext = subop & ~0x7;
551 subop &= 0x7;
553 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
556 static inline void tcg_out_mov(TCGContext *s, TCGType type,
557 TCGReg ret, TCGReg arg)
559 if (arg != ret) {
560 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
561 tcg_out_modrm(s, opc, ret, arg);
565 static void tcg_out_movi(TCGContext *s, TCGType type,
566 TCGReg ret, tcg_target_long arg)
568 tcg_target_long diff;
570 if (arg == 0) {
571 tgen_arithr(s, ARITH_XOR, ret, ret);
572 return;
574 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
575 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
576 tcg_out32(s, arg);
577 return;
579 if (arg == (int32_t)arg) {
580 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
581 tcg_out32(s, arg);
582 return;
585 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
586 diff = arg - ((uintptr_t)s->code_ptr + 7);
587 if (diff == (int32_t)diff) {
588 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
589 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
590 tcg_out32(s, diff);
591 return;
594 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
595 tcg_out64(s, arg);
598 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
600 if (val == (int8_t)val) {
601 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
602 tcg_out8(s, val);
603 } else if (val == (int32_t)val) {
604 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
605 tcg_out32(s, val);
606 } else {
607 tcg_abort();
611 static inline void tcg_out_push(TCGContext *s, int reg)
613 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
616 static inline void tcg_out_pop(TCGContext *s, int reg)
618 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
621 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
622 TCGReg arg1, intptr_t arg2)
624 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
625 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
628 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
629 TCGReg arg1, intptr_t arg2)
631 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
632 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
635 static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
636 tcg_target_long ofs, tcg_target_long val)
638 int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
639 tcg_out_modrm_offset(s, opc, 0, base, ofs);
640 tcg_out32(s, val);
643 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
645 /* Propagate an opcode prefix, such as P_DATA16. */
646 int ext = subopc & ~0x7;
647 subopc &= 0x7;
649 if (count == 1) {
650 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
651 } else {
652 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
653 tcg_out8(s, count);
657 static inline void tcg_out_bswap32(TCGContext *s, int reg)
659 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
662 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
664 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
667 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
669 /* movzbl */
670 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
671 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
674 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
676 /* movsbl */
677 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
678 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
681 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
683 /* movzwl */
684 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
687 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
689 /* movsw[lq] */
690 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
693 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
695 /* 32-bit mov zero extends. */
696 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
699 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
701 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
704 static inline void tcg_out_bswap64(TCGContext *s, int reg)
706 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
709 static void tgen_arithi(TCGContext *s, int c, int r0,
710 tcg_target_long val, int cf)
712 int rexw = 0;
714 if (TCG_TARGET_REG_BITS == 64) {
715 rexw = c & -8;
716 c &= 7;
719 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
720 partial flags update stalls on Pentium4 and are not recommended
721 by current Intel optimization manuals. */
722 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
723 int is_inc = (c == ARITH_ADD) ^ (val < 0);
724 if (TCG_TARGET_REG_BITS == 64) {
725 /* The single-byte increment encodings are re-tasked as the
726 REX prefixes. Use the MODRM encoding. */
727 tcg_out_modrm(s, OPC_GRP5 + rexw,
728 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
729 } else {
730 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
732 return;
735 if (c == ARITH_AND) {
736 if (TCG_TARGET_REG_BITS == 64) {
737 if (val == 0xffffffffu) {
738 tcg_out_ext32u(s, r0, r0);
739 return;
741 if (val == (uint32_t)val) {
742 /* AND with no high bits set can use a 32-bit operation. */
743 rexw = 0;
746 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
747 tcg_out_ext8u(s, r0, r0);
748 return;
750 if (val == 0xffffu) {
751 tcg_out_ext16u(s, r0, r0);
752 return;
756 if (val == (int8_t)val) {
757 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
758 tcg_out8(s, val);
759 return;
761 if (rexw == 0 || val == (int32_t)val) {
762 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
763 tcg_out32(s, val);
764 return;
767 tcg_abort();
770 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
772 if (val != 0) {
773 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
777 /* Use SMALL != 0 to force a short forward branch. */
778 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
780 int32_t val, val1;
781 TCGLabel *l = &s->labels[label_index];
783 if (l->has_value) {
784 val = l->u.value - (intptr_t)s->code_ptr;
785 val1 = val - 2;
786 if ((int8_t)val1 == val1) {
787 if (opc == -1) {
788 tcg_out8(s, OPC_JMP_short);
789 } else {
790 tcg_out8(s, OPC_JCC_short + opc);
792 tcg_out8(s, val1);
793 } else {
794 if (small) {
795 tcg_abort();
797 if (opc == -1) {
798 tcg_out8(s, OPC_JMP_long);
799 tcg_out32(s, val - 5);
800 } else {
801 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
802 tcg_out32(s, val - 6);
805 } else if (small) {
806 if (opc == -1) {
807 tcg_out8(s, OPC_JMP_short);
808 } else {
809 tcg_out8(s, OPC_JCC_short + opc);
811 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
812 s->code_ptr += 1;
813 } else {
814 if (opc == -1) {
815 tcg_out8(s, OPC_JMP_long);
816 } else {
817 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
819 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
820 s->code_ptr += 4;
824 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
825 int const_arg2, int rexw)
827 if (const_arg2) {
828 if (arg2 == 0) {
829 /* test r, r */
830 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
831 } else {
832 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
834 } else {
835 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
839 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
840 TCGArg arg1, TCGArg arg2, int const_arg2,
841 int label_index, int small)
843 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
844 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
847 #if TCG_TARGET_REG_BITS == 64
848 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
849 TCGArg arg1, TCGArg arg2, int const_arg2,
850 int label_index, int small)
852 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
853 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
855 #else
856 /* XXX: we implement it at the target level to avoid having to
857 handle cross basic blocks temporaries */
858 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
859 const int *const_args, int small)
861 int label_next;
862 label_next = gen_new_label();
863 switch(args[4]) {
864 case TCG_COND_EQ:
865 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
866 label_next, 1);
867 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
868 args[5], small);
869 break;
870 case TCG_COND_NE:
871 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
872 args[5], small);
873 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
874 args[5], small);
875 break;
876 case TCG_COND_LT:
877 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
878 args[5], small);
879 tcg_out_jxx(s, JCC_JNE, label_next, 1);
880 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
881 args[5], small);
882 break;
883 case TCG_COND_LE:
884 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
885 args[5], small);
886 tcg_out_jxx(s, JCC_JNE, label_next, 1);
887 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
888 args[5], small);
889 break;
890 case TCG_COND_GT:
891 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
892 args[5], small);
893 tcg_out_jxx(s, JCC_JNE, label_next, 1);
894 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
895 args[5], small);
896 break;
897 case TCG_COND_GE:
898 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
899 args[5], small);
900 tcg_out_jxx(s, JCC_JNE, label_next, 1);
901 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
902 args[5], small);
903 break;
904 case TCG_COND_LTU:
905 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
906 args[5], small);
907 tcg_out_jxx(s, JCC_JNE, label_next, 1);
908 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
909 args[5], small);
910 break;
911 case TCG_COND_LEU:
912 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
913 args[5], small);
914 tcg_out_jxx(s, JCC_JNE, label_next, 1);
915 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
916 args[5], small);
917 break;
918 case TCG_COND_GTU:
919 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
920 args[5], small);
921 tcg_out_jxx(s, JCC_JNE, label_next, 1);
922 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
923 args[5], small);
924 break;
925 case TCG_COND_GEU:
926 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
927 args[5], small);
928 tcg_out_jxx(s, JCC_JNE, label_next, 1);
929 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
930 args[5], small);
931 break;
932 default:
933 tcg_abort();
935 tcg_out_label(s, label_next, s->code_ptr);
937 #endif
939 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
940 TCGArg arg1, TCGArg arg2, int const_arg2)
942 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
943 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
944 tcg_out_ext8u(s, dest, dest);
947 #if TCG_TARGET_REG_BITS == 64
948 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
949 TCGArg arg1, TCGArg arg2, int const_arg2)
951 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
952 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
953 tcg_out_ext8u(s, dest, dest);
955 #else
956 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
957 const int *const_args)
959 TCGArg new_args[6];
960 int label_true, label_over;
962 memcpy(new_args, args+1, 5*sizeof(TCGArg));
964 if (args[0] == args[1] || args[0] == args[2]
965 || (!const_args[3] && args[0] == args[3])
966 || (!const_args[4] && args[0] == args[4])) {
967 /* When the destination overlaps with one of the argument
968 registers, don't do anything tricky. */
969 label_true = gen_new_label();
970 label_over = gen_new_label();
972 new_args[5] = label_true;
973 tcg_out_brcond2(s, new_args, const_args+1, 1);
975 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
976 tcg_out_jxx(s, JCC_JMP, label_over, 1);
977 tcg_out_label(s, label_true, s->code_ptr);
979 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
980 tcg_out_label(s, label_over, s->code_ptr);
981 } else {
982 /* When the destination does not overlap one of the arguments,
983 clear the destination first, jump if cond false, and emit an
984 increment in the true case. This results in smaller code. */
986 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
988 label_over = gen_new_label();
989 new_args[4] = tcg_invert_cond(new_args[4]);
990 new_args[5] = label_over;
991 tcg_out_brcond2(s, new_args, const_args+1, 1);
993 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
994 tcg_out_label(s, label_over, s->code_ptr);
997 #endif
999 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1000 TCGArg c1, TCGArg c2, int const_c2,
1001 TCGArg v1)
1003 tcg_out_cmp(s, c1, c2, const_c2, 0);
1004 if (have_cmov) {
1005 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
1006 } else {
1007 int over = gen_new_label();
1008 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1009 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1010 tcg_out_label(s, over, s->code_ptr);
1014 #if TCG_TARGET_REG_BITS == 64
1015 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1016 TCGArg c1, TCGArg c2, int const_c2,
1017 TCGArg v1)
1019 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1020 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1022 #endif
1024 static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest)
1026 intptr_t disp = dest - (intptr_t)s->code_ptr - 5;
1028 if (disp == (int32_t)disp) {
1029 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1030 tcg_out32(s, disp);
1031 } else {
1032 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1033 tcg_out_modrm(s, OPC_GRP5,
1034 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1038 static inline void tcg_out_calli(TCGContext *s, uintptr_t dest)
1040 tcg_out_branch(s, 1, dest);
1043 static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
1045 tcg_out_branch(s, 0, dest);
1048 #if defined(CONFIG_SOFTMMU)
1049 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1050 * int mmu_idx, uintptr_t ra)
1052 static const void * const qemu_ld_helpers[16] = {
1053 [MO_UB] = helper_ret_ldub_mmu,
1054 [MO_LEUW] = helper_le_lduw_mmu,
1055 [MO_LEUL] = helper_le_ldul_mmu,
1056 [MO_LEQ] = helper_le_ldq_mmu,
1057 [MO_BEUW] = helper_be_lduw_mmu,
1058 [MO_BEUL] = helper_be_ldul_mmu,
1059 [MO_BEQ] = helper_be_ldq_mmu,
1062 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1063 * uintxx_t val, int mmu_idx, uintptr_t ra)
1065 static const void * const qemu_st_helpers[16] = {
1066 [MO_UB] = helper_ret_stb_mmu,
1067 [MO_LEUW] = helper_le_stw_mmu,
1068 [MO_LEUL] = helper_le_stl_mmu,
1069 [MO_LEQ] = helper_le_stq_mmu,
1070 [MO_BEUW] = helper_be_stw_mmu,
1071 [MO_BEUL] = helper_be_stl_mmu,
1072 [MO_BEQ] = helper_be_stq_mmu,
1075 /* Perform the TLB load and compare.
1077 Inputs:
1078 ADDRLO and ADDRHI contain the low and high part of the address.
1080 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1082 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1083 This should be offsetof addr_read or addr_write.
1085 Outputs:
1086 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1087 positions of the displacements of forward jumps to the TLB miss case.
1089 Second argument register is loaded with the low part of the address.
1090 In the TLB hit case, it has been adjusted as indicated by the TLB
1091 and so is a host address. In the TLB miss case, it continues to
1092 hold a guest address.
1094 First argument register is clobbered. */
1096 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1097 int mem_index, TCGMemOp s_bits,
1098 uint8_t **label_ptr, int which)
1100 const TCGReg r0 = TCG_REG_L0;
1101 const TCGReg r1 = TCG_REG_L1;
1102 TCGType ttype = TCG_TYPE_I32;
1103 TCGType htype = TCG_TYPE_I32;
1104 int trexw = 0, hrexw = 0;
1106 if (TCG_TARGET_REG_BITS == 64) {
1107 if (TARGET_LONG_BITS == 64) {
1108 ttype = TCG_TYPE_I64;
1109 trexw = P_REXW;
1111 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1112 htype = TCG_TYPE_I64;
1113 hrexw = P_REXW;
1117 tcg_out_mov(s, htype, r0, addrlo);
1118 tcg_out_mov(s, ttype, r1, addrlo);
1120 tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
1121 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1123 tgen_arithi(s, ARITH_AND + trexw, r1,
1124 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1125 tgen_arithi(s, ARITH_AND + hrexw, r0,
1126 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1128 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1129 offsetof(CPUArchState, tlb_table[mem_index][0])
1130 + which);
1132 /* cmp 0(r0), r1 */
1133 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1135 /* Prepare for both the fast path add of the tlb addend, and the slow
1136 path function argument setup. There are two cases worth note:
1137 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1138 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1139 copies the entire guest address for the slow path, while truncation
1140 for the 32-bit host happens with the fastpath ADDL below. */
1141 tcg_out_mov(s, ttype, r1, addrlo);
1143 /* jne slow_path */
1144 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1145 label_ptr[0] = s->code_ptr;
1146 s->code_ptr += 4;
1148 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1149 /* cmp 4(r0), addrhi */
1150 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1152 /* jne slow_path */
1153 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1154 label_ptr[1] = s->code_ptr;
1155 s->code_ptr += 4;
1158 /* TLB Hit. */
1160 /* add addend(r0), r1 */
1161 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1162 offsetof(CPUTLBEntry, addend) - which);
1166 * Record the context of a call to the out of line helper code for the slow path
1167 * for a load or store, so that we can later generate the correct helper code
1169 static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
1170 TCGReg datalo, TCGReg datahi,
1171 TCGReg addrlo, TCGReg addrhi,
1172 int mem_index, uint8_t *raddr,
1173 uint8_t **label_ptr)
1175 TCGLabelQemuLdst *label = new_ldst_label(s);
1177 label->is_ld = is_ld;
1178 label->opc = opc;
1179 label->datalo_reg = datalo;
1180 label->datahi_reg = datahi;
1181 label->addrlo_reg = addrlo;
1182 label->addrhi_reg = addrhi;
1183 label->mem_index = mem_index;
1184 label->raddr = raddr;
1185 label->label_ptr[0] = label_ptr[0];
1186 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1187 label->label_ptr[1] = label_ptr[1];
1192 * Generate code for the slow path for a load at the end of block
1194 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1196 TCGMemOp opc = l->opc;
1197 TCGReg data_reg;
1198 uint8_t **label_ptr = &l->label_ptr[0];
1200 /* resolve label address */
1201 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1202 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1203 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1206 if (TCG_TARGET_REG_BITS == 32) {
1207 int ofs = 0;
1209 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1210 ofs += 4;
1212 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1213 ofs += 4;
1215 if (TARGET_LONG_BITS == 64) {
1216 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1217 ofs += 4;
1220 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1221 ofs += 4;
1223 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
1224 } else {
1225 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1226 /* The second argument is already loaded with addrlo. */
1227 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
1228 l->mem_index);
1229 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1230 (uintptr_t)l->raddr);
1233 tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
1235 data_reg = l->datalo_reg;
1236 switch (opc & MO_SSIZE) {
1237 case MO_SB:
1238 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1239 break;
1240 case MO_SW:
1241 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1242 break;
1243 #if TCG_TARGET_REG_BITS == 64
1244 case MO_SL:
1245 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1246 break;
1247 #endif
1248 case MO_UB:
1249 case MO_UW:
1250 /* Note that the helpers have zero-extended to tcg_target_long. */
1251 case MO_UL:
1252 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1253 break;
1254 case MO_Q:
1255 if (TCG_TARGET_REG_BITS == 64) {
1256 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1257 } else if (data_reg == TCG_REG_EDX) {
1258 /* xchg %edx, %eax */
1259 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1260 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1261 } else {
1262 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1263 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1265 break;
1266 default:
1267 tcg_abort();
1270 /* Jump to the code corresponding to next IR of qemu_st */
1271 tcg_out_jmp(s, (uintptr_t)l->raddr);
1275 * Generate code for the slow path for a store at the end of block
1277 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1279 TCGMemOp opc = l->opc;
1280 TCGMemOp s_bits = opc & MO_SIZE;
1281 uint8_t **label_ptr = &l->label_ptr[0];
1282 TCGReg retaddr;
1284 /* resolve label address */
1285 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1286 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1287 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1290 if (TCG_TARGET_REG_BITS == 32) {
1291 int ofs = 0;
1293 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1294 ofs += 4;
1296 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1297 ofs += 4;
1299 if (TARGET_LONG_BITS == 64) {
1300 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1301 ofs += 4;
1304 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1305 ofs += 4;
1307 if (s_bits == MO_64) {
1308 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1309 ofs += 4;
1312 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1313 ofs += 4;
1315 retaddr = TCG_REG_EAX;
1316 tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr);
1317 tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs);
1318 } else {
1319 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1320 /* The second argument is already loaded with addrlo. */
1321 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1322 tcg_target_call_iarg_regs[2], l->datalo_reg);
1323 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
1324 l->mem_index);
1326 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1327 retaddr = tcg_target_call_iarg_regs[4];
1328 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1329 } else {
1330 retaddr = TCG_REG_RAX;
1331 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1332 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
1336 /* "Tail call" to the helper, with the return address back inline. */
1337 tcg_out_push(s, retaddr);
1338 tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]);
1340 #elif defined(__x86_64__) && defined(__linux__)
1341 # include <asm/prctl.h>
1342 # include <sys/prctl.h>
1344 int arch_prctl(int code, unsigned long addr);
1346 static int guest_base_flags;
1347 static inline void setup_guest_base_seg(void)
1349 if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
1350 guest_base_flags = P_GS;
1353 #else
1354 # define guest_base_flags 0
1355 static inline void setup_guest_base_seg(void) { }
1356 #endif /* SOFTMMU */
1358 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1359 TCGReg base, intptr_t ofs, int seg,
1360 TCGMemOp memop)
1362 const TCGMemOp real_bswap = memop & MO_BSWAP;
1363 TCGMemOp bswap = real_bswap;
1364 int movop = OPC_MOVL_GvEv;
1366 if (have_movbe && real_bswap) {
1367 bswap = 0;
1368 movop = OPC_MOVBE_GyMy;
1371 switch (memop & MO_SSIZE) {
1372 case MO_UB:
1373 tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
1374 break;
1375 case MO_SB:
1376 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
1377 break;
1378 case MO_UW:
1379 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1380 if (real_bswap) {
1381 tcg_out_rolw_8(s, datalo);
1383 break;
1384 case MO_SW:
1385 if (real_bswap) {
1386 if (have_movbe) {
1387 tcg_out_modrm_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1388 datalo, base, ofs);
1389 } else {
1390 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1391 tcg_out_rolw_8(s, datalo);
1393 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1394 } else {
1395 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
1396 datalo, base, ofs);
1398 break;
1399 case MO_UL:
1400 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1401 if (bswap) {
1402 tcg_out_bswap32(s, datalo);
1404 break;
1405 #if TCG_TARGET_REG_BITS == 64
1406 case MO_SL:
1407 if (real_bswap) {
1408 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1409 if (bswap) {
1410 tcg_out_bswap32(s, datalo);
1412 tcg_out_ext32s(s, datalo, datalo);
1413 } else {
1414 tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
1416 break;
1417 #endif
1418 case MO_Q:
1419 if (TCG_TARGET_REG_BITS == 64) {
1420 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
1421 if (bswap) {
1422 tcg_out_bswap64(s, datalo);
1424 } else {
1425 if (real_bswap) {
1426 int t = datalo;
1427 datalo = datahi;
1428 datahi = t;
1430 if (base != datalo) {
1431 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1432 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4);
1433 } else {
1434 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4);
1435 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1437 if (bswap) {
1438 tcg_out_bswap32(s, datalo);
1439 tcg_out_bswap32(s, datahi);
1442 break;
1443 default:
1444 tcg_abort();
1448 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1449 EAX. It will be useful once fixed registers globals are less
1450 common. */
1451 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1453 TCGReg datalo, datahi, addrlo;
1454 TCGReg addrhi __attribute__((unused));
1455 TCGMemOp opc;
1456 #if defined(CONFIG_SOFTMMU)
1457 int mem_index;
1458 TCGMemOp s_bits;
1459 uint8_t *label_ptr[2];
1460 #endif
1462 datalo = *args++;
1463 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1464 addrlo = *args++;
1465 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1466 opc = *args++;
1468 #if defined(CONFIG_SOFTMMU)
1469 mem_index = *args++;
1470 s_bits = opc & MO_SIZE;
1472 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
1473 label_ptr, offsetof(CPUTLBEntry, addr_read));
1475 /* TLB Hit. */
1476 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1478 /* Record the current context of a load into ldst label */
1479 add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi,
1480 mem_index, s->code_ptr, label_ptr);
1481 #else
1483 int32_t offset = GUEST_BASE;
1484 TCGReg base = addrlo;
1485 int seg = 0;
1487 /* ??? We assume all operations have left us with register contents
1488 that are zero extended. So far this appears to be true. If we
1489 want to enforce this, we can either do an explicit zero-extension
1490 here, or (if GUEST_BASE == 0, or a segment register is in use)
1491 use the ADDR32 prefix. For now, do nothing. */
1492 if (GUEST_BASE && guest_base_flags) {
1493 seg = guest_base_flags;
1494 offset = 0;
1495 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1496 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1497 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1498 base = TCG_REG_L1;
1499 offset = 0;
1502 tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc);
1504 #endif
1507 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1508 TCGReg base, intptr_t ofs, int seg,
1509 TCGMemOp memop)
1511 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1512 we could perform the bswap twice to restore the original value
1513 instead of moving to the scratch. But as it is, the L constraint
1514 means that TCG_REG_L0 is definitely free here. */
1515 const TCGReg scratch = TCG_REG_L0;
1516 const TCGMemOp real_bswap = memop & MO_BSWAP;
1517 TCGMemOp bswap = real_bswap;
1518 int movop = OPC_MOVL_EvGv;
1520 if (have_movbe && real_bswap) {
1521 bswap = 0;
1522 movop = OPC_MOVBE_MyGy;
1525 switch (memop & MO_SIZE) {
1526 case MO_8:
1527 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1528 Use the scratch register if necessary. */
1529 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1530 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1531 datalo = scratch;
1533 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1534 datalo, base, ofs);
1535 break;
1536 case MO_16:
1537 if (bswap) {
1538 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1539 tcg_out_rolw_8(s, scratch);
1540 datalo = scratch;
1542 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
1543 break;
1544 case MO_32:
1545 if (bswap) {
1546 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1547 tcg_out_bswap32(s, scratch);
1548 datalo = scratch;
1550 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1551 break;
1552 case MO_64:
1553 if (TCG_TARGET_REG_BITS == 64) {
1554 if (bswap) {
1555 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1556 tcg_out_bswap64(s, scratch);
1557 datalo = scratch;
1559 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
1560 } else if (bswap) {
1561 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1562 tcg_out_bswap32(s, scratch);
1563 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1564 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1565 tcg_out_bswap32(s, scratch);
1566 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1567 } else {
1568 if (real_bswap) {
1569 int t = datalo;
1570 datalo = datahi;
1571 datahi = t;
1573 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1574 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
1576 break;
1577 default:
1578 tcg_abort();
1582 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1584 TCGReg datalo, datahi, addrlo;
1585 TCGReg addrhi __attribute__((unused));
1586 TCGMemOp opc;
1587 #if defined(CONFIG_SOFTMMU)
1588 int mem_index;
1589 TCGMemOp s_bits;
1590 uint8_t *label_ptr[2];
1591 #endif
1593 datalo = *args++;
1594 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1595 addrlo = *args++;
1596 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1597 opc = *args++;
1599 #if defined(CONFIG_SOFTMMU)
1600 mem_index = *args++;
1601 s_bits = opc & MO_SIZE;
1603 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
1604 label_ptr, offsetof(CPUTLBEntry, addr_write));
1606 /* TLB Hit. */
1607 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1609 /* Record the current context of a store into ldst label */
1610 add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi,
1611 mem_index, s->code_ptr, label_ptr);
1612 #else
1614 int32_t offset = GUEST_BASE;
1615 TCGReg base = addrlo;
1616 int seg = 0;
1618 /* ??? We assume all operations have left us with register contents
1619 that are zero extended. So far this appears to be true. If we
1620 want to enforce this, we can either do an explicit zero-extension
1621 here, or (if GUEST_BASE == 0, or a segment register is in use)
1622 use the ADDR32 prefix. For now, do nothing. */
1623 if (GUEST_BASE && guest_base_flags) {
1624 seg = guest_base_flags;
1625 offset = 0;
1626 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1627 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1628 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1629 base = TCG_REG_L1;
1630 offset = 0;
1633 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1635 #endif
1638 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1639 const TCGArg *args, const int *const_args)
1641 int c, rexw = 0;
1643 #if TCG_TARGET_REG_BITS == 64
1644 # define OP_32_64(x) \
1645 case glue(glue(INDEX_op_, x), _i64): \
1646 rexw = P_REXW; /* FALLTHRU */ \
1647 case glue(glue(INDEX_op_, x), _i32)
1648 #else
1649 # define OP_32_64(x) \
1650 case glue(glue(INDEX_op_, x), _i32)
1651 #endif
1653 switch(opc) {
1654 case INDEX_op_exit_tb:
1655 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1656 tcg_out_jmp(s, (uintptr_t)tb_ret_addr);
1657 break;
1658 case INDEX_op_goto_tb:
1659 if (s->tb_jmp_offset) {
1660 /* direct jump method */
1661 tcg_out8(s, OPC_JMP_long); /* jmp im */
1662 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1663 tcg_out32(s, 0);
1664 } else {
1665 /* indirect jump method */
1666 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1667 (intptr_t)(s->tb_next + args[0]));
1669 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1670 break;
1671 case INDEX_op_call:
1672 if (const_args[0]) {
1673 tcg_out_calli(s, args[0]);
1674 } else {
1675 /* call *reg */
1676 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1678 break;
1679 case INDEX_op_br:
1680 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1681 break;
1682 case INDEX_op_movi_i32:
1683 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1684 break;
1685 OP_32_64(ld8u):
1686 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1687 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1688 break;
1689 OP_32_64(ld8s):
1690 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1691 break;
1692 OP_32_64(ld16u):
1693 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1694 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1695 break;
1696 OP_32_64(ld16s):
1697 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1698 break;
1699 #if TCG_TARGET_REG_BITS == 64
1700 case INDEX_op_ld32u_i64:
1701 #endif
1702 case INDEX_op_ld_i32:
1703 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1704 break;
1706 OP_32_64(st8):
1707 if (const_args[0]) {
1708 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1709 0, args[1], args[2]);
1710 tcg_out8(s, args[0]);
1711 } else {
1712 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1713 args[0], args[1], args[2]);
1715 break;
1716 OP_32_64(st16):
1717 if (const_args[0]) {
1718 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1719 0, args[1], args[2]);
1720 tcg_out16(s, args[0]);
1721 } else {
1722 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1723 args[0], args[1], args[2]);
1725 break;
1726 #if TCG_TARGET_REG_BITS == 64
1727 case INDEX_op_st32_i64:
1728 #endif
1729 case INDEX_op_st_i32:
1730 if (const_args[0]) {
1731 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1732 tcg_out32(s, args[0]);
1733 } else {
1734 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1736 break;
1738 OP_32_64(add):
1739 /* For 3-operand addition, use LEA. */
1740 if (args[0] != args[1]) {
1741 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1743 if (const_args[2]) {
1744 c3 = a2, a2 = -1;
1745 } else if (a0 == a2) {
1746 /* Watch out for dest = src + dest, since we've removed
1747 the matching constraint on the add. */
1748 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1749 break;
1752 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1753 break;
1755 c = ARITH_ADD;
1756 goto gen_arith;
1757 OP_32_64(sub):
1758 c = ARITH_SUB;
1759 goto gen_arith;
1760 OP_32_64(and):
1761 c = ARITH_AND;
1762 goto gen_arith;
1763 OP_32_64(or):
1764 c = ARITH_OR;
1765 goto gen_arith;
1766 OP_32_64(xor):
1767 c = ARITH_XOR;
1768 goto gen_arith;
1769 gen_arith:
1770 if (const_args[2]) {
1771 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1772 } else {
1773 tgen_arithr(s, c + rexw, args[0], args[2]);
1775 break;
1777 OP_32_64(mul):
1778 if (const_args[2]) {
1779 int32_t val;
1780 val = args[2];
1781 if (val == (int8_t)val) {
1782 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1783 tcg_out8(s, val);
1784 } else {
1785 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1786 tcg_out32(s, val);
1788 } else {
1789 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1791 break;
1793 OP_32_64(div2):
1794 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1795 break;
1796 OP_32_64(divu2):
1797 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1798 break;
1800 OP_32_64(shl):
1801 c = SHIFT_SHL;
1802 goto gen_shift;
1803 OP_32_64(shr):
1804 c = SHIFT_SHR;
1805 goto gen_shift;
1806 OP_32_64(sar):
1807 c = SHIFT_SAR;
1808 goto gen_shift;
1809 OP_32_64(rotl):
1810 c = SHIFT_ROL;
1811 goto gen_shift;
1812 OP_32_64(rotr):
1813 c = SHIFT_ROR;
1814 goto gen_shift;
1815 gen_shift:
1816 if (const_args[2]) {
1817 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1818 } else {
1819 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1821 break;
1823 case INDEX_op_brcond_i32:
1824 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1825 args[3], 0);
1826 break;
1827 case INDEX_op_setcond_i32:
1828 tcg_out_setcond32(s, args[3], args[0], args[1],
1829 args[2], const_args[2]);
1830 break;
1831 case INDEX_op_movcond_i32:
1832 tcg_out_movcond32(s, args[5], args[0], args[1],
1833 args[2], const_args[2], args[3]);
1834 break;
1836 OP_32_64(bswap16):
1837 tcg_out_rolw_8(s, args[0]);
1838 break;
1839 OP_32_64(bswap32):
1840 tcg_out_bswap32(s, args[0]);
1841 break;
1843 OP_32_64(neg):
1844 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1845 break;
1846 OP_32_64(not):
1847 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1848 break;
1850 OP_32_64(ext8s):
1851 tcg_out_ext8s(s, args[0], args[1], rexw);
1852 break;
1853 OP_32_64(ext16s):
1854 tcg_out_ext16s(s, args[0], args[1], rexw);
1855 break;
1856 OP_32_64(ext8u):
1857 tcg_out_ext8u(s, args[0], args[1]);
1858 break;
1859 OP_32_64(ext16u):
1860 tcg_out_ext16u(s, args[0], args[1]);
1861 break;
1863 case INDEX_op_qemu_ld_i32:
1864 tcg_out_qemu_ld(s, args, 0);
1865 break;
1866 case INDEX_op_qemu_ld_i64:
1867 tcg_out_qemu_ld(s, args, 1);
1868 break;
1869 case INDEX_op_qemu_st_i32:
1870 tcg_out_qemu_st(s, args, 0);
1871 break;
1872 case INDEX_op_qemu_st_i64:
1873 tcg_out_qemu_st(s, args, 1);
1874 break;
1876 OP_32_64(mulu2):
1877 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
1878 break;
1879 OP_32_64(muls2):
1880 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
1881 break;
1882 OP_32_64(add2):
1883 if (const_args[4]) {
1884 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
1885 } else {
1886 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
1888 if (const_args[5]) {
1889 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
1890 } else {
1891 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
1893 break;
1894 OP_32_64(sub2):
1895 if (const_args[4]) {
1896 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
1897 } else {
1898 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
1900 if (const_args[5]) {
1901 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
1902 } else {
1903 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
1905 break;
1907 #if TCG_TARGET_REG_BITS == 32
1908 case INDEX_op_brcond2_i32:
1909 tcg_out_brcond2(s, args, const_args, 0);
1910 break;
1911 case INDEX_op_setcond2_i32:
1912 tcg_out_setcond2(s, args, const_args);
1913 break;
1914 #else /* TCG_TARGET_REG_BITS == 64 */
1915 case INDEX_op_movi_i64:
1916 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1917 break;
1918 case INDEX_op_ld32s_i64:
1919 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1920 break;
1921 case INDEX_op_ld_i64:
1922 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1923 break;
1924 case INDEX_op_st_i64:
1925 if (const_args[0]) {
1926 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
1927 0, args[1], args[2]);
1928 tcg_out32(s, args[0]);
1929 } else {
1930 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1932 break;
1934 case INDEX_op_brcond_i64:
1935 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1936 args[3], 0);
1937 break;
1938 case INDEX_op_setcond_i64:
1939 tcg_out_setcond64(s, args[3], args[0], args[1],
1940 args[2], const_args[2]);
1941 break;
1942 case INDEX_op_movcond_i64:
1943 tcg_out_movcond64(s, args[5], args[0], args[1],
1944 args[2], const_args[2], args[3]);
1945 break;
1947 case INDEX_op_bswap64_i64:
1948 tcg_out_bswap64(s, args[0]);
1949 break;
1950 case INDEX_op_ext32u_i64:
1951 tcg_out_ext32u(s, args[0], args[1]);
1952 break;
1953 case INDEX_op_ext32s_i64:
1954 tcg_out_ext32s(s, args[0], args[1]);
1955 break;
1956 #endif
1958 OP_32_64(deposit):
1959 if (args[3] == 0 && args[4] == 8) {
1960 /* load bits 0..7 */
1961 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1962 args[2], args[0]);
1963 } else if (args[3] == 8 && args[4] == 8) {
1964 /* load bits 8..15 */
1965 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1966 } else if (args[3] == 0 && args[4] == 16) {
1967 /* load bits 0..15 */
1968 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1969 } else {
1970 tcg_abort();
1972 break;
1974 default:
1975 tcg_abort();
1978 #undef OP_32_64
1981 static const TCGTargetOpDef x86_op_defs[] = {
1982 { INDEX_op_exit_tb, { } },
1983 { INDEX_op_goto_tb, { } },
1984 { INDEX_op_call, { "ri" } },
1985 { INDEX_op_br, { } },
1986 { INDEX_op_mov_i32, { "r", "r" } },
1987 { INDEX_op_movi_i32, { "r" } },
1988 { INDEX_op_ld8u_i32, { "r", "r" } },
1989 { INDEX_op_ld8s_i32, { "r", "r" } },
1990 { INDEX_op_ld16u_i32, { "r", "r" } },
1991 { INDEX_op_ld16s_i32, { "r", "r" } },
1992 { INDEX_op_ld_i32, { "r", "r" } },
1993 { INDEX_op_st8_i32, { "qi", "r" } },
1994 { INDEX_op_st16_i32, { "ri", "r" } },
1995 { INDEX_op_st_i32, { "ri", "r" } },
1997 { INDEX_op_add_i32, { "r", "r", "ri" } },
1998 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1999 { INDEX_op_mul_i32, { "r", "0", "ri" } },
2000 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2001 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2002 { INDEX_op_and_i32, { "r", "0", "ri" } },
2003 { INDEX_op_or_i32, { "r", "0", "ri" } },
2004 { INDEX_op_xor_i32, { "r", "0", "ri" } },
2006 { INDEX_op_shl_i32, { "r", "0", "ci" } },
2007 { INDEX_op_shr_i32, { "r", "0", "ci" } },
2008 { INDEX_op_sar_i32, { "r", "0", "ci" } },
2009 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2010 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
2012 { INDEX_op_brcond_i32, { "r", "ri" } },
2014 { INDEX_op_bswap16_i32, { "r", "0" } },
2015 { INDEX_op_bswap32_i32, { "r", "0" } },
2017 { INDEX_op_neg_i32, { "r", "0" } },
2019 { INDEX_op_not_i32, { "r", "0" } },
2021 { INDEX_op_ext8s_i32, { "r", "q" } },
2022 { INDEX_op_ext16s_i32, { "r", "r" } },
2023 { INDEX_op_ext8u_i32, { "r", "q" } },
2024 { INDEX_op_ext16u_i32, { "r", "r" } },
2026 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2028 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
2029 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
2031 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2032 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2033 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2034 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2036 #if TCG_TARGET_REG_BITS == 32
2037 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2038 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2039 #else
2040 { INDEX_op_mov_i64, { "r", "r" } },
2041 { INDEX_op_movi_i64, { "r" } },
2042 { INDEX_op_ld8u_i64, { "r", "r" } },
2043 { INDEX_op_ld8s_i64, { "r", "r" } },
2044 { INDEX_op_ld16u_i64, { "r", "r" } },
2045 { INDEX_op_ld16s_i64, { "r", "r" } },
2046 { INDEX_op_ld32u_i64, { "r", "r" } },
2047 { INDEX_op_ld32s_i64, { "r", "r" } },
2048 { INDEX_op_ld_i64, { "r", "r" } },
2049 { INDEX_op_st8_i64, { "ri", "r" } },
2050 { INDEX_op_st16_i64, { "ri", "r" } },
2051 { INDEX_op_st32_i64, { "ri", "r" } },
2052 { INDEX_op_st_i64, { "re", "r" } },
2054 { INDEX_op_add_i64, { "r", "r", "re" } },
2055 { INDEX_op_mul_i64, { "r", "0", "re" } },
2056 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2057 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2058 { INDEX_op_sub_i64, { "r", "0", "re" } },
2059 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2060 { INDEX_op_or_i64, { "r", "0", "re" } },
2061 { INDEX_op_xor_i64, { "r", "0", "re" } },
2063 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2064 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2065 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2066 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2067 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2069 { INDEX_op_brcond_i64, { "r", "re" } },
2070 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2072 { INDEX_op_bswap16_i64, { "r", "0" } },
2073 { INDEX_op_bswap32_i64, { "r", "0" } },
2074 { INDEX_op_bswap64_i64, { "r", "0" } },
2075 { INDEX_op_neg_i64, { "r", "0" } },
2076 { INDEX_op_not_i64, { "r", "0" } },
2078 { INDEX_op_ext8s_i64, { "r", "r" } },
2079 { INDEX_op_ext16s_i64, { "r", "r" } },
2080 { INDEX_op_ext32s_i64, { "r", "r" } },
2081 { INDEX_op_ext8u_i64, { "r", "r" } },
2082 { INDEX_op_ext16u_i64, { "r", "r" } },
2083 { INDEX_op_ext32u_i64, { "r", "r" } },
2085 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2086 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2088 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2089 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2090 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2091 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2092 #endif
2094 #if TCG_TARGET_REG_BITS == 64
2095 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2096 { INDEX_op_qemu_st_i32, { "L", "L" } },
2097 { INDEX_op_qemu_ld_i64, { "r", "L" } },
2098 { INDEX_op_qemu_st_i64, { "L", "L" } },
2099 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2100 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2101 { INDEX_op_qemu_st_i32, { "L", "L" } },
2102 { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2103 { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
2104 #else
2105 { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2106 { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2107 { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2108 { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
2109 #endif
2110 { -1 },
2113 static int tcg_target_callee_save_regs[] = {
2114 #if TCG_TARGET_REG_BITS == 64
2115 TCG_REG_RBP,
2116 TCG_REG_RBX,
2117 #if defined(_WIN64)
2118 TCG_REG_RDI,
2119 TCG_REG_RSI,
2120 #endif
2121 TCG_REG_R12,
2122 TCG_REG_R13,
2123 TCG_REG_R14, /* Currently used for the global env. */
2124 TCG_REG_R15,
2125 #else
2126 TCG_REG_EBP, /* Currently used for the global env. */
2127 TCG_REG_EBX,
2128 TCG_REG_ESI,
2129 TCG_REG_EDI,
2130 #endif
2133 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2134 and tcg_register_jit. */
2136 #define PUSH_SIZE \
2137 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2138 * (TCG_TARGET_REG_BITS / 8))
2140 #define FRAME_SIZE \
2141 ((PUSH_SIZE \
2142 + TCG_STATIC_CALL_ARGS_SIZE \
2143 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2144 + TCG_TARGET_STACK_ALIGN - 1) \
2145 & ~(TCG_TARGET_STACK_ALIGN - 1))
2147 /* Generate global QEMU prologue and epilogue code */
2148 static void tcg_target_qemu_prologue(TCGContext *s)
2150 int i, stack_addend;
2152 /* TB prologue */
2154 /* Reserve some stack space, also for TCG temps. */
2155 stack_addend = FRAME_SIZE - PUSH_SIZE;
2156 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2157 CPU_TEMP_BUF_NLONGS * sizeof(long));
2159 /* Save all callee saved registers. */
2160 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2161 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2164 #if TCG_TARGET_REG_BITS == 32
2165 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2166 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2167 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2168 /* jmp *tb. */
2169 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2170 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2171 + stack_addend);
2172 #else
2173 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2174 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2175 /* jmp *tb. */
2176 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2177 #endif
2179 /* TB epilogue */
2180 tb_ret_addr = s->code_ptr;
2182 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2184 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2185 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2187 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2189 #if !defined(CONFIG_SOFTMMU)
2190 /* Try to set up a segment register to point to GUEST_BASE. */
2191 if (GUEST_BASE) {
2192 setup_guest_base_seg();
2194 #endif
2197 static void tcg_target_init(TCGContext *s)
2199 #if !(defined(have_cmov) && defined(have_movbe))
2201 unsigned a, b, c, d;
2202 int ret = __get_cpuid(1, &a, &b, &c, &d);
2204 # ifndef have_cmov
2205 /* For 32-bit, 99% certainty that we're running on hardware that
2206 supports cmov, but we still need to check. In case cmov is not
2207 available, we'll use a small forward branch. */
2208 have_cmov = ret && (d & bit_CMOV);
2209 # endif
2211 # ifndef have_movbe
2212 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2213 need to probe for it. */
2214 have_movbe = ret && (c & bit_MOVBE);
2215 # endif
2217 #endif
2219 if (TCG_TARGET_REG_BITS == 64) {
2220 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2221 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2222 } else {
2223 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2226 tcg_regset_clear(tcg_target_call_clobber_regs);
2227 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2228 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2229 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2230 if (TCG_TARGET_REG_BITS == 64) {
2231 #if !defined(_WIN64)
2232 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2233 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2234 #endif
2235 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2236 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2237 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2238 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2241 tcg_regset_clear(s->reserved_regs);
2242 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2244 tcg_add_target_add_op_defs(x86_op_defs);
2247 typedef struct {
2248 DebugFrameCIE cie;
2249 DebugFrameFDEHeader fde;
2250 uint8_t fde_def_cfa[4];
2251 uint8_t fde_reg_ofs[14];
2252 } DebugFrame;
2254 /* We're expecting a 2 byte uleb128 encoded value. */
2255 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2257 #if !defined(__ELF__)
2258 /* Host machine without ELF. */
2259 #elif TCG_TARGET_REG_BITS == 64
2260 #define ELF_HOST_MACHINE EM_X86_64
2261 static DebugFrame debug_frame = {
2262 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2263 .cie.id = -1,
2264 .cie.version = 1,
2265 .cie.code_align = 1,
2266 .cie.data_align = 0x78, /* sleb128 -8 */
2267 .cie.return_column = 16,
2269 /* Total FDE size does not include the "len" member. */
2270 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2272 .fde_def_cfa = {
2273 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2274 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2275 (FRAME_SIZE >> 7)
2277 .fde_reg_ofs = {
2278 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2279 /* The following ordering must match tcg_target_callee_save_regs. */
2280 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2281 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2282 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2283 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2284 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2285 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2288 #else
2289 #define ELF_HOST_MACHINE EM_386
2290 static DebugFrame debug_frame = {
2291 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2292 .cie.id = -1,
2293 .cie.version = 1,
2294 .cie.code_align = 1,
2295 .cie.data_align = 0x7c, /* sleb128 -4 */
2296 .cie.return_column = 8,
2298 /* Total FDE size does not include the "len" member. */
2299 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2301 .fde_def_cfa = {
2302 12, 4, /* DW_CFA_def_cfa %esp, ... */
2303 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2304 (FRAME_SIZE >> 7)
2306 .fde_reg_ofs = {
2307 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2308 /* The following ordering must match tcg_target_callee_save_regs. */
2309 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2310 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2311 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2312 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2315 #endif
2317 #if defined(ELF_HOST_MACHINE)
2318 void tcg_register_jit(void *buf, size_t buf_size)
2320 debug_frame.fde.func_start = (uintptr_t)buf;
2321 debug_frame.fde.func_len = buf_size;
2323 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2325 #endif