slirp: Factorizing address translation
[qemu/kevin.git] / tcg / i386 / tcg-target.c
blobd90636cb4e19b7bf8bd8abb1d418730c63ff2029
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "qemu/osdep.h"
26 #include "tcg-be-ldst.h"
28 #ifndef NDEBUG
29 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
30 #if TCG_TARGET_REG_BITS == 64
31 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
32 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
33 #else
34 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
35 #endif
37 #endif
39 static const int tcg_target_reg_alloc_order[] = {
40 #if TCG_TARGET_REG_BITS == 64
41 TCG_REG_RBP,
42 TCG_REG_RBX,
43 TCG_REG_R12,
44 TCG_REG_R13,
45 TCG_REG_R14,
46 TCG_REG_R15,
47 TCG_REG_R10,
48 TCG_REG_R11,
49 TCG_REG_R9,
50 TCG_REG_R8,
51 TCG_REG_RCX,
52 TCG_REG_RDX,
53 TCG_REG_RSI,
54 TCG_REG_RDI,
55 TCG_REG_RAX,
56 #else
57 TCG_REG_EBX,
58 TCG_REG_ESI,
59 TCG_REG_EDI,
60 TCG_REG_EBP,
61 TCG_REG_ECX,
62 TCG_REG_EDX,
63 TCG_REG_EAX,
64 #endif
67 static const int tcg_target_call_iarg_regs[] = {
68 #if TCG_TARGET_REG_BITS == 64
69 #if defined(_WIN64)
70 TCG_REG_RCX,
71 TCG_REG_RDX,
72 #else
73 TCG_REG_RDI,
74 TCG_REG_RSI,
75 TCG_REG_RDX,
76 TCG_REG_RCX,
77 #endif
78 TCG_REG_R8,
79 TCG_REG_R9,
80 #else
81 /* 32 bit mode uses stack based calling convention (GCC default). */
82 #endif
85 static const int tcg_target_call_oarg_regs[] = {
86 TCG_REG_EAX,
87 #if TCG_TARGET_REG_BITS == 32
88 TCG_REG_EDX
89 #endif
92 /* Constants we accept. */
93 #define TCG_CT_CONST_S32 0x100
94 #define TCG_CT_CONST_U32 0x200
95 #define TCG_CT_CONST_I32 0x400
97 /* Registers used with L constraint, which are the first argument
98 registers on x86_64, and two random call clobbered registers on
99 i386. */
100 #if TCG_TARGET_REG_BITS == 64
101 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
102 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
103 #else
104 # define TCG_REG_L0 TCG_REG_EAX
105 # define TCG_REG_L1 TCG_REG_EDX
106 #endif
108 /* The host compiler should supply <cpuid.h> to enable runtime features
109 detection, as we're not going to go so far as our own inline assembly.
110 If not available, default values will be assumed. */
111 #if defined(CONFIG_CPUID_H)
112 #include <cpuid.h>
113 #endif
115 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
116 is available. */
117 #if TCG_TARGET_REG_BITS == 64
118 # define have_cmov 1
119 #elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
120 static bool have_cmov;
121 #else
122 # define have_cmov 0
123 #endif
125 /* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
126 going to attempt to determine at runtime whether movbe is available. */
127 #if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
128 static bool have_movbe;
129 #else
130 # define have_movbe 0
131 #endif
133 /* We need this symbol in tcg-target.h, and we can't properly conditionalize
134 it there. Therefore we always define the variable. */
135 bool have_bmi1;
137 #if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
138 static bool have_bmi2;
139 #else
140 # define have_bmi2 0
141 #endif
143 static tcg_insn_unit *tb_ret_addr;
145 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
146 intptr_t value, intptr_t addend)
148 value += addend;
149 switch(type) {
150 case R_386_PC32:
151 value -= (uintptr_t)code_ptr;
152 if (value != (int32_t)value) {
153 tcg_abort();
155 tcg_patch32(code_ptr, value);
156 break;
157 case R_386_PC8:
158 value -= (uintptr_t)code_ptr;
159 if (value != (int8_t)value) {
160 tcg_abort();
162 tcg_patch8(code_ptr, value);
163 break;
164 default:
165 tcg_abort();
169 /* parse target specific constraints */
170 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
172 const char *ct_str;
174 ct_str = *pct_str;
175 switch(ct_str[0]) {
176 case 'a':
177 ct->ct |= TCG_CT_REG;
178 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
179 break;
180 case 'b':
181 ct->ct |= TCG_CT_REG;
182 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
183 break;
184 case 'c':
185 case_c:
186 ct->ct |= TCG_CT_REG;
187 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
188 break;
189 case 'd':
190 ct->ct |= TCG_CT_REG;
191 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
192 break;
193 case 'S':
194 ct->ct |= TCG_CT_REG;
195 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
196 break;
197 case 'D':
198 ct->ct |= TCG_CT_REG;
199 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
200 break;
201 case 'q':
202 ct->ct |= TCG_CT_REG;
203 if (TCG_TARGET_REG_BITS == 64) {
204 tcg_regset_set32(ct->u.regs, 0, 0xffff);
205 } else {
206 tcg_regset_set32(ct->u.regs, 0, 0xf);
208 break;
209 case 'Q':
210 ct->ct |= TCG_CT_REG;
211 tcg_regset_set32(ct->u.regs, 0, 0xf);
212 break;
213 case 'r':
214 case_r:
215 ct->ct |= TCG_CT_REG;
216 if (TCG_TARGET_REG_BITS == 64) {
217 tcg_regset_set32(ct->u.regs, 0, 0xffff);
218 } else {
219 tcg_regset_set32(ct->u.regs, 0, 0xff);
221 break;
222 case 'C':
223 /* With SHRX et al, we need not use ECX as shift count register. */
224 if (have_bmi2) {
225 goto case_r;
226 } else {
227 goto case_c;
230 /* qemu_ld/st address constraint */
231 case 'L':
232 ct->ct |= TCG_CT_REG;
233 if (TCG_TARGET_REG_BITS == 64) {
234 tcg_regset_set32(ct->u.regs, 0, 0xffff);
235 } else {
236 tcg_regset_set32(ct->u.regs, 0, 0xff);
238 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
239 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
240 break;
242 case 'e':
243 ct->ct |= TCG_CT_CONST_S32;
244 break;
245 case 'Z':
246 ct->ct |= TCG_CT_CONST_U32;
247 break;
248 case 'I':
249 ct->ct |= TCG_CT_CONST_I32;
250 break;
252 default:
253 return -1;
255 ct_str++;
256 *pct_str = ct_str;
257 return 0;
260 /* test if a constant matches the constraint */
261 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
262 const TCGArgConstraint *arg_ct)
264 int ct = arg_ct->ct;
265 if (ct & TCG_CT_CONST) {
266 return 1;
268 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
269 return 1;
271 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
272 return 1;
274 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
275 return 1;
277 return 0;
280 #if TCG_TARGET_REG_BITS == 64
281 # define LOWREGMASK(x) ((x) & 7)
282 #else
283 # define LOWREGMASK(x) (x)
284 #endif
286 #define P_EXT 0x100 /* 0x0f opcode prefix */
287 #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
288 #define P_DATA16 0x400 /* 0x66 opcode prefix */
289 #if TCG_TARGET_REG_BITS == 64
290 # define P_ADDR32 0x800 /* 0x67 opcode prefix */
291 # define P_REXW 0x1000 /* Set REX.W = 1 */
292 # define P_REXB_R 0x2000 /* REG field as byte register */
293 # define P_REXB_RM 0x4000 /* R/M field as byte register */
294 # define P_GS 0x8000 /* gs segment override */
295 #else
296 # define P_ADDR32 0
297 # define P_REXW 0
298 # define P_REXB_R 0
299 # define P_REXB_RM 0
300 # define P_GS 0
301 #endif
302 #define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
303 #define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
305 #define OPC_ARITH_EvIz (0x81)
306 #define OPC_ARITH_EvIb (0x83)
307 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
308 #define OPC_ANDN (0xf2 | P_EXT38)
309 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
310 #define OPC_BSWAP (0xc8 | P_EXT)
311 #define OPC_CALL_Jz (0xe8)
312 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
313 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
314 #define OPC_DEC_r32 (0x48)
315 #define OPC_IMUL_GvEv (0xaf | P_EXT)
316 #define OPC_IMUL_GvEvIb (0x6b)
317 #define OPC_IMUL_GvEvIz (0x69)
318 #define OPC_INC_r32 (0x40)
319 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
320 #define OPC_JCC_short (0x70) /* ... plus condition code */
321 #define OPC_JMP_long (0xe9)
322 #define OPC_JMP_short (0xeb)
323 #define OPC_LEA (0x8d)
324 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
325 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
326 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
327 #define OPC_MOVB_EvIz (0xc6)
328 #define OPC_MOVL_EvIz (0xc7)
329 #define OPC_MOVL_Iv (0xb8)
330 #define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
331 #define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
332 #define OPC_MOVSBL (0xbe | P_EXT)
333 #define OPC_MOVSWL (0xbf | P_EXT)
334 #define OPC_MOVSLQ (0x63 | P_REXW)
335 #define OPC_MOVZBL (0xb6 | P_EXT)
336 #define OPC_MOVZWL (0xb7 | P_EXT)
337 #define OPC_POP_r32 (0x58)
338 #define OPC_PUSH_r32 (0x50)
339 #define OPC_PUSH_Iv (0x68)
340 #define OPC_PUSH_Ib (0x6a)
341 #define OPC_RET (0xc3)
342 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
343 #define OPC_SHIFT_1 (0xd1)
344 #define OPC_SHIFT_Ib (0xc1)
345 #define OPC_SHIFT_cl (0xd3)
346 #define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
347 #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
348 #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
349 #define OPC_TESTL (0x85)
350 #define OPC_XCHG_ax_r32 (0x90)
352 #define OPC_GRP3_Ev (0xf7)
353 #define OPC_GRP5 (0xff)
355 /* Group 1 opcode extensions for 0x80-0x83.
356 These are also used as modifiers for OPC_ARITH. */
357 #define ARITH_ADD 0
358 #define ARITH_OR 1
359 #define ARITH_ADC 2
360 #define ARITH_SBB 3
361 #define ARITH_AND 4
362 #define ARITH_SUB 5
363 #define ARITH_XOR 6
364 #define ARITH_CMP 7
366 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
367 #define SHIFT_ROL 0
368 #define SHIFT_ROR 1
369 #define SHIFT_SHL 4
370 #define SHIFT_SHR 5
371 #define SHIFT_SAR 7
373 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
374 #define EXT3_NOT 2
375 #define EXT3_NEG 3
376 #define EXT3_MUL 4
377 #define EXT3_IMUL 5
378 #define EXT3_DIV 6
379 #define EXT3_IDIV 7
381 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
382 #define EXT5_INC_Ev 0
383 #define EXT5_DEC_Ev 1
384 #define EXT5_CALLN_Ev 2
385 #define EXT5_JMPN_Ev 4
387 /* Condition codes to be added to OPC_JCC_{long,short}. */
388 #define JCC_JMP (-1)
389 #define JCC_JO 0x0
390 #define JCC_JNO 0x1
391 #define JCC_JB 0x2
392 #define JCC_JAE 0x3
393 #define JCC_JE 0x4
394 #define JCC_JNE 0x5
395 #define JCC_JBE 0x6
396 #define JCC_JA 0x7
397 #define JCC_JS 0x8
398 #define JCC_JNS 0x9
399 #define JCC_JP 0xa
400 #define JCC_JNP 0xb
401 #define JCC_JL 0xc
402 #define JCC_JGE 0xd
403 #define JCC_JLE 0xe
404 #define JCC_JG 0xf
406 static const uint8_t tcg_cond_to_jcc[] = {
407 [TCG_COND_EQ] = JCC_JE,
408 [TCG_COND_NE] = JCC_JNE,
409 [TCG_COND_LT] = JCC_JL,
410 [TCG_COND_GE] = JCC_JGE,
411 [TCG_COND_LE] = JCC_JLE,
412 [TCG_COND_GT] = JCC_JG,
413 [TCG_COND_LTU] = JCC_JB,
414 [TCG_COND_GEU] = JCC_JAE,
415 [TCG_COND_LEU] = JCC_JBE,
416 [TCG_COND_GTU] = JCC_JA,
419 #if TCG_TARGET_REG_BITS == 64
420 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
422 int rex;
424 if (opc & P_GS) {
425 tcg_out8(s, 0x65);
427 if (opc & P_DATA16) {
428 /* We should never be asking for both 16 and 64-bit operation. */
429 assert((opc & P_REXW) == 0);
430 tcg_out8(s, 0x66);
432 if (opc & P_ADDR32) {
433 tcg_out8(s, 0x67);
436 rex = 0;
437 rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
438 rex |= (r & 8) >> 1; /* REX.R */
439 rex |= (x & 8) >> 2; /* REX.X */
440 rex |= (rm & 8) >> 3; /* REX.B */
442 /* P_REXB_{R,RM} indicates that the given register is the low byte.
443 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
444 as otherwise the encoding indicates %[abcd]h. Note that the values
445 that are ORed in merely indicate that the REX byte must be present;
446 those bits get discarded in output. */
447 rex |= opc & (r >= 4 ? P_REXB_R : 0);
448 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
450 if (rex) {
451 tcg_out8(s, (uint8_t)(rex | 0x40));
454 if (opc & (P_EXT | P_EXT38)) {
455 tcg_out8(s, 0x0f);
456 if (opc & P_EXT38) {
457 tcg_out8(s, 0x38);
461 tcg_out8(s, opc);
463 #else
464 static void tcg_out_opc(TCGContext *s, int opc)
466 if (opc & P_DATA16) {
467 tcg_out8(s, 0x66);
469 if (opc & (P_EXT | P_EXT38)) {
470 tcg_out8(s, 0x0f);
471 if (opc & P_EXT38) {
472 tcg_out8(s, 0x38);
475 tcg_out8(s, opc);
477 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
478 the 32-bit compilation paths. This method works with all versions of gcc,
479 whereas relying on optimization may not be able to exclude them. */
480 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
481 #endif
483 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
485 tcg_out_opc(s, opc, r, rm, 0);
486 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
489 static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
491 int tmp;
493 if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
494 /* Three byte VEX prefix. */
495 tcg_out8(s, 0xc4);
497 /* VEX.m-mmmm */
498 if (opc & P_EXT38) {
499 tmp = 2;
500 } else if (opc & P_EXT) {
501 tmp = 1;
502 } else {
503 tcg_abort();
505 tmp |= 0x40; /* VEX.X */
506 tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
507 tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
508 tcg_out8(s, tmp);
510 tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
511 } else {
512 /* Two byte VEX prefix. */
513 tcg_out8(s, 0xc5);
515 tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
517 /* VEX.pp */
518 if (opc & P_DATA16) {
519 tmp |= 1; /* 0x66 */
520 } else if (opc & P_SIMDF3) {
521 tmp |= 2; /* 0xf3 */
522 } else if (opc & P_SIMDF2) {
523 tmp |= 3; /* 0xf2 */
525 tmp |= (~v & 15) << 3; /* VEX.vvvv */
526 tcg_out8(s, tmp);
527 tcg_out8(s, opc);
528 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
531 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
532 We handle either RM and INDEX missing with a negative value. In 64-bit
533 mode for absolute addresses, ~RM is the size of the immediate operand
534 that will follow the instruction. */
536 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
537 int index, int shift, intptr_t offset)
539 int mod, len;
541 if (index < 0 && rm < 0) {
542 if (TCG_TARGET_REG_BITS == 64) {
543 /* Try for a rip-relative addressing mode. This has replaced
544 the 32-bit-mode absolute addressing encoding. */
545 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
546 intptr_t disp = offset - pc;
547 if (disp == (int32_t)disp) {
548 tcg_out_opc(s, opc, r, 0, 0);
549 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
550 tcg_out32(s, disp);
551 return;
554 /* Try for an absolute address encoding. This requires the
555 use of the MODRM+SIB encoding and is therefore larger than
556 rip-relative addressing. */
557 if (offset == (int32_t)offset) {
558 tcg_out_opc(s, opc, r, 0, 0);
559 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
560 tcg_out8(s, (4 << 3) | 5);
561 tcg_out32(s, offset);
562 return;
565 /* ??? The memory isn't directly addressable. */
566 tcg_abort();
567 } else {
568 /* Absolute address. */
569 tcg_out_opc(s, opc, r, 0, 0);
570 tcg_out8(s, (r << 3) | 5);
571 tcg_out32(s, offset);
572 return;
576 /* Find the length of the immediate addend. Note that the encoding
577 that would be used for (%ebp) indicates absolute addressing. */
578 if (rm < 0) {
579 mod = 0, len = 4, rm = 5;
580 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
581 mod = 0, len = 0;
582 } else if (offset == (int8_t)offset) {
583 mod = 0x40, len = 1;
584 } else {
585 mod = 0x80, len = 4;
588 /* Use a single byte MODRM format if possible. Note that the encoding
589 that would be used for %esp is the escape to the two byte form. */
590 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
591 /* Single byte MODRM format. */
592 tcg_out_opc(s, opc, r, rm, 0);
593 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
594 } else {
595 /* Two byte MODRM+SIB format. */
597 /* Note that the encoding that would place %esp into the index
598 field indicates no index register. In 64-bit mode, the REX.X
599 bit counts, so %r12 can be used as the index. */
600 if (index < 0) {
601 index = 4;
602 } else {
603 assert(index != TCG_REG_ESP);
606 tcg_out_opc(s, opc, r, rm, index);
607 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
608 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
611 if (len == 1) {
612 tcg_out8(s, offset);
613 } else if (len == 4) {
614 tcg_out32(s, offset);
618 /* A simplification of the above with no index or shift. */
619 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
620 int rm, intptr_t offset)
622 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
625 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
626 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
628 /* Propagate an opcode prefix, such as P_REXW. */
629 int ext = subop & ~0x7;
630 subop &= 0x7;
632 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
635 static inline void tcg_out_mov(TCGContext *s, TCGType type,
636 TCGReg ret, TCGReg arg)
638 if (arg != ret) {
639 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
640 tcg_out_modrm(s, opc, ret, arg);
644 static void tcg_out_movi(TCGContext *s, TCGType type,
645 TCGReg ret, tcg_target_long arg)
647 tcg_target_long diff;
649 if (arg == 0) {
650 tgen_arithr(s, ARITH_XOR, ret, ret);
651 return;
653 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
654 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
655 tcg_out32(s, arg);
656 return;
658 if (arg == (int32_t)arg) {
659 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
660 tcg_out32(s, arg);
661 return;
664 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
665 diff = arg - ((uintptr_t)s->code_ptr + 7);
666 if (diff == (int32_t)diff) {
667 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
668 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
669 tcg_out32(s, diff);
670 return;
673 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
674 tcg_out64(s, arg);
677 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
679 if (val == (int8_t)val) {
680 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
681 tcg_out8(s, val);
682 } else if (val == (int32_t)val) {
683 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
684 tcg_out32(s, val);
685 } else {
686 tcg_abort();
690 static inline void tcg_out_push(TCGContext *s, int reg)
692 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
695 static inline void tcg_out_pop(TCGContext *s, int reg)
697 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
700 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
701 TCGReg arg1, intptr_t arg2)
703 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
704 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
707 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
708 TCGReg arg1, intptr_t arg2)
710 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
711 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
714 static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
715 tcg_target_long ofs, tcg_target_long val)
717 int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
718 tcg_out_modrm_offset(s, opc, 0, base, ofs);
719 tcg_out32(s, val);
722 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
724 /* Propagate an opcode prefix, such as P_DATA16. */
725 int ext = subopc & ~0x7;
726 subopc &= 0x7;
728 if (count == 1) {
729 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
730 } else {
731 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
732 tcg_out8(s, count);
736 static inline void tcg_out_bswap32(TCGContext *s, int reg)
738 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
741 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
743 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
746 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
748 /* movzbl */
749 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
750 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
753 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
755 /* movsbl */
756 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
757 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
760 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
762 /* movzwl */
763 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
766 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
768 /* movsw[lq] */
769 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
772 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
774 /* 32-bit mov zero extends. */
775 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
778 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
780 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
783 static inline void tcg_out_bswap64(TCGContext *s, int reg)
785 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
788 static void tgen_arithi(TCGContext *s, int c, int r0,
789 tcg_target_long val, int cf)
791 int rexw = 0;
793 if (TCG_TARGET_REG_BITS == 64) {
794 rexw = c & -8;
795 c &= 7;
798 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
799 partial flags update stalls on Pentium4 and are not recommended
800 by current Intel optimization manuals. */
801 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
802 int is_inc = (c == ARITH_ADD) ^ (val < 0);
803 if (TCG_TARGET_REG_BITS == 64) {
804 /* The single-byte increment encodings are re-tasked as the
805 REX prefixes. Use the MODRM encoding. */
806 tcg_out_modrm(s, OPC_GRP5 + rexw,
807 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
808 } else {
809 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
811 return;
814 if (c == ARITH_AND) {
815 if (TCG_TARGET_REG_BITS == 64) {
816 if (val == 0xffffffffu) {
817 tcg_out_ext32u(s, r0, r0);
818 return;
820 if (val == (uint32_t)val) {
821 /* AND with no high bits set can use a 32-bit operation. */
822 rexw = 0;
825 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
826 tcg_out_ext8u(s, r0, r0);
827 return;
829 if (val == 0xffffu) {
830 tcg_out_ext16u(s, r0, r0);
831 return;
835 if (val == (int8_t)val) {
836 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
837 tcg_out8(s, val);
838 return;
840 if (rexw == 0 || val == (int32_t)val) {
841 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
842 tcg_out32(s, val);
843 return;
846 tcg_abort();
849 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
851 if (val != 0) {
852 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
856 /* Use SMALL != 0 to force a short forward branch. */
857 static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
859 int32_t val, val1;
861 if (l->has_value) {
862 val = tcg_pcrel_diff(s, l->u.value_ptr);
863 val1 = val - 2;
864 if ((int8_t)val1 == val1) {
865 if (opc == -1) {
866 tcg_out8(s, OPC_JMP_short);
867 } else {
868 tcg_out8(s, OPC_JCC_short + opc);
870 tcg_out8(s, val1);
871 } else {
872 if (small) {
873 tcg_abort();
875 if (opc == -1) {
876 tcg_out8(s, OPC_JMP_long);
877 tcg_out32(s, val - 5);
878 } else {
879 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
880 tcg_out32(s, val - 6);
883 } else if (small) {
884 if (opc == -1) {
885 tcg_out8(s, OPC_JMP_short);
886 } else {
887 tcg_out8(s, OPC_JCC_short + opc);
889 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
890 s->code_ptr += 1;
891 } else {
892 if (opc == -1) {
893 tcg_out8(s, OPC_JMP_long);
894 } else {
895 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
897 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
898 s->code_ptr += 4;
902 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
903 int const_arg2, int rexw)
905 if (const_arg2) {
906 if (arg2 == 0) {
907 /* test r, r */
908 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
909 } else {
910 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
912 } else {
913 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
917 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
918 TCGArg arg1, TCGArg arg2, int const_arg2,
919 TCGLabel *label, int small)
921 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
922 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
925 #if TCG_TARGET_REG_BITS == 64
926 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
927 TCGArg arg1, TCGArg arg2, int const_arg2,
928 TCGLabel *label, int small)
930 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
931 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
933 #else
934 /* XXX: we implement it at the target level to avoid having to
935 handle cross basic blocks temporaries */
936 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
937 const int *const_args, int small)
939 TCGLabel *label_next = gen_new_label();
940 TCGLabel *label_this = arg_label(args[5]);
942 switch(args[4]) {
943 case TCG_COND_EQ:
944 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
945 label_next, 1);
946 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
947 label_this, small);
948 break;
949 case TCG_COND_NE:
950 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
951 label_this, small);
952 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
953 label_this, small);
954 break;
955 case TCG_COND_LT:
956 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
957 label_this, small);
958 tcg_out_jxx(s, JCC_JNE, label_next, 1);
959 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
960 label_this, small);
961 break;
962 case TCG_COND_LE:
963 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
964 label_this, small);
965 tcg_out_jxx(s, JCC_JNE, label_next, 1);
966 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
967 label_this, small);
968 break;
969 case TCG_COND_GT:
970 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
971 label_this, small);
972 tcg_out_jxx(s, JCC_JNE, label_next, 1);
973 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
974 label_this, small);
975 break;
976 case TCG_COND_GE:
977 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
978 label_this, small);
979 tcg_out_jxx(s, JCC_JNE, label_next, 1);
980 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
981 label_this, small);
982 break;
983 case TCG_COND_LTU:
984 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
985 label_this, small);
986 tcg_out_jxx(s, JCC_JNE, label_next, 1);
987 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
988 label_this, small);
989 break;
990 case TCG_COND_LEU:
991 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
992 label_this, small);
993 tcg_out_jxx(s, JCC_JNE, label_next, 1);
994 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
995 label_this, small);
996 break;
997 case TCG_COND_GTU:
998 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
999 label_this, small);
1000 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1001 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1002 label_this, small);
1003 break;
1004 case TCG_COND_GEU:
1005 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1006 label_this, small);
1007 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1008 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1009 label_this, small);
1010 break;
1011 default:
1012 tcg_abort();
1014 tcg_out_label(s, label_next, s->code_ptr);
1016 #endif
1018 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1019 TCGArg arg1, TCGArg arg2, int const_arg2)
1021 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1022 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1023 tcg_out_ext8u(s, dest, dest);
1026 #if TCG_TARGET_REG_BITS == 64
1027 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1028 TCGArg arg1, TCGArg arg2, int const_arg2)
1030 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1031 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1032 tcg_out_ext8u(s, dest, dest);
1034 #else
1035 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1036 const int *const_args)
1038 TCGArg new_args[6];
1039 TCGLabel *label_true, *label_over;
1041 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1043 if (args[0] == args[1] || args[0] == args[2]
1044 || (!const_args[3] && args[0] == args[3])
1045 || (!const_args[4] && args[0] == args[4])) {
1046 /* When the destination overlaps with one of the argument
1047 registers, don't do anything tricky. */
1048 label_true = gen_new_label();
1049 label_over = gen_new_label();
1051 new_args[5] = label_arg(label_true);
1052 tcg_out_brcond2(s, new_args, const_args+1, 1);
1054 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1055 tcg_out_jxx(s, JCC_JMP, label_over, 1);
1056 tcg_out_label(s, label_true, s->code_ptr);
1058 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
1059 tcg_out_label(s, label_over, s->code_ptr);
1060 } else {
1061 /* When the destination does not overlap one of the arguments,
1062 clear the destination first, jump if cond false, and emit an
1063 increment in the true case. This results in smaller code. */
1065 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1067 label_over = gen_new_label();
1068 new_args[4] = tcg_invert_cond(new_args[4]);
1069 new_args[5] = label_arg(label_over);
1070 tcg_out_brcond2(s, new_args, const_args+1, 1);
1072 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1073 tcg_out_label(s, label_over, s->code_ptr);
1076 #endif
1078 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1079 TCGArg c1, TCGArg c2, int const_c2,
1080 TCGArg v1)
1082 tcg_out_cmp(s, c1, c2, const_c2, 0);
1083 if (have_cmov) {
1084 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
1085 } else {
1086 TCGLabel *over = gen_new_label();
1087 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1088 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1089 tcg_out_label(s, over, s->code_ptr);
1093 #if TCG_TARGET_REG_BITS == 64
1094 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1095 TCGArg c1, TCGArg c2, int const_c2,
1096 TCGArg v1)
1098 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1099 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1101 #endif
1103 static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
1105 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1107 if (disp == (int32_t)disp) {
1108 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1109 tcg_out32(s, disp);
1110 } else {
1111 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest);
1112 tcg_out_modrm(s, OPC_GRP5,
1113 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1117 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
1119 tcg_out_branch(s, 1, dest);
1122 static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
1124 tcg_out_branch(s, 0, dest);
1127 #if defined(CONFIG_SOFTMMU)
1128 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1129 * int mmu_idx, uintptr_t ra)
1131 static void * const qemu_ld_helpers[16] = {
1132 [MO_UB] = helper_ret_ldub_mmu,
1133 [MO_LEUW] = helper_le_lduw_mmu,
1134 [MO_LEUL] = helper_le_ldul_mmu,
1135 [MO_LEQ] = helper_le_ldq_mmu,
1136 [MO_BEUW] = helper_be_lduw_mmu,
1137 [MO_BEUL] = helper_be_ldul_mmu,
1138 [MO_BEQ] = helper_be_ldq_mmu,
1141 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1142 * uintxx_t val, int mmu_idx, uintptr_t ra)
1144 static void * const qemu_st_helpers[16] = {
1145 [MO_UB] = helper_ret_stb_mmu,
1146 [MO_LEUW] = helper_le_stw_mmu,
1147 [MO_LEUL] = helper_le_stl_mmu,
1148 [MO_LEQ] = helper_le_stq_mmu,
1149 [MO_BEUW] = helper_be_stw_mmu,
1150 [MO_BEUL] = helper_be_stl_mmu,
1151 [MO_BEQ] = helper_be_stq_mmu,
1154 /* Perform the TLB load and compare.
1156 Inputs:
1157 ADDRLO and ADDRHI contain the low and high part of the address.
1159 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1161 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1162 This should be offsetof addr_read or addr_write.
1164 Outputs:
1165 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1166 positions of the displacements of forward jumps to the TLB miss case.
1168 Second argument register is loaded with the low part of the address.
1169 In the TLB hit case, it has been adjusted as indicated by the TLB
1170 and so is a host address. In the TLB miss case, it continues to
1171 hold a guest address.
1173 First argument register is clobbered. */
1175 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1176 int mem_index, TCGMemOp opc,
1177 tcg_insn_unit **label_ptr, int which)
1179 const TCGReg r0 = TCG_REG_L0;
1180 const TCGReg r1 = TCG_REG_L1;
1181 TCGType ttype = TCG_TYPE_I32;
1182 TCGType tlbtype = TCG_TYPE_I32;
1183 int trexw = 0, hrexw = 0, tlbrexw = 0;
1184 int s_mask = (1 << (opc & MO_SIZE)) - 1;
1185 bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0;
1187 if (TCG_TARGET_REG_BITS == 64) {
1188 if (TARGET_LONG_BITS == 64) {
1189 ttype = TCG_TYPE_I64;
1190 trexw = P_REXW;
1192 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1193 hrexw = P_REXW;
1194 if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1195 tlbtype = TCG_TYPE_I64;
1196 tlbrexw = P_REXW;
1201 tcg_out_mov(s, tlbtype, r0, addrlo);
1202 if (aligned) {
1203 tcg_out_mov(s, ttype, r1, addrlo);
1204 } else {
1205 /* For unaligned access check that we don't cross pages using
1206 the page address of the last byte. */
1207 tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask);
1210 tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
1211 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1213 tgen_arithi(s, ARITH_AND + trexw, r1,
1214 TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0);
1215 tgen_arithi(s, ARITH_AND + tlbrexw, r0,
1216 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1218 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1219 offsetof(CPUArchState, tlb_table[mem_index][0])
1220 + which);
1222 /* cmp 0(r0), r1 */
1223 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1225 /* Prepare for both the fast path add of the tlb addend, and the slow
1226 path function argument setup. There are two cases worth note:
1227 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1228 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1229 copies the entire guest address for the slow path, while truncation
1230 for the 32-bit host happens with the fastpath ADDL below. */
1231 tcg_out_mov(s, ttype, r1, addrlo);
1233 /* jne slow_path */
1234 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1235 label_ptr[0] = s->code_ptr;
1236 s->code_ptr += 4;
1238 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1239 /* cmp 4(r0), addrhi */
1240 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1242 /* jne slow_path */
1243 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1244 label_ptr[1] = s->code_ptr;
1245 s->code_ptr += 4;
1248 /* TLB Hit. */
1250 /* add addend(r0), r1 */
1251 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1252 offsetof(CPUTLBEntry, addend) - which);
1256 * Record the context of a call to the out of line helper code for the slow path
1257 * for a load or store, so that we can later generate the correct helper code
1259 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1260 TCGReg datalo, TCGReg datahi,
1261 TCGReg addrlo, TCGReg addrhi,
1262 tcg_insn_unit *raddr,
1263 tcg_insn_unit **label_ptr)
1265 TCGLabelQemuLdst *label = new_ldst_label(s);
1267 label->is_ld = is_ld;
1268 label->oi = oi;
1269 label->datalo_reg = datalo;
1270 label->datahi_reg = datahi;
1271 label->addrlo_reg = addrlo;
1272 label->addrhi_reg = addrhi;
1273 label->raddr = raddr;
1274 label->label_ptr[0] = label_ptr[0];
1275 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1276 label->label_ptr[1] = label_ptr[1];
1281 * Generate code for the slow path for a load at the end of block
1283 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1285 TCGMemOpIdx oi = l->oi;
1286 TCGMemOp opc = get_memop(oi);
1287 TCGReg data_reg;
1288 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1290 /* resolve label address */
1291 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1292 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1293 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1296 if (TCG_TARGET_REG_BITS == 32) {
1297 int ofs = 0;
1299 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1300 ofs += 4;
1302 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1303 ofs += 4;
1305 if (TARGET_LONG_BITS == 64) {
1306 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1307 ofs += 4;
1310 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi);
1311 ofs += 4;
1313 tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
1314 } else {
1315 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1316 /* The second argument is already loaded with addrlo. */
1317 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
1318 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1319 (uintptr_t)l->raddr);
1322 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1324 data_reg = l->datalo_reg;
1325 switch (opc & MO_SSIZE) {
1326 case MO_SB:
1327 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1328 break;
1329 case MO_SW:
1330 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1331 break;
1332 #if TCG_TARGET_REG_BITS == 64
1333 case MO_SL:
1334 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1335 break;
1336 #endif
1337 case MO_UB:
1338 case MO_UW:
1339 /* Note that the helpers have zero-extended to tcg_target_long. */
1340 case MO_UL:
1341 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1342 break;
1343 case MO_Q:
1344 if (TCG_TARGET_REG_BITS == 64) {
1345 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1346 } else if (data_reg == TCG_REG_EDX) {
1347 /* xchg %edx, %eax */
1348 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1349 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1350 } else {
1351 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1352 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1354 break;
1355 default:
1356 tcg_abort();
1359 /* Jump to the code corresponding to next IR of qemu_st */
1360 tcg_out_jmp(s, l->raddr);
1364 * Generate code for the slow path for a store at the end of block
1366 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1368 TCGMemOpIdx oi = l->oi;
1369 TCGMemOp opc = get_memop(oi);
1370 TCGMemOp s_bits = opc & MO_SIZE;
1371 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1372 TCGReg retaddr;
1374 /* resolve label address */
1375 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1376 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1377 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1380 if (TCG_TARGET_REG_BITS == 32) {
1381 int ofs = 0;
1383 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1384 ofs += 4;
1386 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1387 ofs += 4;
1389 if (TARGET_LONG_BITS == 64) {
1390 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1391 ofs += 4;
1394 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1395 ofs += 4;
1397 if (s_bits == MO_64) {
1398 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1399 ofs += 4;
1402 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi);
1403 ofs += 4;
1405 retaddr = TCG_REG_EAX;
1406 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1407 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
1408 } else {
1409 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1410 /* The second argument is already loaded with addrlo. */
1411 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1412 tcg_target_call_iarg_regs[2], l->datalo_reg);
1413 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
1415 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1416 retaddr = tcg_target_call_iarg_regs[4];
1417 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1418 } else {
1419 retaddr = TCG_REG_RAX;
1420 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1421 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1422 TCG_TARGET_CALL_STACK_OFFSET);
1426 /* "Tail call" to the helper, with the return address back inline. */
1427 tcg_out_push(s, retaddr);
1428 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1430 #elif defined(__x86_64__) && defined(__linux__)
1431 # include <asm/prctl.h>
1432 # include <sys/prctl.h>
1434 int arch_prctl(int code, unsigned long addr);
1436 static int guest_base_flags;
1437 static inline void setup_guest_base_seg(void)
1439 if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
1440 guest_base_flags = P_GS;
1443 #else
1444 # define guest_base_flags 0
1445 static inline void setup_guest_base_seg(void) { }
1446 #endif /* SOFTMMU */
1448 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1449 TCGReg base, int index, intptr_t ofs,
1450 int seg, TCGMemOp memop)
1452 const TCGMemOp real_bswap = memop & MO_BSWAP;
1453 TCGMemOp bswap = real_bswap;
1454 int movop = OPC_MOVL_GvEv;
1456 if (have_movbe && real_bswap) {
1457 bswap = 0;
1458 movop = OPC_MOVBE_GyMy;
1461 switch (memop & MO_SSIZE) {
1462 case MO_UB:
1463 tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1464 base, index, 0, ofs);
1465 break;
1466 case MO_SB:
1467 tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1468 base, index, 0, ofs);
1469 break;
1470 case MO_UW:
1471 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1472 base, index, 0, ofs);
1473 if (real_bswap) {
1474 tcg_out_rolw_8(s, datalo);
1476 break;
1477 case MO_SW:
1478 if (real_bswap) {
1479 if (have_movbe) {
1480 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1481 datalo, base, index, 0, ofs);
1482 } else {
1483 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1484 base, index, 0, ofs);
1485 tcg_out_rolw_8(s, datalo);
1487 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1488 } else {
1489 tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1490 datalo, base, index, 0, ofs);
1492 break;
1493 case MO_UL:
1494 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
1495 if (bswap) {
1496 tcg_out_bswap32(s, datalo);
1498 break;
1499 #if TCG_TARGET_REG_BITS == 64
1500 case MO_SL:
1501 if (real_bswap) {
1502 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1503 base, index, 0, ofs);
1504 if (bswap) {
1505 tcg_out_bswap32(s, datalo);
1507 tcg_out_ext32s(s, datalo, datalo);
1508 } else {
1509 tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1510 base, index, 0, ofs);
1512 break;
1513 #endif
1514 case MO_Q:
1515 if (TCG_TARGET_REG_BITS == 64) {
1516 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1517 base, index, 0, ofs);
1518 if (bswap) {
1519 tcg_out_bswap64(s, datalo);
1521 } else {
1522 if (real_bswap) {
1523 int t = datalo;
1524 datalo = datahi;
1525 datahi = t;
1527 if (base != datalo) {
1528 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1529 base, index, 0, ofs);
1530 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1531 base, index, 0, ofs + 4);
1532 } else {
1533 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1534 base, index, 0, ofs + 4);
1535 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1536 base, index, 0, ofs);
1538 if (bswap) {
1539 tcg_out_bswap32(s, datalo);
1540 tcg_out_bswap32(s, datahi);
1543 break;
1544 default:
1545 tcg_abort();
1549 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1550 EAX. It will be useful once fixed registers globals are less
1551 common. */
1552 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1554 TCGReg datalo, datahi, addrlo;
1555 TCGReg addrhi __attribute__((unused));
1556 TCGMemOpIdx oi;
1557 TCGMemOp opc;
1558 #if defined(CONFIG_SOFTMMU)
1559 int mem_index;
1560 tcg_insn_unit *label_ptr[2];
1561 #endif
1563 datalo = *args++;
1564 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1565 addrlo = *args++;
1566 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1567 oi = *args++;
1568 opc = get_memop(oi);
1570 #if defined(CONFIG_SOFTMMU)
1571 mem_index = get_mmuidx(oi);
1573 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1574 label_ptr, offsetof(CPUTLBEntry, addr_read));
1576 /* TLB Hit. */
1577 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
1579 /* Record the current context of a load into ldst label */
1580 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1581 s->code_ptr, label_ptr);
1582 #else
1584 int32_t offset = guest_base;
1585 TCGReg base = addrlo;
1586 int index = -1;
1587 int seg = 0;
1589 /* For a 32-bit guest, the high 32 bits may contain garbage.
1590 We can do this with the ADDR32 prefix if we're not using
1591 a guest base, or when using segmentation. Otherwise we
1592 need to zero-extend manually. */
1593 if (guest_base == 0 || guest_base_flags) {
1594 seg = guest_base_flags;
1595 offset = 0;
1596 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1597 seg |= P_ADDR32;
1599 } else if (TCG_TARGET_REG_BITS == 64) {
1600 if (TARGET_LONG_BITS == 32) {
1601 tcg_out_ext32u(s, TCG_REG_L0, base);
1602 base = TCG_REG_L0;
1604 if (offset != guest_base) {
1605 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1606 index = TCG_REG_L1;
1607 offset = 0;
1611 tcg_out_qemu_ld_direct(s, datalo, datahi,
1612 base, index, offset, seg, opc);
1614 #endif
1617 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1618 TCGReg base, intptr_t ofs, int seg,
1619 TCGMemOp memop)
1621 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1622 we could perform the bswap twice to restore the original value
1623 instead of moving to the scratch. But as it is, the L constraint
1624 means that TCG_REG_L0 is definitely free here. */
1625 const TCGReg scratch = TCG_REG_L0;
1626 const TCGMemOp real_bswap = memop & MO_BSWAP;
1627 TCGMemOp bswap = real_bswap;
1628 int movop = OPC_MOVL_EvGv;
1630 if (have_movbe && real_bswap) {
1631 bswap = 0;
1632 movop = OPC_MOVBE_MyGy;
1635 switch (memop & MO_SIZE) {
1636 case MO_8:
1637 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1638 Use the scratch register if necessary. */
1639 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1640 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1641 datalo = scratch;
1643 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1644 datalo, base, ofs);
1645 break;
1646 case MO_16:
1647 if (bswap) {
1648 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1649 tcg_out_rolw_8(s, scratch);
1650 datalo = scratch;
1652 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
1653 break;
1654 case MO_32:
1655 if (bswap) {
1656 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1657 tcg_out_bswap32(s, scratch);
1658 datalo = scratch;
1660 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1661 break;
1662 case MO_64:
1663 if (TCG_TARGET_REG_BITS == 64) {
1664 if (bswap) {
1665 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1666 tcg_out_bswap64(s, scratch);
1667 datalo = scratch;
1669 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
1670 } else if (bswap) {
1671 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1672 tcg_out_bswap32(s, scratch);
1673 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1674 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1675 tcg_out_bswap32(s, scratch);
1676 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1677 } else {
1678 if (real_bswap) {
1679 int t = datalo;
1680 datalo = datahi;
1681 datahi = t;
1683 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1684 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
1686 break;
1687 default:
1688 tcg_abort();
1692 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1694 TCGReg datalo, datahi, addrlo;
1695 TCGReg addrhi __attribute__((unused));
1696 TCGMemOpIdx oi;
1697 TCGMemOp opc;
1698 #if defined(CONFIG_SOFTMMU)
1699 int mem_index;
1700 tcg_insn_unit *label_ptr[2];
1701 #endif
1703 datalo = *args++;
1704 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1705 addrlo = *args++;
1706 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1707 oi = *args++;
1708 opc = get_memop(oi);
1710 #if defined(CONFIG_SOFTMMU)
1711 mem_index = get_mmuidx(oi);
1713 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1714 label_ptr, offsetof(CPUTLBEntry, addr_write));
1716 /* TLB Hit. */
1717 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1719 /* Record the current context of a store into ldst label */
1720 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1721 s->code_ptr, label_ptr);
1722 #else
1724 int32_t offset = guest_base;
1725 TCGReg base = addrlo;
1726 int seg = 0;
1728 /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
1729 if (guest_base == 0 || guest_base_flags) {
1730 seg = guest_base_flags;
1731 offset = 0;
1732 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1733 seg |= P_ADDR32;
1735 } else if (TCG_TARGET_REG_BITS == 64) {
1736 /* ??? Note that we can't use the same SIB addressing scheme
1737 as for loads, since we require L0 free for bswap. */
1738 if (offset != guest_base) {
1739 if (TARGET_LONG_BITS == 32) {
1740 tcg_out_ext32u(s, TCG_REG_L0, base);
1741 base = TCG_REG_L0;
1743 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1744 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1745 base = TCG_REG_L1;
1746 offset = 0;
1747 } else if (TARGET_LONG_BITS == 32) {
1748 tcg_out_ext32u(s, TCG_REG_L1, base);
1749 base = TCG_REG_L1;
1753 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1755 #endif
1758 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1759 const TCGArg *args, const int *const_args)
1761 int c, vexop, rexw = 0;
1763 #if TCG_TARGET_REG_BITS == 64
1764 # define OP_32_64(x) \
1765 case glue(glue(INDEX_op_, x), _i64): \
1766 rexw = P_REXW; /* FALLTHRU */ \
1767 case glue(glue(INDEX_op_, x), _i32)
1768 #else
1769 # define OP_32_64(x) \
1770 case glue(glue(INDEX_op_, x), _i32)
1771 #endif
1773 switch(opc) {
1774 case INDEX_op_exit_tb:
1775 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1776 tcg_out_jmp(s, tb_ret_addr);
1777 break;
1778 case INDEX_op_goto_tb:
1779 if (s->tb_jmp_offset) {
1780 /* direct jump method */
1781 tcg_out8(s, OPC_JMP_long); /* jmp im */
1782 s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
1783 tcg_out32(s, 0);
1784 } else {
1785 /* indirect jump method */
1786 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1787 (intptr_t)(s->tb_next + args[0]));
1789 s->tb_next_offset[args[0]] = tcg_current_code_size(s);
1790 break;
1791 case INDEX_op_br:
1792 tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0);
1793 break;
1794 OP_32_64(ld8u):
1795 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1796 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1797 break;
1798 OP_32_64(ld8s):
1799 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1800 break;
1801 OP_32_64(ld16u):
1802 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1803 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1804 break;
1805 OP_32_64(ld16s):
1806 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1807 break;
1808 #if TCG_TARGET_REG_BITS == 64
1809 case INDEX_op_ld32u_i64:
1810 #endif
1811 case INDEX_op_ld_i32:
1812 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1813 break;
1815 OP_32_64(st8):
1816 if (const_args[0]) {
1817 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1818 0, args[1], args[2]);
1819 tcg_out8(s, args[0]);
1820 } else {
1821 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1822 args[0], args[1], args[2]);
1824 break;
1825 OP_32_64(st16):
1826 if (const_args[0]) {
1827 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1828 0, args[1], args[2]);
1829 tcg_out16(s, args[0]);
1830 } else {
1831 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1832 args[0], args[1], args[2]);
1834 break;
1835 #if TCG_TARGET_REG_BITS == 64
1836 case INDEX_op_st32_i64:
1837 #endif
1838 case INDEX_op_st_i32:
1839 if (const_args[0]) {
1840 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1841 tcg_out32(s, args[0]);
1842 } else {
1843 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1845 break;
1847 OP_32_64(add):
1848 /* For 3-operand addition, use LEA. */
1849 if (args[0] != args[1]) {
1850 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1852 if (const_args[2]) {
1853 c3 = a2, a2 = -1;
1854 } else if (a0 == a2) {
1855 /* Watch out for dest = src + dest, since we've removed
1856 the matching constraint on the add. */
1857 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1858 break;
1861 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1862 break;
1864 c = ARITH_ADD;
1865 goto gen_arith;
1866 OP_32_64(sub):
1867 c = ARITH_SUB;
1868 goto gen_arith;
1869 OP_32_64(and):
1870 c = ARITH_AND;
1871 goto gen_arith;
1872 OP_32_64(or):
1873 c = ARITH_OR;
1874 goto gen_arith;
1875 OP_32_64(xor):
1876 c = ARITH_XOR;
1877 goto gen_arith;
1878 gen_arith:
1879 if (const_args[2]) {
1880 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1881 } else {
1882 tgen_arithr(s, c + rexw, args[0], args[2]);
1884 break;
1886 OP_32_64(andc):
1887 if (const_args[2]) {
1888 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
1889 args[0], args[1]);
1890 tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
1891 } else {
1892 tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
1894 break;
1896 OP_32_64(mul):
1897 if (const_args[2]) {
1898 int32_t val;
1899 val = args[2];
1900 if (val == (int8_t)val) {
1901 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1902 tcg_out8(s, val);
1903 } else {
1904 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1905 tcg_out32(s, val);
1907 } else {
1908 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1910 break;
1912 OP_32_64(div2):
1913 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1914 break;
1915 OP_32_64(divu2):
1916 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1917 break;
1919 OP_32_64(shl):
1920 c = SHIFT_SHL;
1921 vexop = OPC_SHLX;
1922 goto gen_shift_maybe_vex;
1923 OP_32_64(shr):
1924 c = SHIFT_SHR;
1925 vexop = OPC_SHRX;
1926 goto gen_shift_maybe_vex;
1927 OP_32_64(sar):
1928 c = SHIFT_SAR;
1929 vexop = OPC_SARX;
1930 goto gen_shift_maybe_vex;
1931 OP_32_64(rotl):
1932 c = SHIFT_ROL;
1933 goto gen_shift;
1934 OP_32_64(rotr):
1935 c = SHIFT_ROR;
1936 goto gen_shift;
1937 gen_shift_maybe_vex:
1938 if (have_bmi2 && !const_args[2]) {
1939 tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
1940 break;
1942 /* FALLTHRU */
1943 gen_shift:
1944 if (const_args[2]) {
1945 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1946 } else {
1947 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1949 break;
1951 case INDEX_op_brcond_i32:
1952 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1953 arg_label(args[3]), 0);
1954 break;
1955 case INDEX_op_setcond_i32:
1956 tcg_out_setcond32(s, args[3], args[0], args[1],
1957 args[2], const_args[2]);
1958 break;
1959 case INDEX_op_movcond_i32:
1960 tcg_out_movcond32(s, args[5], args[0], args[1],
1961 args[2], const_args[2], args[3]);
1962 break;
1964 OP_32_64(bswap16):
1965 tcg_out_rolw_8(s, args[0]);
1966 break;
1967 OP_32_64(bswap32):
1968 tcg_out_bswap32(s, args[0]);
1969 break;
1971 OP_32_64(neg):
1972 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1973 break;
1974 OP_32_64(not):
1975 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1976 break;
1978 OP_32_64(ext8s):
1979 tcg_out_ext8s(s, args[0], args[1], rexw);
1980 break;
1981 OP_32_64(ext16s):
1982 tcg_out_ext16s(s, args[0], args[1], rexw);
1983 break;
1984 OP_32_64(ext8u):
1985 tcg_out_ext8u(s, args[0], args[1]);
1986 break;
1987 OP_32_64(ext16u):
1988 tcg_out_ext16u(s, args[0], args[1]);
1989 break;
1991 case INDEX_op_qemu_ld_i32:
1992 tcg_out_qemu_ld(s, args, 0);
1993 break;
1994 case INDEX_op_qemu_ld_i64:
1995 tcg_out_qemu_ld(s, args, 1);
1996 break;
1997 case INDEX_op_qemu_st_i32:
1998 tcg_out_qemu_st(s, args, 0);
1999 break;
2000 case INDEX_op_qemu_st_i64:
2001 tcg_out_qemu_st(s, args, 1);
2002 break;
2004 OP_32_64(mulu2):
2005 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
2006 break;
2007 OP_32_64(muls2):
2008 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2009 break;
2010 OP_32_64(add2):
2011 if (const_args[4]) {
2012 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
2013 } else {
2014 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
2016 if (const_args[5]) {
2017 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
2018 } else {
2019 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
2021 break;
2022 OP_32_64(sub2):
2023 if (const_args[4]) {
2024 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
2025 } else {
2026 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
2028 if (const_args[5]) {
2029 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
2030 } else {
2031 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
2033 break;
2035 #if TCG_TARGET_REG_BITS == 32
2036 case INDEX_op_brcond2_i32:
2037 tcg_out_brcond2(s, args, const_args, 0);
2038 break;
2039 case INDEX_op_setcond2_i32:
2040 tcg_out_setcond2(s, args, const_args);
2041 break;
2042 #else /* TCG_TARGET_REG_BITS == 64 */
2043 case INDEX_op_ld32s_i64:
2044 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
2045 break;
2046 case INDEX_op_ld_i64:
2047 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2048 break;
2049 case INDEX_op_st_i64:
2050 if (const_args[0]) {
2051 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
2052 0, args[1], args[2]);
2053 tcg_out32(s, args[0]);
2054 } else {
2055 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2057 break;
2059 case INDEX_op_brcond_i64:
2060 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
2061 arg_label(args[3]), 0);
2062 break;
2063 case INDEX_op_setcond_i64:
2064 tcg_out_setcond64(s, args[3], args[0], args[1],
2065 args[2], const_args[2]);
2066 break;
2067 case INDEX_op_movcond_i64:
2068 tcg_out_movcond64(s, args[5], args[0], args[1],
2069 args[2], const_args[2], args[3]);
2070 break;
2072 case INDEX_op_bswap64_i64:
2073 tcg_out_bswap64(s, args[0]);
2074 break;
2075 case INDEX_op_extu_i32_i64:
2076 case INDEX_op_ext32u_i64:
2077 tcg_out_ext32u(s, args[0], args[1]);
2078 break;
2079 case INDEX_op_ext_i32_i64:
2080 case INDEX_op_ext32s_i64:
2081 tcg_out_ext32s(s, args[0], args[1]);
2082 break;
2083 #endif
2085 OP_32_64(deposit):
2086 if (args[3] == 0 && args[4] == 8) {
2087 /* load bits 0..7 */
2088 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
2089 args[2], args[0]);
2090 } else if (args[3] == 8 && args[4] == 8) {
2091 /* load bits 8..15 */
2092 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
2093 } else if (args[3] == 0 && args[4] == 16) {
2094 /* load bits 0..15 */
2095 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
2096 } else {
2097 tcg_abort();
2099 break;
2101 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2102 case INDEX_op_mov_i64:
2103 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2104 case INDEX_op_movi_i64:
2105 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2106 default:
2107 tcg_abort();
2110 #undef OP_32_64
2113 static const TCGTargetOpDef x86_op_defs[] = {
2114 { INDEX_op_exit_tb, { } },
2115 { INDEX_op_goto_tb, { } },
2116 { INDEX_op_br, { } },
2117 { INDEX_op_ld8u_i32, { "r", "r" } },
2118 { INDEX_op_ld8s_i32, { "r", "r" } },
2119 { INDEX_op_ld16u_i32, { "r", "r" } },
2120 { INDEX_op_ld16s_i32, { "r", "r" } },
2121 { INDEX_op_ld_i32, { "r", "r" } },
2122 { INDEX_op_st8_i32, { "qi", "r" } },
2123 { INDEX_op_st16_i32, { "ri", "r" } },
2124 { INDEX_op_st_i32, { "ri", "r" } },
2126 { INDEX_op_add_i32, { "r", "r", "ri" } },
2127 { INDEX_op_sub_i32, { "r", "0", "ri" } },
2128 { INDEX_op_mul_i32, { "r", "0", "ri" } },
2129 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2130 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2131 { INDEX_op_and_i32, { "r", "0", "ri" } },
2132 { INDEX_op_or_i32, { "r", "0", "ri" } },
2133 { INDEX_op_xor_i32, { "r", "0", "ri" } },
2134 { INDEX_op_andc_i32, { "r", "r", "ri" } },
2136 { INDEX_op_shl_i32, { "r", "0", "Ci" } },
2137 { INDEX_op_shr_i32, { "r", "0", "Ci" } },
2138 { INDEX_op_sar_i32, { "r", "0", "Ci" } },
2139 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2140 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
2142 { INDEX_op_brcond_i32, { "r", "ri" } },
2144 { INDEX_op_bswap16_i32, { "r", "0" } },
2145 { INDEX_op_bswap32_i32, { "r", "0" } },
2147 { INDEX_op_neg_i32, { "r", "0" } },
2149 { INDEX_op_not_i32, { "r", "0" } },
2151 { INDEX_op_ext8s_i32, { "r", "q" } },
2152 { INDEX_op_ext16s_i32, { "r", "r" } },
2153 { INDEX_op_ext8u_i32, { "r", "q" } },
2154 { INDEX_op_ext16u_i32, { "r", "r" } },
2156 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2158 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
2159 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
2161 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2162 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2163 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2164 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2166 #if TCG_TARGET_REG_BITS == 32
2167 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2168 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2169 #else
2170 { INDEX_op_ld8u_i64, { "r", "r" } },
2171 { INDEX_op_ld8s_i64, { "r", "r" } },
2172 { INDEX_op_ld16u_i64, { "r", "r" } },
2173 { INDEX_op_ld16s_i64, { "r", "r" } },
2174 { INDEX_op_ld32u_i64, { "r", "r" } },
2175 { INDEX_op_ld32s_i64, { "r", "r" } },
2176 { INDEX_op_ld_i64, { "r", "r" } },
2177 { INDEX_op_st8_i64, { "ri", "r" } },
2178 { INDEX_op_st16_i64, { "ri", "r" } },
2179 { INDEX_op_st32_i64, { "ri", "r" } },
2180 { INDEX_op_st_i64, { "re", "r" } },
2182 { INDEX_op_add_i64, { "r", "r", "re" } },
2183 { INDEX_op_mul_i64, { "r", "0", "re" } },
2184 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2185 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2186 { INDEX_op_sub_i64, { "r", "0", "re" } },
2187 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2188 { INDEX_op_or_i64, { "r", "0", "re" } },
2189 { INDEX_op_xor_i64, { "r", "0", "re" } },
2190 { INDEX_op_andc_i64, { "r", "r", "rI" } },
2192 { INDEX_op_shl_i64, { "r", "0", "Ci" } },
2193 { INDEX_op_shr_i64, { "r", "0", "Ci" } },
2194 { INDEX_op_sar_i64, { "r", "0", "Ci" } },
2195 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2196 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2198 { INDEX_op_brcond_i64, { "r", "re" } },
2199 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2201 { INDEX_op_bswap16_i64, { "r", "0" } },
2202 { INDEX_op_bswap32_i64, { "r", "0" } },
2203 { INDEX_op_bswap64_i64, { "r", "0" } },
2204 { INDEX_op_neg_i64, { "r", "0" } },
2205 { INDEX_op_not_i64, { "r", "0" } },
2207 { INDEX_op_ext8s_i64, { "r", "r" } },
2208 { INDEX_op_ext16s_i64, { "r", "r" } },
2209 { INDEX_op_ext32s_i64, { "r", "r" } },
2210 { INDEX_op_ext8u_i64, { "r", "r" } },
2211 { INDEX_op_ext16u_i64, { "r", "r" } },
2212 { INDEX_op_ext32u_i64, { "r", "r" } },
2214 { INDEX_op_ext_i32_i64, { "r", "r" } },
2215 { INDEX_op_extu_i32_i64, { "r", "r" } },
2217 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2218 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2220 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2221 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2222 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2223 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2224 #endif
2226 #if TCG_TARGET_REG_BITS == 64
2227 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2228 { INDEX_op_qemu_st_i32, { "L", "L" } },
2229 { INDEX_op_qemu_ld_i64, { "r", "L" } },
2230 { INDEX_op_qemu_st_i64, { "L", "L" } },
2231 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2232 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2233 { INDEX_op_qemu_st_i32, { "L", "L" } },
2234 { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2235 { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
2236 #else
2237 { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2238 { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2239 { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2240 { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
2241 #endif
2242 { -1 },
2245 static int tcg_target_callee_save_regs[] = {
2246 #if TCG_TARGET_REG_BITS == 64
2247 TCG_REG_RBP,
2248 TCG_REG_RBX,
2249 #if defined(_WIN64)
2250 TCG_REG_RDI,
2251 TCG_REG_RSI,
2252 #endif
2253 TCG_REG_R12,
2254 TCG_REG_R13,
2255 TCG_REG_R14, /* Currently used for the global env. */
2256 TCG_REG_R15,
2257 #else
2258 TCG_REG_EBP, /* Currently used for the global env. */
2259 TCG_REG_EBX,
2260 TCG_REG_ESI,
2261 TCG_REG_EDI,
2262 #endif
2265 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2266 and tcg_register_jit. */
2268 #define PUSH_SIZE \
2269 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2270 * (TCG_TARGET_REG_BITS / 8))
2272 #define FRAME_SIZE \
2273 ((PUSH_SIZE \
2274 + TCG_STATIC_CALL_ARGS_SIZE \
2275 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2276 + TCG_TARGET_STACK_ALIGN - 1) \
2277 & ~(TCG_TARGET_STACK_ALIGN - 1))
2279 /* Generate global QEMU prologue and epilogue code */
2280 static void tcg_target_qemu_prologue(TCGContext *s)
2282 int i, stack_addend;
2284 /* TB prologue */
2286 /* Reserve some stack space, also for TCG temps. */
2287 stack_addend = FRAME_SIZE - PUSH_SIZE;
2288 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2289 CPU_TEMP_BUF_NLONGS * sizeof(long));
2291 /* Save all callee saved registers. */
2292 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2293 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2296 #if TCG_TARGET_REG_BITS == 32
2297 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2298 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2299 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2300 /* jmp *tb. */
2301 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2302 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2303 + stack_addend);
2304 #else
2305 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2306 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2307 /* jmp *tb. */
2308 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2309 #endif
2311 /* TB epilogue */
2312 tb_ret_addr = s->code_ptr;
2314 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2316 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2317 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2319 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2321 #if !defined(CONFIG_SOFTMMU)
2322 /* Try to set up a segment register to point to guest_base. */
2323 if (guest_base) {
2324 setup_guest_base_seg();
2326 #endif
2329 static void tcg_target_init(TCGContext *s)
2331 #ifdef CONFIG_CPUID_H
2332 unsigned a, b, c, d;
2333 int max = __get_cpuid_max(0, 0);
2335 if (max >= 1) {
2336 __cpuid(1, a, b, c, d);
2337 #ifndef have_cmov
2338 /* For 32-bit, 99% certainty that we're running on hardware that
2339 supports cmov, but we still need to check. In case cmov is not
2340 available, we'll use a small forward branch. */
2341 have_cmov = (d & bit_CMOV) != 0;
2342 #endif
2343 #ifndef have_movbe
2344 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2345 need to probe for it. */
2346 have_movbe = (c & bit_MOVBE) != 0;
2347 #endif
2350 if (max >= 7) {
2351 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2352 __cpuid_count(7, 0, a, b, c, d);
2353 #ifdef bit_BMI
2354 have_bmi1 = (b & bit_BMI) != 0;
2355 #endif
2356 #ifndef have_bmi2
2357 have_bmi2 = (b & bit_BMI2) != 0;
2358 #endif
2360 #endif
2362 if (TCG_TARGET_REG_BITS == 64) {
2363 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2364 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2365 } else {
2366 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2369 tcg_regset_clear(tcg_target_call_clobber_regs);
2370 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2371 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2372 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2373 if (TCG_TARGET_REG_BITS == 64) {
2374 #if !defined(_WIN64)
2375 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2376 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2377 #endif
2378 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2379 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2380 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2381 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2384 tcg_regset_clear(s->reserved_regs);
2385 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2387 tcg_add_target_add_op_defs(x86_op_defs);
2390 typedef struct {
2391 DebugFrameHeader h;
2392 uint8_t fde_def_cfa[4];
2393 uint8_t fde_reg_ofs[14];
2394 } DebugFrame;
2396 /* We're expecting a 2 byte uleb128 encoded value. */
2397 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2399 #if !defined(__ELF__)
2400 /* Host machine without ELF. */
2401 #elif TCG_TARGET_REG_BITS == 64
2402 #define ELF_HOST_MACHINE EM_X86_64
2403 static const DebugFrame debug_frame = {
2404 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2405 .h.cie.id = -1,
2406 .h.cie.version = 1,
2407 .h.cie.code_align = 1,
2408 .h.cie.data_align = 0x78, /* sleb128 -8 */
2409 .h.cie.return_column = 16,
2411 /* Total FDE size does not include the "len" member. */
2412 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2414 .fde_def_cfa = {
2415 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2416 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2417 (FRAME_SIZE >> 7)
2419 .fde_reg_ofs = {
2420 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2421 /* The following ordering must match tcg_target_callee_save_regs. */
2422 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2423 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2424 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2425 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2426 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2427 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2430 #else
2431 #define ELF_HOST_MACHINE EM_386
2432 static const DebugFrame debug_frame = {
2433 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2434 .h.cie.id = -1,
2435 .h.cie.version = 1,
2436 .h.cie.code_align = 1,
2437 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2438 .h.cie.return_column = 8,
2440 /* Total FDE size does not include the "len" member. */
2441 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2443 .fde_def_cfa = {
2444 12, 4, /* DW_CFA_def_cfa %esp, ... */
2445 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2446 (FRAME_SIZE >> 7)
2448 .fde_reg_ofs = {
2449 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2450 /* The following ordering must match tcg_target_callee_save_regs. */
2451 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2452 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2453 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2454 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2457 #endif
2459 #if defined(ELF_HOST_MACHINE)
2460 void tcg_register_jit(void *buf, size_t buf_size)
2462 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2464 #endif