Merge remote-tracking branch 'qemu/master'
[qemu/ar7.git] / tcg / i386 / tcg-target.inc.c
blob9301cb6a9d097fa53a20400ab4adfbf3bc4a8a88
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "tcg-pool.inc.c"
27 #ifdef CONFIG_DEBUG_TCG
28 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
32 #else
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34 #endif
36 #endif
38 static const int tcg_target_reg_alloc_order[] = {
39 #if TCG_TARGET_REG_BITS == 64
40 TCG_REG_RBP,
41 TCG_REG_RBX,
42 TCG_REG_R12,
43 TCG_REG_R13,
44 TCG_REG_R14,
45 TCG_REG_R15,
46 TCG_REG_R10,
47 TCG_REG_R11,
48 TCG_REG_R9,
49 TCG_REG_R8,
50 TCG_REG_RCX,
51 TCG_REG_RDX,
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54 TCG_REG_RAX,
55 #else
56 TCG_REG_EBX,
57 TCG_REG_ESI,
58 TCG_REG_EDI,
59 TCG_REG_EBP,
60 TCG_REG_ECX,
61 TCG_REG_EDX,
62 TCG_REG_EAX,
63 #endif
66 static const int tcg_target_call_iarg_regs[] = {
67 #if TCG_TARGET_REG_BITS == 64
68 #if defined(_WIN64)
69 TCG_REG_RCX,
70 TCG_REG_RDX,
71 #else
72 TCG_REG_RDI,
73 TCG_REG_RSI,
74 TCG_REG_RDX,
75 TCG_REG_RCX,
76 #endif
77 TCG_REG_R8,
78 TCG_REG_R9,
79 #else
80 /* 32 bit mode uses stack based calling convention (GCC default). */
81 #endif
84 static const int tcg_target_call_oarg_regs[] = {
85 TCG_REG_EAX,
86 #if TCG_TARGET_REG_BITS == 32
87 TCG_REG_EDX
88 #endif
91 /* Constants we accept. */
92 #define TCG_CT_CONST_S32 0x100
93 #define TCG_CT_CONST_U32 0x200
94 #define TCG_CT_CONST_I32 0x400
95 #define TCG_CT_CONST_WSZ 0x800
97 /* Registers used with L constraint, which are the first argument
98 registers on x86_64, and two random call clobbered registers on
99 i386. */
100 #if TCG_TARGET_REG_BITS == 64
101 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
102 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
103 #else
104 # define TCG_REG_L0 TCG_REG_EAX
105 # define TCG_REG_L1 TCG_REG_EDX
106 #endif
108 /* The host compiler should supply <cpuid.h> to enable runtime features
109 detection, as we're not going to go so far as our own inline assembly.
110 If not available, default values will be assumed. */
111 #if defined(CONFIG_CPUID_H)
112 #include "qemu/cpuid.h"
113 #endif
115 /* For 64-bit, we always know that CMOV is available. */
116 #if TCG_TARGET_REG_BITS == 64
117 # define have_cmov 1
118 #elif defined(CONFIG_CPUID_H)
119 static bool have_cmov;
120 #else
121 # define have_cmov 0
122 #endif
124 /* We need these symbols in tcg-target.h, and we can't properly conditionalize
125 it there. Therefore we always define the variable. */
126 bool have_bmi1;
127 bool have_popcnt;
129 #ifdef CONFIG_CPUID_H
130 static bool have_movbe;
131 static bool have_bmi2;
132 static bool have_lzcnt;
133 #else
134 # define have_movbe 0
135 # define have_bmi2 0
136 # define have_lzcnt 0
137 #endif
139 static tcg_insn_unit *tb_ret_addr;
141 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
142 intptr_t value, intptr_t addend)
144 value += addend;
145 switch(type) {
146 case R_386_PC32:
147 value -= (uintptr_t)code_ptr;
148 if (value != (int32_t)value) {
149 tcg_abort();
151 tcg_patch32(code_ptr, value);
152 break;
153 case R_386_PC8:
154 value -= (uintptr_t)code_ptr;
155 if (value != (int8_t)value) {
156 tcg_abort();
158 tcg_patch8(code_ptr, value);
159 break;
160 default:
161 tcg_abort();
165 /* parse target specific constraints */
166 static const char *target_parse_constraint(TCGArgConstraint *ct,
167 const char *ct_str, TCGType type)
169 switch(*ct_str++) {
170 case 'a':
171 ct->ct |= TCG_CT_REG;
172 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
173 break;
174 case 'b':
175 ct->ct |= TCG_CT_REG;
176 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
177 break;
178 case 'c':
179 ct->ct |= TCG_CT_REG;
180 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
181 break;
182 case 'd':
183 ct->ct |= TCG_CT_REG;
184 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
185 break;
186 case 'S':
187 ct->ct |= TCG_CT_REG;
188 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
189 break;
190 case 'D':
191 ct->ct |= TCG_CT_REG;
192 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
193 break;
194 case 'q':
195 ct->ct |= TCG_CT_REG;
196 ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
197 break;
198 case 'Q':
199 ct->ct |= TCG_CT_REG;
200 ct->u.regs = 0xf;
201 break;
202 case 'r':
203 ct->ct |= TCG_CT_REG;
204 ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
205 break;
206 case 'W':
207 /* With TZCNT/LZCNT, we can have operand-size as an input. */
208 ct->ct |= TCG_CT_CONST_WSZ;
209 break;
211 /* qemu_ld/st address constraint */
212 case 'L':
213 ct->ct |= TCG_CT_REG;
214 ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
215 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
216 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
217 break;
219 case 'e':
220 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_S32);
221 break;
222 case 'Z':
223 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_U32);
224 break;
225 case 'I':
226 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_I32);
227 break;
229 default:
230 return NULL;
232 return ct_str;
235 /* test if a constant matches the constraint */
236 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
237 const TCGArgConstraint *arg_ct)
239 int ct = arg_ct->ct;
240 if (ct & TCG_CT_CONST) {
241 return 1;
243 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
244 return 1;
246 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
247 return 1;
249 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
250 return 1;
252 if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
253 return 1;
255 return 0;
258 #if TCG_TARGET_REG_BITS == 64
259 # define LOWREGMASK(x) ((x) & 7)
260 #else
261 # define LOWREGMASK(x) (x)
262 #endif
264 #define P_EXT 0x100 /* 0x0f opcode prefix */
265 #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
266 #define P_DATA16 0x400 /* 0x66 opcode prefix */
267 #if TCG_TARGET_REG_BITS == 64
268 # define P_ADDR32 0x800 /* 0x67 opcode prefix */
269 # define P_REXW 0x1000 /* Set REX.W = 1 */
270 # define P_REXB_R 0x2000 /* REG field as byte register */
271 # define P_REXB_RM 0x4000 /* R/M field as byte register */
272 # define P_GS 0x8000 /* gs segment override */
273 #else
274 # define P_ADDR32 0
275 # define P_REXW 0
276 # define P_REXB_R 0
277 # define P_REXB_RM 0
278 # define P_GS 0
279 #endif
280 #define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
281 #define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
283 #define OPC_ARITH_EvIz (0x81)
284 #define OPC_ARITH_EvIb (0x83)
285 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
286 #define OPC_ANDN (0xf2 | P_EXT38)
287 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
288 #define OPC_BSF (0xbc | P_EXT)
289 #define OPC_BSR (0xbd | P_EXT)
290 #define OPC_BSWAP (0xc8 | P_EXT)
291 #define OPC_CALL_Jz (0xe8)
292 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
293 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
294 #define OPC_DEC_r32 (0x48)
295 #define OPC_IMUL_GvEv (0xaf | P_EXT)
296 #define OPC_IMUL_GvEvIb (0x6b)
297 #define OPC_IMUL_GvEvIz (0x69)
298 #define OPC_INC_r32 (0x40)
299 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
300 #define OPC_JCC_short (0x70) /* ... plus condition code */
301 #define OPC_JMP_long (0xe9)
302 #define OPC_JMP_short (0xeb)
303 #define OPC_LEA (0x8d)
304 #define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3)
305 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
306 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
307 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
308 #define OPC_MOVB_EvIz (0xc6)
309 #define OPC_MOVL_EvIz (0xc7)
310 #define OPC_MOVL_Iv (0xb8)
311 #define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
312 #define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
313 #define OPC_MOVSBL (0xbe | P_EXT)
314 #define OPC_MOVSWL (0xbf | P_EXT)
315 #define OPC_MOVSLQ (0x63 | P_REXW)
316 #define OPC_MOVZBL (0xb6 | P_EXT)
317 #define OPC_MOVZWL (0xb7 | P_EXT)
318 #define OPC_POP_r32 (0x58)
319 #define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3)
320 #define OPC_PUSH_r32 (0x50)
321 #define OPC_PUSH_Iv (0x68)
322 #define OPC_PUSH_Ib (0x6a)
323 #define OPC_RET (0xc3)
324 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
325 #define OPC_SHIFT_1 (0xd1)
326 #define OPC_SHIFT_Ib (0xc1)
327 #define OPC_SHIFT_cl (0xd3)
328 #define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
329 #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
330 #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
331 #define OPC_TESTL (0x85)
332 #define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
333 #define OPC_XCHG_ax_r32 (0x90)
335 #define OPC_GRP3_Ev (0xf7)
336 #define OPC_GRP5 (0xff)
338 /* Group 1 opcode extensions for 0x80-0x83.
339 These are also used as modifiers for OPC_ARITH. */
340 #define ARITH_ADD 0
341 #define ARITH_OR 1
342 #define ARITH_ADC 2
343 #define ARITH_SBB 3
344 #define ARITH_AND 4
345 #define ARITH_SUB 5
346 #define ARITH_XOR 6
347 #define ARITH_CMP 7
349 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
350 #define SHIFT_ROL 0
351 #define SHIFT_ROR 1
352 #define SHIFT_SHL 4
353 #define SHIFT_SHR 5
354 #define SHIFT_SAR 7
356 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
357 #define EXT3_NOT 2
358 #define EXT3_NEG 3
359 #define EXT3_MUL 4
360 #define EXT3_IMUL 5
361 #define EXT3_DIV 6
362 #define EXT3_IDIV 7
364 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
365 #define EXT5_INC_Ev 0
366 #define EXT5_DEC_Ev 1
367 #define EXT5_CALLN_Ev 2
368 #define EXT5_JMPN_Ev 4
370 /* Condition codes to be added to OPC_JCC_{long,short}. */
371 #define JCC_JMP (-1)
372 #define JCC_JO 0x0
373 #define JCC_JNO 0x1
374 #define JCC_JB 0x2
375 #define JCC_JAE 0x3
376 #define JCC_JE 0x4
377 #define JCC_JNE 0x5
378 #define JCC_JBE 0x6
379 #define JCC_JA 0x7
380 #define JCC_JS 0x8
381 #define JCC_JNS 0x9
382 #define JCC_JP 0xa
383 #define JCC_JNP 0xb
384 #define JCC_JL 0xc
385 #define JCC_JGE 0xd
386 #define JCC_JLE 0xe
387 #define JCC_JG 0xf
389 static const uint8_t tcg_cond_to_jcc[] = {
390 [TCG_COND_EQ] = JCC_JE,
391 [TCG_COND_NE] = JCC_JNE,
392 [TCG_COND_LT] = JCC_JL,
393 [TCG_COND_GE] = JCC_JGE,
394 [TCG_COND_LE] = JCC_JLE,
395 [TCG_COND_GT] = JCC_JG,
396 [TCG_COND_LTU] = JCC_JB,
397 [TCG_COND_GEU] = JCC_JAE,
398 [TCG_COND_LEU] = JCC_JBE,
399 [TCG_COND_GTU] = JCC_JA,
402 #if TCG_TARGET_REG_BITS == 64
403 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
405 int rex;
407 if (opc & P_GS) {
408 tcg_out8(s, 0x65);
410 if (opc & P_DATA16) {
411 /* We should never be asking for both 16 and 64-bit operation. */
412 tcg_debug_assert((opc & P_REXW) == 0);
413 tcg_out8(s, 0x66);
415 if (opc & P_ADDR32) {
416 tcg_out8(s, 0x67);
418 if (opc & P_SIMDF3) {
419 tcg_out8(s, 0xf3);
420 } else if (opc & P_SIMDF2) {
421 tcg_out8(s, 0xf2);
424 rex = 0;
425 rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
426 rex |= (r & 8) >> 1; /* REX.R */
427 rex |= (x & 8) >> 2; /* REX.X */
428 rex |= (rm & 8) >> 3; /* REX.B */
430 /* P_REXB_{R,RM} indicates that the given register is the low byte.
431 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
432 as otherwise the encoding indicates %[abcd]h. Note that the values
433 that are ORed in merely indicate that the REX byte must be present;
434 those bits get discarded in output. */
435 rex |= opc & (r >= 4 ? P_REXB_R : 0);
436 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
438 if (rex) {
439 tcg_out8(s, (uint8_t)(rex | 0x40));
442 if (opc & (P_EXT | P_EXT38)) {
443 tcg_out8(s, 0x0f);
444 if (opc & P_EXT38) {
445 tcg_out8(s, 0x38);
449 tcg_out8(s, opc);
451 #else
452 static void tcg_out_opc(TCGContext *s, int opc)
454 if (opc & P_DATA16) {
455 tcg_out8(s, 0x66);
457 if (opc & P_SIMDF3) {
458 tcg_out8(s, 0xf3);
459 } else if (opc & P_SIMDF2) {
460 tcg_out8(s, 0xf2);
462 if (opc & (P_EXT | P_EXT38)) {
463 tcg_out8(s, 0x0f);
464 if (opc & P_EXT38) {
465 tcg_out8(s, 0x38);
468 tcg_out8(s, opc);
470 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
471 the 32-bit compilation paths. This method works with all versions of gcc,
472 whereas relying on optimization may not be able to exclude them. */
473 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
474 #endif
476 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
478 tcg_out_opc(s, opc, r, rm, 0);
479 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
482 static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
484 int tmp;
486 if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
487 /* Three byte VEX prefix. */
488 tcg_out8(s, 0xc4);
490 /* VEX.m-mmmm */
491 if (opc & P_EXT38) {
492 tmp = 2;
493 } else if (opc & P_EXT) {
494 tmp = 1;
495 } else {
496 tcg_abort();
498 tmp |= 0x40; /* VEX.X */
499 tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
500 tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
501 tcg_out8(s, tmp);
503 tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
504 } else {
505 /* Two byte VEX prefix. */
506 tcg_out8(s, 0xc5);
508 tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
510 /* VEX.pp */
511 if (opc & P_DATA16) {
512 tmp |= 1; /* 0x66 */
513 } else if (opc & P_SIMDF3) {
514 tmp |= 2; /* 0xf3 */
515 } else if (opc & P_SIMDF2) {
516 tmp |= 3; /* 0xf2 */
518 tmp |= (~v & 15) << 3; /* VEX.vvvv */
519 tcg_out8(s, tmp);
520 tcg_out8(s, opc);
521 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
524 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
525 We handle either RM and INDEX missing with a negative value. In 64-bit
526 mode for absolute addresses, ~RM is the size of the immediate operand
527 that will follow the instruction. */
529 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
530 int index, int shift, intptr_t offset)
532 int mod, len;
534 if (index < 0 && rm < 0) {
535 if (TCG_TARGET_REG_BITS == 64) {
536 /* Try for a rip-relative addressing mode. This has replaced
537 the 32-bit-mode absolute addressing encoding. */
538 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
539 intptr_t disp = offset - pc;
540 if (disp == (int32_t)disp) {
541 tcg_out_opc(s, opc, r, 0, 0);
542 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
543 tcg_out32(s, disp);
544 return;
547 /* Try for an absolute address encoding. This requires the
548 use of the MODRM+SIB encoding and is therefore larger than
549 rip-relative addressing. */
550 if (offset == (int32_t)offset) {
551 tcg_out_opc(s, opc, r, 0, 0);
552 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
553 tcg_out8(s, (4 << 3) | 5);
554 tcg_out32(s, offset);
555 return;
558 /* ??? The memory isn't directly addressable. */
559 tcg_abort();
560 } else {
561 /* Absolute address. */
562 tcg_out_opc(s, opc, r, 0, 0);
563 tcg_out8(s, (r << 3) | 5);
564 tcg_out32(s, offset);
565 return;
569 /* Find the length of the immediate addend. Note that the encoding
570 that would be used for (%ebp) indicates absolute addressing. */
571 if (rm < 0) {
572 mod = 0, len = 4, rm = 5;
573 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
574 mod = 0, len = 0;
575 } else if (offset == (int8_t)offset) {
576 mod = 0x40, len = 1;
577 } else {
578 mod = 0x80, len = 4;
581 /* Use a single byte MODRM format if possible. Note that the encoding
582 that would be used for %esp is the escape to the two byte form. */
583 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
584 /* Single byte MODRM format. */
585 tcg_out_opc(s, opc, r, rm, 0);
586 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
587 } else {
588 /* Two byte MODRM+SIB format. */
590 /* Note that the encoding that would place %esp into the index
591 field indicates no index register. In 64-bit mode, the REX.X
592 bit counts, so %r12 can be used as the index. */
593 if (index < 0) {
594 index = 4;
595 } else {
596 tcg_debug_assert(index != TCG_REG_ESP);
599 tcg_out_opc(s, opc, r, rm, index);
600 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
601 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
604 if (len == 1) {
605 tcg_out8(s, offset);
606 } else if (len == 4) {
607 tcg_out32(s, offset);
611 /* A simplification of the above with no index or shift. */
612 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
613 int rm, intptr_t offset)
615 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
618 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
619 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
621 /* Propagate an opcode prefix, such as P_REXW. */
622 int ext = subop & ~0x7;
623 subop &= 0x7;
625 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
628 static inline void tcg_out_mov(TCGContext *s, TCGType type,
629 TCGReg ret, TCGReg arg)
631 if (arg != ret) {
632 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
633 tcg_out_modrm(s, opc, ret, arg);
637 static void tcg_out_movi(TCGContext *s, TCGType type,
638 TCGReg ret, tcg_target_long arg)
640 tcg_target_long diff;
642 if (arg == 0) {
643 tgen_arithr(s, ARITH_XOR, ret, ret);
644 return;
646 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
647 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
648 tcg_out32(s, arg);
649 return;
651 if (arg == (int32_t)arg) {
652 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
653 tcg_out32(s, arg);
654 return;
657 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
658 diff = arg - ((uintptr_t)s->code_ptr + 7);
659 if (diff == (int32_t)diff) {
660 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
661 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
662 tcg_out32(s, diff);
663 return;
666 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
667 tcg_out64(s, arg);
670 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
672 if (val == (int8_t)val) {
673 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
674 tcg_out8(s, val);
675 } else if (val == (int32_t)val) {
676 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
677 tcg_out32(s, val);
678 } else {
679 tcg_abort();
683 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
685 /* Given the strength of x86 memory ordering, we only need care for
686 store-load ordering. Experimentally, "lock orl $0,0(%esp)" is
687 faster than "mfence", so don't bother with the sse insn. */
688 if (a0 & TCG_MO_ST_LD) {
689 tcg_out8(s, 0xf0);
690 tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
691 tcg_out8(s, 0);
695 static inline void tcg_out_push(TCGContext *s, int reg)
697 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
700 static inline void tcg_out_pop(TCGContext *s, int reg)
702 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
705 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
706 TCGReg arg1, intptr_t arg2)
708 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
709 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
712 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
713 TCGReg arg1, intptr_t arg2)
715 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
716 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
719 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
720 TCGReg base, intptr_t ofs)
722 int rexw = 0;
723 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
724 if (val != (int32_t)val) {
725 return false;
727 rexw = P_REXW;
729 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
730 tcg_out32(s, val);
731 return true;
734 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
736 /* Propagate an opcode prefix, such as P_DATA16. */
737 int ext = subopc & ~0x7;
738 subopc &= 0x7;
740 if (count == 1) {
741 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
742 } else {
743 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
744 tcg_out8(s, count);
748 static inline void tcg_out_bswap32(TCGContext *s, int reg)
750 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
753 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
755 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
758 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
760 /* movzbl */
761 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
762 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
765 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
767 /* movsbl */
768 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
769 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
772 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
774 /* movzwl */
775 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
778 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
780 /* movsw[lq] */
781 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
784 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
786 /* 32-bit mov zero extends. */
787 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
790 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
792 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
795 static inline void tcg_out_bswap64(TCGContext *s, int reg)
797 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
800 static void tgen_arithi(TCGContext *s, int c, int r0,
801 tcg_target_long val, int cf)
803 int rexw = 0;
805 if (TCG_TARGET_REG_BITS == 64) {
806 rexw = c & -8;
807 c &= 7;
810 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
811 partial flags update stalls on Pentium4 and are not recommended
812 by current Intel optimization manuals. */
813 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
814 int is_inc = (c == ARITH_ADD) ^ (val < 0);
815 if (TCG_TARGET_REG_BITS == 64) {
816 /* The single-byte increment encodings are re-tasked as the
817 REX prefixes. Use the MODRM encoding. */
818 tcg_out_modrm(s, OPC_GRP5 + rexw,
819 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
820 } else {
821 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
823 return;
826 if (c == ARITH_AND) {
827 if (TCG_TARGET_REG_BITS == 64) {
828 if (val == 0xffffffffu) {
829 tcg_out_ext32u(s, r0, r0);
830 return;
832 if (val == (uint32_t)val) {
833 /* AND with no high bits set can use a 32-bit operation. */
834 rexw = 0;
837 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
838 tcg_out_ext8u(s, r0, r0);
839 return;
841 if (val == 0xffffu) {
842 tcg_out_ext16u(s, r0, r0);
843 return;
847 if (val == (int8_t)val) {
848 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
849 tcg_out8(s, val);
850 return;
852 if (rexw == 0 || val == (int32_t)val) {
853 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
854 tcg_out32(s, val);
855 return;
858 tcg_abort();
861 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
863 if (val != 0) {
864 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
868 /* Use SMALL != 0 to force a short forward branch. */
869 static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
871 int32_t val, val1;
873 if (l->has_value) {
874 val = tcg_pcrel_diff(s, l->u.value_ptr);
875 val1 = val - 2;
876 if ((int8_t)val1 == val1) {
877 if (opc == -1) {
878 tcg_out8(s, OPC_JMP_short);
879 } else {
880 tcg_out8(s, OPC_JCC_short + opc);
882 tcg_out8(s, val1);
883 } else {
884 if (small) {
885 tcg_abort();
887 if (opc == -1) {
888 tcg_out8(s, OPC_JMP_long);
889 tcg_out32(s, val - 5);
890 } else {
891 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
892 tcg_out32(s, val - 6);
895 } else if (small) {
896 if (opc == -1) {
897 tcg_out8(s, OPC_JMP_short);
898 } else {
899 tcg_out8(s, OPC_JCC_short + opc);
901 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
902 s->code_ptr += 1;
903 } else {
904 if (opc == -1) {
905 tcg_out8(s, OPC_JMP_long);
906 } else {
907 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
909 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
910 s->code_ptr += 4;
914 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
915 int const_arg2, int rexw)
917 if (const_arg2) {
918 if (arg2 == 0) {
919 /* test r, r */
920 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
921 } else {
922 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
924 } else {
925 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
929 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
930 TCGArg arg1, TCGArg arg2, int const_arg2,
931 TCGLabel *label, int small)
933 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
934 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
937 #if TCG_TARGET_REG_BITS == 64
938 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
939 TCGArg arg1, TCGArg arg2, int const_arg2,
940 TCGLabel *label, int small)
942 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
943 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
945 #else
946 /* XXX: we implement it at the target level to avoid having to
947 handle cross basic blocks temporaries */
948 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
949 const int *const_args, int small)
951 TCGLabel *label_next = gen_new_label();
952 TCGLabel *label_this = arg_label(args[5]);
954 switch(args[4]) {
955 case TCG_COND_EQ:
956 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
957 label_next, 1);
958 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
959 label_this, small);
960 break;
961 case TCG_COND_NE:
962 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
963 label_this, small);
964 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
965 label_this, small);
966 break;
967 case TCG_COND_LT:
968 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
969 label_this, small);
970 tcg_out_jxx(s, JCC_JNE, label_next, 1);
971 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
972 label_this, small);
973 break;
974 case TCG_COND_LE:
975 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
976 label_this, small);
977 tcg_out_jxx(s, JCC_JNE, label_next, 1);
978 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
979 label_this, small);
980 break;
981 case TCG_COND_GT:
982 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
983 label_this, small);
984 tcg_out_jxx(s, JCC_JNE, label_next, 1);
985 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
986 label_this, small);
987 break;
988 case TCG_COND_GE:
989 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
990 label_this, small);
991 tcg_out_jxx(s, JCC_JNE, label_next, 1);
992 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
993 label_this, small);
994 break;
995 case TCG_COND_LTU:
996 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
997 label_this, small);
998 tcg_out_jxx(s, JCC_JNE, label_next, 1);
999 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
1000 label_this, small);
1001 break;
1002 case TCG_COND_LEU:
1003 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
1004 label_this, small);
1005 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1006 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
1007 label_this, small);
1008 break;
1009 case TCG_COND_GTU:
1010 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1011 label_this, small);
1012 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1013 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1014 label_this, small);
1015 break;
1016 case TCG_COND_GEU:
1017 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1018 label_this, small);
1019 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1020 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1021 label_this, small);
1022 break;
1023 default:
1024 tcg_abort();
1026 tcg_out_label(s, label_next, s->code_ptr);
1028 #endif
1030 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1031 TCGArg arg1, TCGArg arg2, int const_arg2)
1033 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1034 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1035 tcg_out_ext8u(s, dest, dest);
1038 #if TCG_TARGET_REG_BITS == 64
1039 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1040 TCGArg arg1, TCGArg arg2, int const_arg2)
1042 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1043 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1044 tcg_out_ext8u(s, dest, dest);
1046 #else
1047 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1048 const int *const_args)
1050 TCGArg new_args[6];
1051 TCGLabel *label_true, *label_over;
1053 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1055 if (args[0] == args[1] || args[0] == args[2]
1056 || (!const_args[3] && args[0] == args[3])
1057 || (!const_args[4] && args[0] == args[4])) {
1058 /* When the destination overlaps with one of the argument
1059 registers, don't do anything tricky. */
1060 label_true = gen_new_label();
1061 label_over = gen_new_label();
1063 new_args[5] = label_arg(label_true);
1064 tcg_out_brcond2(s, new_args, const_args+1, 1);
1066 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1067 tcg_out_jxx(s, JCC_JMP, label_over, 1);
1068 tcg_out_label(s, label_true, s->code_ptr);
1070 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
1071 tcg_out_label(s, label_over, s->code_ptr);
1072 } else {
1073 /* When the destination does not overlap one of the arguments,
1074 clear the destination first, jump if cond false, and emit an
1075 increment in the true case. This results in smaller code. */
1077 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1079 label_over = gen_new_label();
1080 new_args[4] = tcg_invert_cond(new_args[4]);
1081 new_args[5] = label_arg(label_over);
1082 tcg_out_brcond2(s, new_args, const_args+1, 1);
1084 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1085 tcg_out_label(s, label_over, s->code_ptr);
1088 #endif
1090 static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
1091 TCGReg dest, TCGReg v1)
1093 if (have_cmov) {
1094 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1);
1095 } else {
1096 TCGLabel *over = gen_new_label();
1097 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1098 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1099 tcg_out_label(s, over, s->code_ptr);
1103 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest,
1104 TCGReg c1, TCGArg c2, int const_c2,
1105 TCGReg v1)
1107 tcg_out_cmp(s, c1, c2, const_c2, 0);
1108 tcg_out_cmov(s, cond, 0, dest, v1);
1111 #if TCG_TARGET_REG_BITS == 64
1112 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest,
1113 TCGReg c1, TCGArg c2, int const_c2,
1114 TCGReg v1)
1116 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1117 tcg_out_cmov(s, cond, P_REXW, dest, v1);
1119 #endif
1121 static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1122 TCGArg arg2, bool const_a2)
1124 if (have_bmi1) {
1125 tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1);
1126 if (const_a2) {
1127 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1128 } else {
1129 tcg_debug_assert(dest != arg2);
1130 tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
1132 } else {
1133 tcg_debug_assert(dest != arg2);
1134 tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1);
1135 tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
1139 static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1140 TCGArg arg2, bool const_a2)
1142 if (have_lzcnt) {
1143 tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1);
1144 if (const_a2) {
1145 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1146 } else {
1147 tcg_debug_assert(dest != arg2);
1148 tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
1150 } else {
1151 tcg_debug_assert(!const_a2);
1152 tcg_debug_assert(dest != arg1);
1153 tcg_debug_assert(dest != arg2);
1155 /* Recall that the output of BSR is the index not the count. */
1156 tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1);
1157 tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0);
1159 /* Since we have destroyed the flags from BSR, we have to re-test. */
1160 tcg_out_cmp(s, arg1, 0, 1, rexw);
1161 tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
1165 static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
1167 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1169 if (disp == (int32_t)disp) {
1170 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1171 tcg_out32(s, disp);
1172 } else {
1173 /* rip-relative addressing into the constant pool.
1174 This is 6 + 8 = 14 bytes, as compared to using an
1175 an immediate load 10 + 6 = 16 bytes, plus we may
1176 be able to re-use the pool constant for more calls. */
1177 tcg_out_opc(s, OPC_GRP5, 0, 0, 0);
1178 tcg_out8(s, (call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev) << 3 | 5);
1179 new_pool_label(s, (uintptr_t)dest, R_386_PC32, s->code_ptr, -4);
1180 tcg_out32(s, 0);
1184 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
1186 tcg_out_branch(s, 1, dest);
1189 static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
1191 tcg_out_branch(s, 0, dest);
1194 static void tcg_out_nopn(TCGContext *s, int n)
1196 int i;
1197 /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
1198 * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
1199 * duplicate prefix, and all of the interesting recent cores can
1200 * decode and discard the duplicates in a single cycle.
1202 tcg_debug_assert(n >= 1);
1203 for (i = 1; i < n; ++i) {
1204 tcg_out8(s, 0x66);
1206 tcg_out8(s, 0x90);
1209 #if defined(CONFIG_SOFTMMU)
1210 #include "tcg-ldst.inc.c"
1212 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1213 * int mmu_idx, uintptr_t ra)
1215 static void * const qemu_ld_helpers[16] = {
1216 [MO_UB] = helper_ret_ldub_mmu,
1217 [MO_LEUW] = helper_le_lduw_mmu,
1218 [MO_LEUL] = helper_le_ldul_mmu,
1219 [MO_LEQ] = helper_le_ldq_mmu,
1220 [MO_BEUW] = helper_be_lduw_mmu,
1221 [MO_BEUL] = helper_be_ldul_mmu,
1222 [MO_BEQ] = helper_be_ldq_mmu,
1225 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1226 * uintxx_t val, int mmu_idx, uintptr_t ra)
1228 static void * const qemu_st_helpers[16] = {
1229 [MO_UB] = helper_ret_stb_mmu,
1230 [MO_LEUW] = helper_le_stw_mmu,
1231 [MO_LEUL] = helper_le_stl_mmu,
1232 [MO_LEQ] = helper_le_stq_mmu,
1233 [MO_BEUW] = helper_be_stw_mmu,
1234 [MO_BEUL] = helper_be_stl_mmu,
1235 [MO_BEQ] = helper_be_stq_mmu,
1238 /* Perform the TLB load and compare.
1240 Inputs:
1241 ADDRLO and ADDRHI contain the low and high part of the address.
1243 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1245 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1246 This should be offsetof addr_read or addr_write.
1248 Outputs:
1249 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1250 positions of the displacements of forward jumps to the TLB miss case.
1252 Second argument register is loaded with the low part of the address.
1253 In the TLB hit case, it has been adjusted as indicated by the TLB
1254 and so is a host address. In the TLB miss case, it continues to
1255 hold a guest address.
1257 First argument register is clobbered. */
1259 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1260 int mem_index, TCGMemOp opc,
1261 tcg_insn_unit **label_ptr, int which)
1263 const TCGReg r0 = TCG_REG_L0;
1264 const TCGReg r1 = TCG_REG_L1;
1265 TCGType ttype = TCG_TYPE_I32;
1266 TCGType tlbtype = TCG_TYPE_I32;
1267 int trexw = 0, hrexw = 0, tlbrexw = 0;
1268 unsigned a_bits = get_alignment_bits(opc);
1269 unsigned s_bits = opc & MO_SIZE;
1270 unsigned a_mask = (1 << a_bits) - 1;
1271 unsigned s_mask = (1 << s_bits) - 1;
1272 target_ulong tlb_mask;
1274 if (TCG_TARGET_REG_BITS == 64) {
1275 if (TARGET_LONG_BITS == 64) {
1276 ttype = TCG_TYPE_I64;
1277 trexw = P_REXW;
1279 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1280 hrexw = P_REXW;
1281 if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1282 tlbtype = TCG_TYPE_I64;
1283 tlbrexw = P_REXW;
1288 tcg_out_mov(s, tlbtype, r0, addrlo);
1289 /* If the required alignment is at least as large as the access, simply
1290 copy the address and mask. For lesser alignments, check that we don't
1291 cross pages for the complete access. */
1292 if (a_bits >= s_bits) {
1293 tcg_out_mov(s, ttype, r1, addrlo);
1294 } else {
1295 tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
1297 tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
1299 tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
1300 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1302 tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
1303 tgen_arithi(s, ARITH_AND + tlbrexw, r0,
1304 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1306 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1307 offsetof(CPUArchState, tlb_table[mem_index][0])
1308 + which);
1310 /* cmp 0(r0), r1 */
1311 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1313 /* Prepare for both the fast path add of the tlb addend, and the slow
1314 path function argument setup. There are two cases worth note:
1315 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1316 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1317 copies the entire guest address for the slow path, while truncation
1318 for the 32-bit host happens with the fastpath ADDL below. */
1319 tcg_out_mov(s, ttype, r1, addrlo);
1321 /* jne slow_path */
1322 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1323 label_ptr[0] = s->code_ptr;
1324 s->code_ptr += 4;
1326 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1327 /* cmp 4(r0), addrhi */
1328 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1330 /* jne slow_path */
1331 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1332 label_ptr[1] = s->code_ptr;
1333 s->code_ptr += 4;
1336 /* TLB Hit. */
1338 /* add addend(r0), r1 */
1339 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1340 offsetof(CPUTLBEntry, addend) - which);
1344 * Record the context of a call to the out of line helper code for the slow path
1345 * for a load or store, so that we can later generate the correct helper code
1347 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1348 TCGReg datalo, TCGReg datahi,
1349 TCGReg addrlo, TCGReg addrhi,
1350 tcg_insn_unit *raddr,
1351 tcg_insn_unit **label_ptr)
1353 TCGLabelQemuLdst *label = new_ldst_label(s);
1355 label->is_ld = is_ld;
1356 label->oi = oi;
1357 label->datalo_reg = datalo;
1358 label->datahi_reg = datahi;
1359 label->addrlo_reg = addrlo;
1360 label->addrhi_reg = addrhi;
1361 label->raddr = raddr;
1362 label->label_ptr[0] = label_ptr[0];
1363 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1364 label->label_ptr[1] = label_ptr[1];
1369 * Generate code for the slow path for a load at the end of block
1371 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1373 TCGMemOpIdx oi = l->oi;
1374 TCGMemOp opc = get_memop(oi);
1375 TCGReg data_reg;
1376 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1378 /* resolve label address */
1379 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1380 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1381 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1384 if (TCG_TARGET_REG_BITS == 32) {
1385 int ofs = 0;
1387 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1388 ofs += 4;
1390 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1391 ofs += 4;
1393 if (TARGET_LONG_BITS == 64) {
1394 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1395 ofs += 4;
1398 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1399 ofs += 4;
1401 tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
1402 } else {
1403 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1404 /* The second argument is already loaded with addrlo. */
1405 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
1406 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1407 (uintptr_t)l->raddr);
1410 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1412 data_reg = l->datalo_reg;
1413 switch (opc & MO_SSIZE) {
1414 case MO_SB:
1415 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1416 break;
1417 case MO_SW:
1418 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1419 break;
1420 #if TCG_TARGET_REG_BITS == 64
1421 case MO_SL:
1422 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1423 break;
1424 #endif
1425 case MO_UB:
1426 case MO_UW:
1427 /* Note that the helpers have zero-extended to tcg_target_long. */
1428 case MO_UL:
1429 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1430 break;
1431 case MO_Q:
1432 if (TCG_TARGET_REG_BITS == 64) {
1433 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1434 } else if (data_reg == TCG_REG_EDX) {
1435 /* xchg %edx, %eax */
1436 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1437 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1438 } else {
1439 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1440 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1442 break;
1443 default:
1444 tcg_abort();
1447 /* Jump to the code corresponding to next IR of qemu_st */
1448 tcg_out_jmp(s, l->raddr);
1452 * Generate code for the slow path for a store at the end of block
1454 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1456 TCGMemOpIdx oi = l->oi;
1457 TCGMemOp opc = get_memop(oi);
1458 TCGMemOp s_bits = opc & MO_SIZE;
1459 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1460 TCGReg retaddr;
1462 /* resolve label address */
1463 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1464 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1465 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1468 if (TCG_TARGET_REG_BITS == 32) {
1469 int ofs = 0;
1471 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1472 ofs += 4;
1474 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1475 ofs += 4;
1477 if (TARGET_LONG_BITS == 64) {
1478 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1479 ofs += 4;
1482 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1483 ofs += 4;
1485 if (s_bits == MO_64) {
1486 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1487 ofs += 4;
1490 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1491 ofs += 4;
1493 retaddr = TCG_REG_EAX;
1494 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1495 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
1496 } else {
1497 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1498 /* The second argument is already loaded with addrlo. */
1499 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1500 tcg_target_call_iarg_regs[2], l->datalo_reg);
1501 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
1503 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1504 retaddr = tcg_target_call_iarg_regs[4];
1505 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1506 } else {
1507 retaddr = TCG_REG_RAX;
1508 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1509 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1510 TCG_TARGET_CALL_STACK_OFFSET);
1514 /* "Tail call" to the helper, with the return address back inline. */
1515 tcg_out_push(s, retaddr);
1516 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1518 #elif defined(__x86_64__) && defined(__linux__)
1519 # include <asm/prctl.h>
1520 # include <sys/prctl.h>
1522 int arch_prctl(int code, unsigned long addr);
1524 static int guest_base_flags;
1525 static inline void setup_guest_base_seg(void)
1527 if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
1528 guest_base_flags = P_GS;
1531 #else
1532 # define guest_base_flags 0
1533 static inline void setup_guest_base_seg(void) { }
1534 #endif /* SOFTMMU */
1536 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1537 TCGReg base, int index, intptr_t ofs,
1538 int seg, TCGMemOp memop)
1540 const TCGMemOp real_bswap = memop & MO_BSWAP;
1541 TCGMemOp bswap = real_bswap;
1542 int movop = OPC_MOVL_GvEv;
1544 if (have_movbe && real_bswap) {
1545 bswap = 0;
1546 movop = OPC_MOVBE_GyMy;
1549 switch (memop & MO_SSIZE) {
1550 case MO_UB:
1551 tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1552 base, index, 0, ofs);
1553 break;
1554 case MO_SB:
1555 tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1556 base, index, 0, ofs);
1557 break;
1558 case MO_UW:
1559 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1560 base, index, 0, ofs);
1561 if (real_bswap) {
1562 tcg_out_rolw_8(s, datalo);
1564 break;
1565 case MO_SW:
1566 if (real_bswap) {
1567 if (have_movbe) {
1568 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1569 datalo, base, index, 0, ofs);
1570 } else {
1571 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1572 base, index, 0, ofs);
1573 tcg_out_rolw_8(s, datalo);
1575 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1576 } else {
1577 tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1578 datalo, base, index, 0, ofs);
1580 break;
1581 case MO_UL:
1582 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
1583 if (bswap) {
1584 tcg_out_bswap32(s, datalo);
1586 break;
1587 #if TCG_TARGET_REG_BITS == 64
1588 case MO_SL:
1589 if (real_bswap) {
1590 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1591 base, index, 0, ofs);
1592 if (bswap) {
1593 tcg_out_bswap32(s, datalo);
1595 tcg_out_ext32s(s, datalo, datalo);
1596 } else {
1597 tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1598 base, index, 0, ofs);
1600 break;
1601 #endif
1602 case MO_Q:
1603 if (TCG_TARGET_REG_BITS == 64) {
1604 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1605 base, index, 0, ofs);
1606 if (bswap) {
1607 tcg_out_bswap64(s, datalo);
1609 } else {
1610 if (real_bswap) {
1611 int t = datalo;
1612 datalo = datahi;
1613 datahi = t;
1615 if (base != datalo) {
1616 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1617 base, index, 0, ofs);
1618 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1619 base, index, 0, ofs + 4);
1620 } else {
1621 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1622 base, index, 0, ofs + 4);
1623 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1624 base, index, 0, ofs);
1626 if (bswap) {
1627 tcg_out_bswap32(s, datalo);
1628 tcg_out_bswap32(s, datahi);
1631 break;
1632 default:
1633 tcg_abort();
1637 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1638 EAX. It will be useful once fixed registers globals are less
1639 common. */
1640 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1642 TCGReg datalo, datahi, addrlo;
1643 TCGReg addrhi __attribute__((unused));
1644 TCGMemOpIdx oi;
1645 TCGMemOp opc;
1646 #if defined(CONFIG_SOFTMMU)
1647 int mem_index;
1648 tcg_insn_unit *label_ptr[2];
1649 #endif
1651 datalo = *args++;
1652 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1653 addrlo = *args++;
1654 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1655 oi = *args++;
1656 opc = get_memop(oi);
1658 #if defined(CONFIG_SOFTMMU)
1659 mem_index = get_mmuidx(oi);
1661 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1662 label_ptr, offsetof(CPUTLBEntry, addr_read));
1664 /* TLB Hit. */
1665 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
1667 /* Record the current context of a load into ldst label */
1668 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1669 s->code_ptr, label_ptr);
1670 #else
1672 int32_t offset = guest_base;
1673 TCGReg base = addrlo;
1674 int index = -1;
1675 int seg = 0;
1677 /* For a 32-bit guest, the high 32 bits may contain garbage.
1678 We can do this with the ADDR32 prefix if we're not using
1679 a guest base, or when using segmentation. Otherwise we
1680 need to zero-extend manually. */
1681 if (guest_base == 0 || guest_base_flags) {
1682 seg = guest_base_flags;
1683 offset = 0;
1684 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1685 seg |= P_ADDR32;
1687 } else if (TCG_TARGET_REG_BITS == 64) {
1688 if (TARGET_LONG_BITS == 32) {
1689 tcg_out_ext32u(s, TCG_REG_L0, base);
1690 base = TCG_REG_L0;
1692 if (offset != guest_base) {
1693 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1694 index = TCG_REG_L1;
1695 offset = 0;
1699 tcg_out_qemu_ld_direct(s, datalo, datahi,
1700 base, index, offset, seg, opc);
1702 #endif
1705 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1706 TCGReg base, intptr_t ofs, int seg,
1707 TCGMemOp memop)
1709 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1710 we could perform the bswap twice to restore the original value
1711 instead of moving to the scratch. But as it is, the L constraint
1712 means that TCG_REG_L0 is definitely free here. */
1713 const TCGReg scratch = TCG_REG_L0;
1714 const TCGMemOp real_bswap = memop & MO_BSWAP;
1715 TCGMemOp bswap = real_bswap;
1716 int movop = OPC_MOVL_EvGv;
1718 if (have_movbe && real_bswap) {
1719 bswap = 0;
1720 movop = OPC_MOVBE_MyGy;
1723 switch (memop & MO_SIZE) {
1724 case MO_8:
1725 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1726 Use the scratch register if necessary. */
1727 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1728 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1729 datalo = scratch;
1731 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1732 datalo, base, ofs);
1733 break;
1734 case MO_16:
1735 if (bswap) {
1736 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1737 tcg_out_rolw_8(s, scratch);
1738 datalo = scratch;
1740 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
1741 break;
1742 case MO_32:
1743 if (bswap) {
1744 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1745 tcg_out_bswap32(s, scratch);
1746 datalo = scratch;
1748 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1749 break;
1750 case MO_64:
1751 if (TCG_TARGET_REG_BITS == 64) {
1752 if (bswap) {
1753 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1754 tcg_out_bswap64(s, scratch);
1755 datalo = scratch;
1757 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
1758 } else if (bswap) {
1759 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1760 tcg_out_bswap32(s, scratch);
1761 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1762 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1763 tcg_out_bswap32(s, scratch);
1764 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1765 } else {
1766 if (real_bswap) {
1767 int t = datalo;
1768 datalo = datahi;
1769 datahi = t;
1771 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1772 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
1774 break;
1775 default:
1776 tcg_abort();
1780 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1782 TCGReg datalo, datahi, addrlo;
1783 TCGReg addrhi __attribute__((unused));
1784 TCGMemOpIdx oi;
1785 TCGMemOp opc;
1786 #if defined(CONFIG_SOFTMMU)
1787 int mem_index;
1788 tcg_insn_unit *label_ptr[2];
1789 #endif
1791 datalo = *args++;
1792 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1793 addrlo = *args++;
1794 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1795 oi = *args++;
1796 opc = get_memop(oi);
1798 #if defined(CONFIG_SOFTMMU)
1799 mem_index = get_mmuidx(oi);
1801 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1802 label_ptr, offsetof(CPUTLBEntry, addr_write));
1804 /* TLB Hit. */
1805 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1807 /* Record the current context of a store into ldst label */
1808 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1809 s->code_ptr, label_ptr);
1810 #else
1812 int32_t offset = guest_base;
1813 TCGReg base = addrlo;
1814 int seg = 0;
1816 /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
1817 if (guest_base == 0 || guest_base_flags) {
1818 seg = guest_base_flags;
1819 offset = 0;
1820 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1821 seg |= P_ADDR32;
1823 } else if (TCG_TARGET_REG_BITS == 64) {
1824 /* ??? Note that we can't use the same SIB addressing scheme
1825 as for loads, since we require L0 free for bswap. */
1826 if (offset != guest_base) {
1827 if (TARGET_LONG_BITS == 32) {
1828 tcg_out_ext32u(s, TCG_REG_L0, base);
1829 base = TCG_REG_L0;
1831 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1832 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1833 base = TCG_REG_L1;
1834 offset = 0;
1835 } else if (TARGET_LONG_BITS == 32) {
1836 tcg_out_ext32u(s, TCG_REG_L1, base);
1837 base = TCG_REG_L1;
1841 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1843 #endif
1846 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1847 const TCGArg *args, const int *const_args)
1849 TCGArg a0, a1, a2;
1850 int c, const_a2, vexop, rexw = 0;
1852 #if TCG_TARGET_REG_BITS == 64
1853 # define OP_32_64(x) \
1854 case glue(glue(INDEX_op_, x), _i64): \
1855 rexw = P_REXW; /* FALLTHRU */ \
1856 case glue(glue(INDEX_op_, x), _i32)
1857 #else
1858 # define OP_32_64(x) \
1859 case glue(glue(INDEX_op_, x), _i32)
1860 #endif
1862 /* Hoist the loads of the most common arguments. */
1863 a0 = args[0];
1864 a1 = args[1];
1865 a2 = args[2];
1866 const_a2 = const_args[2];
1868 switch (opc) {
1869 case INDEX_op_exit_tb:
1870 /* Reuse the zeroing that exists for goto_ptr. */
1871 if (a0 == 0) {
1872 tcg_out_jmp(s, s->code_gen_epilogue);
1873 } else {
1874 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
1875 tcg_out_jmp(s, tb_ret_addr);
1877 break;
1878 case INDEX_op_goto_tb:
1879 if (s->tb_jmp_insn_offset) {
1880 /* direct jump method */
1881 int gap;
1882 /* jump displacement must be aligned for atomic patching;
1883 * see if we need to add extra nops before jump
1885 gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
1886 if (gap != 1) {
1887 tcg_out_nopn(s, gap - 1);
1889 tcg_out8(s, OPC_JMP_long); /* jmp im */
1890 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1891 tcg_out32(s, 0);
1892 } else {
1893 /* indirect jump method */
1894 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1895 (intptr_t)(s->tb_jmp_target_addr + a0));
1897 s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
1898 break;
1899 case INDEX_op_goto_ptr:
1900 /* jmp to the given host address (could be epilogue) */
1901 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0);
1902 break;
1903 case INDEX_op_br:
1904 tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
1905 break;
1906 OP_32_64(ld8u):
1907 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1908 tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2);
1909 break;
1910 OP_32_64(ld8s):
1911 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2);
1912 break;
1913 OP_32_64(ld16u):
1914 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1915 tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2);
1916 break;
1917 OP_32_64(ld16s):
1918 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2);
1919 break;
1920 #if TCG_TARGET_REG_BITS == 64
1921 case INDEX_op_ld32u_i64:
1922 #endif
1923 case INDEX_op_ld_i32:
1924 tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2);
1925 break;
1927 OP_32_64(st8):
1928 if (const_args[0]) {
1929 tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2);
1930 tcg_out8(s, a0);
1931 } else {
1932 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2);
1934 break;
1935 OP_32_64(st16):
1936 if (const_args[0]) {
1937 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2);
1938 tcg_out16(s, a0);
1939 } else {
1940 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2);
1942 break;
1943 #if TCG_TARGET_REG_BITS == 64
1944 case INDEX_op_st32_i64:
1945 #endif
1946 case INDEX_op_st_i32:
1947 if (const_args[0]) {
1948 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2);
1949 tcg_out32(s, a0);
1950 } else {
1951 tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
1953 break;
1955 OP_32_64(add):
1956 /* For 3-operand addition, use LEA. */
1957 if (a0 != a1) {
1958 TCGArg c3 = 0;
1959 if (const_a2) {
1960 c3 = a2, a2 = -1;
1961 } else if (a0 == a2) {
1962 /* Watch out for dest = src + dest, since we've removed
1963 the matching constraint on the add. */
1964 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1965 break;
1968 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1969 break;
1971 c = ARITH_ADD;
1972 goto gen_arith;
1973 OP_32_64(sub):
1974 c = ARITH_SUB;
1975 goto gen_arith;
1976 OP_32_64(and):
1977 c = ARITH_AND;
1978 goto gen_arith;
1979 OP_32_64(or):
1980 c = ARITH_OR;
1981 goto gen_arith;
1982 OP_32_64(xor):
1983 c = ARITH_XOR;
1984 goto gen_arith;
1985 gen_arith:
1986 if (const_a2) {
1987 tgen_arithi(s, c + rexw, a0, a2, 0);
1988 } else {
1989 tgen_arithr(s, c + rexw, a0, a2);
1991 break;
1993 OP_32_64(andc):
1994 if (const_a2) {
1995 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
1996 tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0);
1997 } else {
1998 tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1);
2000 break;
2002 OP_32_64(mul):
2003 if (const_a2) {
2004 int32_t val;
2005 val = a2;
2006 if (val == (int8_t)val) {
2007 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0);
2008 tcg_out8(s, val);
2009 } else {
2010 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0);
2011 tcg_out32(s, val);
2013 } else {
2014 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2);
2016 break;
2018 OP_32_64(div2):
2019 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
2020 break;
2021 OP_32_64(divu2):
2022 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
2023 break;
2025 OP_32_64(shl):
2026 /* For small constant 3-operand shift, use LEA. */
2027 if (const_a2 && a0 != a1 && (a2 - 1) < 3) {
2028 if (a2 - 1 == 0) {
2029 /* shl $1,a1,a0 -> lea (a1,a1),a0 */
2030 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0);
2031 } else {
2032 /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */
2033 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0);
2035 break;
2037 c = SHIFT_SHL;
2038 vexop = OPC_SHLX;
2039 goto gen_shift_maybe_vex;
2040 OP_32_64(shr):
2041 c = SHIFT_SHR;
2042 vexop = OPC_SHRX;
2043 goto gen_shift_maybe_vex;
2044 OP_32_64(sar):
2045 c = SHIFT_SAR;
2046 vexop = OPC_SARX;
2047 goto gen_shift_maybe_vex;
2048 OP_32_64(rotl):
2049 c = SHIFT_ROL;
2050 goto gen_shift;
2051 OP_32_64(rotr):
2052 c = SHIFT_ROR;
2053 goto gen_shift;
2054 gen_shift_maybe_vex:
2055 if (have_bmi2) {
2056 if (!const_a2) {
2057 tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1);
2058 break;
2060 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
2062 /* FALLTHRU */
2063 gen_shift:
2064 if (const_a2) {
2065 tcg_out_shifti(s, c + rexw, a0, a2);
2066 } else {
2067 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0);
2069 break;
2071 OP_32_64(ctz):
2072 tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]);
2073 break;
2074 OP_32_64(clz):
2075 tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]);
2076 break;
2077 OP_32_64(ctpop):
2078 tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1);
2079 break;
2081 case INDEX_op_brcond_i32:
2082 tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
2083 break;
2084 case INDEX_op_setcond_i32:
2085 tcg_out_setcond32(s, args[3], a0, a1, a2, const_a2);
2086 break;
2087 case INDEX_op_movcond_i32:
2088 tcg_out_movcond32(s, args[5], a0, a1, a2, const_a2, args[3]);
2089 break;
2091 OP_32_64(bswap16):
2092 tcg_out_rolw_8(s, a0);
2093 break;
2094 OP_32_64(bswap32):
2095 tcg_out_bswap32(s, a0);
2096 break;
2098 OP_32_64(neg):
2099 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0);
2100 break;
2101 OP_32_64(not):
2102 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
2103 break;
2105 OP_32_64(ext8s):
2106 tcg_out_ext8s(s, a0, a1, rexw);
2107 break;
2108 OP_32_64(ext16s):
2109 tcg_out_ext16s(s, a0, a1, rexw);
2110 break;
2111 OP_32_64(ext8u):
2112 tcg_out_ext8u(s, a0, a1);
2113 break;
2114 OP_32_64(ext16u):
2115 tcg_out_ext16u(s, a0, a1);
2116 break;
2118 case INDEX_op_qemu_ld_i32:
2119 tcg_out_qemu_ld(s, args, 0);
2120 break;
2121 case INDEX_op_qemu_ld_i64:
2122 tcg_out_qemu_ld(s, args, 1);
2123 break;
2124 case INDEX_op_qemu_st_i32:
2125 tcg_out_qemu_st(s, args, 0);
2126 break;
2127 case INDEX_op_qemu_st_i64:
2128 tcg_out_qemu_st(s, args, 1);
2129 break;
2131 OP_32_64(mulu2):
2132 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
2133 break;
2134 OP_32_64(muls2):
2135 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2136 break;
2137 OP_32_64(add2):
2138 if (const_args[4]) {
2139 tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1);
2140 } else {
2141 tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]);
2143 if (const_args[5]) {
2144 tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1);
2145 } else {
2146 tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]);
2148 break;
2149 OP_32_64(sub2):
2150 if (const_args[4]) {
2151 tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1);
2152 } else {
2153 tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]);
2155 if (const_args[5]) {
2156 tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1);
2157 } else {
2158 tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]);
2160 break;
2162 #if TCG_TARGET_REG_BITS == 32
2163 case INDEX_op_brcond2_i32:
2164 tcg_out_brcond2(s, args, const_args, 0);
2165 break;
2166 case INDEX_op_setcond2_i32:
2167 tcg_out_setcond2(s, args, const_args);
2168 break;
2169 #else /* TCG_TARGET_REG_BITS == 64 */
2170 case INDEX_op_ld32s_i64:
2171 tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2);
2172 break;
2173 case INDEX_op_ld_i64:
2174 tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2);
2175 break;
2176 case INDEX_op_st_i64:
2177 if (const_args[0]) {
2178 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2);
2179 tcg_out32(s, a0);
2180 } else {
2181 tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2);
2183 break;
2185 case INDEX_op_brcond_i64:
2186 tcg_out_brcond64(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
2187 break;
2188 case INDEX_op_setcond_i64:
2189 tcg_out_setcond64(s, args[3], a0, a1, a2, const_a2);
2190 break;
2191 case INDEX_op_movcond_i64:
2192 tcg_out_movcond64(s, args[5], a0, a1, a2, const_a2, args[3]);
2193 break;
2195 case INDEX_op_bswap64_i64:
2196 tcg_out_bswap64(s, a0);
2197 break;
2198 case INDEX_op_extu_i32_i64:
2199 case INDEX_op_ext32u_i64:
2200 tcg_out_ext32u(s, a0, a1);
2201 break;
2202 case INDEX_op_ext_i32_i64:
2203 case INDEX_op_ext32s_i64:
2204 tcg_out_ext32s(s, a0, a1);
2205 break;
2206 #endif
2208 OP_32_64(deposit):
2209 if (args[3] == 0 && args[4] == 8) {
2210 /* load bits 0..7 */
2211 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
2212 } else if (args[3] == 8 && args[4] == 8) {
2213 /* load bits 8..15 */
2214 tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
2215 } else if (args[3] == 0 && args[4] == 16) {
2216 /* load bits 0..15 */
2217 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0);
2218 } else {
2219 tcg_abort();
2221 break;
2223 case INDEX_op_extract_i64:
2224 if (a2 + args[3] == 32) {
2225 /* This is a 32-bit zero-extending right shift. */
2226 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2227 tcg_out_shifti(s, SHIFT_SHR, a0, a2);
2228 break;
2230 /* FALLTHRU */
2231 case INDEX_op_extract_i32:
2232 /* On the off-chance that we can use the high-byte registers.
2233 Otherwise we emit the same ext16 + shift pattern that we
2234 would have gotten from the normal tcg-op.c expansion. */
2235 tcg_debug_assert(a2 == 8 && args[3] == 8);
2236 if (a1 < 4 && a0 < 8) {
2237 tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
2238 } else {
2239 tcg_out_ext16u(s, a0, a1);
2240 tcg_out_shifti(s, SHIFT_SHR, a0, 8);
2242 break;
2244 case INDEX_op_sextract_i32:
2245 /* We don't implement sextract_i64, as we cannot sign-extend to
2246 64-bits without using the REX prefix that explicitly excludes
2247 access to the high-byte registers. */
2248 tcg_debug_assert(a2 == 8 && args[3] == 8);
2249 if (a1 < 4 && a0 < 8) {
2250 tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
2251 } else {
2252 tcg_out_ext16s(s, a0, a1, 0);
2253 tcg_out_shifti(s, SHIFT_SAR, a0, 8);
2255 break;
2257 case INDEX_op_mb:
2258 tcg_out_mb(s, a0);
2259 break;
2260 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2261 case INDEX_op_mov_i64:
2262 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2263 case INDEX_op_movi_i64:
2264 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2265 default:
2266 tcg_abort();
2269 #undef OP_32_64
2272 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2274 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2275 static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } };
2276 static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } };
2277 static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } };
2278 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2279 static const TCGTargetOpDef r_q = { .args_ct_str = { "r", "q" } };
2280 static const TCGTargetOpDef r_re = { .args_ct_str = { "r", "re" } };
2281 static const TCGTargetOpDef r_0 = { .args_ct_str = { "r", "0" } };
2282 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2283 static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } };
2284 static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } };
2285 static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
2286 static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
2287 static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
2288 static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
2289 static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
2290 static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
2291 static const TCGTargetOpDef r_r_L_L
2292 = { .args_ct_str = { "r", "r", "L", "L" } };
2293 static const TCGTargetOpDef L_L_L_L
2294 = { .args_ct_str = { "L", "L", "L", "L" } };
2296 switch (op) {
2297 case INDEX_op_goto_ptr:
2298 return &r;
2300 case INDEX_op_ld8u_i32:
2301 case INDEX_op_ld8u_i64:
2302 case INDEX_op_ld8s_i32:
2303 case INDEX_op_ld8s_i64:
2304 case INDEX_op_ld16u_i32:
2305 case INDEX_op_ld16u_i64:
2306 case INDEX_op_ld16s_i32:
2307 case INDEX_op_ld16s_i64:
2308 case INDEX_op_ld_i32:
2309 case INDEX_op_ld32u_i64:
2310 case INDEX_op_ld32s_i64:
2311 case INDEX_op_ld_i64:
2312 return &r_r;
2314 case INDEX_op_st8_i32:
2315 case INDEX_op_st8_i64:
2316 return &qi_r;
2317 case INDEX_op_st16_i32:
2318 case INDEX_op_st16_i64:
2319 case INDEX_op_st_i32:
2320 case INDEX_op_st32_i64:
2321 return &ri_r;
2322 case INDEX_op_st_i64:
2323 return &re_r;
2325 case INDEX_op_add_i32:
2326 case INDEX_op_add_i64:
2327 return &r_r_re;
2328 case INDEX_op_sub_i32:
2329 case INDEX_op_sub_i64:
2330 case INDEX_op_mul_i32:
2331 case INDEX_op_mul_i64:
2332 case INDEX_op_or_i32:
2333 case INDEX_op_or_i64:
2334 case INDEX_op_xor_i32:
2335 case INDEX_op_xor_i64:
2336 return &r_0_re;
2338 case INDEX_op_and_i32:
2339 case INDEX_op_and_i64:
2341 static const TCGTargetOpDef and
2342 = { .args_ct_str = { "r", "0", "reZ" } };
2343 return &and;
2345 break;
2346 case INDEX_op_andc_i32:
2347 case INDEX_op_andc_i64:
2349 static const TCGTargetOpDef andc
2350 = { .args_ct_str = { "r", "r", "rI" } };
2351 return &andc;
2353 break;
2355 case INDEX_op_shl_i32:
2356 case INDEX_op_shl_i64:
2357 case INDEX_op_shr_i32:
2358 case INDEX_op_shr_i64:
2359 case INDEX_op_sar_i32:
2360 case INDEX_op_sar_i64:
2361 return have_bmi2 ? &r_r_ri : &r_0_ci;
2362 case INDEX_op_rotl_i32:
2363 case INDEX_op_rotl_i64:
2364 case INDEX_op_rotr_i32:
2365 case INDEX_op_rotr_i64:
2366 return &r_0_ci;
2368 case INDEX_op_brcond_i32:
2369 case INDEX_op_brcond_i64:
2370 return &r_re;
2372 case INDEX_op_bswap16_i32:
2373 case INDEX_op_bswap16_i64:
2374 case INDEX_op_bswap32_i32:
2375 case INDEX_op_bswap32_i64:
2376 case INDEX_op_bswap64_i64:
2377 case INDEX_op_neg_i32:
2378 case INDEX_op_neg_i64:
2379 case INDEX_op_not_i32:
2380 case INDEX_op_not_i64:
2381 return &r_0;
2383 case INDEX_op_ext8s_i32:
2384 case INDEX_op_ext8s_i64:
2385 case INDEX_op_ext8u_i32:
2386 case INDEX_op_ext8u_i64:
2387 return &r_q;
2388 case INDEX_op_ext16s_i32:
2389 case INDEX_op_ext16s_i64:
2390 case INDEX_op_ext16u_i32:
2391 case INDEX_op_ext16u_i64:
2392 case INDEX_op_ext32s_i64:
2393 case INDEX_op_ext32u_i64:
2394 case INDEX_op_ext_i32_i64:
2395 case INDEX_op_extu_i32_i64:
2396 case INDEX_op_extract_i32:
2397 case INDEX_op_extract_i64:
2398 case INDEX_op_sextract_i32:
2399 case INDEX_op_ctpop_i32:
2400 case INDEX_op_ctpop_i64:
2401 return &r_r;
2403 case INDEX_op_deposit_i32:
2404 case INDEX_op_deposit_i64:
2406 static const TCGTargetOpDef dep
2407 = { .args_ct_str = { "Q", "0", "Q" } };
2408 return &dep;
2410 case INDEX_op_setcond_i32:
2411 case INDEX_op_setcond_i64:
2413 static const TCGTargetOpDef setc
2414 = { .args_ct_str = { "q", "r", "re" } };
2415 return &setc;
2417 case INDEX_op_movcond_i32:
2418 case INDEX_op_movcond_i64:
2420 static const TCGTargetOpDef movc
2421 = { .args_ct_str = { "r", "r", "re", "r", "0" } };
2422 return &movc;
2424 case INDEX_op_div2_i32:
2425 case INDEX_op_div2_i64:
2426 case INDEX_op_divu2_i32:
2427 case INDEX_op_divu2_i64:
2429 static const TCGTargetOpDef div2
2430 = { .args_ct_str = { "a", "d", "0", "1", "r" } };
2431 return &div2;
2433 case INDEX_op_mulu2_i32:
2434 case INDEX_op_mulu2_i64:
2435 case INDEX_op_muls2_i32:
2436 case INDEX_op_muls2_i64:
2438 static const TCGTargetOpDef mul2
2439 = { .args_ct_str = { "a", "d", "a", "r" } };
2440 return &mul2;
2442 case INDEX_op_add2_i32:
2443 case INDEX_op_add2_i64:
2444 case INDEX_op_sub2_i32:
2445 case INDEX_op_sub2_i64:
2447 static const TCGTargetOpDef arith2
2448 = { .args_ct_str = { "r", "r", "0", "1", "re", "re" } };
2449 return &arith2;
2451 case INDEX_op_ctz_i32:
2452 case INDEX_op_ctz_i64:
2454 static const TCGTargetOpDef ctz[2] = {
2455 { .args_ct_str = { "&r", "r", "r" } },
2456 { .args_ct_str = { "&r", "r", "rW" } },
2458 return &ctz[have_bmi1];
2460 case INDEX_op_clz_i32:
2461 case INDEX_op_clz_i64:
2463 static const TCGTargetOpDef clz[2] = {
2464 { .args_ct_str = { "&r", "r", "r" } },
2465 { .args_ct_str = { "&r", "r", "rW" } },
2467 return &clz[have_lzcnt];
2470 case INDEX_op_qemu_ld_i32:
2471 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
2472 case INDEX_op_qemu_st_i32:
2473 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
2474 case INDEX_op_qemu_ld_i64:
2475 return (TCG_TARGET_REG_BITS == 64 ? &r_L
2476 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
2477 : &r_r_L_L);
2478 case INDEX_op_qemu_st_i64:
2479 return (TCG_TARGET_REG_BITS == 64 ? &L_L
2480 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L_L
2481 : &L_L_L_L);
2483 case INDEX_op_brcond2_i32:
2485 static const TCGTargetOpDef b2
2486 = { .args_ct_str = { "r", "r", "ri", "ri" } };
2487 return &b2;
2489 case INDEX_op_setcond2_i32:
2491 static const TCGTargetOpDef s2
2492 = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
2493 return &s2;
2496 default:
2497 break;
2499 return NULL;
2502 static const int tcg_target_callee_save_regs[] = {
2503 #if TCG_TARGET_REG_BITS == 64
2504 TCG_REG_RBP,
2505 TCG_REG_RBX,
2506 #if defined(_WIN64)
2507 TCG_REG_RDI,
2508 TCG_REG_RSI,
2509 #endif
2510 TCG_REG_R12,
2511 TCG_REG_R13,
2512 TCG_REG_R14, /* Currently used for the global env. */
2513 TCG_REG_R15,
2514 #if defined(_WIN64)
2515 TCG_REG_RDI,
2516 TCG_REG_RSI
2517 #endif
2518 #else
2519 TCG_REG_EBP, /* Currently used for the global env. */
2520 TCG_REG_EBX,
2521 TCG_REG_ESI,
2522 TCG_REG_EDI,
2523 #endif
2526 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2527 and tcg_register_jit. */
2529 #define PUSH_SIZE \
2530 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2531 * (TCG_TARGET_REG_BITS / 8))
2533 #define FRAME_SIZE \
2534 ((PUSH_SIZE \
2535 + TCG_STATIC_CALL_ARGS_SIZE \
2536 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2537 + TCG_TARGET_STACK_ALIGN - 1) \
2538 & ~(TCG_TARGET_STACK_ALIGN - 1))
2540 /* Generate global QEMU prologue and epilogue code */
2541 static void tcg_target_qemu_prologue(TCGContext *s)
2543 int i, stack_addend;
2545 /* TB prologue */
2547 /* Reserve some stack space, also for TCG temps. */
2548 stack_addend = FRAME_SIZE - PUSH_SIZE;
2549 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2550 CPU_TEMP_BUF_NLONGS * sizeof(long));
2552 /* Save all callee saved registers. */
2553 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2554 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2557 #if TCG_TARGET_REG_BITS == 32
2558 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2559 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2560 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2561 /* jmp *tb. */
2562 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2563 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2564 + stack_addend);
2565 #else
2566 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2567 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2568 /* jmp *tb. */
2569 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2570 #endif
2573 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2574 * and fall through to the rest of the epilogue.
2576 s->code_gen_epilogue = s->code_ptr;
2577 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0);
2579 /* TB epilogue */
2580 tb_ret_addr = s->code_ptr;
2582 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2584 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2585 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2587 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2589 #if !defined(CONFIG_SOFTMMU)
2590 /* Try to set up a segment register to point to guest_base. */
2591 if (guest_base) {
2592 setup_guest_base_seg();
2594 #endif
2597 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2599 memset(p, 0x90, count);
2602 static void tcg_target_init(TCGContext *s)
2604 #ifdef CONFIG_CPUID_H
2605 unsigned a, b, c, d;
2606 int max = __get_cpuid_max(0, 0);
2608 if (max >= 1) {
2609 __cpuid(1, a, b, c, d);
2610 #ifndef have_cmov
2611 /* For 32-bit, 99% certainty that we're running on hardware that
2612 supports cmov, but we still need to check. In case cmov is not
2613 available, we'll use a small forward branch. */
2614 have_cmov = (d & bit_CMOV) != 0;
2615 #endif
2616 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2617 need to probe for it. */
2618 have_movbe = (c & bit_MOVBE) != 0;
2619 have_popcnt = (c & bit_POPCNT) != 0;
2622 if (max >= 7) {
2623 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2624 __cpuid_count(7, 0, a, b, c, d);
2625 have_bmi1 = (b & bit_BMI) != 0;
2626 have_bmi2 = (b & bit_BMI2) != 0;
2629 max = __get_cpuid_max(0x8000000, 0);
2630 if (max >= 1) {
2631 __cpuid(0x80000001, a, b, c, d);
2632 /* LZCNT was introduced with AMD Barcelona and Intel Haswell CPUs. */
2633 have_lzcnt = (c & bit_LZCNT) != 0;
2635 #endif /* CONFIG_CPUID_H */
2637 if (TCG_TARGET_REG_BITS == 64) {
2638 tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
2639 tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
2640 } else {
2641 tcg_target_available_regs[TCG_TYPE_I32] = 0xff;
2644 tcg_target_call_clobber_regs = 0;
2645 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2646 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2647 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2648 if (TCG_TARGET_REG_BITS == 64) {
2649 #if !defined(_WIN64)
2650 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2651 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2652 #endif
2653 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2654 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2655 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2656 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2659 s->reserved_regs = 0;
2660 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2663 typedef struct {
2664 DebugFrameHeader h;
2665 uint8_t fde_def_cfa[4];
2666 uint8_t fde_reg_ofs[14];
2667 } DebugFrame;
2669 /* We're expecting a 2 byte uleb128 encoded value. */
2670 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2672 #if !defined(__ELF__)
2673 /* Host machine without ELF. */
2674 #elif TCG_TARGET_REG_BITS == 64
2675 #define ELF_HOST_MACHINE EM_X86_64
2676 static const DebugFrame debug_frame = {
2677 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2678 .h.cie.id = -1,
2679 .h.cie.version = 1,
2680 .h.cie.code_align = 1,
2681 .h.cie.data_align = 0x78, /* sleb128 -8 */
2682 .h.cie.return_column = 16,
2684 /* Total FDE size does not include the "len" member. */
2685 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2687 .fde_def_cfa = {
2688 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2689 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2690 (FRAME_SIZE >> 7)
2692 .fde_reg_ofs = {
2693 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2694 /* The following ordering must match tcg_target_callee_save_regs. */
2695 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2696 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2697 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2698 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2699 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2700 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2703 #else
2704 #define ELF_HOST_MACHINE EM_386
2705 static const DebugFrame debug_frame = {
2706 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2707 .h.cie.id = -1,
2708 .h.cie.version = 1,
2709 .h.cie.code_align = 1,
2710 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2711 .h.cie.return_column = 8,
2713 /* Total FDE size does not include the "len" member. */
2714 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2716 .fde_def_cfa = {
2717 12, 4, /* DW_CFA_def_cfa %esp, ... */
2718 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2719 (FRAME_SIZE >> 7)
2721 .fde_reg_ofs = {
2722 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2723 /* The following ordering must match tcg_target_callee_save_regs. */
2724 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2725 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2726 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2727 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2730 #endif
2732 #if defined(ELF_HOST_MACHINE)
2733 void tcg_register_jit(void *buf, size_t buf_size)
2735 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2737 #endif