2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "tcg-pool.inc.c"
14 #include "qemu/bitops.h"
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32
!= 0 || TCG_TYPE_I64
!= 1);
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names
[TCG_TARGET_NB_REGS
] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33 #endif /* CONFIG_DEBUG_TCG */
35 static const int tcg_target_reg_alloc_order
[] = {
36 TCG_REG_X20
, TCG_REG_X21
, TCG_REG_X22
, TCG_REG_X23
,
37 TCG_REG_X24
, TCG_REG_X25
, TCG_REG_X26
, TCG_REG_X27
,
38 TCG_REG_X28
, /* we will reserve this for guest_base if configured */
40 TCG_REG_X8
, TCG_REG_X9
, TCG_REG_X10
, TCG_REG_X11
,
41 TCG_REG_X12
, TCG_REG_X13
, TCG_REG_X14
, TCG_REG_X15
,
42 TCG_REG_X16
, TCG_REG_X17
,
44 TCG_REG_X0
, TCG_REG_X1
, TCG_REG_X2
, TCG_REG_X3
,
45 TCG_REG_X4
, TCG_REG_X5
, TCG_REG_X6
, TCG_REG_X7
,
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
52 TCG_REG_V0
, TCG_REG_V1
, TCG_REG_V2
, TCG_REG_V3
,
53 TCG_REG_V4
, TCG_REG_V5
, TCG_REG_V6
, TCG_REG_V7
,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16
, TCG_REG_V17
, TCG_REG_V18
, TCG_REG_V19
,
56 TCG_REG_V20
, TCG_REG_V21
, TCG_REG_V22
, TCG_REG_V23
,
57 TCG_REG_V24
, TCG_REG_V25
, TCG_REG_V26
, TCG_REG_V27
,
58 TCG_REG_V28
, TCG_REG_V29
, TCG_REG_V30
, TCG_REG_V31
,
61 static const int tcg_target_call_iarg_regs
[8] = {
62 TCG_REG_X0
, TCG_REG_X1
, TCG_REG_X2
, TCG_REG_X3
,
63 TCG_REG_X4
, TCG_REG_X5
, TCG_REG_X6
, TCG_REG_X7
65 static const int tcg_target_call_oarg_regs
[1] = {
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
81 static inline bool reloc_pc26(tcg_insn_unit
*code_ptr
, tcg_insn_unit
*target
)
83 ptrdiff_t offset
= target
- code_ptr
;
84 if (offset
== sextract64(offset
, 0, 26)) {
85 /* read instruction, mask away previous PC_REL26 parameter contents,
86 set the proper offset, then write back the instruction. */
87 *code_ptr
= deposit32(*code_ptr
, 0, 26, offset
);
93 static inline bool reloc_pc19(tcg_insn_unit
*code_ptr
, tcg_insn_unit
*target
)
95 ptrdiff_t offset
= target
- code_ptr
;
96 if (offset
== sextract64(offset
, 0, 19)) {
97 *code_ptr
= deposit32(*code_ptr
, 5, 19, offset
);
103 static inline bool patch_reloc(tcg_insn_unit
*code_ptr
, int type
,
104 intptr_t value
, intptr_t addend
)
106 tcg_debug_assert(addend
== 0);
108 case R_AARCH64_JUMP26
:
109 case R_AARCH64_CALL26
:
110 return reloc_pc26(code_ptr
, (tcg_insn_unit
*)value
);
111 case R_AARCH64_CONDBR19
:
112 return reloc_pc19(code_ptr
, (tcg_insn_unit
*)value
);
114 g_assert_not_reached();
118 #define TCG_CT_CONST_AIMM 0x100
119 #define TCG_CT_CONST_LIMM 0x200
120 #define TCG_CT_CONST_ZERO 0x400
121 #define TCG_CT_CONST_MONE 0x800
122 #define TCG_CT_CONST_ORRI 0x1000
123 #define TCG_CT_CONST_ANDI 0x2000
125 /* parse target specific constraints */
126 static const char *target_parse_constraint(TCGArgConstraint
*ct
,
127 const char *ct_str
, TCGType type
)
130 case 'r': /* general registers */
131 ct
->ct
|= TCG_CT_REG
;
132 ct
->u
.regs
|= 0xffffffffu
;
134 case 'w': /* advsimd registers */
135 ct
->ct
|= TCG_CT_REG
;
136 ct
->u
.regs
|= 0xffffffff00000000ull
;
138 case 'l': /* qemu_ld / qemu_st address, data_reg */
139 ct
->ct
|= TCG_CT_REG
;
140 ct
->u
.regs
= 0xffffffffu
;
141 #ifdef CONFIG_SOFTMMU
142 /* x0 and x1 will be overwritten when reading the tlb entry,
143 and x2, and x3 for helper args, better to avoid using them. */
144 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_X0
);
145 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_X1
);
146 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_X2
);
147 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_X3
);
150 case 'A': /* Valid for arithmetic immediate (positive or negative). */
151 ct
->ct
|= TCG_CT_CONST_AIMM
;
153 case 'L': /* Valid for logical immediate. */
154 ct
->ct
|= TCG_CT_CONST_LIMM
;
156 case 'M': /* minus one */
157 ct
->ct
|= TCG_CT_CONST_MONE
;
159 case 'O': /* vector orr/bic immediate */
160 ct
->ct
|= TCG_CT_CONST_ORRI
;
162 case 'N': /* vector orr/bic immediate, inverted */
163 ct
->ct
|= TCG_CT_CONST_ANDI
;
166 ct
->ct
|= TCG_CT_CONST_ZERO
;
174 /* Match a constant valid for addition (12-bit, optionally shifted). */
175 static inline bool is_aimm(uint64_t val
)
177 return (val
& ~0xfff) == 0 || (val
& ~0xfff000) == 0;
180 /* Match a constant valid for logical operations. */
181 static inline bool is_limm(uint64_t val
)
183 /* Taking a simplified view of the logical immediates for now, ignoring
184 the replication that can happen across the field. Match bit patterns
188 and their inverses. */
190 /* Make things easier below, by testing the form with msb clear. */
191 if ((int64_t)val
< 0) {
198 return (val
& (val
- 1)) == 0;
201 /* Return true if v16 is a valid 16-bit shifted immediate. */
202 static bool is_shimm16(uint16_t v16
, int *cmode
, int *imm8
)
204 if (v16
== (v16
& 0xff)) {
208 } else if (v16
== (v16
& 0xff00)) {
216 /* Return true if v32 is a valid 32-bit shifted immediate. */
217 static bool is_shimm32(uint32_t v32
, int *cmode
, int *imm8
)
219 if (v32
== (v32
& 0xff)) {
223 } else if (v32
== (v32
& 0xff00)) {
225 *imm8
= (v32
>> 8) & 0xff;
227 } else if (v32
== (v32
& 0xff0000)) {
229 *imm8
= (v32
>> 16) & 0xff;
231 } else if (v32
== (v32
& 0xff000000)) {
239 /* Return true if v32 is a valid 32-bit shifting ones immediate. */
240 static bool is_soimm32(uint32_t v32
, int *cmode
, int *imm8
)
242 if ((v32
& 0xffff00ff) == 0xff) {
244 *imm8
= (v32
>> 8) & 0xff;
246 } else if ((v32
& 0xff00ffff) == 0xffff) {
248 *imm8
= (v32
>> 16) & 0xff;
254 /* Return true if v32 is a valid float32 immediate. */
255 static bool is_fimm32(uint32_t v32
, int *cmode
, int *imm8
)
257 if (extract32(v32
, 0, 19) == 0
258 && (extract32(v32
, 25, 6) == 0x20
259 || extract32(v32
, 25, 6) == 0x1f)) {
261 *imm8
= (extract32(v32
, 31, 1) << 7)
262 | (extract32(v32
, 25, 1) << 6)
263 | extract32(v32
, 19, 6);
269 /* Return true if v64 is a valid float64 immediate. */
270 static bool is_fimm64(uint64_t v64
, int *cmode
, int *imm8
)
272 if (extract64(v64
, 0, 48) == 0
273 && (extract64(v64
, 54, 9) == 0x100
274 || extract64(v64
, 54, 9) == 0x0ff)) {
276 *imm8
= (extract64(v64
, 63, 1) << 7)
277 | (extract64(v64
, 54, 1) << 6)
278 | extract64(v64
, 48, 6);
285 * Return non-zero if v32 can be formed by MOVI+ORR.
286 * Place the parameters for MOVI in (cmode, imm8).
287 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
289 static int is_shimm32_pair(uint32_t v32
, int *cmode
, int *imm8
)
293 for (i
= 6; i
> 0; i
-= 2) {
294 /* Mask out one byte we can add with ORR. */
295 uint32_t tmp
= v32
& ~(0xffu
<< (i
* 4));
296 if (is_shimm32(tmp
, cmode
, imm8
) ||
297 is_soimm32(tmp
, cmode
, imm8
)) {
304 /* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
305 static bool is_shimm1632(uint32_t v32
, int *cmode
, int *imm8
)
307 if (v32
== deposit32(v32
, 16, 16, v32
)) {
308 return is_shimm16(v32
, cmode
, imm8
);
310 return is_shimm32(v32
, cmode
, imm8
);
314 static int tcg_target_const_match(tcg_target_long val
, TCGType type
,
315 const TCGArgConstraint
*arg_ct
)
319 if (ct
& TCG_CT_CONST
) {
322 if (type
== TCG_TYPE_I32
) {
325 if ((ct
& TCG_CT_CONST_AIMM
) && (is_aimm(val
) || is_aimm(-val
))) {
328 if ((ct
& TCG_CT_CONST_LIMM
) && is_limm(val
)) {
331 if ((ct
& TCG_CT_CONST_ZERO
) && val
== 0) {
334 if ((ct
& TCG_CT_CONST_MONE
) && val
== -1) {
338 switch (ct
& (TCG_CT_CONST_ORRI
| TCG_CT_CONST_ANDI
)) {
341 case TCG_CT_CONST_ANDI
:
344 case TCG_CT_CONST_ORRI
:
345 if (val
== deposit64(val
, 32, 32, val
)) {
347 return is_shimm1632(val
, &cmode
, &imm8
);
351 /* Both bits should not be set for the same insn. */
352 g_assert_not_reached();
358 enum aarch64_cond_code
{
361 COND_CS
= 0x2, /* Unsigned greater or equal */
362 COND_HS
= COND_CS
, /* ALIAS greater or equal */
363 COND_CC
= 0x3, /* Unsigned less than */
364 COND_LO
= COND_CC
, /* ALIAS Lower */
365 COND_MI
= 0x4, /* Negative */
366 COND_PL
= 0x5, /* Zero or greater */
367 COND_VS
= 0x6, /* Overflow */
368 COND_VC
= 0x7, /* No overflow */
369 COND_HI
= 0x8, /* Unsigned greater than */
370 COND_LS
= 0x9, /* Unsigned less or equal */
376 COND_NV
= 0xf, /* behaves like COND_AL here */
379 static const enum aarch64_cond_code tcg_cond_to_aarch64
[] = {
380 [TCG_COND_EQ
] = COND_EQ
,
381 [TCG_COND_NE
] = COND_NE
,
382 [TCG_COND_LT
] = COND_LT
,
383 [TCG_COND_GE
] = COND_GE
,
384 [TCG_COND_LE
] = COND_LE
,
385 [TCG_COND_GT
] = COND_GT
,
387 [TCG_COND_LTU
] = COND_LO
,
388 [TCG_COND_GTU
] = COND_HI
,
389 [TCG_COND_GEU
] = COND_HS
,
390 [TCG_COND_LEU
] = COND_LS
,
394 LDST_ST
= 0, /* store */
395 LDST_LD
= 1, /* load */
396 LDST_LD_S_X
= 2, /* load and sign-extend into Xt */
397 LDST_LD_S_W
= 3, /* load and sign-extend into Wt */
400 /* We encode the format of the insn into the beginning of the name, so that
401 we can have the preprocessor help "typecheck" the insn vs the output
402 function. Arm didn't provide us with nice names for the formats, so we
403 use the section number of the architecture reference manual in which the
404 instruction group is described. */
406 /* Compare and branch (immediate). */
407 I3201_CBZ
= 0x34000000,
408 I3201_CBNZ
= 0x35000000,
410 /* Conditional branch (immediate). */
411 I3202_B_C
= 0x54000000,
413 /* Unconditional branch (immediate). */
414 I3206_B
= 0x14000000,
415 I3206_BL
= 0x94000000,
417 /* Unconditional branch (register). */
418 I3207_BR
= 0xd61f0000,
419 I3207_BLR
= 0xd63f0000,
420 I3207_RET
= 0xd65f0000,
422 /* AdvSIMD load/store single structure. */
423 I3303_LD1R
= 0x0d40c000,
425 /* Load literal for loading the address at pc-relative offset */
426 I3305_LDR
= 0x58000000,
427 I3305_LDR_v64
= 0x5c000000,
428 I3305_LDR_v128
= 0x9c000000,
430 /* Load/store register. Described here as 3.3.12, but the helper
431 that emits them can transform to 3.3.10 or 3.3.13. */
432 I3312_STRB
= 0x38000000 | LDST_ST
<< 22 | MO_8
<< 30,
433 I3312_STRH
= 0x38000000 | LDST_ST
<< 22 | MO_16
<< 30,
434 I3312_STRW
= 0x38000000 | LDST_ST
<< 22 | MO_32
<< 30,
435 I3312_STRX
= 0x38000000 | LDST_ST
<< 22 | MO_64
<< 30,
437 I3312_LDRB
= 0x38000000 | LDST_LD
<< 22 | MO_8
<< 30,
438 I3312_LDRH
= 0x38000000 | LDST_LD
<< 22 | MO_16
<< 30,
439 I3312_LDRW
= 0x38000000 | LDST_LD
<< 22 | MO_32
<< 30,
440 I3312_LDRX
= 0x38000000 | LDST_LD
<< 22 | MO_64
<< 30,
442 I3312_LDRSBW
= 0x38000000 | LDST_LD_S_W
<< 22 | MO_8
<< 30,
443 I3312_LDRSHW
= 0x38000000 | LDST_LD_S_W
<< 22 | MO_16
<< 30,
445 I3312_LDRSBX
= 0x38000000 | LDST_LD_S_X
<< 22 | MO_8
<< 30,
446 I3312_LDRSHX
= 0x38000000 | LDST_LD_S_X
<< 22 | MO_16
<< 30,
447 I3312_LDRSWX
= 0x38000000 | LDST_LD_S_X
<< 22 | MO_32
<< 30,
449 I3312_LDRVS
= 0x3c000000 | LDST_LD
<< 22 | MO_32
<< 30,
450 I3312_STRVS
= 0x3c000000 | LDST_ST
<< 22 | MO_32
<< 30,
452 I3312_LDRVD
= 0x3c000000 | LDST_LD
<< 22 | MO_64
<< 30,
453 I3312_STRVD
= 0x3c000000 | LDST_ST
<< 22 | MO_64
<< 30,
455 I3312_LDRVQ
= 0x3c000000 | 3 << 22 | 0 << 30,
456 I3312_STRVQ
= 0x3c000000 | 2 << 22 | 0 << 30,
458 I3312_TO_I3310
= 0x00200800,
459 I3312_TO_I3313
= 0x01000000,
461 /* Load/store register pair instructions. */
462 I3314_LDP
= 0x28400000,
463 I3314_STP
= 0x28000000,
465 /* Add/subtract immediate instructions. */
466 I3401_ADDI
= 0x11000000,
467 I3401_ADDSI
= 0x31000000,
468 I3401_SUBI
= 0x51000000,
469 I3401_SUBSI
= 0x71000000,
471 /* Bitfield instructions. */
472 I3402_BFM
= 0x33000000,
473 I3402_SBFM
= 0x13000000,
474 I3402_UBFM
= 0x53000000,
476 /* Extract instruction. */
477 I3403_EXTR
= 0x13800000,
479 /* Logical immediate instructions. */
480 I3404_ANDI
= 0x12000000,
481 I3404_ORRI
= 0x32000000,
482 I3404_EORI
= 0x52000000,
484 /* Move wide immediate instructions. */
485 I3405_MOVN
= 0x12800000,
486 I3405_MOVZ
= 0x52800000,
487 I3405_MOVK
= 0x72800000,
489 /* PC relative addressing instructions. */
490 I3406_ADR
= 0x10000000,
491 I3406_ADRP
= 0x90000000,
493 /* Add/subtract shifted register instructions (without a shift). */
494 I3502_ADD
= 0x0b000000,
495 I3502_ADDS
= 0x2b000000,
496 I3502_SUB
= 0x4b000000,
497 I3502_SUBS
= 0x6b000000,
499 /* Add/subtract shifted register instructions (with a shift). */
500 I3502S_ADD_LSL
= I3502_ADD
,
502 /* Add/subtract with carry instructions. */
503 I3503_ADC
= 0x1a000000,
504 I3503_SBC
= 0x5a000000,
506 /* Conditional select instructions. */
507 I3506_CSEL
= 0x1a800000,
508 I3506_CSINC
= 0x1a800400,
509 I3506_CSINV
= 0x5a800000,
510 I3506_CSNEG
= 0x5a800400,
512 /* Data-processing (1 source) instructions. */
513 I3507_CLZ
= 0x5ac01000,
514 I3507_RBIT
= 0x5ac00000,
515 I3507_REV16
= 0x5ac00400,
516 I3507_REV32
= 0x5ac00800,
517 I3507_REV64
= 0x5ac00c00,
519 /* Data-processing (2 source) instructions. */
520 I3508_LSLV
= 0x1ac02000,
521 I3508_LSRV
= 0x1ac02400,
522 I3508_ASRV
= 0x1ac02800,
523 I3508_RORV
= 0x1ac02c00,
524 I3508_SMULH
= 0x9b407c00,
525 I3508_UMULH
= 0x9bc07c00,
526 I3508_UDIV
= 0x1ac00800,
527 I3508_SDIV
= 0x1ac00c00,
529 /* Data-processing (3 source) instructions. */
530 I3509_MADD
= 0x1b000000,
531 I3509_MSUB
= 0x1b008000,
533 /* Logical shifted register instructions (without a shift). */
534 I3510_AND
= 0x0a000000,
535 I3510_BIC
= 0x0a200000,
536 I3510_ORR
= 0x2a000000,
537 I3510_ORN
= 0x2a200000,
538 I3510_EOR
= 0x4a000000,
539 I3510_EON
= 0x4a200000,
540 I3510_ANDS
= 0x6a000000,
542 /* Logical shifted register instructions (with a shift). */
543 I3502S_AND_LSR
= I3510_AND
| (1 << 22),
546 I3605_DUP
= 0x0e000400,
547 I3605_INS
= 0x4e001c00,
548 I3605_UMOV
= 0x0e003c00,
550 /* AdvSIMD modified immediate */
551 I3606_MOVI
= 0x0f000400,
552 I3606_MVNI
= 0x2f000400,
553 I3606_BIC
= 0x2f001400,
554 I3606_ORR
= 0x0f001400,
556 /* AdvSIMD shift by immediate */
557 I3614_SSHR
= 0x0f000400,
558 I3614_SSRA
= 0x0f001400,
559 I3614_SHL
= 0x0f005400,
560 I3614_USHR
= 0x2f000400,
561 I3614_USRA
= 0x2f001400,
563 /* AdvSIMD three same. */
564 I3616_ADD
= 0x0e208400,
565 I3616_AND
= 0x0e201c00,
566 I3616_BIC
= 0x0e601c00,
567 I3616_BIF
= 0x2ee01c00,
568 I3616_BIT
= 0x2ea01c00,
569 I3616_BSL
= 0x2e601c00,
570 I3616_EOR
= 0x2e201c00,
571 I3616_MUL
= 0x0e209c00,
572 I3616_ORR
= 0x0ea01c00,
573 I3616_ORN
= 0x0ee01c00,
574 I3616_SUB
= 0x2e208400,
575 I3616_CMGT
= 0x0e203400,
576 I3616_CMGE
= 0x0e203c00,
577 I3616_CMTST
= 0x0e208c00,
578 I3616_CMHI
= 0x2e203400,
579 I3616_CMHS
= 0x2e203c00,
580 I3616_CMEQ
= 0x2e208c00,
581 I3616_SMAX
= 0x0e206400,
582 I3616_SMIN
= 0x0e206c00,
583 I3616_SSHL
= 0x0e204400,
584 I3616_SQADD
= 0x0e200c00,
585 I3616_SQSUB
= 0x0e202c00,
586 I3616_UMAX
= 0x2e206400,
587 I3616_UMIN
= 0x2e206c00,
588 I3616_UQADD
= 0x2e200c00,
589 I3616_UQSUB
= 0x2e202c00,
590 I3616_USHL
= 0x2e204400,
592 /* AdvSIMD two-reg misc. */
593 I3617_CMGT0
= 0x0e208800,
594 I3617_CMEQ0
= 0x0e209800,
595 I3617_CMLT0
= 0x0e20a800,
596 I3617_CMGE0
= 0x2e208800,
597 I3617_CMLE0
= 0x2e20a800,
598 I3617_NOT
= 0x2e205800,
599 I3617_ABS
= 0x0e20b800,
600 I3617_NEG
= 0x2e20b800,
602 /* System instructions. */
604 DMB_ISH
= 0xd50338bf,
609 static inline uint32_t tcg_in32(TCGContext
*s
)
611 uint32_t v
= *(uint32_t *)s
->code_ptr
;
615 /* Emit an opcode with "type-checking" of the format. */
616 #define tcg_out_insn(S, FMT, OP, ...) \
617 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
619 static void tcg_out_insn_3303(TCGContext
*s
, AArch64Insn insn
, bool q
,
620 TCGReg rt
, TCGReg rn
, unsigned size
)
622 tcg_out32(s
, insn
| (rt
& 0x1f) | (rn
<< 5) | (size
<< 10) | (q
<< 30));
625 static void tcg_out_insn_3305(TCGContext
*s
, AArch64Insn insn
,
626 int imm19
, TCGReg rt
)
628 tcg_out32(s
, insn
| (imm19
& 0x7ffff) << 5 | rt
);
631 static void tcg_out_insn_3201(TCGContext
*s
, AArch64Insn insn
, TCGType ext
,
632 TCGReg rt
, int imm19
)
634 tcg_out32(s
, insn
| ext
<< 31 | (imm19
& 0x7ffff) << 5 | rt
);
637 static void tcg_out_insn_3202(TCGContext
*s
, AArch64Insn insn
,
638 TCGCond c
, int imm19
)
640 tcg_out32(s
, insn
| tcg_cond_to_aarch64
[c
] | (imm19
& 0x7ffff) << 5);
643 static void tcg_out_insn_3206(TCGContext
*s
, AArch64Insn insn
, int imm26
)
645 tcg_out32(s
, insn
| (imm26
& 0x03ffffff));
648 static void tcg_out_insn_3207(TCGContext
*s
, AArch64Insn insn
, TCGReg rn
)
650 tcg_out32(s
, insn
| rn
<< 5);
653 static void tcg_out_insn_3314(TCGContext
*s
, AArch64Insn insn
,
654 TCGReg r1
, TCGReg r2
, TCGReg rn
,
655 tcg_target_long ofs
, bool pre
, bool w
)
657 insn
|= 1u << 31; /* ext */
661 tcg_debug_assert(ofs
>= -0x200 && ofs
< 0x200 && (ofs
& 7) == 0);
662 insn
|= (ofs
& (0x7f << 3)) << (15 - 3);
664 tcg_out32(s
, insn
| r2
<< 10 | rn
<< 5 | r1
);
667 static void tcg_out_insn_3401(TCGContext
*s
, AArch64Insn insn
, TCGType ext
,
668 TCGReg rd
, TCGReg rn
, uint64_t aimm
)
671 tcg_debug_assert((aimm
& 0xfff) == 0);
673 tcg_debug_assert(aimm
<= 0xfff);
674 aimm
|= 1 << 12; /* apply LSL 12 */
676 tcg_out32(s
, insn
| ext
<< 31 | aimm
<< 10 | rn
<< 5 | rd
);
679 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
680 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
681 that feed the DecodeBitMasks pseudo function. */
682 static void tcg_out_insn_3402(TCGContext
*s
, AArch64Insn insn
, TCGType ext
,
683 TCGReg rd
, TCGReg rn
, int n
, int immr
, int imms
)
685 tcg_out32(s
, insn
| ext
<< 31 | n
<< 22 | immr
<< 16 | imms
<< 10
689 #define tcg_out_insn_3404 tcg_out_insn_3402
691 static void tcg_out_insn_3403(TCGContext
*s
, AArch64Insn insn
, TCGType ext
,
692 TCGReg rd
, TCGReg rn
, TCGReg rm
, int imms
)
694 tcg_out32(s
, insn
| ext
<< 31 | ext
<< 22 | rm
<< 16 | imms
<< 10
698 /* This function is used for the Move (wide immediate) instruction group.
699 Note that SHIFT is a full shift count, not the 2 bit HW field. */
700 static void tcg_out_insn_3405(TCGContext
*s
, AArch64Insn insn
, TCGType ext
,
701 TCGReg rd
, uint16_t half
, unsigned shift
)
703 tcg_debug_assert((shift
& ~0x30) == 0);
704 tcg_out32(s
, insn
| ext
<< 31 | shift
<< (21 - 4) | half
<< 5 | rd
);
707 static void tcg_out_insn_3406(TCGContext
*s
, AArch64Insn insn
,
708 TCGReg rd
, int64_t disp
)
710 tcg_out32(s
, insn
| (disp
& 3) << 29 | (disp
& 0x1ffffc) << (5 - 2) | rd
);
713 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
714 the rare occasion when we actually want to supply a shift amount. */
715 static inline void tcg_out_insn_3502S(TCGContext
*s
, AArch64Insn insn
,
716 TCGType ext
, TCGReg rd
, TCGReg rn
,
719 tcg_out32(s
, insn
| ext
<< 31 | rm
<< 16 | imm6
<< 10 | rn
<< 5 | rd
);
722 /* This function is for 3.5.2 (Add/subtract shifted register),
723 and 3.5.10 (Logical shifted register), for the vast majorty of cases
724 when we don't want to apply a shift. Thus it can also be used for
725 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
726 static void tcg_out_insn_3502(TCGContext
*s
, AArch64Insn insn
, TCGType ext
,
727 TCGReg rd
, TCGReg rn
, TCGReg rm
)
729 tcg_out32(s
, insn
| ext
<< 31 | rm
<< 16 | rn
<< 5 | rd
);
732 #define tcg_out_insn_3503 tcg_out_insn_3502
733 #define tcg_out_insn_3508 tcg_out_insn_3502
734 #define tcg_out_insn_3510 tcg_out_insn_3502
736 static void tcg_out_insn_3506(TCGContext
*s
, AArch64Insn insn
, TCGType ext
,
737 TCGReg rd
, TCGReg rn
, TCGReg rm
, TCGCond c
)
739 tcg_out32(s
, insn
| ext
<< 31 | rm
<< 16 | rn
<< 5 | rd
740 | tcg_cond_to_aarch64
[c
] << 12);
743 static void tcg_out_insn_3507(TCGContext
*s
, AArch64Insn insn
, TCGType ext
,
744 TCGReg rd
, TCGReg rn
)
746 tcg_out32(s
, insn
| ext
<< 31 | rn
<< 5 | rd
);
749 static void tcg_out_insn_3509(TCGContext
*s
, AArch64Insn insn
, TCGType ext
,
750 TCGReg rd
, TCGReg rn
, TCGReg rm
, TCGReg ra
)
752 tcg_out32(s
, insn
| ext
<< 31 | rm
<< 16 | ra
<< 10 | rn
<< 5 | rd
);
755 static void tcg_out_insn_3605(TCGContext
*s
, AArch64Insn insn
, bool q
,
756 TCGReg rd
, TCGReg rn
, int dst_idx
, int src_idx
)
758 /* Note that bit 11 set means general register input. Therefore
759 we can handle both register sets with one function. */
760 tcg_out32(s
, insn
| q
<< 30 | (dst_idx
<< 16) | (src_idx
<< 11)
761 | (rd
& 0x1f) | (~rn
& 0x20) << 6 | (rn
& 0x1f) << 5);
764 static void tcg_out_insn_3606(TCGContext
*s
, AArch64Insn insn
, bool q
,
765 TCGReg rd
, bool op
, int cmode
, uint8_t imm8
)
767 tcg_out32(s
, insn
| q
<< 30 | op
<< 29 | cmode
<< 12 | (rd
& 0x1f)
768 | (imm8
& 0xe0) << (16 - 5) | (imm8
& 0x1f) << 5);
771 static void tcg_out_insn_3614(TCGContext
*s
, AArch64Insn insn
, bool q
,
772 TCGReg rd
, TCGReg rn
, unsigned immhb
)
774 tcg_out32(s
, insn
| q
<< 30 | immhb
<< 16
775 | (rn
& 0x1f) << 5 | (rd
& 0x1f));
778 static void tcg_out_insn_3616(TCGContext
*s
, AArch64Insn insn
, bool q
,
779 unsigned size
, TCGReg rd
, TCGReg rn
, TCGReg rm
)
781 tcg_out32(s
, insn
| q
<< 30 | (size
<< 22) | (rm
& 0x1f) << 16
782 | (rn
& 0x1f) << 5 | (rd
& 0x1f));
785 static void tcg_out_insn_3617(TCGContext
*s
, AArch64Insn insn
, bool q
,
786 unsigned size
, TCGReg rd
, TCGReg rn
)
788 tcg_out32(s
, insn
| q
<< 30 | (size
<< 22)
789 | (rn
& 0x1f) << 5 | (rd
& 0x1f));
792 static void tcg_out_insn_3310(TCGContext
*s
, AArch64Insn insn
,
793 TCGReg rd
, TCGReg base
, TCGType ext
,
796 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
797 tcg_out32(s
, insn
| I3312_TO_I3310
| regoff
<< 16 |
798 0x4000 | ext
<< 13 | base
<< 5 | (rd
& 0x1f));
801 static void tcg_out_insn_3312(TCGContext
*s
, AArch64Insn insn
,
802 TCGReg rd
, TCGReg rn
, intptr_t offset
)
804 tcg_out32(s
, insn
| (offset
& 0x1ff) << 12 | rn
<< 5 | (rd
& 0x1f));
807 static void tcg_out_insn_3313(TCGContext
*s
, AArch64Insn insn
,
808 TCGReg rd
, TCGReg rn
, uintptr_t scaled_uimm
)
810 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
811 tcg_out32(s
, insn
| I3312_TO_I3313
| scaled_uimm
<< 10
812 | rn
<< 5 | (rd
& 0x1f));
815 /* Register to register move using ORR (shifted register with no shift). */
816 static void tcg_out_movr(TCGContext
*s
, TCGType ext
, TCGReg rd
, TCGReg rm
)
818 tcg_out_insn(s
, 3510, ORR
, ext
, rd
, TCG_REG_XZR
, rm
);
821 /* Register to register move using ADDI (move to/from SP). */
822 static void tcg_out_movr_sp(TCGContext
*s
, TCGType ext
, TCGReg rd
, TCGReg rn
)
824 tcg_out_insn(s
, 3401, ADDI
, ext
, rd
, rn
, 0);
827 /* This function is used for the Logical (immediate) instruction group.
828 The value of LIMM must satisfy IS_LIMM. See the comment above about
829 only supporting simplified logical immediates. */
830 static void tcg_out_logicali(TCGContext
*s
, AArch64Insn insn
, TCGType ext
,
831 TCGReg rd
, TCGReg rn
, uint64_t limm
)
835 tcg_debug_assert(is_limm(limm
));
840 r
= 0; /* form 0....01....1 */
841 c
= ctz64(~limm
) - 1;
843 r
= clz64(~limm
); /* form 1..10..01..1 */
847 r
= 64 - l
; /* form 1....10....0 or 0..01..10..0 */
850 if (ext
== TCG_TYPE_I32
) {
855 tcg_out_insn_3404(s
, insn
, ext
, rd
, rn
, ext
, r
, c
);
858 static void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
,
859 TCGReg rd
, tcg_target_long v64
)
861 bool q
= type
== TCG_TYPE_V128
;
864 /* Test all bytes equal first. */
865 if (v64
== dup_const(MO_8
, v64
)) {
867 tcg_out_insn(s
, 3606, MOVI
, q
, rd
, 0, 0xe, imm8
);
872 * Test all bytes 0x00 or 0xff second. This can match cases that
873 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
875 for (i
= imm8
= 0; i
< 8; i
++) {
876 uint8_t byte
= v64
>> (i
* 8);
879 } else if (byte
!= 0) {
883 tcg_out_insn(s
, 3606, MOVI
, q
, rd
, 1, 0xe, imm8
);
888 * Tests for various replications. For each element width, if we
889 * cannot find an expansion there's no point checking a larger
890 * width because we already know by replication it cannot match.
892 if (v64
== dup_const(MO_16
, v64
)) {
895 if (is_shimm16(v16
, &cmode
, &imm8
)) {
896 tcg_out_insn(s
, 3606, MOVI
, q
, rd
, 0, cmode
, imm8
);
899 if (is_shimm16(~v16
, &cmode
, &imm8
)) {
900 tcg_out_insn(s
, 3606, MVNI
, q
, rd
, 0, cmode
, imm8
);
905 * Otherwise, all remaining constants can be loaded in two insns:
906 * rd = v16 & 0xff, rd |= v16 & 0xff00.
908 tcg_out_insn(s
, 3606, MOVI
, q
, rd
, 0, 0x8, v16
& 0xff);
909 tcg_out_insn(s
, 3606, ORR
, q
, rd
, 0, 0xa, v16
>> 8);
911 } else if (v64
== dup_const(MO_32
, v64
)) {
915 if (is_shimm32(v32
, &cmode
, &imm8
) ||
916 is_soimm32(v32
, &cmode
, &imm8
) ||
917 is_fimm32(v32
, &cmode
, &imm8
)) {
918 tcg_out_insn(s
, 3606, MOVI
, q
, rd
, 0, cmode
, imm8
);
921 if (is_shimm32(n32
, &cmode
, &imm8
) ||
922 is_soimm32(n32
, &cmode
, &imm8
)) {
923 tcg_out_insn(s
, 3606, MVNI
, q
, rd
, 0, cmode
, imm8
);
928 * Restrict the set of constants to those we can load with
929 * two instructions. Others we load from the pool.
931 i
= is_shimm32_pair(v32
, &cmode
, &imm8
);
933 tcg_out_insn(s
, 3606, MOVI
, q
, rd
, 0, cmode
, imm8
);
934 tcg_out_insn(s
, 3606, ORR
, q
, rd
, 0, i
, extract32(v32
, i
* 4, 8));
937 i
= is_shimm32_pair(n32
, &cmode
, &imm8
);
939 tcg_out_insn(s
, 3606, MVNI
, q
, rd
, 0, cmode
, imm8
);
940 tcg_out_insn(s
, 3606, BIC
, q
, rd
, 0, i
, extract32(n32
, i
* 4, 8));
943 } else if (is_fimm64(v64
, &cmode
, &imm8
)) {
944 tcg_out_insn(s
, 3606, MOVI
, q
, rd
, 1, cmode
, imm8
);
949 * As a last resort, load from the constant pool. Sadly there
950 * is no LD1R (literal), so store the full 16-byte vector.
952 if (type
== TCG_TYPE_V128
) {
953 new_pool_l2(s
, R_AARCH64_CONDBR19
, s
->code_ptr
, 0, v64
, v64
);
954 tcg_out_insn(s
, 3305, LDR_v128
, 0, rd
);
956 new_pool_label(s
, v64
, R_AARCH64_CONDBR19
, s
->code_ptr
, 0);
957 tcg_out_insn(s
, 3305, LDR_v64
, 0, rd
);
961 static bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
962 TCGReg rd
, TCGReg rs
)
964 int is_q
= type
- TCG_TYPE_V64
;
965 tcg_out_insn(s
, 3605, DUP
, is_q
, rd
, rs
, 1 << vece
, 0);
969 static bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
970 TCGReg r
, TCGReg base
, intptr_t offset
)
972 TCGReg temp
= TCG_REG_TMP
;
974 if (offset
< -0xffffff || offset
> 0xffffff) {
975 tcg_out_movi(s
, TCG_TYPE_PTR
, temp
, offset
);
976 tcg_out_insn(s
, 3502, ADD
, 1, temp
, temp
, base
);
979 AArch64Insn add_insn
= I3401_ADDI
;
982 add_insn
= I3401_SUBI
;
985 if (offset
& 0xfff000) {
986 tcg_out_insn_3401(s
, add_insn
, 1, temp
, base
, offset
& 0xfff000);
989 if (offset
& 0xfff) {
990 tcg_out_insn_3401(s
, add_insn
, 1, temp
, base
, offset
& 0xfff);
994 tcg_out_insn(s
, 3303, LD1R
, type
== TCG_TYPE_V128
, r
, base
, vece
);
998 static void tcg_out_movi(TCGContext
*s
, TCGType type
, TCGReg rd
,
999 tcg_target_long value
)
1001 tcg_target_long svalue
= value
;
1002 tcg_target_long ivalue
= ~value
;
1003 tcg_target_long t0
, t1
, t2
;
1010 tcg_debug_assert(rd
< 32);
1015 tcg_debug_assert(rd
>= 32);
1016 tcg_out_dupi_vec(s
, type
, rd
, value
);
1020 g_assert_not_reached();
1023 /* For 32-bit values, discard potential garbage in value. For 64-bit
1024 values within [2**31, 2**32-1], we can create smaller sequences by
1025 interpreting this as a negative 32-bit number, while ensuring that
1026 the high 32 bits are cleared by setting SF=0. */
1027 if (type
== TCG_TYPE_I32
|| (value
& ~0xffffffffull
) == 0) {
1028 svalue
= (int32_t)value
;
1029 value
= (uint32_t)value
;
1030 ivalue
= (uint32_t)ivalue
;
1031 type
= TCG_TYPE_I32
;
1034 /* Speed things up by handling the common case of small positive
1035 and negative values specially. */
1036 if ((value
& ~0xffffull
) == 0) {
1037 tcg_out_insn(s
, 3405, MOVZ
, type
, rd
, value
, 0);
1039 } else if ((ivalue
& ~0xffffull
) == 0) {
1040 tcg_out_insn(s
, 3405, MOVN
, type
, rd
, ivalue
, 0);
1044 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1045 use the sign-extended value. That lets us match rotated values such
1046 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1047 if (is_limm(svalue
)) {
1048 tcg_out_logicali(s
, I3404_ORRI
, type
, rd
, TCG_REG_XZR
, svalue
);
1052 /* Look for host pointer values within 4G of the PC. This happens
1053 often when loading pointers to QEMU's own data structures. */
1054 if (type
== TCG_TYPE_I64
) {
1055 tcg_target_long disp
= value
- (intptr_t)s
->code_ptr
;
1056 if (disp
== sextract64(disp
, 0, 21)) {
1057 tcg_out_insn(s
, 3406, ADR
, rd
, disp
);
1060 disp
= (value
>> 12) - ((intptr_t)s
->code_ptr
>> 12);
1061 if (disp
== sextract64(disp
, 0, 21)) {
1062 tcg_out_insn(s
, 3406, ADRP
, rd
, disp
);
1063 if (value
& 0xfff) {
1064 tcg_out_insn(s
, 3401, ADDI
, type
, rd
, rd
, value
& 0xfff);
1070 /* Would it take fewer insns to begin with MOVN? */
1071 if (ctpop64(value
) >= 32) {
1078 s0
= ctz64(t0
) & (63 & -16);
1079 t1
= t0
& ~(0xffffUL
<< s0
);
1080 s1
= ctz64(t1
) & (63 & -16);
1081 t2
= t1
& ~(0xffffUL
<< s1
);
1083 tcg_out_insn_3405(s
, opc
, type
, rd
, t0
>> s0
, s0
);
1085 tcg_out_insn(s
, 3405, MOVK
, type
, rd
, value
>> s1
, s1
);
1090 /* For more than 2 insns, dump it into the constant pool. */
1091 new_pool_label(s
, value
, R_AARCH64_CONDBR19
, s
->code_ptr
, 0);
1092 tcg_out_insn(s
, 3305, LDR
, 0, rd
);
1095 /* Define something more legible for general use. */
1096 #define tcg_out_ldst_r tcg_out_insn_3310
1098 static void tcg_out_ldst(TCGContext
*s
, AArch64Insn insn
, TCGReg rd
,
1099 TCGReg rn
, intptr_t offset
, int lgsize
)
1101 /* If the offset is naturally aligned and in range, then we can
1102 use the scaled uimm12 encoding */
1103 if (offset
>= 0 && !(offset
& ((1 << lgsize
) - 1))) {
1104 uintptr_t scaled_uimm
= offset
>> lgsize
;
1105 if (scaled_uimm
<= 0xfff) {
1106 tcg_out_insn_3313(s
, insn
, rd
, rn
, scaled_uimm
);
1111 /* Small signed offsets can use the unscaled encoding. */
1112 if (offset
>= -256 && offset
< 256) {
1113 tcg_out_insn_3312(s
, insn
, rd
, rn
, offset
);
1117 /* Worst-case scenario, move offset to temp register, use reg offset. */
1118 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_TMP
, offset
);
1119 tcg_out_ldst_r(s
, insn
, rd
, rn
, TCG_TYPE_I64
, TCG_REG_TMP
);
1122 static bool tcg_out_mov(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg
)
1130 if (ret
< 32 && arg
< 32) {
1131 tcg_out_movr(s
, type
, ret
, arg
);
1133 } else if (ret
< 32) {
1134 tcg_out_insn(s
, 3605, UMOV
, type
, ret
, arg
, 0, 0);
1136 } else if (arg
< 32) {
1137 tcg_out_insn(s
, 3605, INS
, 0, ret
, arg
, 4 << type
, 0);
1143 tcg_debug_assert(ret
>= 32 && arg
>= 32);
1144 tcg_out_insn(s
, 3616, ORR
, 0, 0, ret
, arg
, arg
);
1147 tcg_debug_assert(ret
>= 32 && arg
>= 32);
1148 tcg_out_insn(s
, 3616, ORR
, 1, 0, ret
, arg
, arg
);
1152 g_assert_not_reached();
1157 static void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
,
1158 TCGReg base
, intptr_t ofs
)
1165 insn
= (ret
< 32 ? I3312_LDRW
: I3312_LDRVS
);
1169 insn
= (ret
< 32 ? I3312_LDRX
: I3312_LDRVD
);
1181 g_assert_not_reached();
1183 tcg_out_ldst(s
, insn
, ret
, base
, ofs
, lgsz
);
1186 static void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg src
,
1187 TCGReg base
, intptr_t ofs
)
1194 insn
= (src
< 32 ? I3312_STRW
: I3312_STRVS
);
1198 insn
= (src
< 32 ? I3312_STRX
: I3312_STRVD
);
1210 g_assert_not_reached();
1212 tcg_out_ldst(s
, insn
, src
, base
, ofs
, lgsz
);
1215 static inline bool tcg_out_sti(TCGContext
*s
, TCGType type
, TCGArg val
,
1216 TCGReg base
, intptr_t ofs
)
1218 if (type
<= TCG_TYPE_I64
&& val
== 0) {
1219 tcg_out_st(s
, type
, TCG_REG_XZR
, base
, ofs
);
1225 static inline void tcg_out_bfm(TCGContext
*s
, TCGType ext
, TCGReg rd
,
1226 TCGReg rn
, unsigned int a
, unsigned int b
)
1228 tcg_out_insn(s
, 3402, BFM
, ext
, rd
, rn
, ext
, a
, b
);
1231 static inline void tcg_out_ubfm(TCGContext
*s
, TCGType ext
, TCGReg rd
,
1232 TCGReg rn
, unsigned int a
, unsigned int b
)
1234 tcg_out_insn(s
, 3402, UBFM
, ext
, rd
, rn
, ext
, a
, b
);
1237 static inline void tcg_out_sbfm(TCGContext
*s
, TCGType ext
, TCGReg rd
,
1238 TCGReg rn
, unsigned int a
, unsigned int b
)
1240 tcg_out_insn(s
, 3402, SBFM
, ext
, rd
, rn
, ext
, a
, b
);
1243 static inline void tcg_out_extr(TCGContext
*s
, TCGType ext
, TCGReg rd
,
1244 TCGReg rn
, TCGReg rm
, unsigned int a
)
1246 tcg_out_insn(s
, 3403, EXTR
, ext
, rd
, rn
, rm
, a
);
1249 static inline void tcg_out_shl(TCGContext
*s
, TCGType ext
,
1250 TCGReg rd
, TCGReg rn
, unsigned int m
)
1252 int bits
= ext
? 64 : 32;
1254 tcg_out_ubfm(s
, ext
, rd
, rn
, bits
- (m
& max
), max
- (m
& max
));
1257 static inline void tcg_out_shr(TCGContext
*s
, TCGType ext
,
1258 TCGReg rd
, TCGReg rn
, unsigned int m
)
1260 int max
= ext
? 63 : 31;
1261 tcg_out_ubfm(s
, ext
, rd
, rn
, m
& max
, max
);
1264 static inline void tcg_out_sar(TCGContext
*s
, TCGType ext
,
1265 TCGReg rd
, TCGReg rn
, unsigned int m
)
1267 int max
= ext
? 63 : 31;
1268 tcg_out_sbfm(s
, ext
, rd
, rn
, m
& max
, max
);
1271 static inline void tcg_out_rotr(TCGContext
*s
, TCGType ext
,
1272 TCGReg rd
, TCGReg rn
, unsigned int m
)
1274 int max
= ext
? 63 : 31;
1275 tcg_out_extr(s
, ext
, rd
, rn
, rn
, m
& max
);
1278 static inline void tcg_out_rotl(TCGContext
*s
, TCGType ext
,
1279 TCGReg rd
, TCGReg rn
, unsigned int m
)
1281 int bits
= ext
? 64 : 32;
1283 tcg_out_extr(s
, ext
, rd
, rn
, rn
, bits
- (m
& max
));
1286 static inline void tcg_out_dep(TCGContext
*s
, TCGType ext
, TCGReg rd
,
1287 TCGReg rn
, unsigned lsb
, unsigned width
)
1289 unsigned size
= ext
? 64 : 32;
1290 unsigned a
= (size
- lsb
) & (size
- 1);
1291 unsigned b
= width
- 1;
1292 tcg_out_bfm(s
, ext
, rd
, rn
, a
, b
);
1295 static void tcg_out_cmp(TCGContext
*s
, TCGType ext
, TCGReg a
,
1296 tcg_target_long b
, bool const_b
)
1299 /* Using CMP or CMN aliases. */
1301 tcg_out_insn(s
, 3401, SUBSI
, ext
, TCG_REG_XZR
, a
, b
);
1303 tcg_out_insn(s
, 3401, ADDSI
, ext
, TCG_REG_XZR
, a
, -b
);
1306 /* Using CMP alias SUBS wzr, Wn, Wm */
1307 tcg_out_insn(s
, 3502, SUBS
, ext
, TCG_REG_XZR
, a
, b
);
1311 static inline void tcg_out_goto(TCGContext
*s
, tcg_insn_unit
*target
)
1313 ptrdiff_t offset
= target
- s
->code_ptr
;
1314 tcg_debug_assert(offset
== sextract64(offset
, 0, 26));
1315 tcg_out_insn(s
, 3206, B
, offset
);
1318 static inline void tcg_out_goto_long(TCGContext
*s
, tcg_insn_unit
*target
)
1320 ptrdiff_t offset
= target
- s
->code_ptr
;
1321 if (offset
== sextract64(offset
, 0, 26)) {
1322 tcg_out_insn(s
, 3206, BL
, offset
);
1324 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_TMP
, (intptr_t)target
);
1325 tcg_out_insn(s
, 3207, BR
, TCG_REG_TMP
);
1329 static inline void tcg_out_callr(TCGContext
*s
, TCGReg reg
)
1331 tcg_out_insn(s
, 3207, BLR
, reg
);
1334 static inline void tcg_out_call(TCGContext
*s
, tcg_insn_unit
*target
)
1336 ptrdiff_t offset
= target
- s
->code_ptr
;
1337 if (offset
== sextract64(offset
, 0, 26)) {
1338 tcg_out_insn(s
, 3206, BL
, offset
);
1340 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_TMP
, (intptr_t)target
);
1341 tcg_out_callr(s
, TCG_REG_TMP
);
1345 void tb_target_set_jmp_target(uintptr_t tc_ptr
, uintptr_t jmp_addr
,
1348 tcg_insn_unit i1
, i2
;
1349 TCGType rt
= TCG_TYPE_I64
;
1350 TCGReg rd
= TCG_REG_TMP
;
1353 ptrdiff_t offset
= addr
- jmp_addr
;
1355 if (offset
== sextract64(offset
, 0, 26)) {
1356 i1
= I3206_B
| ((offset
>> 2) & 0x3ffffff);
1359 offset
= (addr
>> 12) - (jmp_addr
>> 12);
1362 i1
= I3406_ADRP
| (offset
& 3) << 29 | (offset
& 0x1ffffc) << (5 - 2) | rd
;
1364 i2
= I3401_ADDI
| rt
<< 31 | (addr
& 0xfff) << 10 | rd
<< 5 | rd
;
1366 pair
= (uint64_t)i2
<< 32 | i1
;
1367 atomic_set((uint64_t *)jmp_addr
, pair
);
1368 flush_icache_range(jmp_addr
, jmp_addr
+ 8);
1371 static inline void tcg_out_goto_label(TCGContext
*s
, TCGLabel
*l
)
1373 if (!l
->has_value
) {
1374 tcg_out_reloc(s
, s
->code_ptr
, R_AARCH64_JUMP26
, l
, 0);
1375 tcg_out_insn(s
, 3206, B
, 0);
1377 tcg_out_goto(s
, l
->u
.value_ptr
);
1381 static void tcg_out_brcond(TCGContext
*s
, TCGType ext
, TCGCond c
, TCGArg a
,
1382 TCGArg b
, bool b_const
, TCGLabel
*l
)
1387 if (b_const
&& b
== 0 && (c
== TCG_COND_EQ
|| c
== TCG_COND_NE
)) {
1391 tcg_out_cmp(s
, ext
, a
, b
, b_const
);
1394 if (!l
->has_value
) {
1395 tcg_out_reloc(s
, s
->code_ptr
, R_AARCH64_CONDBR19
, l
, 0);
1396 offset
= tcg_in32(s
) >> 5;
1398 offset
= l
->u
.value_ptr
- s
->code_ptr
;
1399 tcg_debug_assert(offset
== sextract64(offset
, 0, 19));
1403 tcg_out_insn(s
, 3202, B_C
, c
, offset
);
1404 } else if (c
== TCG_COND_EQ
) {
1405 tcg_out_insn(s
, 3201, CBZ
, ext
, a
, offset
);
1407 tcg_out_insn(s
, 3201, CBNZ
, ext
, a
, offset
);
1411 static inline void tcg_out_rev64(TCGContext
*s
, TCGReg rd
, TCGReg rn
)
1413 tcg_out_insn(s
, 3507, REV64
, TCG_TYPE_I64
, rd
, rn
);
1416 static inline void tcg_out_rev32(TCGContext
*s
, TCGReg rd
, TCGReg rn
)
1418 tcg_out_insn(s
, 3507, REV32
, TCG_TYPE_I32
, rd
, rn
);
1421 static inline void tcg_out_rev16(TCGContext
*s
, TCGReg rd
, TCGReg rn
)
1423 tcg_out_insn(s
, 3507, REV16
, TCG_TYPE_I32
, rd
, rn
);
1426 static inline void tcg_out_sxt(TCGContext
*s
, TCGType ext
, MemOp s_bits
,
1427 TCGReg rd
, TCGReg rn
)
1429 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1430 int bits
= (8 << s_bits
) - 1;
1431 tcg_out_sbfm(s
, ext
, rd
, rn
, 0, bits
);
1434 static inline void tcg_out_uxt(TCGContext
*s
, MemOp s_bits
,
1435 TCGReg rd
, TCGReg rn
)
1437 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1438 int bits
= (8 << s_bits
) - 1;
1439 tcg_out_ubfm(s
, 0, rd
, rn
, 0, bits
);
1442 static void tcg_out_addsubi(TCGContext
*s
, int ext
, TCGReg rd
,
1443 TCGReg rn
, int64_t aimm
)
1446 tcg_out_insn(s
, 3401, ADDI
, ext
, rd
, rn
, aimm
);
1448 tcg_out_insn(s
, 3401, SUBI
, ext
, rd
, rn
, -aimm
);
1452 static inline void tcg_out_addsub2(TCGContext
*s
, TCGType ext
, TCGReg rl
,
1453 TCGReg rh
, TCGReg al
, TCGReg ah
,
1454 tcg_target_long bl
, tcg_target_long bh
,
1455 bool const_bl
, bool const_bh
, bool sub
)
1457 TCGReg orig_rl
= rl
;
1460 if (rl
== ah
|| (!const_bh
&& rl
== bh
)) {
1466 if ((bl
< 0) ^ sub
) {
1470 if (unlikely(al
== TCG_REG_XZR
)) {
1471 /* ??? We want to allow al to be zero for the benefit of
1472 negation via subtraction. However, that leaves open the
1473 possibility of adding 0+const in the low part, and the
1474 immediate add instructions encode XSP not XZR. Don't try
1475 anything more elaborate here than loading another zero. */
1477 tcg_out_movi(s
, ext
, al
, 0);
1479 tcg_out_insn_3401(s
, insn
, ext
, rl
, al
, bl
);
1481 tcg_out_insn_3502(s
, sub
? I3502_SUBS
: I3502_ADDS
, ext
, rl
, al
, bl
);
1486 /* Note that the only two constants we support are 0 and -1, and
1487 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1488 if ((bh
!= 0) ^ sub
) {
1495 tcg_out_insn_3503(s
, insn
, ext
, rh
, ah
, bh
);
1497 tcg_out_mov(s
, ext
, orig_rl
, rl
);
1500 static inline void tcg_out_mb(TCGContext
*s
, TCGArg a0
)
1502 static const uint32_t sync
[] = {
1503 [0 ... TCG_MO_ALL
] = DMB_ISH
| DMB_LD
| DMB_ST
,
1504 [TCG_MO_ST_ST
] = DMB_ISH
| DMB_ST
,
1505 [TCG_MO_LD_LD
] = DMB_ISH
| DMB_LD
,
1506 [TCG_MO_LD_ST
] = DMB_ISH
| DMB_LD
,
1507 [TCG_MO_LD_ST
| TCG_MO_LD_LD
] = DMB_ISH
| DMB_LD
,
1509 tcg_out32(s
, sync
[a0
& TCG_MO_ALL
]);
1512 static void tcg_out_cltz(TCGContext
*s
, TCGType ext
, TCGReg d
,
1513 TCGReg a0
, TCGArg b
, bool const_b
, bool is_ctz
)
1518 tcg_out_insn(s
, 3507, RBIT
, ext
, a1
, a0
);
1520 if (const_b
&& b
== (ext
? 64 : 32)) {
1521 tcg_out_insn(s
, 3507, CLZ
, ext
, d
, a1
);
1523 AArch64Insn sel
= I3506_CSEL
;
1525 tcg_out_cmp(s
, ext
, a0
, 0, 1);
1526 tcg_out_insn(s
, 3507, CLZ
, ext
, TCG_REG_TMP
, a1
);
1532 } else if (b
== 0) {
1535 tcg_out_movi(s
, ext
, d
, b
);
1539 tcg_out_insn_3506(s
, sel
, ext
, d
, TCG_REG_TMP
, b
, TCG_COND_NE
);
1543 #ifdef CONFIG_SOFTMMU
1544 #include "tcg-ldst.inc.c"
1546 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1547 * TCGMemOpIdx oi, uintptr_t ra)
1549 static void * const qemu_ld_helpers
[16] = {
1550 [MO_UB
] = helper_ret_ldub_mmu
,
1551 [MO_LEUW
] = helper_le_lduw_mmu
,
1552 [MO_LEUL
] = helper_le_ldul_mmu
,
1553 [MO_LEQ
] = helper_le_ldq_mmu
,
1554 [MO_BEUW
] = helper_be_lduw_mmu
,
1555 [MO_BEUL
] = helper_be_ldul_mmu
,
1556 [MO_BEQ
] = helper_be_ldq_mmu
,
1559 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1560 * uintxx_t val, TCGMemOpIdx oi,
1563 static void * const qemu_st_helpers
[16] = {
1564 [MO_UB
] = helper_ret_stb_mmu
,
1565 [MO_LEUW
] = helper_le_stw_mmu
,
1566 [MO_LEUL
] = helper_le_stl_mmu
,
1567 [MO_LEQ
] = helper_le_stq_mmu
,
1568 [MO_BEUW
] = helper_be_stw_mmu
,
1569 [MO_BEUL
] = helper_be_stl_mmu
,
1570 [MO_BEQ
] = helper_be_stq_mmu
,
1573 static inline void tcg_out_adr(TCGContext
*s
, TCGReg rd
, void *target
)
1575 ptrdiff_t offset
= tcg_pcrel_diff(s
, target
);
1576 tcg_debug_assert(offset
== sextract64(offset
, 0, 21));
1577 tcg_out_insn(s
, 3406, ADR
, rd
, offset
);
1580 static bool tcg_out_qemu_ld_slow_path(TCGContext
*s
, TCGLabelQemuLdst
*lb
)
1582 TCGMemOpIdx oi
= lb
->oi
;
1583 MemOp opc
= get_memop(oi
);
1584 MemOp size
= opc
& MO_SIZE
;
1586 if (!reloc_pc19(lb
->label_ptr
[0], s
->code_ptr
)) {
1590 tcg_out_mov(s
, TCG_TYPE_PTR
, TCG_REG_X0
, TCG_AREG0
);
1591 tcg_out_mov(s
, TARGET_LONG_BITS
== 64, TCG_REG_X1
, lb
->addrlo_reg
);
1592 tcg_out_movi(s
, TCG_TYPE_I32
, TCG_REG_X2
, oi
);
1593 tcg_out_adr(s
, TCG_REG_X3
, lb
->raddr
);
1594 tcg_out_call(s
, qemu_ld_helpers
[opc
& (MO_BSWAP
| MO_SIZE
)]);
1595 if (opc
& MO_SIGN
) {
1596 tcg_out_sxt(s
, lb
->type
, size
, lb
->datalo_reg
, TCG_REG_X0
);
1598 tcg_out_mov(s
, size
== MO_64
, lb
->datalo_reg
, TCG_REG_X0
);
1601 tcg_out_goto(s
, lb
->raddr
);
1605 static bool tcg_out_qemu_st_slow_path(TCGContext
*s
, TCGLabelQemuLdst
*lb
)
1607 TCGMemOpIdx oi
= lb
->oi
;
1608 MemOp opc
= get_memop(oi
);
1609 MemOp size
= opc
& MO_SIZE
;
1611 if (!reloc_pc19(lb
->label_ptr
[0], s
->code_ptr
)) {
1615 tcg_out_mov(s
, TCG_TYPE_PTR
, TCG_REG_X0
, TCG_AREG0
);
1616 tcg_out_mov(s
, TARGET_LONG_BITS
== 64, TCG_REG_X1
, lb
->addrlo_reg
);
1617 tcg_out_mov(s
, size
== MO_64
, TCG_REG_X2
, lb
->datalo_reg
);
1618 tcg_out_movi(s
, TCG_TYPE_I32
, TCG_REG_X3
, oi
);
1619 tcg_out_adr(s
, TCG_REG_X4
, lb
->raddr
);
1620 tcg_out_call(s
, qemu_st_helpers
[opc
& (MO_BSWAP
| MO_SIZE
)]);
1621 tcg_out_goto(s
, lb
->raddr
);
1625 static void add_qemu_ldst_label(TCGContext
*s
, bool is_ld
, TCGMemOpIdx oi
,
1626 TCGType ext
, TCGReg data_reg
, TCGReg addr_reg
,
1627 tcg_insn_unit
*raddr
, tcg_insn_unit
*label_ptr
)
1629 TCGLabelQemuLdst
*label
= new_ldst_label(s
);
1631 label
->is_ld
= is_ld
;
1634 label
->datalo_reg
= data_reg
;
1635 label
->addrlo_reg
= addr_reg
;
1636 label
->raddr
= raddr
;
1637 label
->label_ptr
[0] = label_ptr
;
1640 /* We expect to use a 7-bit scaled negative offset from ENV. */
1641 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1642 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1644 /* These offsets are built into the LDP below. */
1645 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast
, mask
) != 0);
1646 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast
, table
) != 8);
1648 /* Load and compare a TLB entry, emitting the conditional jump to the
1649 slow path for the failure case, which will be patched later when finalizing
1650 the slow path. Generated code returns the host addend in X1,
1651 clobbers X0,X2,X3,TMP. */
1652 static void tcg_out_tlb_read(TCGContext
*s
, TCGReg addr_reg
, MemOp opc
,
1653 tcg_insn_unit
**label_ptr
, int mem_index
,
1656 unsigned a_bits
= get_alignment_bits(opc
);
1657 unsigned s_bits
= opc
& MO_SIZE
;
1658 unsigned a_mask
= (1u << a_bits
) - 1;
1659 unsigned s_mask
= (1u << s_bits
) - 1;
1662 uint64_t compare_mask
;
1664 mask_type
= (TARGET_PAGE_BITS
+ CPU_TLB_DYN_MAX_BITS
> 32
1665 ? TCG_TYPE_I64
: TCG_TYPE_I32
);
1667 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
1668 tcg_out_insn(s
, 3314, LDP
, TCG_REG_X0
, TCG_REG_X1
, TCG_AREG0
,
1669 TLB_MASK_TABLE_OFS(mem_index
), 1, 0);
1671 /* Extract the TLB index from the address into X0. */
1672 tcg_out_insn(s
, 3502S
, AND_LSR
, mask_type
== TCG_TYPE_I64
,
1673 TCG_REG_X0
, TCG_REG_X0
, addr_reg
,
1674 TARGET_PAGE_BITS
- CPU_TLB_ENTRY_BITS
);
1676 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1677 tcg_out_insn(s
, 3502, ADD
, 1, TCG_REG_X1
, TCG_REG_X1
, TCG_REG_X0
);
1679 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1680 tcg_out_ld(s
, TCG_TYPE_TL
, TCG_REG_X0
, TCG_REG_X1
, is_read
1681 ? offsetof(CPUTLBEntry
, addr_read
)
1682 : offsetof(CPUTLBEntry
, addr_write
));
1683 tcg_out_ld(s
, TCG_TYPE_PTR
, TCG_REG_X1
, TCG_REG_X1
,
1684 offsetof(CPUTLBEntry
, addend
));
1686 /* For aligned accesses, we check the first byte and include the alignment
1687 bits within the address. For unaligned access, we check that we don't
1688 cross pages using the address of the last byte of the access. */
1689 if (a_bits
>= s_bits
) {
1692 tcg_out_insn(s
, 3401, ADDI
, TARGET_LONG_BITS
== 64,
1693 TCG_REG_X3
, addr_reg
, s_mask
- a_mask
);
1696 compare_mask
= (uint64_t)TARGET_PAGE_MASK
| a_mask
;
1698 /* Store the page mask part of the address into X3. */
1699 tcg_out_logicali(s
, I3404_ANDI
, TARGET_LONG_BITS
== 64,
1700 TCG_REG_X3
, x3
, compare_mask
);
1702 /* Perform the address comparison. */
1703 tcg_out_cmp(s
, TARGET_LONG_BITS
== 64, TCG_REG_X0
, TCG_REG_X3
, 0);
1705 /* If not equal, we jump to the slow path. */
1706 *label_ptr
= s
->code_ptr
;
1707 tcg_out_insn(s
, 3202, B_C
, TCG_COND_NE
, 0);
1710 #endif /* CONFIG_SOFTMMU */
1712 static void tcg_out_qemu_ld_direct(TCGContext
*s
, MemOp memop
, TCGType ext
,
1713 TCGReg data_r
, TCGReg addr_r
,
1714 TCGType otype
, TCGReg off_r
)
1716 const MemOp bswap
= memop
& MO_BSWAP
;
1718 switch (memop
& MO_SSIZE
) {
1720 tcg_out_ldst_r(s
, I3312_LDRB
, data_r
, addr_r
, otype
, off_r
);
1723 tcg_out_ldst_r(s
, ext
? I3312_LDRSBX
: I3312_LDRSBW
,
1724 data_r
, addr_r
, otype
, off_r
);
1727 tcg_out_ldst_r(s
, I3312_LDRH
, data_r
, addr_r
, otype
, off_r
);
1729 tcg_out_rev16(s
, data_r
, data_r
);
1734 tcg_out_ldst_r(s
, I3312_LDRH
, data_r
, addr_r
, otype
, off_r
);
1735 tcg_out_rev16(s
, data_r
, data_r
);
1736 tcg_out_sxt(s
, ext
, MO_16
, data_r
, data_r
);
1738 tcg_out_ldst_r(s
, (ext
? I3312_LDRSHX
: I3312_LDRSHW
),
1739 data_r
, addr_r
, otype
, off_r
);
1743 tcg_out_ldst_r(s
, I3312_LDRW
, data_r
, addr_r
, otype
, off_r
);
1745 tcg_out_rev32(s
, data_r
, data_r
);
1750 tcg_out_ldst_r(s
, I3312_LDRW
, data_r
, addr_r
, otype
, off_r
);
1751 tcg_out_rev32(s
, data_r
, data_r
);
1752 tcg_out_sxt(s
, TCG_TYPE_I64
, MO_32
, data_r
, data_r
);
1754 tcg_out_ldst_r(s
, I3312_LDRSWX
, data_r
, addr_r
, otype
, off_r
);
1758 tcg_out_ldst_r(s
, I3312_LDRX
, data_r
, addr_r
, otype
, off_r
);
1760 tcg_out_rev64(s
, data_r
, data_r
);
1768 static void tcg_out_qemu_st_direct(TCGContext
*s
, MemOp memop
,
1769 TCGReg data_r
, TCGReg addr_r
,
1770 TCGType otype
, TCGReg off_r
)
1772 const MemOp bswap
= memop
& MO_BSWAP
;
1774 switch (memop
& MO_SIZE
) {
1776 tcg_out_ldst_r(s
, I3312_STRB
, data_r
, addr_r
, otype
, off_r
);
1779 if (bswap
&& data_r
!= TCG_REG_XZR
) {
1780 tcg_out_rev16(s
, TCG_REG_TMP
, data_r
);
1781 data_r
= TCG_REG_TMP
;
1783 tcg_out_ldst_r(s
, I3312_STRH
, data_r
, addr_r
, otype
, off_r
);
1786 if (bswap
&& data_r
!= TCG_REG_XZR
) {
1787 tcg_out_rev32(s
, TCG_REG_TMP
, data_r
);
1788 data_r
= TCG_REG_TMP
;
1790 tcg_out_ldst_r(s
, I3312_STRW
, data_r
, addr_r
, otype
, off_r
);
1793 if (bswap
&& data_r
!= TCG_REG_XZR
) {
1794 tcg_out_rev64(s
, TCG_REG_TMP
, data_r
);
1795 data_r
= TCG_REG_TMP
;
1797 tcg_out_ldst_r(s
, I3312_STRX
, data_r
, addr_r
, otype
, off_r
);
1804 static void tcg_out_qemu_ld(TCGContext
*s
, TCGReg data_reg
, TCGReg addr_reg
,
1805 TCGMemOpIdx oi
, TCGType ext
)
1807 MemOp memop
= get_memop(oi
);
1808 const TCGType otype
= TARGET_LONG_BITS
== 64 ? TCG_TYPE_I64
: TCG_TYPE_I32
;
1809 #ifdef CONFIG_SOFTMMU
1810 unsigned mem_index
= get_mmuidx(oi
);
1811 tcg_insn_unit
*label_ptr
;
1813 tcg_out_tlb_read(s
, addr_reg
, memop
, &label_ptr
, mem_index
, 1);
1814 tcg_out_qemu_ld_direct(s
, memop
, ext
, data_reg
,
1815 TCG_REG_X1
, otype
, addr_reg
);
1816 add_qemu_ldst_label(s
, true, oi
, ext
, data_reg
, addr_reg
,
1817 s
->code_ptr
, label_ptr
);
1818 #else /* !CONFIG_SOFTMMU */
1819 if (USE_GUEST_BASE
) {
1820 tcg_out_qemu_ld_direct(s
, memop
, ext
, data_reg
,
1821 TCG_REG_GUEST_BASE
, otype
, addr_reg
);
1823 tcg_out_qemu_ld_direct(s
, memop
, ext
, data_reg
,
1824 addr_reg
, TCG_TYPE_I64
, TCG_REG_XZR
);
1826 #endif /* CONFIG_SOFTMMU */
1829 static void tcg_out_qemu_st(TCGContext
*s
, TCGReg data_reg
, TCGReg addr_reg
,
1832 MemOp memop
= get_memop(oi
);
1833 const TCGType otype
= TARGET_LONG_BITS
== 64 ? TCG_TYPE_I64
: TCG_TYPE_I32
;
1834 #ifdef CONFIG_SOFTMMU
1835 unsigned mem_index
= get_mmuidx(oi
);
1836 tcg_insn_unit
*label_ptr
;
1838 tcg_out_tlb_read(s
, addr_reg
, memop
, &label_ptr
, mem_index
, 0);
1839 tcg_out_qemu_st_direct(s
, memop
, data_reg
,
1840 TCG_REG_X1
, otype
, addr_reg
);
1841 add_qemu_ldst_label(s
, false, oi
, (memop
& MO_SIZE
)== MO_64
,
1842 data_reg
, addr_reg
, s
->code_ptr
, label_ptr
);
1843 #else /* !CONFIG_SOFTMMU */
1844 if (USE_GUEST_BASE
) {
1845 tcg_out_qemu_st_direct(s
, memop
, data_reg
,
1846 TCG_REG_GUEST_BASE
, otype
, addr_reg
);
1848 tcg_out_qemu_st_direct(s
, memop
, data_reg
,
1849 addr_reg
, TCG_TYPE_I64
, TCG_REG_XZR
);
1851 #endif /* CONFIG_SOFTMMU */
1854 static tcg_insn_unit
*tb_ret_addr
;
1856 static void tcg_out_op(TCGContext
*s
, TCGOpcode opc
,
1857 const TCGArg args
[TCG_MAX_OP_ARGS
],
1858 const int const_args
[TCG_MAX_OP_ARGS
])
1860 /* 99% of the time, we can signal the use of extension registers
1861 by looking to see if the opcode handles 64-bit data. */
1862 TCGType ext
= (tcg_op_defs
[opc
].flags
& TCG_OPF_64BIT
) != 0;
1864 /* Hoist the loads of the most common arguments. */
1865 TCGArg a0
= args
[0];
1866 TCGArg a1
= args
[1];
1867 TCGArg a2
= args
[2];
1868 int c2
= const_args
[2];
1870 /* Some operands are defined with "rZ" constraint, a register or
1871 the zero register. These need not actually test args[I] == 0. */
1872 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1875 case INDEX_op_exit_tb
:
1876 /* Reuse the zeroing that exists for goto_ptr. */
1878 tcg_out_goto_long(s
, s
->code_gen_epilogue
);
1880 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_X0
, a0
);
1881 tcg_out_goto_long(s
, tb_ret_addr
);
1885 case INDEX_op_goto_tb
:
1886 if (s
->tb_jmp_insn_offset
!= NULL
) {
1887 /* TCG_TARGET_HAS_direct_jump */
1888 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1889 write can be used to patch the target address. */
1890 if ((uintptr_t)s
->code_ptr
& 7) {
1893 s
->tb_jmp_insn_offset
[a0
] = tcg_current_code_size(s
);
1894 /* actual branch destination will be patched by
1895 tb_target_set_jmp_target later. */
1896 tcg_out_insn(s
, 3406, ADRP
, TCG_REG_TMP
, 0);
1897 tcg_out_insn(s
, 3401, ADDI
, TCG_TYPE_I64
, TCG_REG_TMP
, TCG_REG_TMP
, 0);
1899 /* !TCG_TARGET_HAS_direct_jump */
1900 tcg_debug_assert(s
->tb_jmp_target_addr
!= NULL
);
1901 intptr_t offset
= tcg_pcrel_diff(s
, (s
->tb_jmp_target_addr
+ a0
)) >> 2;
1902 tcg_out_insn(s
, 3305, LDR
, offset
, TCG_REG_TMP
);
1904 tcg_out_insn(s
, 3207, BR
, TCG_REG_TMP
);
1905 set_jmp_reset_offset(s
, a0
);
1908 case INDEX_op_goto_ptr
:
1909 tcg_out_insn(s
, 3207, BR
, a0
);
1913 tcg_out_goto_label(s
, arg_label(a0
));
1916 case INDEX_op_ld8u_i32
:
1917 case INDEX_op_ld8u_i64
:
1918 tcg_out_ldst(s
, I3312_LDRB
, a0
, a1
, a2
, 0);
1920 case INDEX_op_ld8s_i32
:
1921 tcg_out_ldst(s
, I3312_LDRSBW
, a0
, a1
, a2
, 0);
1923 case INDEX_op_ld8s_i64
:
1924 tcg_out_ldst(s
, I3312_LDRSBX
, a0
, a1
, a2
, 0);
1926 case INDEX_op_ld16u_i32
:
1927 case INDEX_op_ld16u_i64
:
1928 tcg_out_ldst(s
, I3312_LDRH
, a0
, a1
, a2
, 1);
1930 case INDEX_op_ld16s_i32
:
1931 tcg_out_ldst(s
, I3312_LDRSHW
, a0
, a1
, a2
, 1);
1933 case INDEX_op_ld16s_i64
:
1934 tcg_out_ldst(s
, I3312_LDRSHX
, a0
, a1
, a2
, 1);
1936 case INDEX_op_ld_i32
:
1937 case INDEX_op_ld32u_i64
:
1938 tcg_out_ldst(s
, I3312_LDRW
, a0
, a1
, a2
, 2);
1940 case INDEX_op_ld32s_i64
:
1941 tcg_out_ldst(s
, I3312_LDRSWX
, a0
, a1
, a2
, 2);
1943 case INDEX_op_ld_i64
:
1944 tcg_out_ldst(s
, I3312_LDRX
, a0
, a1
, a2
, 3);
1947 case INDEX_op_st8_i32
:
1948 case INDEX_op_st8_i64
:
1949 tcg_out_ldst(s
, I3312_STRB
, REG0(0), a1
, a2
, 0);
1951 case INDEX_op_st16_i32
:
1952 case INDEX_op_st16_i64
:
1953 tcg_out_ldst(s
, I3312_STRH
, REG0(0), a1
, a2
, 1);
1955 case INDEX_op_st_i32
:
1956 case INDEX_op_st32_i64
:
1957 tcg_out_ldst(s
, I3312_STRW
, REG0(0), a1
, a2
, 2);
1959 case INDEX_op_st_i64
:
1960 tcg_out_ldst(s
, I3312_STRX
, REG0(0), a1
, a2
, 3);
1963 case INDEX_op_add_i32
:
1966 case INDEX_op_add_i64
:
1968 tcg_out_addsubi(s
, ext
, a0
, a1
, a2
);
1970 tcg_out_insn(s
, 3502, ADD
, ext
, a0
, a1
, a2
);
1974 case INDEX_op_sub_i32
:
1977 case INDEX_op_sub_i64
:
1979 tcg_out_addsubi(s
, ext
, a0
, a1
, -a2
);
1981 tcg_out_insn(s
, 3502, SUB
, ext
, a0
, a1
, a2
);
1985 case INDEX_op_neg_i64
:
1986 case INDEX_op_neg_i32
:
1987 tcg_out_insn(s
, 3502, SUB
, ext
, a0
, TCG_REG_XZR
, a1
);
1990 case INDEX_op_and_i32
:
1993 case INDEX_op_and_i64
:
1995 tcg_out_logicali(s
, I3404_ANDI
, ext
, a0
, a1
, a2
);
1997 tcg_out_insn(s
, 3510, AND
, ext
, a0
, a1
, a2
);
2001 case INDEX_op_andc_i32
:
2004 case INDEX_op_andc_i64
:
2006 tcg_out_logicali(s
, I3404_ANDI
, ext
, a0
, a1
, ~a2
);
2008 tcg_out_insn(s
, 3510, BIC
, ext
, a0
, a1
, a2
);
2012 case INDEX_op_or_i32
:
2015 case INDEX_op_or_i64
:
2017 tcg_out_logicali(s
, I3404_ORRI
, ext
, a0
, a1
, a2
);
2019 tcg_out_insn(s
, 3510, ORR
, ext
, a0
, a1
, a2
);
2023 case INDEX_op_orc_i32
:
2026 case INDEX_op_orc_i64
:
2028 tcg_out_logicali(s
, I3404_ORRI
, ext
, a0
, a1
, ~a2
);
2030 tcg_out_insn(s
, 3510, ORN
, ext
, a0
, a1
, a2
);
2034 case INDEX_op_xor_i32
:
2037 case INDEX_op_xor_i64
:
2039 tcg_out_logicali(s
, I3404_EORI
, ext
, a0
, a1
, a2
);
2041 tcg_out_insn(s
, 3510, EOR
, ext
, a0
, a1
, a2
);
2045 case INDEX_op_eqv_i32
:
2048 case INDEX_op_eqv_i64
:
2050 tcg_out_logicali(s
, I3404_EORI
, ext
, a0
, a1
, ~a2
);
2052 tcg_out_insn(s
, 3510, EON
, ext
, a0
, a1
, a2
);
2056 case INDEX_op_not_i64
:
2057 case INDEX_op_not_i32
:
2058 tcg_out_insn(s
, 3510, ORN
, ext
, a0
, TCG_REG_XZR
, a1
);
2061 case INDEX_op_mul_i64
:
2062 case INDEX_op_mul_i32
:
2063 tcg_out_insn(s
, 3509, MADD
, ext
, a0
, a1
, a2
, TCG_REG_XZR
);
2066 case INDEX_op_div_i64
:
2067 case INDEX_op_div_i32
:
2068 tcg_out_insn(s
, 3508, SDIV
, ext
, a0
, a1
, a2
);
2070 case INDEX_op_divu_i64
:
2071 case INDEX_op_divu_i32
:
2072 tcg_out_insn(s
, 3508, UDIV
, ext
, a0
, a1
, a2
);
2075 case INDEX_op_rem_i64
:
2076 case INDEX_op_rem_i32
:
2077 tcg_out_insn(s
, 3508, SDIV
, ext
, TCG_REG_TMP
, a1
, a2
);
2078 tcg_out_insn(s
, 3509, MSUB
, ext
, a0
, TCG_REG_TMP
, a2
, a1
);
2080 case INDEX_op_remu_i64
:
2081 case INDEX_op_remu_i32
:
2082 tcg_out_insn(s
, 3508, UDIV
, ext
, TCG_REG_TMP
, a1
, a2
);
2083 tcg_out_insn(s
, 3509, MSUB
, ext
, a0
, TCG_REG_TMP
, a2
, a1
);
2086 case INDEX_op_shl_i64
:
2087 case INDEX_op_shl_i32
:
2089 tcg_out_shl(s
, ext
, a0
, a1
, a2
);
2091 tcg_out_insn(s
, 3508, LSLV
, ext
, a0
, a1
, a2
);
2095 case INDEX_op_shr_i64
:
2096 case INDEX_op_shr_i32
:
2098 tcg_out_shr(s
, ext
, a0
, a1
, a2
);
2100 tcg_out_insn(s
, 3508, LSRV
, ext
, a0
, a1
, a2
);
2104 case INDEX_op_sar_i64
:
2105 case INDEX_op_sar_i32
:
2107 tcg_out_sar(s
, ext
, a0
, a1
, a2
);
2109 tcg_out_insn(s
, 3508, ASRV
, ext
, a0
, a1
, a2
);
2113 case INDEX_op_rotr_i64
:
2114 case INDEX_op_rotr_i32
:
2116 tcg_out_rotr(s
, ext
, a0
, a1
, a2
);
2118 tcg_out_insn(s
, 3508, RORV
, ext
, a0
, a1
, a2
);
2122 case INDEX_op_rotl_i64
:
2123 case INDEX_op_rotl_i32
:
2125 tcg_out_rotl(s
, ext
, a0
, a1
, a2
);
2127 tcg_out_insn(s
, 3502, SUB
, 0, TCG_REG_TMP
, TCG_REG_XZR
, a2
);
2128 tcg_out_insn(s
, 3508, RORV
, ext
, a0
, a1
, TCG_REG_TMP
);
2132 case INDEX_op_clz_i64
:
2133 case INDEX_op_clz_i32
:
2134 tcg_out_cltz(s
, ext
, a0
, a1
, a2
, c2
, false);
2136 case INDEX_op_ctz_i64
:
2137 case INDEX_op_ctz_i32
:
2138 tcg_out_cltz(s
, ext
, a0
, a1
, a2
, c2
, true);
2141 case INDEX_op_brcond_i32
:
2144 case INDEX_op_brcond_i64
:
2145 tcg_out_brcond(s
, ext
, a2
, a0
, a1
, const_args
[1], arg_label(args
[3]));
2148 case INDEX_op_setcond_i32
:
2151 case INDEX_op_setcond_i64
:
2152 tcg_out_cmp(s
, ext
, a1
, a2
, c2
);
2153 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2154 tcg_out_insn(s
, 3506, CSINC
, TCG_TYPE_I32
, a0
, TCG_REG_XZR
,
2155 TCG_REG_XZR
, tcg_invert_cond(args
[3]));
2158 case INDEX_op_movcond_i32
:
2161 case INDEX_op_movcond_i64
:
2162 tcg_out_cmp(s
, ext
, a1
, a2
, c2
);
2163 tcg_out_insn(s
, 3506, CSEL
, ext
, a0
, REG0(3), REG0(4), args
[5]);
2166 case INDEX_op_qemu_ld_i32
:
2167 case INDEX_op_qemu_ld_i64
:
2168 tcg_out_qemu_ld(s
, a0
, a1
, a2
, ext
);
2170 case INDEX_op_qemu_st_i32
:
2171 case INDEX_op_qemu_st_i64
:
2172 tcg_out_qemu_st(s
, REG0(0), a1
, a2
);
2175 case INDEX_op_bswap64_i64
:
2176 tcg_out_rev64(s
, a0
, a1
);
2178 case INDEX_op_bswap32_i64
:
2179 case INDEX_op_bswap32_i32
:
2180 tcg_out_rev32(s
, a0
, a1
);
2182 case INDEX_op_bswap16_i64
:
2183 case INDEX_op_bswap16_i32
:
2184 tcg_out_rev16(s
, a0
, a1
);
2187 case INDEX_op_ext8s_i64
:
2188 case INDEX_op_ext8s_i32
:
2189 tcg_out_sxt(s
, ext
, MO_8
, a0
, a1
);
2191 case INDEX_op_ext16s_i64
:
2192 case INDEX_op_ext16s_i32
:
2193 tcg_out_sxt(s
, ext
, MO_16
, a0
, a1
);
2195 case INDEX_op_ext_i32_i64
:
2196 case INDEX_op_ext32s_i64
:
2197 tcg_out_sxt(s
, TCG_TYPE_I64
, MO_32
, a0
, a1
);
2199 case INDEX_op_ext8u_i64
:
2200 case INDEX_op_ext8u_i32
:
2201 tcg_out_uxt(s
, MO_8
, a0
, a1
);
2203 case INDEX_op_ext16u_i64
:
2204 case INDEX_op_ext16u_i32
:
2205 tcg_out_uxt(s
, MO_16
, a0
, a1
);
2207 case INDEX_op_extu_i32_i64
:
2208 case INDEX_op_ext32u_i64
:
2209 tcg_out_movr(s
, TCG_TYPE_I32
, a0
, a1
);
2212 case INDEX_op_deposit_i64
:
2213 case INDEX_op_deposit_i32
:
2214 tcg_out_dep(s
, ext
, a0
, REG0(2), args
[3], args
[4]);
2217 case INDEX_op_extract_i64
:
2218 case INDEX_op_extract_i32
:
2219 tcg_out_ubfm(s
, ext
, a0
, a1
, a2
, a2
+ args
[3] - 1);
2222 case INDEX_op_sextract_i64
:
2223 case INDEX_op_sextract_i32
:
2224 tcg_out_sbfm(s
, ext
, a0
, a1
, a2
, a2
+ args
[3] - 1);
2227 case INDEX_op_extract2_i64
:
2228 case INDEX_op_extract2_i32
:
2229 tcg_out_extr(s
, ext
, a0
, REG0(2), REG0(1), args
[3]);
2232 case INDEX_op_add2_i32
:
2233 tcg_out_addsub2(s
, TCG_TYPE_I32
, a0
, a1
, REG0(2), REG0(3),
2234 (int32_t)args
[4], args
[5], const_args
[4],
2235 const_args
[5], false);
2237 case INDEX_op_add2_i64
:
2238 tcg_out_addsub2(s
, TCG_TYPE_I64
, a0
, a1
, REG0(2), REG0(3), args
[4],
2239 args
[5], const_args
[4], const_args
[5], false);
2241 case INDEX_op_sub2_i32
:
2242 tcg_out_addsub2(s
, TCG_TYPE_I32
, a0
, a1
, REG0(2), REG0(3),
2243 (int32_t)args
[4], args
[5], const_args
[4],
2244 const_args
[5], true);
2246 case INDEX_op_sub2_i64
:
2247 tcg_out_addsub2(s
, TCG_TYPE_I64
, a0
, a1
, REG0(2), REG0(3), args
[4],
2248 args
[5], const_args
[4], const_args
[5], true);
2251 case INDEX_op_muluh_i64
:
2252 tcg_out_insn(s
, 3508, UMULH
, TCG_TYPE_I64
, a0
, a1
, a2
);
2254 case INDEX_op_mulsh_i64
:
2255 tcg_out_insn(s
, 3508, SMULH
, TCG_TYPE_I64
, a0
, a1
, a2
);
2262 case INDEX_op_mov_i32
: /* Always emitted via tcg_out_mov. */
2263 case INDEX_op_mov_i64
:
2264 case INDEX_op_movi_i32
: /* Always emitted via tcg_out_movi. */
2265 case INDEX_op_movi_i64
:
2266 case INDEX_op_call
: /* Always emitted via tcg_out_call. */
2268 g_assert_not_reached();
2274 static void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
,
2275 unsigned vecl
, unsigned vece
,
2276 const TCGArg
*args
, const int *const_args
)
2278 static const AArch64Insn cmp_insn
[16] = {
2279 [TCG_COND_EQ
] = I3616_CMEQ
,
2280 [TCG_COND_GT
] = I3616_CMGT
,
2281 [TCG_COND_GE
] = I3616_CMGE
,
2282 [TCG_COND_GTU
] = I3616_CMHI
,
2283 [TCG_COND_GEU
] = I3616_CMHS
,
2285 static const AArch64Insn cmp0_insn
[16] = {
2286 [TCG_COND_EQ
] = I3617_CMEQ0
,
2287 [TCG_COND_GT
] = I3617_CMGT0
,
2288 [TCG_COND_GE
] = I3617_CMGE0
,
2289 [TCG_COND_LT
] = I3617_CMLT0
,
2290 [TCG_COND_LE
] = I3617_CMLE0
,
2293 TCGType type
= vecl
+ TCG_TYPE_V64
;
2294 unsigned is_q
= vecl
;
2295 TCGArg a0
, a1
, a2
, a3
;
2303 case INDEX_op_ld_vec
:
2304 tcg_out_ld(s
, type
, a0
, a1
, a2
);
2306 case INDEX_op_st_vec
:
2307 tcg_out_st(s
, type
, a0
, a1
, a2
);
2309 case INDEX_op_dupm_vec
:
2310 tcg_out_dupm_vec(s
, type
, vece
, a0
, a1
, a2
);
2312 case INDEX_op_add_vec
:
2313 tcg_out_insn(s
, 3616, ADD
, is_q
, vece
, a0
, a1
, a2
);
2315 case INDEX_op_sub_vec
:
2316 tcg_out_insn(s
, 3616, SUB
, is_q
, vece
, a0
, a1
, a2
);
2318 case INDEX_op_mul_vec
:
2319 tcg_out_insn(s
, 3616, MUL
, is_q
, vece
, a0
, a1
, a2
);
2321 case INDEX_op_neg_vec
:
2322 tcg_out_insn(s
, 3617, NEG
, is_q
, vece
, a0
, a1
);
2324 case INDEX_op_abs_vec
:
2325 tcg_out_insn(s
, 3617, ABS
, is_q
, vece
, a0
, a1
);
2327 case INDEX_op_and_vec
:
2328 if (const_args
[2]) {
2329 is_shimm1632(~a2
, &cmode
, &imm8
);
2331 tcg_out_insn(s
, 3606, BIC
, is_q
, a0
, 0, cmode
, imm8
);
2334 tcg_out_insn(s
, 3606, MVNI
, is_q
, a0
, 0, cmode
, imm8
);
2337 tcg_out_insn(s
, 3616, AND
, is_q
, 0, a0
, a1
, a2
);
2339 case INDEX_op_or_vec
:
2340 if (const_args
[2]) {
2341 is_shimm1632(a2
, &cmode
, &imm8
);
2343 tcg_out_insn(s
, 3606, ORR
, is_q
, a0
, 0, cmode
, imm8
);
2346 tcg_out_insn(s
, 3606, MOVI
, is_q
, a0
, 0, cmode
, imm8
);
2349 tcg_out_insn(s
, 3616, ORR
, is_q
, 0, a0
, a1
, a2
);
2351 case INDEX_op_andc_vec
:
2352 if (const_args
[2]) {
2353 is_shimm1632(a2
, &cmode
, &imm8
);
2355 tcg_out_insn(s
, 3606, BIC
, is_q
, a0
, 0, cmode
, imm8
);
2358 tcg_out_insn(s
, 3606, MOVI
, is_q
, a0
, 0, cmode
, imm8
);
2361 tcg_out_insn(s
, 3616, BIC
, is_q
, 0, a0
, a1
, a2
);
2363 case INDEX_op_orc_vec
:
2364 if (const_args
[2]) {
2365 is_shimm1632(~a2
, &cmode
, &imm8
);
2367 tcg_out_insn(s
, 3606, ORR
, is_q
, a0
, 0, cmode
, imm8
);
2370 tcg_out_insn(s
, 3606, MVNI
, is_q
, a0
, 0, cmode
, imm8
);
2373 tcg_out_insn(s
, 3616, ORN
, is_q
, 0, a0
, a1
, a2
);
2375 case INDEX_op_xor_vec
:
2376 tcg_out_insn(s
, 3616, EOR
, is_q
, 0, a0
, a1
, a2
);
2378 case INDEX_op_ssadd_vec
:
2379 tcg_out_insn(s
, 3616, SQADD
, is_q
, vece
, a0
, a1
, a2
);
2381 case INDEX_op_sssub_vec
:
2382 tcg_out_insn(s
, 3616, SQSUB
, is_q
, vece
, a0
, a1
, a2
);
2384 case INDEX_op_usadd_vec
:
2385 tcg_out_insn(s
, 3616, UQADD
, is_q
, vece
, a0
, a1
, a2
);
2387 case INDEX_op_ussub_vec
:
2388 tcg_out_insn(s
, 3616, UQSUB
, is_q
, vece
, a0
, a1
, a2
);
2390 case INDEX_op_smax_vec
:
2391 tcg_out_insn(s
, 3616, SMAX
, is_q
, vece
, a0
, a1
, a2
);
2393 case INDEX_op_smin_vec
:
2394 tcg_out_insn(s
, 3616, SMIN
, is_q
, vece
, a0
, a1
, a2
);
2396 case INDEX_op_umax_vec
:
2397 tcg_out_insn(s
, 3616, UMAX
, is_q
, vece
, a0
, a1
, a2
);
2399 case INDEX_op_umin_vec
:
2400 tcg_out_insn(s
, 3616, UMIN
, is_q
, vece
, a0
, a1
, a2
);
2402 case INDEX_op_not_vec
:
2403 tcg_out_insn(s
, 3617, NOT
, is_q
, 0, a0
, a1
);
2405 case INDEX_op_shli_vec
:
2406 tcg_out_insn(s
, 3614, SHL
, is_q
, a0
, a1
, a2
+ (8 << vece
));
2408 case INDEX_op_shri_vec
:
2409 tcg_out_insn(s
, 3614, USHR
, is_q
, a0
, a1
, (16 << vece
) - a2
);
2411 case INDEX_op_sari_vec
:
2412 tcg_out_insn(s
, 3614, SSHR
, is_q
, a0
, a1
, (16 << vece
) - a2
);
2414 case INDEX_op_shlv_vec
:
2415 tcg_out_insn(s
, 3616, USHL
, is_q
, vece
, a0
, a1
, a2
);
2417 case INDEX_op_aa64_sshl_vec
:
2418 tcg_out_insn(s
, 3616, SSHL
, is_q
, vece
, a0
, a1
, a2
);
2420 case INDEX_op_cmp_vec
:
2422 TCGCond cond
= args
[3];
2425 if (cond
== TCG_COND_NE
) {
2426 if (const_args
[2]) {
2427 tcg_out_insn(s
, 3616, CMTST
, is_q
, vece
, a0
, a1
, a1
);
2429 tcg_out_insn(s
, 3616, CMEQ
, is_q
, vece
, a0
, a1
, a2
);
2430 tcg_out_insn(s
, 3617, NOT
, is_q
, 0, a0
, a0
);
2433 if (const_args
[2]) {
2434 insn
= cmp0_insn
[cond
];
2436 tcg_out_insn_3617(s
, insn
, is_q
, vece
, a0
, a1
);
2439 tcg_out_dupi_vec(s
, type
, TCG_VEC_TMP
, 0);
2442 insn
= cmp_insn
[cond
];
2445 t
= a1
, a1
= a2
, a2
= t
;
2446 cond
= tcg_swap_cond(cond
);
2447 insn
= cmp_insn
[cond
];
2448 tcg_debug_assert(insn
!= 0);
2450 tcg_out_insn_3616(s
, insn
, is_q
, vece
, a0
, a1
, a2
);
2455 case INDEX_op_bitsel_vec
:
2458 tcg_out_insn(s
, 3616, BIT
, is_q
, 0, a0
, a2
, a1
);
2459 } else if (a0
== a2
) {
2460 tcg_out_insn(s
, 3616, BIF
, is_q
, 0, a0
, a3
, a1
);
2463 tcg_out_mov(s
, type
, a0
, a1
);
2465 tcg_out_insn(s
, 3616, BSL
, is_q
, 0, a0
, a2
, a3
);
2469 case INDEX_op_mov_vec
: /* Always emitted via tcg_out_mov. */
2470 case INDEX_op_dupi_vec
: /* Always emitted via tcg_out_movi. */
2471 case INDEX_op_dup_vec
: /* Always emitted via tcg_out_dup_vec. */
2473 g_assert_not_reached();
2477 int tcg_can_emit_vec_op(TCGOpcode opc
, TCGType type
, unsigned vece
)
2480 case INDEX_op_add_vec
:
2481 case INDEX_op_sub_vec
:
2482 case INDEX_op_and_vec
:
2483 case INDEX_op_or_vec
:
2484 case INDEX_op_xor_vec
:
2485 case INDEX_op_andc_vec
:
2486 case INDEX_op_orc_vec
:
2487 case INDEX_op_neg_vec
:
2488 case INDEX_op_abs_vec
:
2489 case INDEX_op_not_vec
:
2490 case INDEX_op_cmp_vec
:
2491 case INDEX_op_shli_vec
:
2492 case INDEX_op_shri_vec
:
2493 case INDEX_op_sari_vec
:
2494 case INDEX_op_ssadd_vec
:
2495 case INDEX_op_sssub_vec
:
2496 case INDEX_op_usadd_vec
:
2497 case INDEX_op_ussub_vec
:
2498 case INDEX_op_shlv_vec
:
2499 case INDEX_op_bitsel_vec
:
2501 case INDEX_op_shrv_vec
:
2502 case INDEX_op_sarv_vec
:
2504 case INDEX_op_mul_vec
:
2505 case INDEX_op_smax_vec
:
2506 case INDEX_op_smin_vec
:
2507 case INDEX_op_umax_vec
:
2508 case INDEX_op_umin_vec
:
2509 return vece
< MO_64
;
2516 void tcg_expand_vec_op(TCGOpcode opc
, TCGType type
, unsigned vece
,
2520 TCGv_vec v0
, v1
, v2
, t1
;
2523 v0
= temp_tcgv_vec(arg_temp(a0
));
2524 v1
= temp_tcgv_vec(arg_temp(va_arg(va
, TCGArg
)));
2525 v2
= temp_tcgv_vec(arg_temp(va_arg(va
, TCGArg
)));
2528 case INDEX_op_shrv_vec
:
2529 case INDEX_op_sarv_vec
:
2530 /* Right shifts are negative left shifts for AArch64. */
2531 t1
= tcg_temp_new_vec(type
);
2532 tcg_gen_neg_vec(vece
, t1
, v2
);
2533 opc
= (opc
== INDEX_op_shrv_vec
2534 ? INDEX_op_shlv_vec
: INDEX_op_aa64_sshl_vec
);
2535 vec_gen_3(opc
, type
, vece
, tcgv_vec_arg(v0
),
2536 tcgv_vec_arg(v1
), tcgv_vec_arg(t1
));
2537 tcg_temp_free_vec(t1
);
2541 g_assert_not_reached();
2547 static const TCGTargetOpDef
*tcg_target_op_def(TCGOpcode op
)
2549 static const TCGTargetOpDef r
= { .args_ct_str
= { "r" } };
2550 static const TCGTargetOpDef r_r
= { .args_ct_str
= { "r", "r" } };
2551 static const TCGTargetOpDef w_w
= { .args_ct_str
= { "w", "w" } };
2552 static const TCGTargetOpDef w_r
= { .args_ct_str
= { "w", "r" } };
2553 static const TCGTargetOpDef w_wr
= { .args_ct_str
= { "w", "wr" } };
2554 static const TCGTargetOpDef r_l
= { .args_ct_str
= { "r", "l" } };
2555 static const TCGTargetOpDef r_rA
= { .args_ct_str
= { "r", "rA" } };
2556 static const TCGTargetOpDef rZ_r
= { .args_ct_str
= { "rZ", "r" } };
2557 static const TCGTargetOpDef lZ_l
= { .args_ct_str
= { "lZ", "l" } };
2558 static const TCGTargetOpDef r_r_r
= { .args_ct_str
= { "r", "r", "r" } };
2559 static const TCGTargetOpDef w_w_w
= { .args_ct_str
= { "w", "w", "w" } };
2560 static const TCGTargetOpDef w_w_wO
= { .args_ct_str
= { "w", "w", "wO" } };
2561 static const TCGTargetOpDef w_w_wN
= { .args_ct_str
= { "w", "w", "wN" } };
2562 static const TCGTargetOpDef w_w_wZ
= { .args_ct_str
= { "w", "w", "wZ" } };
2563 static const TCGTargetOpDef r_r_ri
= { .args_ct_str
= { "r", "r", "ri" } };
2564 static const TCGTargetOpDef r_r_rA
= { .args_ct_str
= { "r", "r", "rA" } };
2565 static const TCGTargetOpDef r_r_rL
= { .args_ct_str
= { "r", "r", "rL" } };
2566 static const TCGTargetOpDef r_r_rAL
2567 = { .args_ct_str
= { "r", "r", "rAL" } };
2568 static const TCGTargetOpDef dep
2569 = { .args_ct_str
= { "r", "0", "rZ" } };
2570 static const TCGTargetOpDef ext2
2571 = { .args_ct_str
= { "r", "rZ", "rZ" } };
2572 static const TCGTargetOpDef movc
2573 = { .args_ct_str
= { "r", "r", "rA", "rZ", "rZ" } };
2574 static const TCGTargetOpDef add2
2575 = { .args_ct_str
= { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2576 static const TCGTargetOpDef w_w_w_w
2577 = { .args_ct_str
= { "w", "w", "w", "w" } };
2580 case INDEX_op_goto_ptr
:
2583 case INDEX_op_ld8u_i32
:
2584 case INDEX_op_ld8s_i32
:
2585 case INDEX_op_ld16u_i32
:
2586 case INDEX_op_ld16s_i32
:
2587 case INDEX_op_ld_i32
:
2588 case INDEX_op_ld8u_i64
:
2589 case INDEX_op_ld8s_i64
:
2590 case INDEX_op_ld16u_i64
:
2591 case INDEX_op_ld16s_i64
:
2592 case INDEX_op_ld32u_i64
:
2593 case INDEX_op_ld32s_i64
:
2594 case INDEX_op_ld_i64
:
2595 case INDEX_op_neg_i32
:
2596 case INDEX_op_neg_i64
:
2597 case INDEX_op_not_i32
:
2598 case INDEX_op_not_i64
:
2599 case INDEX_op_bswap16_i32
:
2600 case INDEX_op_bswap32_i32
:
2601 case INDEX_op_bswap16_i64
:
2602 case INDEX_op_bswap32_i64
:
2603 case INDEX_op_bswap64_i64
:
2604 case INDEX_op_ext8s_i32
:
2605 case INDEX_op_ext16s_i32
:
2606 case INDEX_op_ext8u_i32
:
2607 case INDEX_op_ext16u_i32
:
2608 case INDEX_op_ext8s_i64
:
2609 case INDEX_op_ext16s_i64
:
2610 case INDEX_op_ext32s_i64
:
2611 case INDEX_op_ext8u_i64
:
2612 case INDEX_op_ext16u_i64
:
2613 case INDEX_op_ext32u_i64
:
2614 case INDEX_op_ext_i32_i64
:
2615 case INDEX_op_extu_i32_i64
:
2616 case INDEX_op_extract_i32
:
2617 case INDEX_op_extract_i64
:
2618 case INDEX_op_sextract_i32
:
2619 case INDEX_op_sextract_i64
:
2622 case INDEX_op_st8_i32
:
2623 case INDEX_op_st16_i32
:
2624 case INDEX_op_st_i32
:
2625 case INDEX_op_st8_i64
:
2626 case INDEX_op_st16_i64
:
2627 case INDEX_op_st32_i64
:
2628 case INDEX_op_st_i64
:
2631 case INDEX_op_add_i32
:
2632 case INDEX_op_add_i64
:
2633 case INDEX_op_sub_i32
:
2634 case INDEX_op_sub_i64
:
2635 case INDEX_op_setcond_i32
:
2636 case INDEX_op_setcond_i64
:
2639 case INDEX_op_mul_i32
:
2640 case INDEX_op_mul_i64
:
2641 case INDEX_op_div_i32
:
2642 case INDEX_op_div_i64
:
2643 case INDEX_op_divu_i32
:
2644 case INDEX_op_divu_i64
:
2645 case INDEX_op_rem_i32
:
2646 case INDEX_op_rem_i64
:
2647 case INDEX_op_remu_i32
:
2648 case INDEX_op_remu_i64
:
2649 case INDEX_op_muluh_i64
:
2650 case INDEX_op_mulsh_i64
:
2653 case INDEX_op_and_i32
:
2654 case INDEX_op_and_i64
:
2655 case INDEX_op_or_i32
:
2656 case INDEX_op_or_i64
:
2657 case INDEX_op_xor_i32
:
2658 case INDEX_op_xor_i64
:
2659 case INDEX_op_andc_i32
:
2660 case INDEX_op_andc_i64
:
2661 case INDEX_op_orc_i32
:
2662 case INDEX_op_orc_i64
:
2663 case INDEX_op_eqv_i32
:
2664 case INDEX_op_eqv_i64
:
2667 case INDEX_op_shl_i32
:
2668 case INDEX_op_shr_i32
:
2669 case INDEX_op_sar_i32
:
2670 case INDEX_op_rotl_i32
:
2671 case INDEX_op_rotr_i32
:
2672 case INDEX_op_shl_i64
:
2673 case INDEX_op_shr_i64
:
2674 case INDEX_op_sar_i64
:
2675 case INDEX_op_rotl_i64
:
2676 case INDEX_op_rotr_i64
:
2679 case INDEX_op_clz_i32
:
2680 case INDEX_op_ctz_i32
:
2681 case INDEX_op_clz_i64
:
2682 case INDEX_op_ctz_i64
:
2685 case INDEX_op_brcond_i32
:
2686 case INDEX_op_brcond_i64
:
2689 case INDEX_op_movcond_i32
:
2690 case INDEX_op_movcond_i64
:
2693 case INDEX_op_qemu_ld_i32
:
2694 case INDEX_op_qemu_ld_i64
:
2696 case INDEX_op_qemu_st_i32
:
2697 case INDEX_op_qemu_st_i64
:
2700 case INDEX_op_deposit_i32
:
2701 case INDEX_op_deposit_i64
:
2704 case INDEX_op_extract2_i32
:
2705 case INDEX_op_extract2_i64
:
2708 case INDEX_op_add2_i32
:
2709 case INDEX_op_add2_i64
:
2710 case INDEX_op_sub2_i32
:
2711 case INDEX_op_sub2_i64
:
2714 case INDEX_op_add_vec
:
2715 case INDEX_op_sub_vec
:
2716 case INDEX_op_mul_vec
:
2717 case INDEX_op_xor_vec
:
2718 case INDEX_op_ssadd_vec
:
2719 case INDEX_op_sssub_vec
:
2720 case INDEX_op_usadd_vec
:
2721 case INDEX_op_ussub_vec
:
2722 case INDEX_op_smax_vec
:
2723 case INDEX_op_smin_vec
:
2724 case INDEX_op_umax_vec
:
2725 case INDEX_op_umin_vec
:
2726 case INDEX_op_shlv_vec
:
2727 case INDEX_op_shrv_vec
:
2728 case INDEX_op_sarv_vec
:
2729 case INDEX_op_aa64_sshl_vec
:
2731 case INDEX_op_not_vec
:
2732 case INDEX_op_neg_vec
:
2733 case INDEX_op_abs_vec
:
2734 case INDEX_op_shli_vec
:
2735 case INDEX_op_shri_vec
:
2736 case INDEX_op_sari_vec
:
2738 case INDEX_op_ld_vec
:
2739 case INDEX_op_st_vec
:
2740 case INDEX_op_dupm_vec
:
2742 case INDEX_op_dup_vec
:
2744 case INDEX_op_or_vec
:
2745 case INDEX_op_andc_vec
:
2747 case INDEX_op_and_vec
:
2748 case INDEX_op_orc_vec
:
2750 case INDEX_op_cmp_vec
:
2752 case INDEX_op_bitsel_vec
:
2760 static void tcg_target_init(TCGContext
*s
)
2762 tcg_target_available_regs
[TCG_TYPE_I32
] = 0xffffffffu
;
2763 tcg_target_available_regs
[TCG_TYPE_I64
] = 0xffffffffu
;
2764 tcg_target_available_regs
[TCG_TYPE_V64
] = 0xffffffff00000000ull
;
2765 tcg_target_available_regs
[TCG_TYPE_V128
] = 0xffffffff00000000ull
;
2767 tcg_target_call_clobber_regs
= -1ull;
2768 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_X19
);
2769 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_X20
);
2770 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_X21
);
2771 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_X22
);
2772 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_X23
);
2773 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_X24
);
2774 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_X25
);
2775 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_X26
);
2776 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_X27
);
2777 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_X28
);
2778 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_X29
);
2779 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_V8
);
2780 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_V9
);
2781 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_V10
);
2782 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_V11
);
2783 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_V12
);
2784 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_V13
);
2785 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_V14
);
2786 tcg_regset_reset_reg(tcg_target_call_clobber_regs
, TCG_REG_V15
);
2788 s
->reserved_regs
= 0;
2789 tcg_regset_set_reg(s
->reserved_regs
, TCG_REG_SP
);
2790 tcg_regset_set_reg(s
->reserved_regs
, TCG_REG_FP
);
2791 tcg_regset_set_reg(s
->reserved_regs
, TCG_REG_TMP
);
2792 tcg_regset_set_reg(s
->reserved_regs
, TCG_REG_X18
); /* platform register */
2793 tcg_regset_set_reg(s
->reserved_regs
, TCG_VEC_TMP
);
2796 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2797 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2799 #define FRAME_SIZE \
2801 + TCG_STATIC_CALL_ARGS_SIZE \
2802 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2803 + TCG_TARGET_STACK_ALIGN - 1) \
2804 & ~(TCG_TARGET_STACK_ALIGN - 1))
2806 /* We're expecting a 2 byte uleb128 encoded value. */
2807 QEMU_BUILD_BUG_ON(FRAME_SIZE
>= (1 << 14));
2809 /* We're expecting to use a single ADDI insn. */
2810 QEMU_BUILD_BUG_ON(FRAME_SIZE
- PUSH_SIZE
> 0xfff);
2812 static void tcg_target_qemu_prologue(TCGContext
*s
)
2816 /* Push (FP, LR) and allocate space for all saved registers. */
2817 tcg_out_insn(s
, 3314, STP
, TCG_REG_FP
, TCG_REG_LR
,
2818 TCG_REG_SP
, -PUSH_SIZE
, 1, 1);
2820 /* Set up frame pointer for canonical unwinding. */
2821 tcg_out_movr_sp(s
, TCG_TYPE_I64
, TCG_REG_FP
, TCG_REG_SP
);
2823 /* Store callee-preserved regs x19..x28. */
2824 for (r
= TCG_REG_X19
; r
<= TCG_REG_X27
; r
+= 2) {
2825 int ofs
= (r
- TCG_REG_X19
+ 2) * 8;
2826 tcg_out_insn(s
, 3314, STP
, r
, r
+ 1, TCG_REG_SP
, ofs
, 1, 0);
2829 /* Make stack space for TCG locals. */
2830 tcg_out_insn(s
, 3401, SUBI
, TCG_TYPE_I64
, TCG_REG_SP
, TCG_REG_SP
,
2831 FRAME_SIZE
- PUSH_SIZE
);
2833 /* Inform TCG about how to find TCG locals with register, offset, size. */
2834 tcg_set_frame(s
, TCG_REG_SP
, TCG_STATIC_CALL_ARGS_SIZE
,
2835 CPU_TEMP_BUF_NLONGS
* sizeof(long));
2837 #if !defined(CONFIG_SOFTMMU)
2838 if (USE_GUEST_BASE
) {
2839 tcg_out_movi(s
, TCG_TYPE_PTR
, TCG_REG_GUEST_BASE
, guest_base
);
2840 tcg_regset_set_reg(s
->reserved_regs
, TCG_REG_GUEST_BASE
);
2844 tcg_out_mov(s
, TCG_TYPE_PTR
, TCG_AREG0
, tcg_target_call_iarg_regs
[0]);
2845 tcg_out_insn(s
, 3207, BR
, tcg_target_call_iarg_regs
[1]);
2848 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2849 * and fall through to the rest of the epilogue.
2851 s
->code_gen_epilogue
= s
->code_ptr
;
2852 tcg_out_movi(s
, TCG_TYPE_REG
, TCG_REG_X0
, 0);
2855 tb_ret_addr
= s
->code_ptr
;
2857 /* Remove TCG locals stack space. */
2858 tcg_out_insn(s
, 3401, ADDI
, TCG_TYPE_I64
, TCG_REG_SP
, TCG_REG_SP
,
2859 FRAME_SIZE
- PUSH_SIZE
);
2861 /* Restore registers x19..x28. */
2862 for (r
= TCG_REG_X19
; r
<= TCG_REG_X27
; r
+= 2) {
2863 int ofs
= (r
- TCG_REG_X19
+ 2) * 8;
2864 tcg_out_insn(s
, 3314, LDP
, r
, r
+ 1, TCG_REG_SP
, ofs
, 1, 0);
2867 /* Pop (FP, LR), restore SP to previous frame. */
2868 tcg_out_insn(s
, 3314, LDP
, TCG_REG_FP
, TCG_REG_LR
,
2869 TCG_REG_SP
, PUSH_SIZE
, 0, 1);
2870 tcg_out_insn(s
, 3207, RET
, TCG_REG_LR
);
2873 static void tcg_out_nop_fill(tcg_insn_unit
*p
, int count
)
2876 for (i
= 0; i
< count
; ++i
) {
2883 uint8_t fde_def_cfa
[4];
2884 uint8_t fde_reg_ofs
[24];
2887 #define ELF_HOST_MACHINE EM_AARCH64
2889 static const DebugFrame debug_frame
= {
2890 .h
.cie
.len
= sizeof(DebugFrameCIE
)-4, /* length after .len member */
2893 .h
.cie
.code_align
= 1,
2894 .h
.cie
.data_align
= 0x78, /* sleb128 -8 */
2895 .h
.cie
.return_column
= TCG_REG_LR
,
2897 /* Total FDE size does not include the "len" member. */
2898 .h
.fde
.len
= sizeof(DebugFrame
) - offsetof(DebugFrame
, h
.fde
.cie_offset
),
2901 12, TCG_REG_SP
, /* DW_CFA_def_cfa sp, ... */
2902 (FRAME_SIZE
& 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2906 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2907 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2908 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2909 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2910 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2911 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2912 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2913 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2914 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2915 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2916 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2917 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2921 void tcg_register_jit(void *buf
, size_t buf_size
)
2923 tcg_register_jit_int(buf
, buf_size
, &debug_frame
, sizeof(debug_frame
));