spapr: Fix bug in h_signal_sys_reset()
[qemu/kevin.git] / tcg / aarch64 / tcg-target.inc.c
blob04bc369a92ac60fc6aaa426b57cfa7057c08fae6
1 /*
2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
28 #endif /* CONFIG_DEBUG_TCG */
30 static const int tcg_target_reg_alloc_order[] = {
31 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
32 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
33 TCG_REG_X28, /* we will reserve this for guest_base if configured */
35 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
36 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
37 TCG_REG_X16, TCG_REG_X17,
39 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
40 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
42 /* X18 reserved by system */
43 /* X19 reserved for AREG0 */
44 /* X29 reserved as fp */
45 /* X30 reserved as temporary */
48 static const int tcg_target_call_iarg_regs[8] = {
49 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
50 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
52 static const int tcg_target_call_oarg_regs[1] = {
53 TCG_REG_X0
56 #define TCG_REG_TMP TCG_REG_X30
58 #ifndef CONFIG_SOFTMMU
59 /* Note that XZR cannot be encoded in the address base register slot,
60 as that actaully encodes SP. So if we need to zero-extend the guest
61 address, via the address index register slot, we need to load even
62 a zero guest base into a register. */
63 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
64 #define TCG_REG_GUEST_BASE TCG_REG_X28
65 #endif
67 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
69 ptrdiff_t offset = target - code_ptr;
70 tcg_debug_assert(offset == sextract64(offset, 0, 26));
71 /* read instruction, mask away previous PC_REL26 parameter contents,
72 set the proper offset, then write back the instruction. */
73 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
76 static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
77 tcg_insn_unit *target)
79 ptrdiff_t offset = target - code_ptr;
80 tcg_insn_unit insn;
81 tcg_debug_assert(offset == sextract64(offset, 0, 26));
82 /* read instruction, mask away previous PC_REL26 parameter contents,
83 set the proper offset, then write back the instruction. */
84 insn = atomic_read(code_ptr);
85 atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
88 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
90 ptrdiff_t offset = target - code_ptr;
91 tcg_debug_assert(offset == sextract64(offset, 0, 19));
92 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
95 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
96 intptr_t value, intptr_t addend)
98 tcg_debug_assert(addend == 0);
99 switch (type) {
100 case R_AARCH64_JUMP26:
101 case R_AARCH64_CALL26:
102 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
103 break;
104 case R_AARCH64_CONDBR19:
105 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
106 break;
107 default:
108 tcg_abort();
112 #define TCG_CT_CONST_AIMM 0x100
113 #define TCG_CT_CONST_LIMM 0x200
114 #define TCG_CT_CONST_ZERO 0x400
115 #define TCG_CT_CONST_MONE 0x800
117 /* parse target specific constraints */
118 static const char *target_parse_constraint(TCGArgConstraint *ct,
119 const char *ct_str, TCGType type)
121 switch (*ct_str++) {
122 case 'r':
123 ct->ct |= TCG_CT_REG;
124 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
125 break;
126 case 'l': /* qemu_ld / qemu_st address, data_reg */
127 ct->ct |= TCG_CT_REG;
128 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
129 #ifdef CONFIG_SOFTMMU
130 /* x0 and x1 will be overwritten when reading the tlb entry,
131 and x2, and x3 for helper args, better to avoid using them. */
132 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
133 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
134 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
135 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
136 #endif
137 break;
138 case 'A': /* Valid for arithmetic immediate (positive or negative). */
139 ct->ct |= TCG_CT_CONST_AIMM;
140 break;
141 case 'L': /* Valid for logical immediate. */
142 ct->ct |= TCG_CT_CONST_LIMM;
143 break;
144 case 'M': /* minus one */
145 ct->ct |= TCG_CT_CONST_MONE;
146 break;
147 case 'Z': /* zero */
148 ct->ct |= TCG_CT_CONST_ZERO;
149 break;
150 default:
151 return NULL;
153 return ct_str;
156 static inline bool is_aimm(uint64_t val)
158 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
161 static inline bool is_limm(uint64_t val)
163 /* Taking a simplified view of the logical immediates for now, ignoring
164 the replication that can happen across the field. Match bit patterns
165 of the forms
166 0....01....1
167 0..01..10..0
168 and their inverses. */
170 /* Make things easier below, by testing the form with msb clear. */
171 if ((int64_t)val < 0) {
172 val = ~val;
174 if (val == 0) {
175 return false;
177 val += val & -val;
178 return (val & (val - 1)) == 0;
181 static int tcg_target_const_match(tcg_target_long val, TCGType type,
182 const TCGArgConstraint *arg_ct)
184 int ct = arg_ct->ct;
186 if (ct & TCG_CT_CONST) {
187 return 1;
189 if (type == TCG_TYPE_I32) {
190 val = (int32_t)val;
192 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
193 return 1;
195 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
196 return 1;
198 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
199 return 1;
201 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
202 return 1;
205 return 0;
208 enum aarch64_cond_code {
209 COND_EQ = 0x0,
210 COND_NE = 0x1,
211 COND_CS = 0x2, /* Unsigned greater or equal */
212 COND_HS = COND_CS, /* ALIAS greater or equal */
213 COND_CC = 0x3, /* Unsigned less than */
214 COND_LO = COND_CC, /* ALIAS Lower */
215 COND_MI = 0x4, /* Negative */
216 COND_PL = 0x5, /* Zero or greater */
217 COND_VS = 0x6, /* Overflow */
218 COND_VC = 0x7, /* No overflow */
219 COND_HI = 0x8, /* Unsigned greater than */
220 COND_LS = 0x9, /* Unsigned less or equal */
221 COND_GE = 0xa,
222 COND_LT = 0xb,
223 COND_GT = 0xc,
224 COND_LE = 0xd,
225 COND_AL = 0xe,
226 COND_NV = 0xf, /* behaves like COND_AL here */
229 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
230 [TCG_COND_EQ] = COND_EQ,
231 [TCG_COND_NE] = COND_NE,
232 [TCG_COND_LT] = COND_LT,
233 [TCG_COND_GE] = COND_GE,
234 [TCG_COND_LE] = COND_LE,
235 [TCG_COND_GT] = COND_GT,
236 /* unsigned */
237 [TCG_COND_LTU] = COND_LO,
238 [TCG_COND_GTU] = COND_HI,
239 [TCG_COND_GEU] = COND_HS,
240 [TCG_COND_LEU] = COND_LS,
243 typedef enum {
244 LDST_ST = 0, /* store */
245 LDST_LD = 1, /* load */
246 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
247 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
248 } AArch64LdstType;
250 /* We encode the format of the insn into the beginning of the name, so that
251 we can have the preprocessor help "typecheck" the insn vs the output
252 function. Arm didn't provide us with nice names for the formats, so we
253 use the section number of the architecture reference manual in which the
254 instruction group is described. */
255 typedef enum {
256 /* Compare and branch (immediate). */
257 I3201_CBZ = 0x34000000,
258 I3201_CBNZ = 0x35000000,
260 /* Conditional branch (immediate). */
261 I3202_B_C = 0x54000000,
263 /* Unconditional branch (immediate). */
264 I3206_B = 0x14000000,
265 I3206_BL = 0x94000000,
267 /* Unconditional branch (register). */
268 I3207_BR = 0xd61f0000,
269 I3207_BLR = 0xd63f0000,
270 I3207_RET = 0xd65f0000,
272 /* Load literal for loading the address at pc-relative offset */
273 I3305_LDR = 0x58000000,
274 /* Load/store register. Described here as 3.3.12, but the helper
275 that emits them can transform to 3.3.10 or 3.3.13. */
276 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
277 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
278 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
279 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
281 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
282 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
283 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
284 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
286 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
287 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
289 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
290 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
291 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
293 I3312_TO_I3310 = 0x00200800,
294 I3312_TO_I3313 = 0x01000000,
296 /* Load/store register pair instructions. */
297 I3314_LDP = 0x28400000,
298 I3314_STP = 0x28000000,
300 /* Add/subtract immediate instructions. */
301 I3401_ADDI = 0x11000000,
302 I3401_ADDSI = 0x31000000,
303 I3401_SUBI = 0x51000000,
304 I3401_SUBSI = 0x71000000,
306 /* Bitfield instructions. */
307 I3402_BFM = 0x33000000,
308 I3402_SBFM = 0x13000000,
309 I3402_UBFM = 0x53000000,
311 /* Extract instruction. */
312 I3403_EXTR = 0x13800000,
314 /* Logical immediate instructions. */
315 I3404_ANDI = 0x12000000,
316 I3404_ORRI = 0x32000000,
317 I3404_EORI = 0x52000000,
319 /* Move wide immediate instructions. */
320 I3405_MOVN = 0x12800000,
321 I3405_MOVZ = 0x52800000,
322 I3405_MOVK = 0x72800000,
324 /* PC relative addressing instructions. */
325 I3406_ADR = 0x10000000,
326 I3406_ADRP = 0x90000000,
328 /* Add/subtract shifted register instructions (without a shift). */
329 I3502_ADD = 0x0b000000,
330 I3502_ADDS = 0x2b000000,
331 I3502_SUB = 0x4b000000,
332 I3502_SUBS = 0x6b000000,
334 /* Add/subtract shifted register instructions (with a shift). */
335 I3502S_ADD_LSL = I3502_ADD,
337 /* Add/subtract with carry instructions. */
338 I3503_ADC = 0x1a000000,
339 I3503_SBC = 0x5a000000,
341 /* Conditional select instructions. */
342 I3506_CSEL = 0x1a800000,
343 I3506_CSINC = 0x1a800400,
344 I3506_CSINV = 0x5a800000,
345 I3506_CSNEG = 0x5a800400,
347 /* Data-processing (1 source) instructions. */
348 I3507_CLZ = 0x5ac01000,
349 I3507_RBIT = 0x5ac00000,
350 I3507_REV16 = 0x5ac00400,
351 I3507_REV32 = 0x5ac00800,
352 I3507_REV64 = 0x5ac00c00,
354 /* Data-processing (2 source) instructions. */
355 I3508_LSLV = 0x1ac02000,
356 I3508_LSRV = 0x1ac02400,
357 I3508_ASRV = 0x1ac02800,
358 I3508_RORV = 0x1ac02c00,
359 I3508_SMULH = 0x9b407c00,
360 I3508_UMULH = 0x9bc07c00,
361 I3508_UDIV = 0x1ac00800,
362 I3508_SDIV = 0x1ac00c00,
364 /* Data-processing (3 source) instructions. */
365 I3509_MADD = 0x1b000000,
366 I3509_MSUB = 0x1b008000,
368 /* Logical shifted register instructions (without a shift). */
369 I3510_AND = 0x0a000000,
370 I3510_BIC = 0x0a200000,
371 I3510_ORR = 0x2a000000,
372 I3510_ORN = 0x2a200000,
373 I3510_EOR = 0x4a000000,
374 I3510_EON = 0x4a200000,
375 I3510_ANDS = 0x6a000000,
377 NOP = 0xd503201f,
378 /* System instructions. */
379 DMB_ISH = 0xd50338bf,
380 DMB_LD = 0x00000100,
381 DMB_ST = 0x00000200,
382 } AArch64Insn;
384 static inline uint32_t tcg_in32(TCGContext *s)
386 uint32_t v = *(uint32_t *)s->code_ptr;
387 return v;
390 /* Emit an opcode with "type-checking" of the format. */
391 #define tcg_out_insn(S, FMT, OP, ...) \
392 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
394 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
396 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
399 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
400 TCGReg rt, int imm19)
402 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
405 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
406 TCGCond c, int imm19)
408 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
411 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
413 tcg_out32(s, insn | (imm26 & 0x03ffffff));
416 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
418 tcg_out32(s, insn | rn << 5);
421 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
422 TCGReg r1, TCGReg r2, TCGReg rn,
423 tcg_target_long ofs, bool pre, bool w)
425 insn |= 1u << 31; /* ext */
426 insn |= pre << 24;
427 insn |= w << 23;
429 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
430 insn |= (ofs & (0x7f << 3)) << (15 - 3);
432 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
435 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
436 TCGReg rd, TCGReg rn, uint64_t aimm)
438 if (aimm > 0xfff) {
439 tcg_debug_assert((aimm & 0xfff) == 0);
440 aimm >>= 12;
441 tcg_debug_assert(aimm <= 0xfff);
442 aimm |= 1 << 12; /* apply LSL 12 */
444 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
447 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
448 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
449 that feed the DecodeBitMasks pseudo function. */
450 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
451 TCGReg rd, TCGReg rn, int n, int immr, int imms)
453 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
454 | rn << 5 | rd);
457 #define tcg_out_insn_3404 tcg_out_insn_3402
459 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
460 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
462 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
463 | rn << 5 | rd);
466 /* This function is used for the Move (wide immediate) instruction group.
467 Note that SHIFT is a full shift count, not the 2 bit HW field. */
468 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
469 TCGReg rd, uint16_t half, unsigned shift)
471 tcg_debug_assert((shift & ~0x30) == 0);
472 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
475 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
476 TCGReg rd, int64_t disp)
478 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
481 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
482 the rare occasion when we actually want to supply a shift amount. */
483 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
484 TCGType ext, TCGReg rd, TCGReg rn,
485 TCGReg rm, int imm6)
487 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
490 /* This function is for 3.5.2 (Add/subtract shifted register),
491 and 3.5.10 (Logical shifted register), for the vast majorty of cases
492 when we don't want to apply a shift. Thus it can also be used for
493 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
494 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
495 TCGReg rd, TCGReg rn, TCGReg rm)
497 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
500 #define tcg_out_insn_3503 tcg_out_insn_3502
501 #define tcg_out_insn_3508 tcg_out_insn_3502
502 #define tcg_out_insn_3510 tcg_out_insn_3502
504 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
505 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
507 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
508 | tcg_cond_to_aarch64[c] << 12);
511 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
512 TCGReg rd, TCGReg rn)
514 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
517 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
518 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
520 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
523 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
524 TCGReg rd, TCGReg base, TCGType ext,
525 TCGReg regoff)
527 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
528 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
529 0x4000 | ext << 13 | base << 5 | rd);
532 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
533 TCGReg rd, TCGReg rn, intptr_t offset)
535 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
538 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
539 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
541 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
542 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
545 /* Register to register move using ORR (shifted register with no shift). */
546 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
548 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
551 /* Register to register move using ADDI (move to/from SP). */
552 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
554 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
557 /* This function is used for the Logical (immediate) instruction group.
558 The value of LIMM must satisfy IS_LIMM. See the comment above about
559 only supporting simplified logical immediates. */
560 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
561 TCGReg rd, TCGReg rn, uint64_t limm)
563 unsigned h, l, r, c;
565 tcg_debug_assert(is_limm(limm));
567 h = clz64(limm);
568 l = ctz64(limm);
569 if (l == 0) {
570 r = 0; /* form 0....01....1 */
571 c = ctz64(~limm) - 1;
572 if (h == 0) {
573 r = clz64(~limm); /* form 1..10..01..1 */
574 c += r;
576 } else {
577 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
578 c = r - h - 1;
580 if (ext == TCG_TYPE_I32) {
581 r &= 31;
582 c &= 31;
585 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
588 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
589 tcg_target_long value)
591 int i, wantinv, shift;
592 tcg_target_long svalue = value;
593 tcg_target_long ivalue = ~value;
595 /* For 32-bit values, discard potential garbage in value. For 64-bit
596 values within [2**31, 2**32-1], we can create smaller sequences by
597 interpreting this as a negative 32-bit number, while ensuring that
598 the high 32 bits are cleared by setting SF=0. */
599 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
600 svalue = (int32_t)value;
601 value = (uint32_t)value;
602 ivalue = (uint32_t)ivalue;
603 type = TCG_TYPE_I32;
606 /* Speed things up by handling the common case of small positive
607 and negative values specially. */
608 if ((value & ~0xffffull) == 0) {
609 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
610 return;
611 } else if ((ivalue & ~0xffffull) == 0) {
612 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
613 return;
616 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
617 use the sign-extended value. That lets us match rotated values such
618 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
619 if (is_limm(svalue)) {
620 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
621 return;
624 /* Look for host pointer values within 4G of the PC. This happens
625 often when loading pointers to QEMU's own data structures. */
626 if (type == TCG_TYPE_I64) {
627 tcg_target_long disp = value - (intptr_t)s->code_ptr;
628 if (disp == sextract64(disp, 0, 21)) {
629 tcg_out_insn(s, 3406, ADR, rd, disp);
630 return;
632 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
633 if (disp == sextract64(disp, 0, 21)) {
634 tcg_out_insn(s, 3406, ADRP, rd, disp);
635 if (value & 0xfff) {
636 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
638 return;
642 /* Would it take fewer insns to begin with MOVN? For the value and its
643 inverse, count the number of 16-bit lanes that are 0. */
644 for (i = wantinv = 0; i < 64; i += 16) {
645 tcg_target_long mask = 0xffffull << i;
646 wantinv -= ((value & mask) == 0);
647 wantinv += ((ivalue & mask) == 0);
650 if (wantinv <= 0) {
651 /* Find the lowest lane that is not 0x0000. */
652 shift = ctz64(value) & (63 & -16);
653 tcg_out_insn(s, 3405, MOVZ, type, rd, value >> shift, shift);
654 /* Clear out the lane that we just set. */
655 value &= ~(0xffffUL << shift);
656 /* Iterate until all non-zero lanes have been processed. */
657 while (value) {
658 shift = ctz64(value) & (63 & -16);
659 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
660 value &= ~(0xffffUL << shift);
662 } else {
663 /* Like above, but with the inverted value and MOVN to start. */
664 shift = ctz64(ivalue) & (63 & -16);
665 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue >> shift, shift);
666 ivalue &= ~(0xffffUL << shift);
667 while (ivalue) {
668 shift = ctz64(ivalue) & (63 & -16);
669 /* Provide MOVK with the non-inverted value. */
670 tcg_out_insn(s, 3405, MOVK, type, rd, ~(ivalue >> shift), shift);
671 ivalue &= ~(0xffffUL << shift);
676 /* Define something more legible for general use. */
677 #define tcg_out_ldst_r tcg_out_insn_3310
679 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
680 TCGReg rd, TCGReg rn, intptr_t offset)
682 TCGMemOp size = (uint32_t)insn >> 30;
684 /* If the offset is naturally aligned and in range, then we can
685 use the scaled uimm12 encoding */
686 if (offset >= 0 && !(offset & ((1 << size) - 1))) {
687 uintptr_t scaled_uimm = offset >> size;
688 if (scaled_uimm <= 0xfff) {
689 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
690 return;
694 /* Small signed offsets can use the unscaled encoding. */
695 if (offset >= -256 && offset < 256) {
696 tcg_out_insn_3312(s, insn, rd, rn, offset);
697 return;
700 /* Worst-case scenario, move offset to temp register, use reg offset. */
701 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
702 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
705 static inline void tcg_out_mov(TCGContext *s,
706 TCGType type, TCGReg ret, TCGReg arg)
708 if (ret != arg) {
709 tcg_out_movr(s, type, ret, arg);
713 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
714 TCGReg arg1, intptr_t arg2)
716 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
717 arg, arg1, arg2);
720 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
721 TCGReg arg1, intptr_t arg2)
723 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
724 arg, arg1, arg2);
727 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
728 TCGReg base, intptr_t ofs)
730 if (val == 0) {
731 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
732 return true;
734 return false;
737 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
738 TCGReg rn, unsigned int a, unsigned int b)
740 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
743 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
744 TCGReg rn, unsigned int a, unsigned int b)
746 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
749 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
750 TCGReg rn, unsigned int a, unsigned int b)
752 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
755 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
756 TCGReg rn, TCGReg rm, unsigned int a)
758 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
761 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
762 TCGReg rd, TCGReg rn, unsigned int m)
764 int bits = ext ? 64 : 32;
765 int max = bits - 1;
766 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
769 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
770 TCGReg rd, TCGReg rn, unsigned int m)
772 int max = ext ? 63 : 31;
773 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
776 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
777 TCGReg rd, TCGReg rn, unsigned int m)
779 int max = ext ? 63 : 31;
780 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
783 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
784 TCGReg rd, TCGReg rn, unsigned int m)
786 int max = ext ? 63 : 31;
787 tcg_out_extr(s, ext, rd, rn, rn, m & max);
790 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
791 TCGReg rd, TCGReg rn, unsigned int m)
793 int bits = ext ? 64 : 32;
794 int max = bits - 1;
795 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
798 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
799 TCGReg rn, unsigned lsb, unsigned width)
801 unsigned size = ext ? 64 : 32;
802 unsigned a = (size - lsb) & (size - 1);
803 unsigned b = width - 1;
804 tcg_out_bfm(s, ext, rd, rn, a, b);
807 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
808 tcg_target_long b, bool const_b)
810 if (const_b) {
811 /* Using CMP or CMN aliases. */
812 if (b >= 0) {
813 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
814 } else {
815 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
817 } else {
818 /* Using CMP alias SUBS wzr, Wn, Wm */
819 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
823 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
825 ptrdiff_t offset = target - s->code_ptr;
826 tcg_debug_assert(offset == sextract64(offset, 0, 26));
827 tcg_out_insn(s, 3206, B, offset);
830 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
832 ptrdiff_t offset = target - s->code_ptr;
833 if (offset == sextract64(offset, 0, 26)) {
834 tcg_out_insn(s, 3206, BL, offset);
835 } else {
836 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
837 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
841 static inline void tcg_out_goto_noaddr(TCGContext *s)
843 /* We pay attention here to not modify the branch target by reading from
844 the buffer. This ensure that caches and memory are kept coherent during
845 retranslation. Mask away possible garbage in the high bits for the
846 first translation, while keeping the offset bits for retranslation. */
847 uint32_t old = tcg_in32(s);
848 tcg_out_insn(s, 3206, B, old);
851 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
853 /* See comments in tcg_out_goto_noaddr. */
854 uint32_t old = tcg_in32(s) >> 5;
855 tcg_out_insn(s, 3202, B_C, c, old);
858 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
860 tcg_out_insn(s, 3207, BLR, reg);
863 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
865 ptrdiff_t offset = target - s->code_ptr;
866 if (offset == sextract64(offset, 0, 26)) {
867 tcg_out_insn(s, 3206, BL, offset);
868 } else {
869 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
870 tcg_out_callr(s, TCG_REG_TMP);
874 #ifdef USE_DIRECT_JUMP
876 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
878 tcg_insn_unit i1, i2;
879 TCGType rt = TCG_TYPE_I64;
880 TCGReg rd = TCG_REG_TMP;
881 uint64_t pair;
883 ptrdiff_t offset = addr - jmp_addr;
885 if (offset == sextract64(offset, 0, 26)) {
886 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
887 i2 = NOP;
888 } else {
889 offset = (addr >> 12) - (jmp_addr >> 12);
891 /* patch ADRP */
892 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
893 /* patch ADDI */
894 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
896 pair = (uint64_t)i2 << 32 | i1;
897 atomic_set((uint64_t *)jmp_addr, pair);
898 flush_icache_range(jmp_addr, jmp_addr + 8);
901 #endif
903 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
905 if (!l->has_value) {
906 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
907 tcg_out_goto_noaddr(s);
908 } else {
909 tcg_out_goto(s, l->u.value_ptr);
913 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
914 TCGArg b, bool b_const, TCGLabel *l)
916 intptr_t offset;
917 bool need_cmp;
919 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
920 need_cmp = false;
921 } else {
922 need_cmp = true;
923 tcg_out_cmp(s, ext, a, b, b_const);
926 if (!l->has_value) {
927 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
928 offset = tcg_in32(s) >> 5;
929 } else {
930 offset = l->u.value_ptr - s->code_ptr;
931 tcg_debug_assert(offset == sextract64(offset, 0, 19));
934 if (need_cmp) {
935 tcg_out_insn(s, 3202, B_C, c, offset);
936 } else if (c == TCG_COND_EQ) {
937 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
938 } else {
939 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
943 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
945 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
948 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
950 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
953 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
955 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
958 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
959 TCGReg rd, TCGReg rn)
961 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
962 int bits = (8 << s_bits) - 1;
963 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
966 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
967 TCGReg rd, TCGReg rn)
969 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
970 int bits = (8 << s_bits) - 1;
971 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
974 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
975 TCGReg rn, int64_t aimm)
977 if (aimm >= 0) {
978 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
979 } else {
980 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
984 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
985 TCGReg rh, TCGReg al, TCGReg ah,
986 tcg_target_long bl, tcg_target_long bh,
987 bool const_bl, bool const_bh, bool sub)
989 TCGReg orig_rl = rl;
990 AArch64Insn insn;
992 if (rl == ah || (!const_bh && rl == bh)) {
993 rl = TCG_REG_TMP;
996 if (const_bl) {
997 insn = I3401_ADDSI;
998 if ((bl < 0) ^ sub) {
999 insn = I3401_SUBSI;
1000 bl = -bl;
1002 if (unlikely(al == TCG_REG_XZR)) {
1003 /* ??? We want to allow al to be zero for the benefit of
1004 negation via subtraction. However, that leaves open the
1005 possibility of adding 0+const in the low part, and the
1006 immediate add instructions encode XSP not XZR. Don't try
1007 anything more elaborate here than loading another zero. */
1008 al = TCG_REG_TMP;
1009 tcg_out_movi(s, ext, al, 0);
1011 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1012 } else {
1013 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1016 insn = I3503_ADC;
1017 if (const_bh) {
1018 /* Note that the only two constants we support are 0 and -1, and
1019 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1020 if ((bh != 0) ^ sub) {
1021 insn = I3503_SBC;
1023 bh = TCG_REG_XZR;
1024 } else if (sub) {
1025 insn = I3503_SBC;
1027 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1029 tcg_out_mov(s, ext, orig_rl, rl);
1032 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1034 static const uint32_t sync[] = {
1035 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1036 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1037 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1038 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1039 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1041 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1044 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1045 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1047 TCGReg a1 = a0;
1048 if (is_ctz) {
1049 a1 = TCG_REG_TMP;
1050 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1052 if (const_b && b == (ext ? 64 : 32)) {
1053 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1054 } else {
1055 AArch64Insn sel = I3506_CSEL;
1057 tcg_out_cmp(s, ext, a0, 0, 1);
1058 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1060 if (const_b) {
1061 if (b == -1) {
1062 b = TCG_REG_XZR;
1063 sel = I3506_CSINV;
1064 } else if (b == 0) {
1065 b = TCG_REG_XZR;
1066 } else {
1067 tcg_out_movi(s, ext, d, b);
1068 b = d;
1071 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1075 #ifdef CONFIG_SOFTMMU
1076 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1077 * TCGMemOpIdx oi, uintptr_t ra)
1079 static void * const qemu_ld_helpers[16] = {
1080 [MO_UB] = helper_ret_ldub_mmu,
1081 [MO_LEUW] = helper_le_lduw_mmu,
1082 [MO_LEUL] = helper_le_ldul_mmu,
1083 [MO_LEQ] = helper_le_ldq_mmu,
1084 [MO_BEUW] = helper_be_lduw_mmu,
1085 [MO_BEUL] = helper_be_ldul_mmu,
1086 [MO_BEQ] = helper_be_ldq_mmu,
1089 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1090 * uintxx_t val, TCGMemOpIdx oi,
1091 * uintptr_t ra)
1093 static void * const qemu_st_helpers[16] = {
1094 [MO_UB] = helper_ret_stb_mmu,
1095 [MO_LEUW] = helper_le_stw_mmu,
1096 [MO_LEUL] = helper_le_stl_mmu,
1097 [MO_LEQ] = helper_le_stq_mmu,
1098 [MO_BEUW] = helper_be_stw_mmu,
1099 [MO_BEUL] = helper_be_stl_mmu,
1100 [MO_BEQ] = helper_be_stq_mmu,
1103 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1105 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1106 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1107 tcg_out_insn(s, 3406, ADR, rd, offset);
1110 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1112 TCGMemOpIdx oi = lb->oi;
1113 TCGMemOp opc = get_memop(oi);
1114 TCGMemOp size = opc & MO_SIZE;
1116 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1118 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1119 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1120 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1121 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1122 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1123 if (opc & MO_SIGN) {
1124 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1125 } else {
1126 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1129 tcg_out_goto(s, lb->raddr);
1132 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1134 TCGMemOpIdx oi = lb->oi;
1135 TCGMemOp opc = get_memop(oi);
1136 TCGMemOp size = opc & MO_SIZE;
1138 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1140 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1141 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1142 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1143 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1144 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1145 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1146 tcg_out_goto(s, lb->raddr);
1149 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1150 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1151 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1153 TCGLabelQemuLdst *label = new_ldst_label(s);
1155 label->is_ld = is_ld;
1156 label->oi = oi;
1157 label->type = ext;
1158 label->datalo_reg = data_reg;
1159 label->addrlo_reg = addr_reg;
1160 label->raddr = raddr;
1161 label->label_ptr[0] = label_ptr;
1164 /* Load and compare a TLB entry, emitting the conditional jump to the
1165 slow path for the failure case, which will be patched later when finalizing
1166 the slow path. Generated code returns the host addend in X1,
1167 clobbers X0,X2,X3,TMP. */
1168 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1169 tcg_insn_unit **label_ptr, int mem_index,
1170 bool is_read)
1172 int tlb_offset = is_read ?
1173 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1174 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1175 unsigned a_bits = get_alignment_bits(opc);
1176 unsigned s_bits = opc & MO_SIZE;
1177 unsigned a_mask = (1u << a_bits) - 1;
1178 unsigned s_mask = (1u << s_bits) - 1;
1179 TCGReg base = TCG_AREG0, x3;
1180 uint64_t tlb_mask;
1182 /* For aligned accesses, we check the first byte and include the alignment
1183 bits within the address. For unaligned access, we check that we don't
1184 cross pages using the address of the last byte of the access. */
1185 if (a_bits >= s_bits) {
1186 x3 = addr_reg;
1187 } else {
1188 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1189 TCG_REG_X3, addr_reg, s_mask - a_mask);
1190 x3 = TCG_REG_X3;
1192 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1194 /* Extract the TLB index from the address into X0.
1195 X0<CPU_TLB_BITS:0> =
1196 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1197 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1198 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1200 /* Store the page mask part of the address into X3. */
1201 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1202 TCG_REG_X3, x3, tlb_mask);
1204 /* Add any "high bits" from the tlb offset to the env address into X2,
1205 to take advantage of the LSL12 form of the ADDI instruction.
1206 X2 = env + (tlb_offset & 0xfff000) */
1207 if (tlb_offset & 0xfff000) {
1208 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1209 tlb_offset & 0xfff000);
1210 base = TCG_REG_X2;
1213 /* Merge the tlb index contribution into X2.
1214 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1215 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1216 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1218 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1219 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1220 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1221 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1223 /* Load the tlb addend. Do that early to avoid stalling.
1224 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1225 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1226 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1227 (is_read ? offsetof(CPUTLBEntry, addr_read)
1228 : offsetof(CPUTLBEntry, addr_write)));
1230 /* Perform the address comparison. */
1231 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1233 /* If not equal, we jump to the slow path. */
1234 *label_ptr = s->code_ptr;
1235 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1238 #endif /* CONFIG_SOFTMMU */
1240 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1241 TCGReg data_r, TCGReg addr_r,
1242 TCGType otype, TCGReg off_r)
1244 const TCGMemOp bswap = memop & MO_BSWAP;
1246 switch (memop & MO_SSIZE) {
1247 case MO_UB:
1248 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1249 break;
1250 case MO_SB:
1251 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1252 data_r, addr_r, otype, off_r);
1253 break;
1254 case MO_UW:
1255 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1256 if (bswap) {
1257 tcg_out_rev16(s, data_r, data_r);
1259 break;
1260 case MO_SW:
1261 if (bswap) {
1262 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1263 tcg_out_rev16(s, data_r, data_r);
1264 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1265 } else {
1266 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1267 data_r, addr_r, otype, off_r);
1269 break;
1270 case MO_UL:
1271 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1272 if (bswap) {
1273 tcg_out_rev32(s, data_r, data_r);
1275 break;
1276 case MO_SL:
1277 if (bswap) {
1278 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1279 tcg_out_rev32(s, data_r, data_r);
1280 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1281 } else {
1282 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1284 break;
1285 case MO_Q:
1286 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1287 if (bswap) {
1288 tcg_out_rev64(s, data_r, data_r);
1290 break;
1291 default:
1292 tcg_abort();
1296 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1297 TCGReg data_r, TCGReg addr_r,
1298 TCGType otype, TCGReg off_r)
1300 const TCGMemOp bswap = memop & MO_BSWAP;
1302 switch (memop & MO_SIZE) {
1303 case MO_8:
1304 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1305 break;
1306 case MO_16:
1307 if (bswap && data_r != TCG_REG_XZR) {
1308 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1309 data_r = TCG_REG_TMP;
1311 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1312 break;
1313 case MO_32:
1314 if (bswap && data_r != TCG_REG_XZR) {
1315 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1316 data_r = TCG_REG_TMP;
1318 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1319 break;
1320 case MO_64:
1321 if (bswap && data_r != TCG_REG_XZR) {
1322 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1323 data_r = TCG_REG_TMP;
1325 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1326 break;
1327 default:
1328 tcg_abort();
1332 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1333 TCGMemOpIdx oi, TCGType ext)
1335 TCGMemOp memop = get_memop(oi);
1336 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1337 #ifdef CONFIG_SOFTMMU
1338 unsigned mem_index = get_mmuidx(oi);
1339 tcg_insn_unit *label_ptr;
1341 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1342 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1343 TCG_REG_X1, otype, addr_reg);
1344 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1345 s->code_ptr, label_ptr);
1346 #else /* !CONFIG_SOFTMMU */
1347 if (USE_GUEST_BASE) {
1348 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1349 TCG_REG_GUEST_BASE, otype, addr_reg);
1350 } else {
1351 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1352 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1354 #endif /* CONFIG_SOFTMMU */
1357 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1358 TCGMemOpIdx oi)
1360 TCGMemOp memop = get_memop(oi);
1361 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1362 #ifdef CONFIG_SOFTMMU
1363 unsigned mem_index = get_mmuidx(oi);
1364 tcg_insn_unit *label_ptr;
1366 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1367 tcg_out_qemu_st_direct(s, memop, data_reg,
1368 TCG_REG_X1, otype, addr_reg);
1369 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1370 data_reg, addr_reg, s->code_ptr, label_ptr);
1371 #else /* !CONFIG_SOFTMMU */
1372 if (USE_GUEST_BASE) {
1373 tcg_out_qemu_st_direct(s, memop, data_reg,
1374 TCG_REG_GUEST_BASE, otype, addr_reg);
1375 } else {
1376 tcg_out_qemu_st_direct(s, memop, data_reg,
1377 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1379 #endif /* CONFIG_SOFTMMU */
1382 static tcg_insn_unit *tb_ret_addr;
1384 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1385 const TCGArg args[TCG_MAX_OP_ARGS],
1386 const int const_args[TCG_MAX_OP_ARGS])
1388 /* 99% of the time, we can signal the use of extension registers
1389 by looking to see if the opcode handles 64-bit data. */
1390 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1392 /* Hoist the loads of the most common arguments. */
1393 TCGArg a0 = args[0];
1394 TCGArg a1 = args[1];
1395 TCGArg a2 = args[2];
1396 int c2 = const_args[2];
1398 /* Some operands are defined with "rZ" constraint, a register or
1399 the zero register. These need not actually test args[I] == 0. */
1400 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1402 switch (opc) {
1403 case INDEX_op_exit_tb:
1404 /* Reuse the zeroing that exists for goto_ptr. */
1405 if (a0 == 0) {
1406 tcg_out_goto_long(s, s->code_gen_epilogue);
1407 } else {
1408 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1409 tcg_out_goto_long(s, tb_ret_addr);
1411 break;
1413 case INDEX_op_goto_tb:
1414 if (s->tb_jmp_insn_offset != NULL) {
1415 /* USE_DIRECT_JUMP */
1416 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1417 write can be used to patch the target address. */
1418 if ((uintptr_t)s->code_ptr & 7) {
1419 tcg_out32(s, NOP);
1421 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1422 /* actual branch destination will be patched by
1423 aarch64_tb_set_jmp_target later. */
1424 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1425 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1426 } else {
1427 /* !USE_DIRECT_JUMP */
1428 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1429 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1430 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1432 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1433 s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
1434 break;
1436 case INDEX_op_goto_ptr:
1437 tcg_out_insn(s, 3207, BR, a0);
1438 break;
1440 case INDEX_op_br:
1441 tcg_out_goto_label(s, arg_label(a0));
1442 break;
1444 case INDEX_op_ld8u_i32:
1445 case INDEX_op_ld8u_i64:
1446 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1447 break;
1448 case INDEX_op_ld8s_i32:
1449 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1450 break;
1451 case INDEX_op_ld8s_i64:
1452 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1453 break;
1454 case INDEX_op_ld16u_i32:
1455 case INDEX_op_ld16u_i64:
1456 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1457 break;
1458 case INDEX_op_ld16s_i32:
1459 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1460 break;
1461 case INDEX_op_ld16s_i64:
1462 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1463 break;
1464 case INDEX_op_ld_i32:
1465 case INDEX_op_ld32u_i64:
1466 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1467 break;
1468 case INDEX_op_ld32s_i64:
1469 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1470 break;
1471 case INDEX_op_ld_i64:
1472 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1473 break;
1475 case INDEX_op_st8_i32:
1476 case INDEX_op_st8_i64:
1477 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1478 break;
1479 case INDEX_op_st16_i32:
1480 case INDEX_op_st16_i64:
1481 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1482 break;
1483 case INDEX_op_st_i32:
1484 case INDEX_op_st32_i64:
1485 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1486 break;
1487 case INDEX_op_st_i64:
1488 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1489 break;
1491 case INDEX_op_add_i32:
1492 a2 = (int32_t)a2;
1493 /* FALLTHRU */
1494 case INDEX_op_add_i64:
1495 if (c2) {
1496 tcg_out_addsubi(s, ext, a0, a1, a2);
1497 } else {
1498 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1500 break;
1502 case INDEX_op_sub_i32:
1503 a2 = (int32_t)a2;
1504 /* FALLTHRU */
1505 case INDEX_op_sub_i64:
1506 if (c2) {
1507 tcg_out_addsubi(s, ext, a0, a1, -a2);
1508 } else {
1509 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1511 break;
1513 case INDEX_op_neg_i64:
1514 case INDEX_op_neg_i32:
1515 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1516 break;
1518 case INDEX_op_and_i32:
1519 a2 = (int32_t)a2;
1520 /* FALLTHRU */
1521 case INDEX_op_and_i64:
1522 if (c2) {
1523 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1524 } else {
1525 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1527 break;
1529 case INDEX_op_andc_i32:
1530 a2 = (int32_t)a2;
1531 /* FALLTHRU */
1532 case INDEX_op_andc_i64:
1533 if (c2) {
1534 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1535 } else {
1536 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1538 break;
1540 case INDEX_op_or_i32:
1541 a2 = (int32_t)a2;
1542 /* FALLTHRU */
1543 case INDEX_op_or_i64:
1544 if (c2) {
1545 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1546 } else {
1547 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1549 break;
1551 case INDEX_op_orc_i32:
1552 a2 = (int32_t)a2;
1553 /* FALLTHRU */
1554 case INDEX_op_orc_i64:
1555 if (c2) {
1556 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1557 } else {
1558 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1560 break;
1562 case INDEX_op_xor_i32:
1563 a2 = (int32_t)a2;
1564 /* FALLTHRU */
1565 case INDEX_op_xor_i64:
1566 if (c2) {
1567 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1568 } else {
1569 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1571 break;
1573 case INDEX_op_eqv_i32:
1574 a2 = (int32_t)a2;
1575 /* FALLTHRU */
1576 case INDEX_op_eqv_i64:
1577 if (c2) {
1578 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1579 } else {
1580 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1582 break;
1584 case INDEX_op_not_i64:
1585 case INDEX_op_not_i32:
1586 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1587 break;
1589 case INDEX_op_mul_i64:
1590 case INDEX_op_mul_i32:
1591 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1592 break;
1594 case INDEX_op_div_i64:
1595 case INDEX_op_div_i32:
1596 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1597 break;
1598 case INDEX_op_divu_i64:
1599 case INDEX_op_divu_i32:
1600 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1601 break;
1603 case INDEX_op_rem_i64:
1604 case INDEX_op_rem_i32:
1605 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1606 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1607 break;
1608 case INDEX_op_remu_i64:
1609 case INDEX_op_remu_i32:
1610 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1611 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1612 break;
1614 case INDEX_op_shl_i64:
1615 case INDEX_op_shl_i32:
1616 if (c2) {
1617 tcg_out_shl(s, ext, a0, a1, a2);
1618 } else {
1619 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1621 break;
1623 case INDEX_op_shr_i64:
1624 case INDEX_op_shr_i32:
1625 if (c2) {
1626 tcg_out_shr(s, ext, a0, a1, a2);
1627 } else {
1628 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1630 break;
1632 case INDEX_op_sar_i64:
1633 case INDEX_op_sar_i32:
1634 if (c2) {
1635 tcg_out_sar(s, ext, a0, a1, a2);
1636 } else {
1637 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1639 break;
1641 case INDEX_op_rotr_i64:
1642 case INDEX_op_rotr_i32:
1643 if (c2) {
1644 tcg_out_rotr(s, ext, a0, a1, a2);
1645 } else {
1646 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1648 break;
1650 case INDEX_op_rotl_i64:
1651 case INDEX_op_rotl_i32:
1652 if (c2) {
1653 tcg_out_rotl(s, ext, a0, a1, a2);
1654 } else {
1655 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1656 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1658 break;
1660 case INDEX_op_clz_i64:
1661 case INDEX_op_clz_i32:
1662 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1663 break;
1664 case INDEX_op_ctz_i64:
1665 case INDEX_op_ctz_i32:
1666 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1667 break;
1669 case INDEX_op_brcond_i32:
1670 a1 = (int32_t)a1;
1671 /* FALLTHRU */
1672 case INDEX_op_brcond_i64:
1673 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1674 break;
1676 case INDEX_op_setcond_i32:
1677 a2 = (int32_t)a2;
1678 /* FALLTHRU */
1679 case INDEX_op_setcond_i64:
1680 tcg_out_cmp(s, ext, a1, a2, c2);
1681 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1682 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1683 TCG_REG_XZR, tcg_invert_cond(args[3]));
1684 break;
1686 case INDEX_op_movcond_i32:
1687 a2 = (int32_t)a2;
1688 /* FALLTHRU */
1689 case INDEX_op_movcond_i64:
1690 tcg_out_cmp(s, ext, a1, a2, c2);
1691 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1692 break;
1694 case INDEX_op_qemu_ld_i32:
1695 case INDEX_op_qemu_ld_i64:
1696 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1697 break;
1698 case INDEX_op_qemu_st_i32:
1699 case INDEX_op_qemu_st_i64:
1700 tcg_out_qemu_st(s, REG0(0), a1, a2);
1701 break;
1703 case INDEX_op_bswap64_i64:
1704 tcg_out_rev64(s, a0, a1);
1705 break;
1706 case INDEX_op_bswap32_i64:
1707 case INDEX_op_bswap32_i32:
1708 tcg_out_rev32(s, a0, a1);
1709 break;
1710 case INDEX_op_bswap16_i64:
1711 case INDEX_op_bswap16_i32:
1712 tcg_out_rev16(s, a0, a1);
1713 break;
1715 case INDEX_op_ext8s_i64:
1716 case INDEX_op_ext8s_i32:
1717 tcg_out_sxt(s, ext, MO_8, a0, a1);
1718 break;
1719 case INDEX_op_ext16s_i64:
1720 case INDEX_op_ext16s_i32:
1721 tcg_out_sxt(s, ext, MO_16, a0, a1);
1722 break;
1723 case INDEX_op_ext_i32_i64:
1724 case INDEX_op_ext32s_i64:
1725 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1726 break;
1727 case INDEX_op_ext8u_i64:
1728 case INDEX_op_ext8u_i32:
1729 tcg_out_uxt(s, MO_8, a0, a1);
1730 break;
1731 case INDEX_op_ext16u_i64:
1732 case INDEX_op_ext16u_i32:
1733 tcg_out_uxt(s, MO_16, a0, a1);
1734 break;
1735 case INDEX_op_extu_i32_i64:
1736 case INDEX_op_ext32u_i64:
1737 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1738 break;
1740 case INDEX_op_deposit_i64:
1741 case INDEX_op_deposit_i32:
1742 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1743 break;
1745 case INDEX_op_extract_i64:
1746 case INDEX_op_extract_i32:
1747 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1748 break;
1750 case INDEX_op_sextract_i64:
1751 case INDEX_op_sextract_i32:
1752 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1753 break;
1755 case INDEX_op_add2_i32:
1756 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1757 (int32_t)args[4], args[5], const_args[4],
1758 const_args[5], false);
1759 break;
1760 case INDEX_op_add2_i64:
1761 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1762 args[5], const_args[4], const_args[5], false);
1763 break;
1764 case INDEX_op_sub2_i32:
1765 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1766 (int32_t)args[4], args[5], const_args[4],
1767 const_args[5], true);
1768 break;
1769 case INDEX_op_sub2_i64:
1770 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1771 args[5], const_args[4], const_args[5], true);
1772 break;
1774 case INDEX_op_muluh_i64:
1775 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1776 break;
1777 case INDEX_op_mulsh_i64:
1778 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1779 break;
1781 case INDEX_op_mb:
1782 tcg_out_mb(s, a0);
1783 break;
1785 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1786 case INDEX_op_mov_i64:
1787 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1788 case INDEX_op_movi_i64:
1789 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1790 default:
1791 tcg_abort();
1794 #undef REG0
1797 static const TCGTargetOpDef aarch64_op_defs[] = {
1798 { INDEX_op_exit_tb, { } },
1799 { INDEX_op_goto_tb, { } },
1800 { INDEX_op_br, { } },
1801 { INDEX_op_goto_ptr, { "r" } },
1803 { INDEX_op_ld8u_i32, { "r", "r" } },
1804 { INDEX_op_ld8s_i32, { "r", "r" } },
1805 { INDEX_op_ld16u_i32, { "r", "r" } },
1806 { INDEX_op_ld16s_i32, { "r", "r" } },
1807 { INDEX_op_ld_i32, { "r", "r" } },
1808 { INDEX_op_ld8u_i64, { "r", "r" } },
1809 { INDEX_op_ld8s_i64, { "r", "r" } },
1810 { INDEX_op_ld16u_i64, { "r", "r" } },
1811 { INDEX_op_ld16s_i64, { "r", "r" } },
1812 { INDEX_op_ld32u_i64, { "r", "r" } },
1813 { INDEX_op_ld32s_i64, { "r", "r" } },
1814 { INDEX_op_ld_i64, { "r", "r" } },
1816 { INDEX_op_st8_i32, { "rZ", "r" } },
1817 { INDEX_op_st16_i32, { "rZ", "r" } },
1818 { INDEX_op_st_i32, { "rZ", "r" } },
1819 { INDEX_op_st8_i64, { "rZ", "r" } },
1820 { INDEX_op_st16_i64, { "rZ", "r" } },
1821 { INDEX_op_st32_i64, { "rZ", "r" } },
1822 { INDEX_op_st_i64, { "rZ", "r" } },
1824 { INDEX_op_add_i32, { "r", "r", "rA" } },
1825 { INDEX_op_add_i64, { "r", "r", "rA" } },
1826 { INDEX_op_sub_i32, { "r", "r", "rA" } },
1827 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1828 { INDEX_op_mul_i32, { "r", "r", "r" } },
1829 { INDEX_op_mul_i64, { "r", "r", "r" } },
1830 { INDEX_op_div_i32, { "r", "r", "r" } },
1831 { INDEX_op_div_i64, { "r", "r", "r" } },
1832 { INDEX_op_divu_i32, { "r", "r", "r" } },
1833 { INDEX_op_divu_i64, { "r", "r", "r" } },
1834 { INDEX_op_rem_i32, { "r", "r", "r" } },
1835 { INDEX_op_rem_i64, { "r", "r", "r" } },
1836 { INDEX_op_remu_i32, { "r", "r", "r" } },
1837 { INDEX_op_remu_i64, { "r", "r", "r" } },
1838 { INDEX_op_and_i32, { "r", "r", "rL" } },
1839 { INDEX_op_and_i64, { "r", "r", "rL" } },
1840 { INDEX_op_or_i32, { "r", "r", "rL" } },
1841 { INDEX_op_or_i64, { "r", "r", "rL" } },
1842 { INDEX_op_xor_i32, { "r", "r", "rL" } },
1843 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1844 { INDEX_op_andc_i32, { "r", "r", "rL" } },
1845 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1846 { INDEX_op_orc_i32, { "r", "r", "rL" } },
1847 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1848 { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1849 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1851 { INDEX_op_neg_i32, { "r", "r" } },
1852 { INDEX_op_neg_i64, { "r", "r" } },
1853 { INDEX_op_not_i32, { "r", "r" } },
1854 { INDEX_op_not_i64, { "r", "r" } },
1856 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1857 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1858 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1859 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1860 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1861 { INDEX_op_clz_i32, { "r", "r", "rAL" } },
1862 { INDEX_op_ctz_i32, { "r", "r", "rAL" } },
1863 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1864 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1865 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1866 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1867 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1868 { INDEX_op_clz_i64, { "r", "r", "rAL" } },
1869 { INDEX_op_ctz_i64, { "r", "r", "rAL" } },
1871 { INDEX_op_brcond_i32, { "r", "rA" } },
1872 { INDEX_op_brcond_i64, { "r", "rA" } },
1873 { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1874 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1875 { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1876 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1878 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1879 { INDEX_op_qemu_ld_i64, { "r", "l" } },
1880 { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1881 { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1883 { INDEX_op_bswap16_i32, { "r", "r" } },
1884 { INDEX_op_bswap32_i32, { "r", "r" } },
1885 { INDEX_op_bswap16_i64, { "r", "r" } },
1886 { INDEX_op_bswap32_i64, { "r", "r" } },
1887 { INDEX_op_bswap64_i64, { "r", "r" } },
1889 { INDEX_op_ext8s_i32, { "r", "r" } },
1890 { INDEX_op_ext16s_i32, { "r", "r" } },
1891 { INDEX_op_ext8u_i32, { "r", "r" } },
1892 { INDEX_op_ext16u_i32, { "r", "r" } },
1894 { INDEX_op_ext8s_i64, { "r", "r" } },
1895 { INDEX_op_ext16s_i64, { "r", "r" } },
1896 { INDEX_op_ext32s_i64, { "r", "r" } },
1897 { INDEX_op_ext8u_i64, { "r", "r" } },
1898 { INDEX_op_ext16u_i64, { "r", "r" } },
1899 { INDEX_op_ext32u_i64, { "r", "r" } },
1900 { INDEX_op_ext_i32_i64, { "r", "r" } },
1901 { INDEX_op_extu_i32_i64, { "r", "r" } },
1903 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1904 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1905 { INDEX_op_extract_i32, { "r", "r" } },
1906 { INDEX_op_extract_i64, { "r", "r" } },
1907 { INDEX_op_sextract_i32, { "r", "r" } },
1908 { INDEX_op_sextract_i64, { "r", "r" } },
1910 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1911 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1912 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1913 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1915 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1916 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1918 { INDEX_op_mb, { } },
1919 { -1 },
1922 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
1924 int i, n = ARRAY_SIZE(aarch64_op_defs);
1926 for (i = 0; i < n; ++i) {
1927 if (aarch64_op_defs[i].op == op) {
1928 return &aarch64_op_defs[i];
1931 return NULL;
1934 static void tcg_target_init(TCGContext *s)
1936 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1937 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1939 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1940 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1941 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1942 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1943 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1944 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1945 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1946 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1947 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1948 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1949 (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1951 tcg_regset_clear(s->reserved_regs);
1952 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1953 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1954 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1955 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1958 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1959 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1961 #define FRAME_SIZE \
1962 ((PUSH_SIZE \
1963 + TCG_STATIC_CALL_ARGS_SIZE \
1964 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1965 + TCG_TARGET_STACK_ALIGN - 1) \
1966 & ~(TCG_TARGET_STACK_ALIGN - 1))
1968 /* We're expecting a 2 byte uleb128 encoded value. */
1969 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1971 /* We're expecting to use a single ADDI insn. */
1972 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1974 static void tcg_target_qemu_prologue(TCGContext *s)
1976 TCGReg r;
1978 /* Push (FP, LR) and allocate space for all saved registers. */
1979 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1980 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1982 /* Set up frame pointer for canonical unwinding. */
1983 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1985 /* Store callee-preserved regs x19..x28. */
1986 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1987 int ofs = (r - TCG_REG_X19 + 2) * 8;
1988 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1991 /* Make stack space for TCG locals. */
1992 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1993 FRAME_SIZE - PUSH_SIZE);
1995 /* Inform TCG about how to find TCG locals with register, offset, size. */
1996 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1997 CPU_TEMP_BUF_NLONGS * sizeof(long));
1999 #if !defined(CONFIG_SOFTMMU)
2000 if (USE_GUEST_BASE) {
2001 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2002 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2004 #endif
2006 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2007 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2010 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2011 * and fall through to the rest of the epilogue.
2013 s->code_gen_epilogue = s->code_ptr;
2014 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2016 /* TB epilogue */
2017 tb_ret_addr = s->code_ptr;
2019 /* Remove TCG locals stack space. */
2020 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2021 FRAME_SIZE - PUSH_SIZE);
2023 /* Restore registers x19..x28. */
2024 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2025 int ofs = (r - TCG_REG_X19 + 2) * 8;
2026 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2029 /* Pop (FP, LR), restore SP to previous frame. */
2030 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2031 TCG_REG_SP, PUSH_SIZE, 0, 1);
2032 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2035 typedef struct {
2036 DebugFrameHeader h;
2037 uint8_t fde_def_cfa[4];
2038 uint8_t fde_reg_ofs[24];
2039 } DebugFrame;
2041 #define ELF_HOST_MACHINE EM_AARCH64
2043 static const DebugFrame debug_frame = {
2044 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2045 .h.cie.id = -1,
2046 .h.cie.version = 1,
2047 .h.cie.code_align = 1,
2048 .h.cie.data_align = 0x78, /* sleb128 -8 */
2049 .h.cie.return_column = TCG_REG_LR,
2051 /* Total FDE size does not include the "len" member. */
2052 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2054 .fde_def_cfa = {
2055 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2056 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2057 (FRAME_SIZE >> 7)
2059 .fde_reg_ofs = {
2060 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2061 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2062 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2063 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2064 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2065 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2066 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2067 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2068 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2069 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2070 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2071 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2075 void tcg_register_jit(void *buf, size_t buf_size)
2077 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));