target/ppc: Remove unused POWERPC_FAMILY(POWER)
[qemu/ar7.git] / tcg / aarch64 / tcg-target.inc.c
blob6d227a5a6a844aae47e97d8b53e8accf7949c27f
1 /*
2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
28 #endif /* CONFIG_DEBUG_TCG */
30 static const int tcg_target_reg_alloc_order[] = {
31 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
32 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
33 TCG_REG_X28, /* we will reserve this for guest_base if configured */
35 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
36 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
37 TCG_REG_X16, TCG_REG_X17,
39 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
40 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
42 /* X18 reserved by system */
43 /* X19 reserved for AREG0 */
44 /* X29 reserved as fp */
45 /* X30 reserved as temporary */
48 static const int tcg_target_call_iarg_regs[8] = {
49 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
50 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
52 static const int tcg_target_call_oarg_regs[1] = {
53 TCG_REG_X0
56 #define TCG_REG_TMP TCG_REG_X30
58 #ifndef CONFIG_SOFTMMU
59 /* Note that XZR cannot be encoded in the address base register slot,
60 as that actaully encodes SP. So if we need to zero-extend the guest
61 address, via the address index register slot, we need to load even
62 a zero guest base into a register. */
63 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
64 #define TCG_REG_GUEST_BASE TCG_REG_X28
65 #endif
67 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
69 ptrdiff_t offset = target - code_ptr;
70 tcg_debug_assert(offset == sextract64(offset, 0, 26));
71 /* read instruction, mask away previous PC_REL26 parameter contents,
72 set the proper offset, then write back the instruction. */
73 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
76 static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
77 tcg_insn_unit *target)
79 ptrdiff_t offset = target - code_ptr;
80 tcg_insn_unit insn;
81 tcg_debug_assert(offset == sextract64(offset, 0, 26));
82 /* read instruction, mask away previous PC_REL26 parameter contents,
83 set the proper offset, then write back the instruction. */
84 insn = atomic_read(code_ptr);
85 atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
88 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
90 ptrdiff_t offset = target - code_ptr;
91 tcg_debug_assert(offset == sextract64(offset, 0, 19));
92 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
95 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
96 intptr_t value, intptr_t addend)
98 tcg_debug_assert(addend == 0);
99 switch (type) {
100 case R_AARCH64_JUMP26:
101 case R_AARCH64_CALL26:
102 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
103 break;
104 case R_AARCH64_CONDBR19:
105 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
106 break;
107 default:
108 tcg_abort();
112 #define TCG_CT_CONST_AIMM 0x100
113 #define TCG_CT_CONST_LIMM 0x200
114 #define TCG_CT_CONST_ZERO 0x400
115 #define TCG_CT_CONST_MONE 0x800
117 /* parse target specific constraints */
118 static const char *target_parse_constraint(TCGArgConstraint *ct,
119 const char *ct_str, TCGType type)
121 switch (*ct_str++) {
122 case 'r':
123 ct->ct |= TCG_CT_REG;
124 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
125 break;
126 case 'l': /* qemu_ld / qemu_st address, data_reg */
127 ct->ct |= TCG_CT_REG;
128 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
129 #ifdef CONFIG_SOFTMMU
130 /* x0 and x1 will be overwritten when reading the tlb entry,
131 and x2, and x3 for helper args, better to avoid using them. */
132 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
133 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
134 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
135 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
136 #endif
137 break;
138 case 'A': /* Valid for arithmetic immediate (positive or negative). */
139 ct->ct |= TCG_CT_CONST_AIMM;
140 break;
141 case 'L': /* Valid for logical immediate. */
142 ct->ct |= TCG_CT_CONST_LIMM;
143 break;
144 case 'M': /* minus one */
145 ct->ct |= TCG_CT_CONST_MONE;
146 break;
147 case 'Z': /* zero */
148 ct->ct |= TCG_CT_CONST_ZERO;
149 break;
150 default:
151 return NULL;
153 return ct_str;
156 static inline bool is_aimm(uint64_t val)
158 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
161 static inline bool is_limm(uint64_t val)
163 /* Taking a simplified view of the logical immediates for now, ignoring
164 the replication that can happen across the field. Match bit patterns
165 of the forms
166 0....01....1
167 0..01..10..0
168 and their inverses. */
170 /* Make things easier below, by testing the form with msb clear. */
171 if ((int64_t)val < 0) {
172 val = ~val;
174 if (val == 0) {
175 return false;
177 val += val & -val;
178 return (val & (val - 1)) == 0;
181 static int tcg_target_const_match(tcg_target_long val, TCGType type,
182 const TCGArgConstraint *arg_ct)
184 int ct = arg_ct->ct;
186 if (ct & TCG_CT_CONST) {
187 return 1;
189 if (type == TCG_TYPE_I32) {
190 val = (int32_t)val;
192 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
193 return 1;
195 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
196 return 1;
198 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
199 return 1;
201 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
202 return 1;
205 return 0;
208 enum aarch64_cond_code {
209 COND_EQ = 0x0,
210 COND_NE = 0x1,
211 COND_CS = 0x2, /* Unsigned greater or equal */
212 COND_HS = COND_CS, /* ALIAS greater or equal */
213 COND_CC = 0x3, /* Unsigned less than */
214 COND_LO = COND_CC, /* ALIAS Lower */
215 COND_MI = 0x4, /* Negative */
216 COND_PL = 0x5, /* Zero or greater */
217 COND_VS = 0x6, /* Overflow */
218 COND_VC = 0x7, /* No overflow */
219 COND_HI = 0x8, /* Unsigned greater than */
220 COND_LS = 0x9, /* Unsigned less or equal */
221 COND_GE = 0xa,
222 COND_LT = 0xb,
223 COND_GT = 0xc,
224 COND_LE = 0xd,
225 COND_AL = 0xe,
226 COND_NV = 0xf, /* behaves like COND_AL here */
229 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
230 [TCG_COND_EQ] = COND_EQ,
231 [TCG_COND_NE] = COND_NE,
232 [TCG_COND_LT] = COND_LT,
233 [TCG_COND_GE] = COND_GE,
234 [TCG_COND_LE] = COND_LE,
235 [TCG_COND_GT] = COND_GT,
236 /* unsigned */
237 [TCG_COND_LTU] = COND_LO,
238 [TCG_COND_GTU] = COND_HI,
239 [TCG_COND_GEU] = COND_HS,
240 [TCG_COND_LEU] = COND_LS,
243 typedef enum {
244 LDST_ST = 0, /* store */
245 LDST_LD = 1, /* load */
246 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
247 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
248 } AArch64LdstType;
250 /* We encode the format of the insn into the beginning of the name, so that
251 we can have the preprocessor help "typecheck" the insn vs the output
252 function. Arm didn't provide us with nice names for the formats, so we
253 use the section number of the architecture reference manual in which the
254 instruction group is described. */
255 typedef enum {
256 /* Compare and branch (immediate). */
257 I3201_CBZ = 0x34000000,
258 I3201_CBNZ = 0x35000000,
260 /* Conditional branch (immediate). */
261 I3202_B_C = 0x54000000,
263 /* Unconditional branch (immediate). */
264 I3206_B = 0x14000000,
265 I3206_BL = 0x94000000,
267 /* Unconditional branch (register). */
268 I3207_BR = 0xd61f0000,
269 I3207_BLR = 0xd63f0000,
270 I3207_RET = 0xd65f0000,
272 /* Load/store register. Described here as 3.3.12, but the helper
273 that emits them can transform to 3.3.10 or 3.3.13. */
274 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
275 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
276 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
277 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
279 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
280 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
281 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
282 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
284 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
285 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
287 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
288 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
289 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
291 I3312_TO_I3310 = 0x00200800,
292 I3312_TO_I3313 = 0x01000000,
294 /* Load/store register pair instructions. */
295 I3314_LDP = 0x28400000,
296 I3314_STP = 0x28000000,
298 /* Add/subtract immediate instructions. */
299 I3401_ADDI = 0x11000000,
300 I3401_ADDSI = 0x31000000,
301 I3401_SUBI = 0x51000000,
302 I3401_SUBSI = 0x71000000,
304 /* Bitfield instructions. */
305 I3402_BFM = 0x33000000,
306 I3402_SBFM = 0x13000000,
307 I3402_UBFM = 0x53000000,
309 /* Extract instruction. */
310 I3403_EXTR = 0x13800000,
312 /* Logical immediate instructions. */
313 I3404_ANDI = 0x12000000,
314 I3404_ORRI = 0x32000000,
315 I3404_EORI = 0x52000000,
317 /* Move wide immediate instructions. */
318 I3405_MOVN = 0x12800000,
319 I3405_MOVZ = 0x52800000,
320 I3405_MOVK = 0x72800000,
322 /* PC relative addressing instructions. */
323 I3406_ADR = 0x10000000,
324 I3406_ADRP = 0x90000000,
326 /* Add/subtract shifted register instructions (without a shift). */
327 I3502_ADD = 0x0b000000,
328 I3502_ADDS = 0x2b000000,
329 I3502_SUB = 0x4b000000,
330 I3502_SUBS = 0x6b000000,
332 /* Add/subtract shifted register instructions (with a shift). */
333 I3502S_ADD_LSL = I3502_ADD,
335 /* Add/subtract with carry instructions. */
336 I3503_ADC = 0x1a000000,
337 I3503_SBC = 0x5a000000,
339 /* Conditional select instructions. */
340 I3506_CSEL = 0x1a800000,
341 I3506_CSINC = 0x1a800400,
342 I3506_CSINV = 0x5a800000,
343 I3506_CSNEG = 0x5a800400,
345 /* Data-processing (1 source) instructions. */
346 I3507_CLZ = 0x5ac01000,
347 I3507_RBIT = 0x5ac00000,
348 I3507_REV16 = 0x5ac00400,
349 I3507_REV32 = 0x5ac00800,
350 I3507_REV64 = 0x5ac00c00,
352 /* Data-processing (2 source) instructions. */
353 I3508_LSLV = 0x1ac02000,
354 I3508_LSRV = 0x1ac02400,
355 I3508_ASRV = 0x1ac02800,
356 I3508_RORV = 0x1ac02c00,
357 I3508_SMULH = 0x9b407c00,
358 I3508_UMULH = 0x9bc07c00,
359 I3508_UDIV = 0x1ac00800,
360 I3508_SDIV = 0x1ac00c00,
362 /* Data-processing (3 source) instructions. */
363 I3509_MADD = 0x1b000000,
364 I3509_MSUB = 0x1b008000,
366 /* Logical shifted register instructions (without a shift). */
367 I3510_AND = 0x0a000000,
368 I3510_BIC = 0x0a200000,
369 I3510_ORR = 0x2a000000,
370 I3510_ORN = 0x2a200000,
371 I3510_EOR = 0x4a000000,
372 I3510_EON = 0x4a200000,
373 I3510_ANDS = 0x6a000000,
375 /* System instructions. */
376 DMB_ISH = 0xd50338bf,
377 DMB_LD = 0x00000100,
378 DMB_ST = 0x00000200,
379 } AArch64Insn;
381 static inline uint32_t tcg_in32(TCGContext *s)
383 uint32_t v = *(uint32_t *)s->code_ptr;
384 return v;
387 /* Emit an opcode with "type-checking" of the format. */
388 #define tcg_out_insn(S, FMT, OP, ...) \
389 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
391 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
392 TCGReg rt, int imm19)
394 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
397 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
398 TCGCond c, int imm19)
400 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
403 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
405 tcg_out32(s, insn | (imm26 & 0x03ffffff));
408 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
410 tcg_out32(s, insn | rn << 5);
413 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
414 TCGReg r1, TCGReg r2, TCGReg rn,
415 tcg_target_long ofs, bool pre, bool w)
417 insn |= 1u << 31; /* ext */
418 insn |= pre << 24;
419 insn |= w << 23;
421 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
422 insn |= (ofs & (0x7f << 3)) << (15 - 3);
424 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
427 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
428 TCGReg rd, TCGReg rn, uint64_t aimm)
430 if (aimm > 0xfff) {
431 tcg_debug_assert((aimm & 0xfff) == 0);
432 aimm >>= 12;
433 tcg_debug_assert(aimm <= 0xfff);
434 aimm |= 1 << 12; /* apply LSL 12 */
436 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
439 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
440 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
441 that feed the DecodeBitMasks pseudo function. */
442 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
443 TCGReg rd, TCGReg rn, int n, int immr, int imms)
445 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
446 | rn << 5 | rd);
449 #define tcg_out_insn_3404 tcg_out_insn_3402
451 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
452 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
454 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
455 | rn << 5 | rd);
458 /* This function is used for the Move (wide immediate) instruction group.
459 Note that SHIFT is a full shift count, not the 2 bit HW field. */
460 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
461 TCGReg rd, uint16_t half, unsigned shift)
463 tcg_debug_assert((shift & ~0x30) == 0);
464 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
467 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
468 TCGReg rd, int64_t disp)
470 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
473 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
474 the rare occasion when we actually want to supply a shift amount. */
475 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
476 TCGType ext, TCGReg rd, TCGReg rn,
477 TCGReg rm, int imm6)
479 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
482 /* This function is for 3.5.2 (Add/subtract shifted register),
483 and 3.5.10 (Logical shifted register), for the vast majorty of cases
484 when we don't want to apply a shift. Thus it can also be used for
485 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
486 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
487 TCGReg rd, TCGReg rn, TCGReg rm)
489 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
492 #define tcg_out_insn_3503 tcg_out_insn_3502
493 #define tcg_out_insn_3508 tcg_out_insn_3502
494 #define tcg_out_insn_3510 tcg_out_insn_3502
496 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
497 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
499 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
500 | tcg_cond_to_aarch64[c] << 12);
503 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
504 TCGReg rd, TCGReg rn)
506 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
509 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
510 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
512 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
515 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
516 TCGReg rd, TCGReg base, TCGType ext,
517 TCGReg regoff)
519 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
520 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
521 0x4000 | ext << 13 | base << 5 | rd);
524 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
525 TCGReg rd, TCGReg rn, intptr_t offset)
527 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
530 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
531 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
533 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
534 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
537 /* Register to register move using ORR (shifted register with no shift). */
538 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
540 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
543 /* Register to register move using ADDI (move to/from SP). */
544 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
546 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
549 /* This function is used for the Logical (immediate) instruction group.
550 The value of LIMM must satisfy IS_LIMM. See the comment above about
551 only supporting simplified logical immediates. */
552 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
553 TCGReg rd, TCGReg rn, uint64_t limm)
555 unsigned h, l, r, c;
557 tcg_debug_assert(is_limm(limm));
559 h = clz64(limm);
560 l = ctz64(limm);
561 if (l == 0) {
562 r = 0; /* form 0....01....1 */
563 c = ctz64(~limm) - 1;
564 if (h == 0) {
565 r = clz64(~limm); /* form 1..10..01..1 */
566 c += r;
568 } else {
569 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
570 c = r - h - 1;
572 if (ext == TCG_TYPE_I32) {
573 r &= 31;
574 c &= 31;
577 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
580 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
581 tcg_target_long value)
583 int i, wantinv, shift;
584 tcg_target_long svalue = value;
585 tcg_target_long ivalue = ~value;
587 /* For 32-bit values, discard potential garbage in value. For 64-bit
588 values within [2**31, 2**32-1], we can create smaller sequences by
589 interpreting this as a negative 32-bit number, while ensuring that
590 the high 32 bits are cleared by setting SF=0. */
591 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
592 svalue = (int32_t)value;
593 value = (uint32_t)value;
594 ivalue = (uint32_t)ivalue;
595 type = TCG_TYPE_I32;
598 /* Speed things up by handling the common case of small positive
599 and negative values specially. */
600 if ((value & ~0xffffull) == 0) {
601 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
602 return;
603 } else if ((ivalue & ~0xffffull) == 0) {
604 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
605 return;
608 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
609 use the sign-extended value. That lets us match rotated values such
610 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
611 if (is_limm(svalue)) {
612 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
613 return;
616 /* Look for host pointer values within 4G of the PC. This happens
617 often when loading pointers to QEMU's own data structures. */
618 if (type == TCG_TYPE_I64) {
619 tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
620 if (disp == sextract64(disp, 0, 21)) {
621 tcg_out_insn(s, 3406, ADRP, rd, disp);
622 if (value & 0xfff) {
623 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
625 return;
629 /* Would it take fewer insns to begin with MOVN? For the value and its
630 inverse, count the number of 16-bit lanes that are 0. */
631 for (i = wantinv = 0; i < 64; i += 16) {
632 tcg_target_long mask = 0xffffull << i;
633 wantinv -= ((value & mask) == 0);
634 wantinv += ((ivalue & mask) == 0);
637 if (wantinv <= 0) {
638 /* Find the lowest lane that is not 0x0000. */
639 shift = ctz64(value) & (63 & -16);
640 tcg_out_insn(s, 3405, MOVZ, type, rd, value >> shift, shift);
641 /* Clear out the lane that we just set. */
642 value &= ~(0xffffUL << shift);
643 /* Iterate until all non-zero lanes have been processed. */
644 while (value) {
645 shift = ctz64(value) & (63 & -16);
646 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
647 value &= ~(0xffffUL << shift);
649 } else {
650 /* Like above, but with the inverted value and MOVN to start. */
651 shift = ctz64(ivalue) & (63 & -16);
652 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue >> shift, shift);
653 ivalue &= ~(0xffffUL << shift);
654 while (ivalue) {
655 shift = ctz64(ivalue) & (63 & -16);
656 /* Provide MOVK with the non-inverted value. */
657 tcg_out_insn(s, 3405, MOVK, type, rd, ~(ivalue >> shift), shift);
658 ivalue &= ~(0xffffUL << shift);
663 /* Define something more legible for general use. */
664 #define tcg_out_ldst_r tcg_out_insn_3310
666 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
667 TCGReg rd, TCGReg rn, intptr_t offset)
669 TCGMemOp size = (uint32_t)insn >> 30;
671 /* If the offset is naturally aligned and in range, then we can
672 use the scaled uimm12 encoding */
673 if (offset >= 0 && !(offset & ((1 << size) - 1))) {
674 uintptr_t scaled_uimm = offset >> size;
675 if (scaled_uimm <= 0xfff) {
676 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
677 return;
681 /* Small signed offsets can use the unscaled encoding. */
682 if (offset >= -256 && offset < 256) {
683 tcg_out_insn_3312(s, insn, rd, rn, offset);
684 return;
687 /* Worst-case scenario, move offset to temp register, use reg offset. */
688 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
689 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
692 static inline void tcg_out_mov(TCGContext *s,
693 TCGType type, TCGReg ret, TCGReg arg)
695 if (ret != arg) {
696 tcg_out_movr(s, type, ret, arg);
700 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
701 TCGReg arg1, intptr_t arg2)
703 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
704 arg, arg1, arg2);
707 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
708 TCGReg arg1, intptr_t arg2)
710 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
711 arg, arg1, arg2);
714 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
715 TCGReg base, intptr_t ofs)
717 if (val == 0) {
718 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
719 return true;
721 return false;
724 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
725 TCGReg rn, unsigned int a, unsigned int b)
727 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
730 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
731 TCGReg rn, unsigned int a, unsigned int b)
733 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
736 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
737 TCGReg rn, unsigned int a, unsigned int b)
739 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
742 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
743 TCGReg rn, TCGReg rm, unsigned int a)
745 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
748 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
749 TCGReg rd, TCGReg rn, unsigned int m)
751 int bits = ext ? 64 : 32;
752 int max = bits - 1;
753 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
756 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
757 TCGReg rd, TCGReg rn, unsigned int m)
759 int max = ext ? 63 : 31;
760 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
763 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
764 TCGReg rd, TCGReg rn, unsigned int m)
766 int max = ext ? 63 : 31;
767 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
770 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
771 TCGReg rd, TCGReg rn, unsigned int m)
773 int max = ext ? 63 : 31;
774 tcg_out_extr(s, ext, rd, rn, rn, m & max);
777 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
778 TCGReg rd, TCGReg rn, unsigned int m)
780 int bits = ext ? 64 : 32;
781 int max = bits - 1;
782 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
785 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
786 TCGReg rn, unsigned lsb, unsigned width)
788 unsigned size = ext ? 64 : 32;
789 unsigned a = (size - lsb) & (size - 1);
790 unsigned b = width - 1;
791 tcg_out_bfm(s, ext, rd, rn, a, b);
794 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
795 tcg_target_long b, bool const_b)
797 if (const_b) {
798 /* Using CMP or CMN aliases. */
799 if (b >= 0) {
800 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
801 } else {
802 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
804 } else {
805 /* Using CMP alias SUBS wzr, Wn, Wm */
806 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
810 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
812 ptrdiff_t offset = target - s->code_ptr;
813 tcg_debug_assert(offset == sextract64(offset, 0, 26));
814 tcg_out_insn(s, 3206, B, offset);
817 static inline void tcg_out_goto_noaddr(TCGContext *s)
819 /* We pay attention here to not modify the branch target by reading from
820 the buffer. This ensure that caches and memory are kept coherent during
821 retranslation. Mask away possible garbage in the high bits for the
822 first translation, while keeping the offset bits for retranslation. */
823 uint32_t old = tcg_in32(s);
824 tcg_out_insn(s, 3206, B, old);
827 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
829 /* See comments in tcg_out_goto_noaddr. */
830 uint32_t old = tcg_in32(s) >> 5;
831 tcg_out_insn(s, 3202, B_C, c, old);
834 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
836 tcg_out_insn(s, 3207, BLR, reg);
839 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
841 ptrdiff_t offset = target - s->code_ptr;
842 if (offset == sextract64(offset, 0, 26)) {
843 tcg_out_insn(s, 3206, BL, offset);
844 } else {
845 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
846 tcg_out_callr(s, TCG_REG_TMP);
850 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
852 tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
853 tcg_insn_unit *target = (tcg_insn_unit *)addr;
855 reloc_pc26_atomic(code_ptr, target);
856 flush_icache_range(jmp_addr, jmp_addr + 4);
859 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
861 if (!l->has_value) {
862 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
863 tcg_out_goto_noaddr(s);
864 } else {
865 tcg_out_goto(s, l->u.value_ptr);
869 static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
870 TCGArg b, bool b_const, TCGLabel *l)
872 intptr_t offset;
873 bool need_cmp;
875 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
876 need_cmp = false;
877 } else {
878 need_cmp = true;
879 tcg_out_cmp(s, ext, a, b, b_const);
882 if (!l->has_value) {
883 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
884 offset = tcg_in32(s) >> 5;
885 } else {
886 offset = l->u.value_ptr - s->code_ptr;
887 tcg_debug_assert(offset == sextract64(offset, 0, 19));
890 if (need_cmp) {
891 tcg_out_insn(s, 3202, B_C, c, offset);
892 } else if (c == TCG_COND_EQ) {
893 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
894 } else {
895 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
899 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
901 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
904 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
906 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
909 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
911 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
914 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
915 TCGReg rd, TCGReg rn)
917 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
918 int bits = (8 << s_bits) - 1;
919 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
922 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
923 TCGReg rd, TCGReg rn)
925 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
926 int bits = (8 << s_bits) - 1;
927 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
930 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
931 TCGReg rn, int64_t aimm)
933 if (aimm >= 0) {
934 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
935 } else {
936 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
940 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
941 TCGReg rh, TCGReg al, TCGReg ah,
942 tcg_target_long bl, tcg_target_long bh,
943 bool const_bl, bool const_bh, bool sub)
945 TCGReg orig_rl = rl;
946 AArch64Insn insn;
948 if (rl == ah || (!const_bh && rl == bh)) {
949 rl = TCG_REG_TMP;
952 if (const_bl) {
953 insn = I3401_ADDSI;
954 if ((bl < 0) ^ sub) {
955 insn = I3401_SUBSI;
956 bl = -bl;
958 if (unlikely(al == TCG_REG_XZR)) {
959 /* ??? We want to allow al to be zero for the benefit of
960 negation via subtraction. However, that leaves open the
961 possibility of adding 0+const in the low part, and the
962 immediate add instructions encode XSP not XZR. Don't try
963 anything more elaborate here than loading another zero. */
964 al = TCG_REG_TMP;
965 tcg_out_movi(s, ext, al, 0);
967 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
968 } else {
969 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
972 insn = I3503_ADC;
973 if (const_bh) {
974 /* Note that the only two constants we support are 0 and -1, and
975 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
976 if ((bh != 0) ^ sub) {
977 insn = I3503_SBC;
979 bh = TCG_REG_XZR;
980 } else if (sub) {
981 insn = I3503_SBC;
983 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
985 tcg_out_mov(s, ext, orig_rl, rl);
988 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
990 static const uint32_t sync[] = {
991 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
992 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
993 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
994 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
995 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
997 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1000 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1001 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1003 TCGReg a1 = a0;
1004 if (is_ctz) {
1005 a1 = TCG_REG_TMP;
1006 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1008 if (const_b && b == (ext ? 64 : 32)) {
1009 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1010 } else {
1011 AArch64Insn sel = I3506_CSEL;
1013 tcg_out_cmp(s, ext, a0, 0, 1);
1014 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1016 if (const_b) {
1017 if (b == -1) {
1018 b = TCG_REG_XZR;
1019 sel = I3506_CSINV;
1020 } else if (b == 0) {
1021 b = TCG_REG_XZR;
1022 } else {
1023 tcg_out_movi(s, ext, d, b);
1024 b = d;
1027 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1031 #ifdef CONFIG_SOFTMMU
1032 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1033 * TCGMemOpIdx oi, uintptr_t ra)
1035 static void * const qemu_ld_helpers[16] = {
1036 [MO_UB] = helper_ret_ldub_mmu,
1037 [MO_LEUW] = helper_le_lduw_mmu,
1038 [MO_LEUL] = helper_le_ldul_mmu,
1039 [MO_LEQ] = helper_le_ldq_mmu,
1040 [MO_BEUW] = helper_be_lduw_mmu,
1041 [MO_BEUL] = helper_be_ldul_mmu,
1042 [MO_BEQ] = helper_be_ldq_mmu,
1045 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1046 * uintxx_t val, TCGMemOpIdx oi,
1047 * uintptr_t ra)
1049 static void * const qemu_st_helpers[16] = {
1050 [MO_UB] = helper_ret_stb_mmu,
1051 [MO_LEUW] = helper_le_stw_mmu,
1052 [MO_LEUL] = helper_le_stl_mmu,
1053 [MO_LEQ] = helper_le_stq_mmu,
1054 [MO_BEUW] = helper_be_stw_mmu,
1055 [MO_BEUL] = helper_be_stl_mmu,
1056 [MO_BEQ] = helper_be_stq_mmu,
1059 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1061 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1062 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1063 tcg_out_insn(s, 3406, ADR, rd, offset);
1066 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1068 TCGMemOpIdx oi = lb->oi;
1069 TCGMemOp opc = get_memop(oi);
1070 TCGMemOp size = opc & MO_SIZE;
1072 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1074 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1075 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1076 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1077 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1078 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1079 if (opc & MO_SIGN) {
1080 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1081 } else {
1082 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1085 tcg_out_goto(s, lb->raddr);
1088 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1090 TCGMemOpIdx oi = lb->oi;
1091 TCGMemOp opc = get_memop(oi);
1092 TCGMemOp size = opc & MO_SIZE;
1094 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1096 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1097 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1098 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1099 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1100 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1101 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1102 tcg_out_goto(s, lb->raddr);
1105 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1106 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1107 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1109 TCGLabelQemuLdst *label = new_ldst_label(s);
1111 label->is_ld = is_ld;
1112 label->oi = oi;
1113 label->type = ext;
1114 label->datalo_reg = data_reg;
1115 label->addrlo_reg = addr_reg;
1116 label->raddr = raddr;
1117 label->label_ptr[0] = label_ptr;
1120 /* Load and compare a TLB entry, emitting the conditional jump to the
1121 slow path for the failure case, which will be patched later when finalizing
1122 the slow path. Generated code returns the host addend in X1,
1123 clobbers X0,X2,X3,TMP. */
1124 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1125 tcg_insn_unit **label_ptr, int mem_index,
1126 bool is_read)
1128 int tlb_offset = is_read ?
1129 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1130 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1131 unsigned a_bits = get_alignment_bits(opc);
1132 unsigned s_bits = opc & MO_SIZE;
1133 unsigned a_mask = (1u << a_bits) - 1;
1134 unsigned s_mask = (1u << s_bits) - 1;
1135 TCGReg base = TCG_AREG0, x3;
1136 uint64_t tlb_mask;
1138 /* For aligned accesses, we check the first byte and include the alignment
1139 bits within the address. For unaligned access, we check that we don't
1140 cross pages using the address of the last byte of the access. */
1141 if (a_bits >= s_bits) {
1142 x3 = addr_reg;
1143 } else {
1144 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1145 TCG_REG_X3, addr_reg, s_mask - a_mask);
1146 x3 = TCG_REG_X3;
1148 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1150 /* Extract the TLB index from the address into X0.
1151 X0<CPU_TLB_BITS:0> =
1152 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1153 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1154 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1156 /* Store the page mask part of the address into X3. */
1157 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1158 TCG_REG_X3, x3, tlb_mask);
1160 /* Add any "high bits" from the tlb offset to the env address into X2,
1161 to take advantage of the LSL12 form of the ADDI instruction.
1162 X2 = env + (tlb_offset & 0xfff000) */
1163 if (tlb_offset & 0xfff000) {
1164 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1165 tlb_offset & 0xfff000);
1166 base = TCG_REG_X2;
1169 /* Merge the tlb index contribution into X2.
1170 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1171 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1172 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1174 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1175 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1176 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1177 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1179 /* Load the tlb addend. Do that early to avoid stalling.
1180 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1181 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1182 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1183 (is_read ? offsetof(CPUTLBEntry, addr_read)
1184 : offsetof(CPUTLBEntry, addr_write)));
1186 /* Perform the address comparison. */
1187 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1189 /* If not equal, we jump to the slow path. */
1190 *label_ptr = s->code_ptr;
1191 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1194 #endif /* CONFIG_SOFTMMU */
1196 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1197 TCGReg data_r, TCGReg addr_r,
1198 TCGType otype, TCGReg off_r)
1200 const TCGMemOp bswap = memop & MO_BSWAP;
1202 switch (memop & MO_SSIZE) {
1203 case MO_UB:
1204 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1205 break;
1206 case MO_SB:
1207 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1208 data_r, addr_r, otype, off_r);
1209 break;
1210 case MO_UW:
1211 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1212 if (bswap) {
1213 tcg_out_rev16(s, data_r, data_r);
1215 break;
1216 case MO_SW:
1217 if (bswap) {
1218 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1219 tcg_out_rev16(s, data_r, data_r);
1220 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1221 } else {
1222 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1223 data_r, addr_r, otype, off_r);
1225 break;
1226 case MO_UL:
1227 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1228 if (bswap) {
1229 tcg_out_rev32(s, data_r, data_r);
1231 break;
1232 case MO_SL:
1233 if (bswap) {
1234 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1235 tcg_out_rev32(s, data_r, data_r);
1236 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1237 } else {
1238 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1240 break;
1241 case MO_Q:
1242 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1243 if (bswap) {
1244 tcg_out_rev64(s, data_r, data_r);
1246 break;
1247 default:
1248 tcg_abort();
1252 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1253 TCGReg data_r, TCGReg addr_r,
1254 TCGType otype, TCGReg off_r)
1256 const TCGMemOp bswap = memop & MO_BSWAP;
1258 switch (memop & MO_SIZE) {
1259 case MO_8:
1260 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1261 break;
1262 case MO_16:
1263 if (bswap && data_r != TCG_REG_XZR) {
1264 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1265 data_r = TCG_REG_TMP;
1267 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1268 break;
1269 case MO_32:
1270 if (bswap && data_r != TCG_REG_XZR) {
1271 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1272 data_r = TCG_REG_TMP;
1274 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1275 break;
1276 case MO_64:
1277 if (bswap && data_r != TCG_REG_XZR) {
1278 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1279 data_r = TCG_REG_TMP;
1281 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1282 break;
1283 default:
1284 tcg_abort();
1288 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1289 TCGMemOpIdx oi, TCGType ext)
1291 TCGMemOp memop = get_memop(oi);
1292 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1293 #ifdef CONFIG_SOFTMMU
1294 unsigned mem_index = get_mmuidx(oi);
1295 tcg_insn_unit *label_ptr;
1297 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1298 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1299 TCG_REG_X1, otype, addr_reg);
1300 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1301 s->code_ptr, label_ptr);
1302 #else /* !CONFIG_SOFTMMU */
1303 if (USE_GUEST_BASE) {
1304 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1305 TCG_REG_GUEST_BASE, otype, addr_reg);
1306 } else {
1307 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1308 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1310 #endif /* CONFIG_SOFTMMU */
1313 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1314 TCGMemOpIdx oi)
1316 TCGMemOp memop = get_memop(oi);
1317 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1318 #ifdef CONFIG_SOFTMMU
1319 unsigned mem_index = get_mmuidx(oi);
1320 tcg_insn_unit *label_ptr;
1322 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1323 tcg_out_qemu_st_direct(s, memop, data_reg,
1324 TCG_REG_X1, otype, addr_reg);
1325 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1326 data_reg, addr_reg, s->code_ptr, label_ptr);
1327 #else /* !CONFIG_SOFTMMU */
1328 if (USE_GUEST_BASE) {
1329 tcg_out_qemu_st_direct(s, memop, data_reg,
1330 TCG_REG_GUEST_BASE, otype, addr_reg);
1331 } else {
1332 tcg_out_qemu_st_direct(s, memop, data_reg,
1333 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1335 #endif /* CONFIG_SOFTMMU */
1338 static tcg_insn_unit *tb_ret_addr;
1340 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1341 const TCGArg args[TCG_MAX_OP_ARGS],
1342 const int const_args[TCG_MAX_OP_ARGS])
1344 /* 99% of the time, we can signal the use of extension registers
1345 by looking to see if the opcode handles 64-bit data. */
1346 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1348 /* Hoist the loads of the most common arguments. */
1349 TCGArg a0 = args[0];
1350 TCGArg a1 = args[1];
1351 TCGArg a2 = args[2];
1352 int c2 = const_args[2];
1354 /* Some operands are defined with "rZ" constraint, a register or
1355 the zero register. These need not actually test args[I] == 0. */
1356 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1358 switch (opc) {
1359 case INDEX_op_exit_tb:
1360 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1361 tcg_out_goto(s, tb_ret_addr);
1362 break;
1364 case INDEX_op_goto_tb:
1365 #ifndef USE_DIRECT_JUMP
1366 #error "USE_DIRECT_JUMP required for aarch64"
1367 #endif
1368 /* consistency for USE_DIRECT_JUMP */
1369 tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
1370 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1371 /* actual branch destination will be patched by
1372 aarch64_tb_set_jmp_target later, beware retranslation. */
1373 tcg_out_goto_noaddr(s);
1374 s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
1375 break;
1377 case INDEX_op_br:
1378 tcg_out_goto_label(s, arg_label(a0));
1379 break;
1381 case INDEX_op_ld8u_i32:
1382 case INDEX_op_ld8u_i64:
1383 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1384 break;
1385 case INDEX_op_ld8s_i32:
1386 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1387 break;
1388 case INDEX_op_ld8s_i64:
1389 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1390 break;
1391 case INDEX_op_ld16u_i32:
1392 case INDEX_op_ld16u_i64:
1393 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1394 break;
1395 case INDEX_op_ld16s_i32:
1396 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1397 break;
1398 case INDEX_op_ld16s_i64:
1399 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1400 break;
1401 case INDEX_op_ld_i32:
1402 case INDEX_op_ld32u_i64:
1403 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1404 break;
1405 case INDEX_op_ld32s_i64:
1406 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1407 break;
1408 case INDEX_op_ld_i64:
1409 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1410 break;
1412 case INDEX_op_st8_i32:
1413 case INDEX_op_st8_i64:
1414 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1415 break;
1416 case INDEX_op_st16_i32:
1417 case INDEX_op_st16_i64:
1418 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1419 break;
1420 case INDEX_op_st_i32:
1421 case INDEX_op_st32_i64:
1422 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1423 break;
1424 case INDEX_op_st_i64:
1425 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1426 break;
1428 case INDEX_op_add_i32:
1429 a2 = (int32_t)a2;
1430 /* FALLTHRU */
1431 case INDEX_op_add_i64:
1432 if (c2) {
1433 tcg_out_addsubi(s, ext, a0, a1, a2);
1434 } else {
1435 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1437 break;
1439 case INDEX_op_sub_i32:
1440 a2 = (int32_t)a2;
1441 /* FALLTHRU */
1442 case INDEX_op_sub_i64:
1443 if (c2) {
1444 tcg_out_addsubi(s, ext, a0, a1, -a2);
1445 } else {
1446 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1448 break;
1450 case INDEX_op_neg_i64:
1451 case INDEX_op_neg_i32:
1452 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1453 break;
1455 case INDEX_op_and_i32:
1456 a2 = (int32_t)a2;
1457 /* FALLTHRU */
1458 case INDEX_op_and_i64:
1459 if (c2) {
1460 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1461 } else {
1462 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1464 break;
1466 case INDEX_op_andc_i32:
1467 a2 = (int32_t)a2;
1468 /* FALLTHRU */
1469 case INDEX_op_andc_i64:
1470 if (c2) {
1471 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1472 } else {
1473 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1475 break;
1477 case INDEX_op_or_i32:
1478 a2 = (int32_t)a2;
1479 /* FALLTHRU */
1480 case INDEX_op_or_i64:
1481 if (c2) {
1482 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1483 } else {
1484 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1486 break;
1488 case INDEX_op_orc_i32:
1489 a2 = (int32_t)a2;
1490 /* FALLTHRU */
1491 case INDEX_op_orc_i64:
1492 if (c2) {
1493 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1494 } else {
1495 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1497 break;
1499 case INDEX_op_xor_i32:
1500 a2 = (int32_t)a2;
1501 /* FALLTHRU */
1502 case INDEX_op_xor_i64:
1503 if (c2) {
1504 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1505 } else {
1506 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1508 break;
1510 case INDEX_op_eqv_i32:
1511 a2 = (int32_t)a2;
1512 /* FALLTHRU */
1513 case INDEX_op_eqv_i64:
1514 if (c2) {
1515 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1516 } else {
1517 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1519 break;
1521 case INDEX_op_not_i64:
1522 case INDEX_op_not_i32:
1523 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1524 break;
1526 case INDEX_op_mul_i64:
1527 case INDEX_op_mul_i32:
1528 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1529 break;
1531 case INDEX_op_div_i64:
1532 case INDEX_op_div_i32:
1533 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1534 break;
1535 case INDEX_op_divu_i64:
1536 case INDEX_op_divu_i32:
1537 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1538 break;
1540 case INDEX_op_rem_i64:
1541 case INDEX_op_rem_i32:
1542 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1543 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1544 break;
1545 case INDEX_op_remu_i64:
1546 case INDEX_op_remu_i32:
1547 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1548 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1549 break;
1551 case INDEX_op_shl_i64:
1552 case INDEX_op_shl_i32:
1553 if (c2) {
1554 tcg_out_shl(s, ext, a0, a1, a2);
1555 } else {
1556 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1558 break;
1560 case INDEX_op_shr_i64:
1561 case INDEX_op_shr_i32:
1562 if (c2) {
1563 tcg_out_shr(s, ext, a0, a1, a2);
1564 } else {
1565 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1567 break;
1569 case INDEX_op_sar_i64:
1570 case INDEX_op_sar_i32:
1571 if (c2) {
1572 tcg_out_sar(s, ext, a0, a1, a2);
1573 } else {
1574 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1576 break;
1578 case INDEX_op_rotr_i64:
1579 case INDEX_op_rotr_i32:
1580 if (c2) {
1581 tcg_out_rotr(s, ext, a0, a1, a2);
1582 } else {
1583 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1585 break;
1587 case INDEX_op_rotl_i64:
1588 case INDEX_op_rotl_i32:
1589 if (c2) {
1590 tcg_out_rotl(s, ext, a0, a1, a2);
1591 } else {
1592 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1593 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1595 break;
1597 case INDEX_op_clz_i64:
1598 case INDEX_op_clz_i32:
1599 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1600 break;
1601 case INDEX_op_ctz_i64:
1602 case INDEX_op_ctz_i32:
1603 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1604 break;
1606 case INDEX_op_brcond_i32:
1607 a1 = (int32_t)a1;
1608 /* FALLTHRU */
1609 case INDEX_op_brcond_i64:
1610 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1611 break;
1613 case INDEX_op_setcond_i32:
1614 a2 = (int32_t)a2;
1615 /* FALLTHRU */
1616 case INDEX_op_setcond_i64:
1617 tcg_out_cmp(s, ext, a1, a2, c2);
1618 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1619 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1620 TCG_REG_XZR, tcg_invert_cond(args[3]));
1621 break;
1623 case INDEX_op_movcond_i32:
1624 a2 = (int32_t)a2;
1625 /* FALLTHRU */
1626 case INDEX_op_movcond_i64:
1627 tcg_out_cmp(s, ext, a1, a2, c2);
1628 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1629 break;
1631 case INDEX_op_qemu_ld_i32:
1632 case INDEX_op_qemu_ld_i64:
1633 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1634 break;
1635 case INDEX_op_qemu_st_i32:
1636 case INDEX_op_qemu_st_i64:
1637 tcg_out_qemu_st(s, REG0(0), a1, a2);
1638 break;
1640 case INDEX_op_bswap64_i64:
1641 tcg_out_rev64(s, a0, a1);
1642 break;
1643 case INDEX_op_bswap32_i64:
1644 case INDEX_op_bswap32_i32:
1645 tcg_out_rev32(s, a0, a1);
1646 break;
1647 case INDEX_op_bswap16_i64:
1648 case INDEX_op_bswap16_i32:
1649 tcg_out_rev16(s, a0, a1);
1650 break;
1652 case INDEX_op_ext8s_i64:
1653 case INDEX_op_ext8s_i32:
1654 tcg_out_sxt(s, ext, MO_8, a0, a1);
1655 break;
1656 case INDEX_op_ext16s_i64:
1657 case INDEX_op_ext16s_i32:
1658 tcg_out_sxt(s, ext, MO_16, a0, a1);
1659 break;
1660 case INDEX_op_ext_i32_i64:
1661 case INDEX_op_ext32s_i64:
1662 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1663 break;
1664 case INDEX_op_ext8u_i64:
1665 case INDEX_op_ext8u_i32:
1666 tcg_out_uxt(s, MO_8, a0, a1);
1667 break;
1668 case INDEX_op_ext16u_i64:
1669 case INDEX_op_ext16u_i32:
1670 tcg_out_uxt(s, MO_16, a0, a1);
1671 break;
1672 case INDEX_op_extu_i32_i64:
1673 case INDEX_op_ext32u_i64:
1674 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1675 break;
1677 case INDEX_op_deposit_i64:
1678 case INDEX_op_deposit_i32:
1679 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1680 break;
1682 case INDEX_op_extract_i64:
1683 case INDEX_op_extract_i32:
1684 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1685 break;
1687 case INDEX_op_sextract_i64:
1688 case INDEX_op_sextract_i32:
1689 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1690 break;
1692 case INDEX_op_add2_i32:
1693 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1694 (int32_t)args[4], args[5], const_args[4],
1695 const_args[5], false);
1696 break;
1697 case INDEX_op_add2_i64:
1698 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1699 args[5], const_args[4], const_args[5], false);
1700 break;
1701 case INDEX_op_sub2_i32:
1702 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1703 (int32_t)args[4], args[5], const_args[4],
1704 const_args[5], true);
1705 break;
1706 case INDEX_op_sub2_i64:
1707 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1708 args[5], const_args[4], const_args[5], true);
1709 break;
1711 case INDEX_op_muluh_i64:
1712 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1713 break;
1714 case INDEX_op_mulsh_i64:
1715 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1716 break;
1718 case INDEX_op_mb:
1719 tcg_out_mb(s, a0);
1720 break;
1722 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1723 case INDEX_op_mov_i64:
1724 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1725 case INDEX_op_movi_i64:
1726 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1727 default:
1728 tcg_abort();
1731 #undef REG0
1734 static const TCGTargetOpDef aarch64_op_defs[] = {
1735 { INDEX_op_exit_tb, { } },
1736 { INDEX_op_goto_tb, { } },
1737 { INDEX_op_br, { } },
1739 { INDEX_op_ld8u_i32, { "r", "r" } },
1740 { INDEX_op_ld8s_i32, { "r", "r" } },
1741 { INDEX_op_ld16u_i32, { "r", "r" } },
1742 { INDEX_op_ld16s_i32, { "r", "r" } },
1743 { INDEX_op_ld_i32, { "r", "r" } },
1744 { INDEX_op_ld8u_i64, { "r", "r" } },
1745 { INDEX_op_ld8s_i64, { "r", "r" } },
1746 { INDEX_op_ld16u_i64, { "r", "r" } },
1747 { INDEX_op_ld16s_i64, { "r", "r" } },
1748 { INDEX_op_ld32u_i64, { "r", "r" } },
1749 { INDEX_op_ld32s_i64, { "r", "r" } },
1750 { INDEX_op_ld_i64, { "r", "r" } },
1752 { INDEX_op_st8_i32, { "rZ", "r" } },
1753 { INDEX_op_st16_i32, { "rZ", "r" } },
1754 { INDEX_op_st_i32, { "rZ", "r" } },
1755 { INDEX_op_st8_i64, { "rZ", "r" } },
1756 { INDEX_op_st16_i64, { "rZ", "r" } },
1757 { INDEX_op_st32_i64, { "rZ", "r" } },
1758 { INDEX_op_st_i64, { "rZ", "r" } },
1760 { INDEX_op_add_i32, { "r", "r", "rA" } },
1761 { INDEX_op_add_i64, { "r", "r", "rA" } },
1762 { INDEX_op_sub_i32, { "r", "r", "rA" } },
1763 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1764 { INDEX_op_mul_i32, { "r", "r", "r" } },
1765 { INDEX_op_mul_i64, { "r", "r", "r" } },
1766 { INDEX_op_div_i32, { "r", "r", "r" } },
1767 { INDEX_op_div_i64, { "r", "r", "r" } },
1768 { INDEX_op_divu_i32, { "r", "r", "r" } },
1769 { INDEX_op_divu_i64, { "r", "r", "r" } },
1770 { INDEX_op_rem_i32, { "r", "r", "r" } },
1771 { INDEX_op_rem_i64, { "r", "r", "r" } },
1772 { INDEX_op_remu_i32, { "r", "r", "r" } },
1773 { INDEX_op_remu_i64, { "r", "r", "r" } },
1774 { INDEX_op_and_i32, { "r", "r", "rL" } },
1775 { INDEX_op_and_i64, { "r", "r", "rL" } },
1776 { INDEX_op_or_i32, { "r", "r", "rL" } },
1777 { INDEX_op_or_i64, { "r", "r", "rL" } },
1778 { INDEX_op_xor_i32, { "r", "r", "rL" } },
1779 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1780 { INDEX_op_andc_i32, { "r", "r", "rL" } },
1781 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1782 { INDEX_op_orc_i32, { "r", "r", "rL" } },
1783 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1784 { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1785 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1787 { INDEX_op_neg_i32, { "r", "r" } },
1788 { INDEX_op_neg_i64, { "r", "r" } },
1789 { INDEX_op_not_i32, { "r", "r" } },
1790 { INDEX_op_not_i64, { "r", "r" } },
1792 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1793 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1794 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1795 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1796 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1797 { INDEX_op_clz_i32, { "r", "r", "rAL" } },
1798 { INDEX_op_ctz_i32, { "r", "r", "rAL" } },
1799 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1800 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1801 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1802 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1803 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1804 { INDEX_op_clz_i64, { "r", "r", "rAL" } },
1805 { INDEX_op_ctz_i64, { "r", "r", "rAL" } },
1807 { INDEX_op_brcond_i32, { "r", "rA" } },
1808 { INDEX_op_brcond_i64, { "r", "rA" } },
1809 { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1810 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1811 { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1812 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1814 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1815 { INDEX_op_qemu_ld_i64, { "r", "l" } },
1816 { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1817 { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1819 { INDEX_op_bswap16_i32, { "r", "r" } },
1820 { INDEX_op_bswap32_i32, { "r", "r" } },
1821 { INDEX_op_bswap16_i64, { "r", "r" } },
1822 { INDEX_op_bswap32_i64, { "r", "r" } },
1823 { INDEX_op_bswap64_i64, { "r", "r" } },
1825 { INDEX_op_ext8s_i32, { "r", "r" } },
1826 { INDEX_op_ext16s_i32, { "r", "r" } },
1827 { INDEX_op_ext8u_i32, { "r", "r" } },
1828 { INDEX_op_ext16u_i32, { "r", "r" } },
1830 { INDEX_op_ext8s_i64, { "r", "r" } },
1831 { INDEX_op_ext16s_i64, { "r", "r" } },
1832 { INDEX_op_ext32s_i64, { "r", "r" } },
1833 { INDEX_op_ext8u_i64, { "r", "r" } },
1834 { INDEX_op_ext16u_i64, { "r", "r" } },
1835 { INDEX_op_ext32u_i64, { "r", "r" } },
1836 { INDEX_op_ext_i32_i64, { "r", "r" } },
1837 { INDEX_op_extu_i32_i64, { "r", "r" } },
1839 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1840 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1841 { INDEX_op_extract_i32, { "r", "r" } },
1842 { INDEX_op_extract_i64, { "r", "r" } },
1843 { INDEX_op_sextract_i32, { "r", "r" } },
1844 { INDEX_op_sextract_i64, { "r", "r" } },
1846 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1847 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1848 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1849 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1851 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1852 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1854 { INDEX_op_mb, { } },
1855 { -1 },
1858 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
1860 int i, n = ARRAY_SIZE(aarch64_op_defs);
1862 for (i = 0; i < n; ++i) {
1863 if (aarch64_op_defs[i].op == op) {
1864 return &aarch64_op_defs[i];
1867 return NULL;
1870 static void tcg_target_init(TCGContext *s)
1872 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1873 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1875 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1876 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1877 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1878 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1879 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1880 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1881 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1882 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1883 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1884 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1885 (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1887 tcg_regset_clear(s->reserved_regs);
1888 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1889 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1890 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1891 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1894 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1895 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1897 #define FRAME_SIZE \
1898 ((PUSH_SIZE \
1899 + TCG_STATIC_CALL_ARGS_SIZE \
1900 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1901 + TCG_TARGET_STACK_ALIGN - 1) \
1902 & ~(TCG_TARGET_STACK_ALIGN - 1))
1904 /* We're expecting a 2 byte uleb128 encoded value. */
1905 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1907 /* We're expecting to use a single ADDI insn. */
1908 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1910 static void tcg_target_qemu_prologue(TCGContext *s)
1912 TCGReg r;
1914 /* Push (FP, LR) and allocate space for all saved registers. */
1915 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1916 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1918 /* Set up frame pointer for canonical unwinding. */
1919 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1921 /* Store callee-preserved regs x19..x28. */
1922 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1923 int ofs = (r - TCG_REG_X19 + 2) * 8;
1924 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1927 /* Make stack space for TCG locals. */
1928 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1929 FRAME_SIZE - PUSH_SIZE);
1931 /* Inform TCG about how to find TCG locals with register, offset, size. */
1932 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1933 CPU_TEMP_BUF_NLONGS * sizeof(long));
1935 #if !defined(CONFIG_SOFTMMU)
1936 if (USE_GUEST_BASE) {
1937 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
1938 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1940 #endif
1942 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1943 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1945 tb_ret_addr = s->code_ptr;
1947 /* Remove TCG locals stack space. */
1948 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1949 FRAME_SIZE - PUSH_SIZE);
1951 /* Restore registers x19..x28. */
1952 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1953 int ofs = (r - TCG_REG_X19 + 2) * 8;
1954 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1957 /* Pop (FP, LR), restore SP to previous frame. */
1958 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
1959 TCG_REG_SP, PUSH_SIZE, 0, 1);
1960 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1963 typedef struct {
1964 DebugFrameHeader h;
1965 uint8_t fde_def_cfa[4];
1966 uint8_t fde_reg_ofs[24];
1967 } DebugFrame;
1969 #define ELF_HOST_MACHINE EM_AARCH64
1971 static const DebugFrame debug_frame = {
1972 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1973 .h.cie.id = -1,
1974 .h.cie.version = 1,
1975 .h.cie.code_align = 1,
1976 .h.cie.data_align = 0x78, /* sleb128 -8 */
1977 .h.cie.return_column = TCG_REG_LR,
1979 /* Total FDE size does not include the "len" member. */
1980 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1982 .fde_def_cfa = {
1983 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
1984 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
1985 (FRAME_SIZE >> 7)
1987 .fde_reg_ofs = {
1988 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
1989 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
1990 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
1991 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
1992 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
1993 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
1994 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
1995 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
1996 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
1997 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
1998 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
1999 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2003 void tcg_register_jit(void *buf, size_t buf_size)
2005 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));