memory: avoid unnecessary object_ref/unref
[qemu/ar7.git] / tcg / aarch64 / tcg-target.c
blob0ed10a97412107d55cd25ce32990aa5a9d548297
1 /*
2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21 #ifndef NDEBUG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
28 #endif /* NDEBUG */
30 static const int tcg_target_reg_alloc_order[] = {
31 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
32 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
33 TCG_REG_X28, /* we will reserve this for guest_base if configured */
35 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
36 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
37 TCG_REG_X16, TCG_REG_X17,
39 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
40 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
42 /* X18 reserved by system */
43 /* X19 reserved for AREG0 */
44 /* X29 reserved as fp */
45 /* X30 reserved as temporary */
48 static const int tcg_target_call_iarg_regs[8] = {
49 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
50 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
52 static const int tcg_target_call_oarg_regs[1] = {
53 TCG_REG_X0
56 #define TCG_REG_TMP TCG_REG_X30
58 #ifndef CONFIG_SOFTMMU
59 /* Note that XZR cannot be encoded in the address base register slot,
60 as that actaully encodes SP. So if we need to zero-extend the guest
61 address, via the address index register slot, we need to load even
62 a zero guest base into a register. */
63 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
64 #define TCG_REG_GUEST_BASE TCG_REG_X28
65 #endif
67 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
69 ptrdiff_t offset = target - code_ptr;
70 assert(offset == sextract64(offset, 0, 26));
71 /* read instruction, mask away previous PC_REL26 parameter contents,
72 set the proper offset, then write back the instruction. */
73 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
76 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
78 ptrdiff_t offset = target - code_ptr;
79 assert(offset == sextract64(offset, 0, 19));
80 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
83 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
84 intptr_t value, intptr_t addend)
86 assert(addend == 0);
87 switch (type) {
88 case R_AARCH64_JUMP26:
89 case R_AARCH64_CALL26:
90 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
91 break;
92 case R_AARCH64_CONDBR19:
93 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
94 break;
95 default:
96 tcg_abort();
100 #define TCG_CT_CONST_AIMM 0x100
101 #define TCG_CT_CONST_LIMM 0x200
102 #define TCG_CT_CONST_ZERO 0x400
103 #define TCG_CT_CONST_MONE 0x800
105 /* parse target specific constraints */
106 static int target_parse_constraint(TCGArgConstraint *ct,
107 const char **pct_str)
109 const char *ct_str = *pct_str;
111 switch (ct_str[0]) {
112 case 'r':
113 ct->ct |= TCG_CT_REG;
114 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
115 break;
116 case 'l': /* qemu_ld / qemu_st address, data_reg */
117 ct->ct |= TCG_CT_REG;
118 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
119 #ifdef CONFIG_SOFTMMU
120 /* x0 and x1 will be overwritten when reading the tlb entry,
121 and x2, and x3 for helper args, better to avoid using them. */
122 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
123 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
124 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
125 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
126 #endif
127 break;
128 case 'A': /* Valid for arithmetic immediate (positive or negative). */
129 ct->ct |= TCG_CT_CONST_AIMM;
130 break;
131 case 'L': /* Valid for logical immediate. */
132 ct->ct |= TCG_CT_CONST_LIMM;
133 break;
134 case 'M': /* minus one */
135 ct->ct |= TCG_CT_CONST_MONE;
136 break;
137 case 'Z': /* zero */
138 ct->ct |= TCG_CT_CONST_ZERO;
139 break;
140 default:
141 return -1;
144 ct_str++;
145 *pct_str = ct_str;
146 return 0;
149 static inline bool is_aimm(uint64_t val)
151 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
154 static inline bool is_limm(uint64_t val)
156 /* Taking a simplified view of the logical immediates for now, ignoring
157 the replication that can happen across the field. Match bit patterns
158 of the forms
159 0....01....1
160 0..01..10..0
161 and their inverses. */
163 /* Make things easier below, by testing the form with msb clear. */
164 if ((int64_t)val < 0) {
165 val = ~val;
167 if (val == 0) {
168 return false;
170 val += val & -val;
171 return (val & (val - 1)) == 0;
174 static int tcg_target_const_match(tcg_target_long val, TCGType type,
175 const TCGArgConstraint *arg_ct)
177 int ct = arg_ct->ct;
179 if (ct & TCG_CT_CONST) {
180 return 1;
182 if (type == TCG_TYPE_I32) {
183 val = (int32_t)val;
185 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
186 return 1;
188 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
189 return 1;
191 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
192 return 1;
194 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
195 return 1;
198 return 0;
201 enum aarch64_cond_code {
202 COND_EQ = 0x0,
203 COND_NE = 0x1,
204 COND_CS = 0x2, /* Unsigned greater or equal */
205 COND_HS = COND_CS, /* ALIAS greater or equal */
206 COND_CC = 0x3, /* Unsigned less than */
207 COND_LO = COND_CC, /* ALIAS Lower */
208 COND_MI = 0x4, /* Negative */
209 COND_PL = 0x5, /* Zero or greater */
210 COND_VS = 0x6, /* Overflow */
211 COND_VC = 0x7, /* No overflow */
212 COND_HI = 0x8, /* Unsigned greater than */
213 COND_LS = 0x9, /* Unsigned less or equal */
214 COND_GE = 0xa,
215 COND_LT = 0xb,
216 COND_GT = 0xc,
217 COND_LE = 0xd,
218 COND_AL = 0xe,
219 COND_NV = 0xf, /* behaves like COND_AL here */
222 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
223 [TCG_COND_EQ] = COND_EQ,
224 [TCG_COND_NE] = COND_NE,
225 [TCG_COND_LT] = COND_LT,
226 [TCG_COND_GE] = COND_GE,
227 [TCG_COND_LE] = COND_LE,
228 [TCG_COND_GT] = COND_GT,
229 /* unsigned */
230 [TCG_COND_LTU] = COND_LO,
231 [TCG_COND_GTU] = COND_HI,
232 [TCG_COND_GEU] = COND_HS,
233 [TCG_COND_LEU] = COND_LS,
236 typedef enum {
237 LDST_ST = 0, /* store */
238 LDST_LD = 1, /* load */
239 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
240 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
241 } AArch64LdstType;
243 /* We encode the format of the insn into the beginning of the name, so that
244 we can have the preprocessor help "typecheck" the insn vs the output
245 function. Arm didn't provide us with nice names for the formats, so we
246 use the section number of the architecture reference manual in which the
247 instruction group is described. */
248 typedef enum {
249 /* Compare and branch (immediate). */
250 I3201_CBZ = 0x34000000,
251 I3201_CBNZ = 0x35000000,
253 /* Conditional branch (immediate). */
254 I3202_B_C = 0x54000000,
256 /* Unconditional branch (immediate). */
257 I3206_B = 0x14000000,
258 I3206_BL = 0x94000000,
260 /* Unconditional branch (register). */
261 I3207_BR = 0xd61f0000,
262 I3207_BLR = 0xd63f0000,
263 I3207_RET = 0xd65f0000,
265 /* Load/store register. Described here as 3.3.12, but the helper
266 that emits them can transform to 3.3.10 or 3.3.13. */
267 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
268 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
269 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
270 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
272 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
273 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
274 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
275 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
277 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
278 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
280 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
281 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
282 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
284 I3312_TO_I3310 = 0x00200800,
285 I3312_TO_I3313 = 0x01000000,
287 /* Load/store register pair instructions. */
288 I3314_LDP = 0x28400000,
289 I3314_STP = 0x28000000,
291 /* Add/subtract immediate instructions. */
292 I3401_ADDI = 0x11000000,
293 I3401_ADDSI = 0x31000000,
294 I3401_SUBI = 0x51000000,
295 I3401_SUBSI = 0x71000000,
297 /* Bitfield instructions. */
298 I3402_BFM = 0x33000000,
299 I3402_SBFM = 0x13000000,
300 I3402_UBFM = 0x53000000,
302 /* Extract instruction. */
303 I3403_EXTR = 0x13800000,
305 /* Logical immediate instructions. */
306 I3404_ANDI = 0x12000000,
307 I3404_ORRI = 0x32000000,
308 I3404_EORI = 0x52000000,
310 /* Move wide immediate instructions. */
311 I3405_MOVN = 0x12800000,
312 I3405_MOVZ = 0x52800000,
313 I3405_MOVK = 0x72800000,
315 /* PC relative addressing instructions. */
316 I3406_ADR = 0x10000000,
317 I3406_ADRP = 0x90000000,
319 /* Add/subtract shifted register instructions (without a shift). */
320 I3502_ADD = 0x0b000000,
321 I3502_ADDS = 0x2b000000,
322 I3502_SUB = 0x4b000000,
323 I3502_SUBS = 0x6b000000,
325 /* Add/subtract shifted register instructions (with a shift). */
326 I3502S_ADD_LSL = I3502_ADD,
328 /* Add/subtract with carry instructions. */
329 I3503_ADC = 0x1a000000,
330 I3503_SBC = 0x5a000000,
332 /* Conditional select instructions. */
333 I3506_CSEL = 0x1a800000,
334 I3506_CSINC = 0x1a800400,
336 /* Data-processing (1 source) instructions. */
337 I3507_REV16 = 0x5ac00400,
338 I3507_REV32 = 0x5ac00800,
339 I3507_REV64 = 0x5ac00c00,
341 /* Data-processing (2 source) instructions. */
342 I3508_LSLV = 0x1ac02000,
343 I3508_LSRV = 0x1ac02400,
344 I3508_ASRV = 0x1ac02800,
345 I3508_RORV = 0x1ac02c00,
346 I3508_SMULH = 0x9b407c00,
347 I3508_UMULH = 0x9bc07c00,
348 I3508_UDIV = 0x1ac00800,
349 I3508_SDIV = 0x1ac00c00,
351 /* Data-processing (3 source) instructions. */
352 I3509_MADD = 0x1b000000,
353 I3509_MSUB = 0x1b008000,
355 /* Logical shifted register instructions (without a shift). */
356 I3510_AND = 0x0a000000,
357 I3510_BIC = 0x0a200000,
358 I3510_ORR = 0x2a000000,
359 I3510_ORN = 0x2a200000,
360 I3510_EOR = 0x4a000000,
361 I3510_EON = 0x4a200000,
362 I3510_ANDS = 0x6a000000,
363 } AArch64Insn;
365 static inline uint32_t tcg_in32(TCGContext *s)
367 uint32_t v = *(uint32_t *)s->code_ptr;
368 return v;
371 /* Emit an opcode with "type-checking" of the format. */
372 #define tcg_out_insn(S, FMT, OP, ...) \
373 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
375 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
376 TCGReg rt, int imm19)
378 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
381 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
382 TCGCond c, int imm19)
384 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
387 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
389 tcg_out32(s, insn | (imm26 & 0x03ffffff));
392 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
394 tcg_out32(s, insn | rn << 5);
397 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
398 TCGReg r1, TCGReg r2, TCGReg rn,
399 tcg_target_long ofs, bool pre, bool w)
401 insn |= 1u << 31; /* ext */
402 insn |= pre << 24;
403 insn |= w << 23;
405 assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
406 insn |= (ofs & (0x7f << 3)) << (15 - 3);
408 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
411 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
412 TCGReg rd, TCGReg rn, uint64_t aimm)
414 if (aimm > 0xfff) {
415 assert((aimm & 0xfff) == 0);
416 aimm >>= 12;
417 assert(aimm <= 0xfff);
418 aimm |= 1 << 12; /* apply LSL 12 */
420 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
423 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
424 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
425 that feed the DecodeBitMasks pseudo function. */
426 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
427 TCGReg rd, TCGReg rn, int n, int immr, int imms)
429 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
430 | rn << 5 | rd);
433 #define tcg_out_insn_3404 tcg_out_insn_3402
435 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
436 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
438 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
439 | rn << 5 | rd);
442 /* This function is used for the Move (wide immediate) instruction group.
443 Note that SHIFT is a full shift count, not the 2 bit HW field. */
444 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
445 TCGReg rd, uint16_t half, unsigned shift)
447 assert((shift & ~0x30) == 0);
448 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
451 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
452 TCGReg rd, int64_t disp)
454 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
457 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
458 the rare occasion when we actually want to supply a shift amount. */
459 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
460 TCGType ext, TCGReg rd, TCGReg rn,
461 TCGReg rm, int imm6)
463 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
466 /* This function is for 3.5.2 (Add/subtract shifted register),
467 and 3.5.10 (Logical shifted register), for the vast majorty of cases
468 when we don't want to apply a shift. Thus it can also be used for
469 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
470 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
471 TCGReg rd, TCGReg rn, TCGReg rm)
473 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
476 #define tcg_out_insn_3503 tcg_out_insn_3502
477 #define tcg_out_insn_3508 tcg_out_insn_3502
478 #define tcg_out_insn_3510 tcg_out_insn_3502
480 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
481 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
483 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
484 | tcg_cond_to_aarch64[c] << 12);
487 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
488 TCGReg rd, TCGReg rn)
490 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
493 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
494 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
496 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
499 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
500 TCGReg rd, TCGReg base, TCGType ext,
501 TCGReg regoff)
503 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
504 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
505 0x4000 | ext << 13 | base << 5 | rd);
508 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
509 TCGReg rd, TCGReg rn, intptr_t offset)
511 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
514 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
515 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
517 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
518 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
521 /* Register to register move using ORR (shifted register with no shift). */
522 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
524 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
527 /* Register to register move using ADDI (move to/from SP). */
528 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
530 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
533 /* This function is used for the Logical (immediate) instruction group.
534 The value of LIMM must satisfy IS_LIMM. See the comment above about
535 only supporting simplified logical immediates. */
536 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
537 TCGReg rd, TCGReg rn, uint64_t limm)
539 unsigned h, l, r, c;
541 assert(is_limm(limm));
543 h = clz64(limm);
544 l = ctz64(limm);
545 if (l == 0) {
546 r = 0; /* form 0....01....1 */
547 c = ctz64(~limm) - 1;
548 if (h == 0) {
549 r = clz64(~limm); /* form 1..10..01..1 */
550 c += r;
552 } else {
553 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
554 c = r - h - 1;
556 if (ext == TCG_TYPE_I32) {
557 r &= 31;
558 c &= 31;
561 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
564 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
565 tcg_target_long value)
567 AArch64Insn insn;
568 int i, wantinv, shift;
569 tcg_target_long svalue = value;
570 tcg_target_long ivalue = ~value;
571 tcg_target_long imask;
573 /* For 32-bit values, discard potential garbage in value. For 64-bit
574 values within [2**31, 2**32-1], we can create smaller sequences by
575 interpreting this as a negative 32-bit number, while ensuring that
576 the high 32 bits are cleared by setting SF=0. */
577 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
578 svalue = (int32_t)value;
579 value = (uint32_t)value;
580 ivalue = (uint32_t)ivalue;
581 type = TCG_TYPE_I32;
584 /* Speed things up by handling the common case of small positive
585 and negative values specially. */
586 if ((value & ~0xffffull) == 0) {
587 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
588 return;
589 } else if ((ivalue & ~0xffffull) == 0) {
590 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
591 return;
594 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
595 use the sign-extended value. That lets us match rotated values such
596 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
597 if (is_limm(svalue)) {
598 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
599 return;
602 /* Look for host pointer values within 4G of the PC. This happens
603 often when loading pointers to QEMU's own data structures. */
604 if (type == TCG_TYPE_I64) {
605 tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
606 if (disp == sextract64(disp, 0, 21)) {
607 tcg_out_insn(s, 3406, ADRP, rd, disp);
608 if (value & 0xfff) {
609 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
611 return;
615 /* Would it take fewer insns to begin with MOVN? For the value and its
616 inverse, count the number of 16-bit lanes that are 0. */
617 for (i = wantinv = imask = 0; i < 64; i += 16) {
618 tcg_target_long mask = 0xffffull << i;
619 if ((value & mask) == 0) {
620 wantinv -= 1;
622 if ((ivalue & mask) == 0) {
623 wantinv += 1;
624 imask |= mask;
628 /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
629 insn = I3405_MOVZ;
630 if (wantinv > 0) {
631 value = ivalue;
632 insn = I3405_MOVN;
635 /* Find the lowest lane that is not 0x0000. */
636 shift = ctz64(value) & (63 & -16);
637 tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
639 if (wantinv > 0) {
640 /* Re-invert the value, so MOVK sees non-inverted bits. */
641 value = ~value;
642 /* Clear out all the 0xffff lanes. */
643 value ^= imask;
645 /* Clear out the lane that we just set. */
646 value &= ~(0xffffUL << shift);
648 /* Iterate until all lanes have been set, and thus cleared from VALUE. */
649 while (value) {
650 shift = ctz64(value) & (63 & -16);
651 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
652 value &= ~(0xffffUL << shift);
656 /* Define something more legible for general use. */
657 #define tcg_out_ldst_r tcg_out_insn_3310
659 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
660 TCGReg rd, TCGReg rn, intptr_t offset)
662 TCGMemOp size = (uint32_t)insn >> 30;
664 /* If the offset is naturally aligned and in range, then we can
665 use the scaled uimm12 encoding */
666 if (offset >= 0 && !(offset & ((1 << size) - 1))) {
667 uintptr_t scaled_uimm = offset >> size;
668 if (scaled_uimm <= 0xfff) {
669 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
670 return;
674 /* Small signed offsets can use the unscaled encoding. */
675 if (offset >= -256 && offset < 256) {
676 tcg_out_insn_3312(s, insn, rd, rn, offset);
677 return;
680 /* Worst-case scenario, move offset to temp register, use reg offset. */
681 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
682 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
685 static inline void tcg_out_mov(TCGContext *s,
686 TCGType type, TCGReg ret, TCGReg arg)
688 if (ret != arg) {
689 tcg_out_movr(s, type, ret, arg);
693 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
694 TCGReg arg1, intptr_t arg2)
696 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
697 arg, arg1, arg2);
700 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
701 TCGReg arg1, intptr_t arg2)
703 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
704 arg, arg1, arg2);
707 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
708 TCGReg rn, unsigned int a, unsigned int b)
710 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
713 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
714 TCGReg rn, unsigned int a, unsigned int b)
716 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
719 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
720 TCGReg rn, unsigned int a, unsigned int b)
722 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
725 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
726 TCGReg rn, TCGReg rm, unsigned int a)
728 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
731 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
732 TCGReg rd, TCGReg rn, unsigned int m)
734 int bits = ext ? 64 : 32;
735 int max = bits - 1;
736 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
739 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
740 TCGReg rd, TCGReg rn, unsigned int m)
742 int max = ext ? 63 : 31;
743 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
746 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
747 TCGReg rd, TCGReg rn, unsigned int m)
749 int max = ext ? 63 : 31;
750 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
753 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
754 TCGReg rd, TCGReg rn, unsigned int m)
756 int max = ext ? 63 : 31;
757 tcg_out_extr(s, ext, rd, rn, rn, m & max);
760 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
761 TCGReg rd, TCGReg rn, unsigned int m)
763 int bits = ext ? 64 : 32;
764 int max = bits - 1;
765 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
768 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
769 TCGReg rn, unsigned lsb, unsigned width)
771 unsigned size = ext ? 64 : 32;
772 unsigned a = (size - lsb) & (size - 1);
773 unsigned b = width - 1;
774 tcg_out_bfm(s, ext, rd, rn, a, b);
777 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
778 tcg_target_long b, bool const_b)
780 if (const_b) {
781 /* Using CMP or CMN aliases. */
782 if (b >= 0) {
783 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
784 } else {
785 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
787 } else {
788 /* Using CMP alias SUBS wzr, Wn, Wm */
789 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
793 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
795 ptrdiff_t offset = target - s->code_ptr;
796 assert(offset == sextract64(offset, 0, 26));
797 tcg_out_insn(s, 3206, B, offset);
800 static inline void tcg_out_goto_noaddr(TCGContext *s)
802 /* We pay attention here to not modify the branch target by reading from
803 the buffer. This ensure that caches and memory are kept coherent during
804 retranslation. Mask away possible garbage in the high bits for the
805 first translation, while keeping the offset bits for retranslation. */
806 uint32_t old = tcg_in32(s);
807 tcg_out_insn(s, 3206, B, old);
810 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
812 /* See comments in tcg_out_goto_noaddr. */
813 uint32_t old = tcg_in32(s) >> 5;
814 tcg_out_insn(s, 3202, B_C, c, old);
817 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
819 tcg_out_insn(s, 3207, BLR, reg);
822 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
824 ptrdiff_t offset = target - s->code_ptr;
825 if (offset == sextract64(offset, 0, 26)) {
826 tcg_out_insn(s, 3206, BL, offset);
827 } else {
828 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
829 tcg_out_callr(s, TCG_REG_TMP);
833 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
835 tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
836 tcg_insn_unit *target = (tcg_insn_unit *)addr;
838 reloc_pc26(code_ptr, target);
839 flush_icache_range(jmp_addr, jmp_addr + 4);
842 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
844 if (!l->has_value) {
845 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
846 tcg_out_goto_noaddr(s);
847 } else {
848 tcg_out_goto(s, l->u.value_ptr);
852 static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
853 TCGArg b, bool b_const, TCGLabel *l)
855 intptr_t offset;
856 bool need_cmp;
858 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
859 need_cmp = false;
860 } else {
861 need_cmp = true;
862 tcg_out_cmp(s, ext, a, b, b_const);
865 if (!l->has_value) {
866 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
867 offset = tcg_in32(s) >> 5;
868 } else {
869 offset = l->u.value_ptr - s->code_ptr;
870 assert(offset == sextract64(offset, 0, 19));
873 if (need_cmp) {
874 tcg_out_insn(s, 3202, B_C, c, offset);
875 } else if (c == TCG_COND_EQ) {
876 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
877 } else {
878 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
882 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
884 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
887 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
889 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
892 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
894 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
897 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
898 TCGReg rd, TCGReg rn)
900 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
901 int bits = (8 << s_bits) - 1;
902 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
905 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
906 TCGReg rd, TCGReg rn)
908 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
909 int bits = (8 << s_bits) - 1;
910 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
913 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
914 TCGReg rn, int64_t aimm)
916 if (aimm >= 0) {
917 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
918 } else {
919 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
923 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
924 TCGReg rh, TCGReg al, TCGReg ah,
925 tcg_target_long bl, tcg_target_long bh,
926 bool const_bl, bool const_bh, bool sub)
928 TCGReg orig_rl = rl;
929 AArch64Insn insn;
931 if (rl == ah || (!const_bh && rl == bh)) {
932 rl = TCG_REG_TMP;
935 if (const_bl) {
936 insn = I3401_ADDSI;
937 if ((bl < 0) ^ sub) {
938 insn = I3401_SUBSI;
939 bl = -bl;
941 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
942 } else {
943 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
946 insn = I3503_ADC;
947 if (const_bh) {
948 /* Note that the only two constants we support are 0 and -1, and
949 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
950 if ((bh != 0) ^ sub) {
951 insn = I3503_SBC;
953 bh = TCG_REG_XZR;
954 } else if (sub) {
955 insn = I3503_SBC;
957 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
959 tcg_out_mov(s, ext, orig_rl, rl);
962 #ifdef CONFIG_SOFTMMU
963 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
964 * TCGMemOpIdx oi, uintptr_t ra)
966 static void * const qemu_ld_helpers[16] = {
967 [MO_UB] = helper_ret_ldub_mmu,
968 [MO_LEUW] = helper_le_lduw_mmu,
969 [MO_LEUL] = helper_le_ldul_mmu,
970 [MO_LEQ] = helper_le_ldq_mmu,
971 [MO_BEUW] = helper_be_lduw_mmu,
972 [MO_BEUL] = helper_be_ldul_mmu,
973 [MO_BEQ] = helper_be_ldq_mmu,
976 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
977 * uintxx_t val, TCGMemOpIdx oi,
978 * uintptr_t ra)
980 static void * const qemu_st_helpers[16] = {
981 [MO_UB] = helper_ret_stb_mmu,
982 [MO_LEUW] = helper_le_stw_mmu,
983 [MO_LEUL] = helper_le_stl_mmu,
984 [MO_LEQ] = helper_le_stq_mmu,
985 [MO_BEUW] = helper_be_stw_mmu,
986 [MO_BEUL] = helper_be_stl_mmu,
987 [MO_BEQ] = helper_be_stq_mmu,
990 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
992 ptrdiff_t offset = tcg_pcrel_diff(s, target);
993 assert(offset == sextract64(offset, 0, 21));
994 tcg_out_insn(s, 3406, ADR, rd, offset);
997 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
999 TCGMemOpIdx oi = lb->oi;
1000 TCGMemOp opc = get_memop(oi);
1001 TCGMemOp size = opc & MO_SIZE;
1003 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1005 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1006 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1007 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1008 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1009 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1010 if (opc & MO_SIGN) {
1011 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1012 } else {
1013 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1016 tcg_out_goto(s, lb->raddr);
1019 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1021 TCGMemOpIdx oi = lb->oi;
1022 TCGMemOp opc = get_memop(oi);
1023 TCGMemOp size = opc & MO_SIZE;
1025 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1027 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1028 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1029 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1030 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1031 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1032 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1033 tcg_out_goto(s, lb->raddr);
1036 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1037 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1038 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1040 TCGLabelQemuLdst *label = new_ldst_label(s);
1042 label->is_ld = is_ld;
1043 label->oi = oi;
1044 label->type = ext;
1045 label->datalo_reg = data_reg;
1046 label->addrlo_reg = addr_reg;
1047 label->raddr = raddr;
1048 label->label_ptr[0] = label_ptr;
1051 /* Load and compare a TLB entry, emitting the conditional jump to the
1052 slow path for the failure case, which will be patched later when finalizing
1053 the slow path. Generated code returns the host addend in X1,
1054 clobbers X0,X2,X3,TMP. */
1055 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1056 tcg_insn_unit **label_ptr, int mem_index,
1057 bool is_read)
1059 int tlb_offset = is_read ?
1060 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1061 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1062 int s_mask = (1 << (opc & MO_SIZE)) - 1;
1063 TCGReg base = TCG_AREG0, x3;
1064 uint64_t tlb_mask;
1066 /* For aligned accesses, we check the first byte and include the alignment
1067 bits within the address. For unaligned access, we check that we don't
1068 cross pages using the address of the last byte of the access. */
1069 if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) {
1070 tlb_mask = TARGET_PAGE_MASK | s_mask;
1071 x3 = addr_reg;
1072 } else {
1073 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1074 TCG_REG_X3, addr_reg, s_mask);
1075 tlb_mask = TARGET_PAGE_MASK;
1076 x3 = TCG_REG_X3;
1079 /* Extract the TLB index from the address into X0.
1080 X0<CPU_TLB_BITS:0> =
1081 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1082 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1083 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1085 /* Store the page mask part of the address into X3. */
1086 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1087 TCG_REG_X3, x3, tlb_mask);
1089 /* Add any "high bits" from the tlb offset to the env address into X2,
1090 to take advantage of the LSL12 form of the ADDI instruction.
1091 X2 = env + (tlb_offset & 0xfff000) */
1092 if (tlb_offset & 0xfff000) {
1093 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1094 tlb_offset & 0xfff000);
1095 base = TCG_REG_X2;
1098 /* Merge the tlb index contribution into X2.
1099 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1100 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1101 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1103 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1104 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1105 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1106 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1108 /* Load the tlb addend. Do that early to avoid stalling.
1109 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1110 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1111 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1112 (is_read ? offsetof(CPUTLBEntry, addr_read)
1113 : offsetof(CPUTLBEntry, addr_write)));
1115 /* Perform the address comparison. */
1116 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1118 /* If not equal, we jump to the slow path. */
1119 *label_ptr = s->code_ptr;
1120 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1123 #endif /* CONFIG_SOFTMMU */
1125 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1126 TCGReg data_r, TCGReg addr_r,
1127 TCGType otype, TCGReg off_r)
1129 const TCGMemOp bswap = memop & MO_BSWAP;
1131 switch (memop & MO_SSIZE) {
1132 case MO_UB:
1133 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1134 break;
1135 case MO_SB:
1136 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1137 data_r, addr_r, otype, off_r);
1138 break;
1139 case MO_UW:
1140 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1141 if (bswap) {
1142 tcg_out_rev16(s, data_r, data_r);
1144 break;
1145 case MO_SW:
1146 if (bswap) {
1147 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1148 tcg_out_rev16(s, data_r, data_r);
1149 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1150 } else {
1151 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1152 data_r, addr_r, otype, off_r);
1154 break;
1155 case MO_UL:
1156 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1157 if (bswap) {
1158 tcg_out_rev32(s, data_r, data_r);
1160 break;
1161 case MO_SL:
1162 if (bswap) {
1163 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1164 tcg_out_rev32(s, data_r, data_r);
1165 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1166 } else {
1167 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1169 break;
1170 case MO_Q:
1171 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1172 if (bswap) {
1173 tcg_out_rev64(s, data_r, data_r);
1175 break;
1176 default:
1177 tcg_abort();
1181 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1182 TCGReg data_r, TCGReg addr_r,
1183 TCGType otype, TCGReg off_r)
1185 const TCGMemOp bswap = memop & MO_BSWAP;
1187 switch (memop & MO_SIZE) {
1188 case MO_8:
1189 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1190 break;
1191 case MO_16:
1192 if (bswap && data_r != TCG_REG_XZR) {
1193 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1194 data_r = TCG_REG_TMP;
1196 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1197 break;
1198 case MO_32:
1199 if (bswap && data_r != TCG_REG_XZR) {
1200 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1201 data_r = TCG_REG_TMP;
1203 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1204 break;
1205 case MO_64:
1206 if (bswap && data_r != TCG_REG_XZR) {
1207 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1208 data_r = TCG_REG_TMP;
1210 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1211 break;
1212 default:
1213 tcg_abort();
1217 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1218 TCGMemOpIdx oi, TCGType ext)
1220 TCGMemOp memop = get_memop(oi);
1221 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1222 #ifdef CONFIG_SOFTMMU
1223 unsigned mem_index = get_mmuidx(oi);
1224 tcg_insn_unit *label_ptr;
1226 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1227 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1228 TCG_REG_X1, otype, addr_reg);
1229 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1230 s->code_ptr, label_ptr);
1231 #else /* !CONFIG_SOFTMMU */
1232 if (USE_GUEST_BASE) {
1233 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1234 TCG_REG_GUEST_BASE, otype, addr_reg);
1235 } else {
1236 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1237 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1239 #endif /* CONFIG_SOFTMMU */
1242 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1243 TCGMemOpIdx oi)
1245 TCGMemOp memop = get_memop(oi);
1246 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1247 #ifdef CONFIG_SOFTMMU
1248 unsigned mem_index = get_mmuidx(oi);
1249 tcg_insn_unit *label_ptr;
1251 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1252 tcg_out_qemu_st_direct(s, memop, data_reg,
1253 TCG_REG_X1, otype, addr_reg);
1254 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1255 data_reg, addr_reg, s->code_ptr, label_ptr);
1256 #else /* !CONFIG_SOFTMMU */
1257 if (USE_GUEST_BASE) {
1258 tcg_out_qemu_st_direct(s, memop, data_reg,
1259 TCG_REG_GUEST_BASE, otype, addr_reg);
1260 } else {
1261 tcg_out_qemu_st_direct(s, memop, data_reg,
1262 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1264 #endif /* CONFIG_SOFTMMU */
1267 static tcg_insn_unit *tb_ret_addr;
1269 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1270 const TCGArg args[TCG_MAX_OP_ARGS],
1271 const int const_args[TCG_MAX_OP_ARGS])
1273 /* 99% of the time, we can signal the use of extension registers
1274 by looking to see if the opcode handles 64-bit data. */
1275 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1277 /* Hoist the loads of the most common arguments. */
1278 TCGArg a0 = args[0];
1279 TCGArg a1 = args[1];
1280 TCGArg a2 = args[2];
1281 int c2 = const_args[2];
1283 /* Some operands are defined with "rZ" constraint, a register or
1284 the zero register. These need not actually test args[I] == 0. */
1285 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1287 switch (opc) {
1288 case INDEX_op_exit_tb:
1289 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1290 tcg_out_goto(s, tb_ret_addr);
1291 break;
1293 case INDEX_op_goto_tb:
1294 #ifndef USE_DIRECT_JUMP
1295 #error "USE_DIRECT_JUMP required for aarch64"
1296 #endif
1297 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1298 s->tb_jmp_offset[a0] = tcg_current_code_size(s);
1299 /* actual branch destination will be patched by
1300 aarch64_tb_set_jmp_target later, beware retranslation. */
1301 tcg_out_goto_noaddr(s);
1302 s->tb_next_offset[a0] = tcg_current_code_size(s);
1303 break;
1305 case INDEX_op_br:
1306 tcg_out_goto_label(s, arg_label(a0));
1307 break;
1309 case INDEX_op_ld8u_i32:
1310 case INDEX_op_ld8u_i64:
1311 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1312 break;
1313 case INDEX_op_ld8s_i32:
1314 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1315 break;
1316 case INDEX_op_ld8s_i64:
1317 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1318 break;
1319 case INDEX_op_ld16u_i32:
1320 case INDEX_op_ld16u_i64:
1321 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1322 break;
1323 case INDEX_op_ld16s_i32:
1324 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1325 break;
1326 case INDEX_op_ld16s_i64:
1327 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1328 break;
1329 case INDEX_op_ld_i32:
1330 case INDEX_op_ld32u_i64:
1331 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1332 break;
1333 case INDEX_op_ld32s_i64:
1334 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1335 break;
1336 case INDEX_op_ld_i64:
1337 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1338 break;
1340 case INDEX_op_st8_i32:
1341 case INDEX_op_st8_i64:
1342 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1343 break;
1344 case INDEX_op_st16_i32:
1345 case INDEX_op_st16_i64:
1346 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1347 break;
1348 case INDEX_op_st_i32:
1349 case INDEX_op_st32_i64:
1350 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1351 break;
1352 case INDEX_op_st_i64:
1353 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1354 break;
1356 case INDEX_op_add_i32:
1357 a2 = (int32_t)a2;
1358 /* FALLTHRU */
1359 case INDEX_op_add_i64:
1360 if (c2) {
1361 tcg_out_addsubi(s, ext, a0, a1, a2);
1362 } else {
1363 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1365 break;
1367 case INDEX_op_sub_i32:
1368 a2 = (int32_t)a2;
1369 /* FALLTHRU */
1370 case INDEX_op_sub_i64:
1371 if (c2) {
1372 tcg_out_addsubi(s, ext, a0, a1, -a2);
1373 } else {
1374 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1376 break;
1378 case INDEX_op_neg_i64:
1379 case INDEX_op_neg_i32:
1380 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1381 break;
1383 case INDEX_op_and_i32:
1384 a2 = (int32_t)a2;
1385 /* FALLTHRU */
1386 case INDEX_op_and_i64:
1387 if (c2) {
1388 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1389 } else {
1390 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1392 break;
1394 case INDEX_op_andc_i32:
1395 a2 = (int32_t)a2;
1396 /* FALLTHRU */
1397 case INDEX_op_andc_i64:
1398 if (c2) {
1399 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1400 } else {
1401 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1403 break;
1405 case INDEX_op_or_i32:
1406 a2 = (int32_t)a2;
1407 /* FALLTHRU */
1408 case INDEX_op_or_i64:
1409 if (c2) {
1410 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1411 } else {
1412 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1414 break;
1416 case INDEX_op_orc_i32:
1417 a2 = (int32_t)a2;
1418 /* FALLTHRU */
1419 case INDEX_op_orc_i64:
1420 if (c2) {
1421 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1422 } else {
1423 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1425 break;
1427 case INDEX_op_xor_i32:
1428 a2 = (int32_t)a2;
1429 /* FALLTHRU */
1430 case INDEX_op_xor_i64:
1431 if (c2) {
1432 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1433 } else {
1434 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1436 break;
1438 case INDEX_op_eqv_i32:
1439 a2 = (int32_t)a2;
1440 /* FALLTHRU */
1441 case INDEX_op_eqv_i64:
1442 if (c2) {
1443 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1444 } else {
1445 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1447 break;
1449 case INDEX_op_not_i64:
1450 case INDEX_op_not_i32:
1451 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1452 break;
1454 case INDEX_op_mul_i64:
1455 case INDEX_op_mul_i32:
1456 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1457 break;
1459 case INDEX_op_div_i64:
1460 case INDEX_op_div_i32:
1461 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1462 break;
1463 case INDEX_op_divu_i64:
1464 case INDEX_op_divu_i32:
1465 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1466 break;
1468 case INDEX_op_rem_i64:
1469 case INDEX_op_rem_i32:
1470 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1471 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1472 break;
1473 case INDEX_op_remu_i64:
1474 case INDEX_op_remu_i32:
1475 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1476 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1477 break;
1479 case INDEX_op_shl_i64:
1480 case INDEX_op_shl_i32:
1481 if (c2) {
1482 tcg_out_shl(s, ext, a0, a1, a2);
1483 } else {
1484 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1486 break;
1488 case INDEX_op_shr_i64:
1489 case INDEX_op_shr_i32:
1490 if (c2) {
1491 tcg_out_shr(s, ext, a0, a1, a2);
1492 } else {
1493 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1495 break;
1497 case INDEX_op_sar_i64:
1498 case INDEX_op_sar_i32:
1499 if (c2) {
1500 tcg_out_sar(s, ext, a0, a1, a2);
1501 } else {
1502 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1504 break;
1506 case INDEX_op_rotr_i64:
1507 case INDEX_op_rotr_i32:
1508 if (c2) {
1509 tcg_out_rotr(s, ext, a0, a1, a2);
1510 } else {
1511 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1513 break;
1515 case INDEX_op_rotl_i64:
1516 case INDEX_op_rotl_i32:
1517 if (c2) {
1518 tcg_out_rotl(s, ext, a0, a1, a2);
1519 } else {
1520 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1521 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1523 break;
1525 case INDEX_op_brcond_i32:
1526 a1 = (int32_t)a1;
1527 /* FALLTHRU */
1528 case INDEX_op_brcond_i64:
1529 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1530 break;
1532 case INDEX_op_setcond_i32:
1533 a2 = (int32_t)a2;
1534 /* FALLTHRU */
1535 case INDEX_op_setcond_i64:
1536 tcg_out_cmp(s, ext, a1, a2, c2);
1537 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1538 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1539 TCG_REG_XZR, tcg_invert_cond(args[3]));
1540 break;
1542 case INDEX_op_movcond_i32:
1543 a2 = (int32_t)a2;
1544 /* FALLTHRU */
1545 case INDEX_op_movcond_i64:
1546 tcg_out_cmp(s, ext, a1, a2, c2);
1547 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1548 break;
1550 case INDEX_op_qemu_ld_i32:
1551 case INDEX_op_qemu_ld_i64:
1552 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1553 break;
1554 case INDEX_op_qemu_st_i32:
1555 case INDEX_op_qemu_st_i64:
1556 tcg_out_qemu_st(s, REG0(0), a1, a2);
1557 break;
1559 case INDEX_op_bswap64_i64:
1560 tcg_out_rev64(s, a0, a1);
1561 break;
1562 case INDEX_op_bswap32_i64:
1563 case INDEX_op_bswap32_i32:
1564 tcg_out_rev32(s, a0, a1);
1565 break;
1566 case INDEX_op_bswap16_i64:
1567 case INDEX_op_bswap16_i32:
1568 tcg_out_rev16(s, a0, a1);
1569 break;
1571 case INDEX_op_ext8s_i64:
1572 case INDEX_op_ext8s_i32:
1573 tcg_out_sxt(s, ext, MO_8, a0, a1);
1574 break;
1575 case INDEX_op_ext16s_i64:
1576 case INDEX_op_ext16s_i32:
1577 tcg_out_sxt(s, ext, MO_16, a0, a1);
1578 break;
1579 case INDEX_op_ext_i32_i64:
1580 case INDEX_op_ext32s_i64:
1581 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1582 break;
1583 case INDEX_op_ext8u_i64:
1584 case INDEX_op_ext8u_i32:
1585 tcg_out_uxt(s, MO_8, a0, a1);
1586 break;
1587 case INDEX_op_ext16u_i64:
1588 case INDEX_op_ext16u_i32:
1589 tcg_out_uxt(s, MO_16, a0, a1);
1590 break;
1591 case INDEX_op_extu_i32_i64:
1592 case INDEX_op_ext32u_i64:
1593 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1594 break;
1596 case INDEX_op_deposit_i64:
1597 case INDEX_op_deposit_i32:
1598 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1599 break;
1601 case INDEX_op_add2_i32:
1602 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1603 (int32_t)args[4], args[5], const_args[4],
1604 const_args[5], false);
1605 break;
1606 case INDEX_op_add2_i64:
1607 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1608 args[5], const_args[4], const_args[5], false);
1609 break;
1610 case INDEX_op_sub2_i32:
1611 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1612 (int32_t)args[4], args[5], const_args[4],
1613 const_args[5], true);
1614 break;
1615 case INDEX_op_sub2_i64:
1616 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1617 args[5], const_args[4], const_args[5], true);
1618 break;
1620 case INDEX_op_muluh_i64:
1621 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1622 break;
1623 case INDEX_op_mulsh_i64:
1624 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1625 break;
1627 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1628 case INDEX_op_mov_i64:
1629 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1630 case INDEX_op_movi_i64:
1631 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1632 default:
1633 tcg_abort();
1636 #undef REG0
1639 static const TCGTargetOpDef aarch64_op_defs[] = {
1640 { INDEX_op_exit_tb, { } },
1641 { INDEX_op_goto_tb, { } },
1642 { INDEX_op_br, { } },
1644 { INDEX_op_ld8u_i32, { "r", "r" } },
1645 { INDEX_op_ld8s_i32, { "r", "r" } },
1646 { INDEX_op_ld16u_i32, { "r", "r" } },
1647 { INDEX_op_ld16s_i32, { "r", "r" } },
1648 { INDEX_op_ld_i32, { "r", "r" } },
1649 { INDEX_op_ld8u_i64, { "r", "r" } },
1650 { INDEX_op_ld8s_i64, { "r", "r" } },
1651 { INDEX_op_ld16u_i64, { "r", "r" } },
1652 { INDEX_op_ld16s_i64, { "r", "r" } },
1653 { INDEX_op_ld32u_i64, { "r", "r" } },
1654 { INDEX_op_ld32s_i64, { "r", "r" } },
1655 { INDEX_op_ld_i64, { "r", "r" } },
1657 { INDEX_op_st8_i32, { "rZ", "r" } },
1658 { INDEX_op_st16_i32, { "rZ", "r" } },
1659 { INDEX_op_st_i32, { "rZ", "r" } },
1660 { INDEX_op_st8_i64, { "rZ", "r" } },
1661 { INDEX_op_st16_i64, { "rZ", "r" } },
1662 { INDEX_op_st32_i64, { "rZ", "r" } },
1663 { INDEX_op_st_i64, { "rZ", "r" } },
1665 { INDEX_op_add_i32, { "r", "r", "rA" } },
1666 { INDEX_op_add_i64, { "r", "r", "rA" } },
1667 { INDEX_op_sub_i32, { "r", "r", "rA" } },
1668 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1669 { INDEX_op_mul_i32, { "r", "r", "r" } },
1670 { INDEX_op_mul_i64, { "r", "r", "r" } },
1671 { INDEX_op_div_i32, { "r", "r", "r" } },
1672 { INDEX_op_div_i64, { "r", "r", "r" } },
1673 { INDEX_op_divu_i32, { "r", "r", "r" } },
1674 { INDEX_op_divu_i64, { "r", "r", "r" } },
1675 { INDEX_op_rem_i32, { "r", "r", "r" } },
1676 { INDEX_op_rem_i64, { "r", "r", "r" } },
1677 { INDEX_op_remu_i32, { "r", "r", "r" } },
1678 { INDEX_op_remu_i64, { "r", "r", "r" } },
1679 { INDEX_op_and_i32, { "r", "r", "rL" } },
1680 { INDEX_op_and_i64, { "r", "r", "rL" } },
1681 { INDEX_op_or_i32, { "r", "r", "rL" } },
1682 { INDEX_op_or_i64, { "r", "r", "rL" } },
1683 { INDEX_op_xor_i32, { "r", "r", "rL" } },
1684 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1685 { INDEX_op_andc_i32, { "r", "r", "rL" } },
1686 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1687 { INDEX_op_orc_i32, { "r", "r", "rL" } },
1688 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1689 { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1690 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1692 { INDEX_op_neg_i32, { "r", "r" } },
1693 { INDEX_op_neg_i64, { "r", "r" } },
1694 { INDEX_op_not_i32, { "r", "r" } },
1695 { INDEX_op_not_i64, { "r", "r" } },
1697 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1698 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1699 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1700 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1701 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1702 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1703 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1704 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1705 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1706 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1708 { INDEX_op_brcond_i32, { "r", "rA" } },
1709 { INDEX_op_brcond_i64, { "r", "rA" } },
1710 { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1711 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1712 { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1713 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1715 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1716 { INDEX_op_qemu_ld_i64, { "r", "l" } },
1717 { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1718 { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1720 { INDEX_op_bswap16_i32, { "r", "r" } },
1721 { INDEX_op_bswap32_i32, { "r", "r" } },
1722 { INDEX_op_bswap16_i64, { "r", "r" } },
1723 { INDEX_op_bswap32_i64, { "r", "r" } },
1724 { INDEX_op_bswap64_i64, { "r", "r" } },
1726 { INDEX_op_ext8s_i32, { "r", "r" } },
1727 { INDEX_op_ext16s_i32, { "r", "r" } },
1728 { INDEX_op_ext8u_i32, { "r", "r" } },
1729 { INDEX_op_ext16u_i32, { "r", "r" } },
1731 { INDEX_op_ext8s_i64, { "r", "r" } },
1732 { INDEX_op_ext16s_i64, { "r", "r" } },
1733 { INDEX_op_ext32s_i64, { "r", "r" } },
1734 { INDEX_op_ext8u_i64, { "r", "r" } },
1735 { INDEX_op_ext16u_i64, { "r", "r" } },
1736 { INDEX_op_ext32u_i64, { "r", "r" } },
1737 { INDEX_op_ext_i32_i64, { "r", "r" } },
1738 { INDEX_op_extu_i32_i64, { "r", "r" } },
1740 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1741 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1743 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1744 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1745 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1746 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1748 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1749 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1751 { -1 },
1754 static void tcg_target_init(TCGContext *s)
1756 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1757 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1759 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1760 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1761 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1762 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1763 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1764 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1765 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1766 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1767 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1768 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1769 (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1771 tcg_regset_clear(s->reserved_regs);
1772 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1773 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1774 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1775 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1777 tcg_add_target_add_op_defs(aarch64_op_defs);
1780 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1781 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1783 #define FRAME_SIZE \
1784 ((PUSH_SIZE \
1785 + TCG_STATIC_CALL_ARGS_SIZE \
1786 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1787 + TCG_TARGET_STACK_ALIGN - 1) \
1788 & ~(TCG_TARGET_STACK_ALIGN - 1))
1790 /* We're expecting a 2 byte uleb128 encoded value. */
1791 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1793 /* We're expecting to use a single ADDI insn. */
1794 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1796 static void tcg_target_qemu_prologue(TCGContext *s)
1798 TCGReg r;
1800 /* Push (FP, LR) and allocate space for all saved registers. */
1801 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1802 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1804 /* Set up frame pointer for canonical unwinding. */
1805 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1807 /* Store callee-preserved regs x19..x28. */
1808 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1809 int ofs = (r - TCG_REG_X19 + 2) * 8;
1810 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1813 /* Make stack space for TCG locals. */
1814 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1815 FRAME_SIZE - PUSH_SIZE);
1817 /* Inform TCG about how to find TCG locals with register, offset, size. */
1818 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1819 CPU_TEMP_BUF_NLONGS * sizeof(long));
1821 #if !defined(CONFIG_SOFTMMU)
1822 if (USE_GUEST_BASE) {
1823 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
1824 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1826 #endif
1828 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1829 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1831 tb_ret_addr = s->code_ptr;
1833 /* Remove TCG locals stack space. */
1834 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1835 FRAME_SIZE - PUSH_SIZE);
1837 /* Restore registers x19..x28. */
1838 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1839 int ofs = (r - TCG_REG_X19 + 2) * 8;
1840 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1843 /* Pop (FP, LR), restore SP to previous frame. */
1844 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
1845 TCG_REG_SP, PUSH_SIZE, 0, 1);
1846 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1849 typedef struct {
1850 DebugFrameHeader h;
1851 uint8_t fde_def_cfa[4];
1852 uint8_t fde_reg_ofs[24];
1853 } DebugFrame;
1855 #define ELF_HOST_MACHINE EM_AARCH64
1857 static const DebugFrame debug_frame = {
1858 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1859 .h.cie.id = -1,
1860 .h.cie.version = 1,
1861 .h.cie.code_align = 1,
1862 .h.cie.data_align = 0x78, /* sleb128 -8 */
1863 .h.cie.return_column = TCG_REG_LR,
1865 /* Total FDE size does not include the "len" member. */
1866 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1868 .fde_def_cfa = {
1869 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
1870 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
1871 (FRAME_SIZE >> 7)
1873 .fde_reg_ofs = {
1874 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
1875 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
1876 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
1877 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
1878 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
1879 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
1880 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
1881 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
1882 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
1883 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
1884 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
1885 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
1889 void tcg_register_jit(void *buf, size_t buf_size)
1891 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));