target-i386: Use gen_lea_v_seg in stack subroutines
[qemu/ar7.git] / tcg / aarch64 / tcg-target.c
blob8467d5d8b9e8e93e97ffe8648e735a062fb94f5d
1 /*
2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "qemu/osdep.h"
14 #include "tcg-be-ldst.h"
15 #include "qemu/bitops.h"
17 /* We're going to re-use TCGType in setting of the SF bit, which controls
18 the size of the operation performed. If we know the values match, it
19 makes things much cleaner. */
20 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
22 #ifndef NDEBUG
23 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
25 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
26 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
27 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
29 #endif /* NDEBUG */
31 static const int tcg_target_reg_alloc_order[] = {
32 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
33 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
34 TCG_REG_X28, /* we will reserve this for guest_base if configured */
36 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
37 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
38 TCG_REG_X16, TCG_REG_X17,
40 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
41 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
43 /* X18 reserved by system */
44 /* X19 reserved for AREG0 */
45 /* X29 reserved as fp */
46 /* X30 reserved as temporary */
49 static const int tcg_target_call_iarg_regs[8] = {
50 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
51 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
53 static const int tcg_target_call_oarg_regs[1] = {
54 TCG_REG_X0
57 #define TCG_REG_TMP TCG_REG_X30
59 #ifndef CONFIG_SOFTMMU
60 /* Note that XZR cannot be encoded in the address base register slot,
61 as that actaully encodes SP. So if we need to zero-extend the guest
62 address, via the address index register slot, we need to load even
63 a zero guest base into a register. */
64 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
65 #define TCG_REG_GUEST_BASE TCG_REG_X28
66 #endif
68 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
70 ptrdiff_t offset = target - code_ptr;
71 assert(offset == sextract64(offset, 0, 26));
72 /* read instruction, mask away previous PC_REL26 parameter contents,
73 set the proper offset, then write back the instruction. */
74 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
77 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
79 ptrdiff_t offset = target - code_ptr;
80 assert(offset == sextract64(offset, 0, 19));
81 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
84 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
85 intptr_t value, intptr_t addend)
87 assert(addend == 0);
88 switch (type) {
89 case R_AARCH64_JUMP26:
90 case R_AARCH64_CALL26:
91 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
92 break;
93 case R_AARCH64_CONDBR19:
94 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
95 break;
96 default:
97 tcg_abort();
101 #define TCG_CT_CONST_AIMM 0x100
102 #define TCG_CT_CONST_LIMM 0x200
103 #define TCG_CT_CONST_ZERO 0x400
104 #define TCG_CT_CONST_MONE 0x800
106 /* parse target specific constraints */
107 static int target_parse_constraint(TCGArgConstraint *ct,
108 const char **pct_str)
110 const char *ct_str = *pct_str;
112 switch (ct_str[0]) {
113 case 'r':
114 ct->ct |= TCG_CT_REG;
115 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
116 break;
117 case 'l': /* qemu_ld / qemu_st address, data_reg */
118 ct->ct |= TCG_CT_REG;
119 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
120 #ifdef CONFIG_SOFTMMU
121 /* x0 and x1 will be overwritten when reading the tlb entry,
122 and x2, and x3 for helper args, better to avoid using them. */
123 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
124 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
125 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
126 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
127 #endif
128 break;
129 case 'A': /* Valid for arithmetic immediate (positive or negative). */
130 ct->ct |= TCG_CT_CONST_AIMM;
131 break;
132 case 'L': /* Valid for logical immediate. */
133 ct->ct |= TCG_CT_CONST_LIMM;
134 break;
135 case 'M': /* minus one */
136 ct->ct |= TCG_CT_CONST_MONE;
137 break;
138 case 'Z': /* zero */
139 ct->ct |= TCG_CT_CONST_ZERO;
140 break;
141 default:
142 return -1;
145 ct_str++;
146 *pct_str = ct_str;
147 return 0;
150 static inline bool is_aimm(uint64_t val)
152 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
155 static inline bool is_limm(uint64_t val)
157 /* Taking a simplified view of the logical immediates for now, ignoring
158 the replication that can happen across the field. Match bit patterns
159 of the forms
160 0....01....1
161 0..01..10..0
162 and their inverses. */
164 /* Make things easier below, by testing the form with msb clear. */
165 if ((int64_t)val < 0) {
166 val = ~val;
168 if (val == 0) {
169 return false;
171 val += val & -val;
172 return (val & (val - 1)) == 0;
175 static int tcg_target_const_match(tcg_target_long val, TCGType type,
176 const TCGArgConstraint *arg_ct)
178 int ct = arg_ct->ct;
180 if (ct & TCG_CT_CONST) {
181 return 1;
183 if (type == TCG_TYPE_I32) {
184 val = (int32_t)val;
186 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
187 return 1;
189 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
190 return 1;
192 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
193 return 1;
195 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
196 return 1;
199 return 0;
202 enum aarch64_cond_code {
203 COND_EQ = 0x0,
204 COND_NE = 0x1,
205 COND_CS = 0x2, /* Unsigned greater or equal */
206 COND_HS = COND_CS, /* ALIAS greater or equal */
207 COND_CC = 0x3, /* Unsigned less than */
208 COND_LO = COND_CC, /* ALIAS Lower */
209 COND_MI = 0x4, /* Negative */
210 COND_PL = 0x5, /* Zero or greater */
211 COND_VS = 0x6, /* Overflow */
212 COND_VC = 0x7, /* No overflow */
213 COND_HI = 0x8, /* Unsigned greater than */
214 COND_LS = 0x9, /* Unsigned less or equal */
215 COND_GE = 0xa,
216 COND_LT = 0xb,
217 COND_GT = 0xc,
218 COND_LE = 0xd,
219 COND_AL = 0xe,
220 COND_NV = 0xf, /* behaves like COND_AL here */
223 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
224 [TCG_COND_EQ] = COND_EQ,
225 [TCG_COND_NE] = COND_NE,
226 [TCG_COND_LT] = COND_LT,
227 [TCG_COND_GE] = COND_GE,
228 [TCG_COND_LE] = COND_LE,
229 [TCG_COND_GT] = COND_GT,
230 /* unsigned */
231 [TCG_COND_LTU] = COND_LO,
232 [TCG_COND_GTU] = COND_HI,
233 [TCG_COND_GEU] = COND_HS,
234 [TCG_COND_LEU] = COND_LS,
237 typedef enum {
238 LDST_ST = 0, /* store */
239 LDST_LD = 1, /* load */
240 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
241 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
242 } AArch64LdstType;
244 /* We encode the format of the insn into the beginning of the name, so that
245 we can have the preprocessor help "typecheck" the insn vs the output
246 function. Arm didn't provide us with nice names for the formats, so we
247 use the section number of the architecture reference manual in which the
248 instruction group is described. */
249 typedef enum {
250 /* Compare and branch (immediate). */
251 I3201_CBZ = 0x34000000,
252 I3201_CBNZ = 0x35000000,
254 /* Conditional branch (immediate). */
255 I3202_B_C = 0x54000000,
257 /* Unconditional branch (immediate). */
258 I3206_B = 0x14000000,
259 I3206_BL = 0x94000000,
261 /* Unconditional branch (register). */
262 I3207_BR = 0xd61f0000,
263 I3207_BLR = 0xd63f0000,
264 I3207_RET = 0xd65f0000,
266 /* Load/store register. Described here as 3.3.12, but the helper
267 that emits them can transform to 3.3.10 or 3.3.13. */
268 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
269 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
270 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
271 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
273 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
274 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
275 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
276 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
278 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
279 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
281 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
282 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
283 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
285 I3312_TO_I3310 = 0x00200800,
286 I3312_TO_I3313 = 0x01000000,
288 /* Load/store register pair instructions. */
289 I3314_LDP = 0x28400000,
290 I3314_STP = 0x28000000,
292 /* Add/subtract immediate instructions. */
293 I3401_ADDI = 0x11000000,
294 I3401_ADDSI = 0x31000000,
295 I3401_SUBI = 0x51000000,
296 I3401_SUBSI = 0x71000000,
298 /* Bitfield instructions. */
299 I3402_BFM = 0x33000000,
300 I3402_SBFM = 0x13000000,
301 I3402_UBFM = 0x53000000,
303 /* Extract instruction. */
304 I3403_EXTR = 0x13800000,
306 /* Logical immediate instructions. */
307 I3404_ANDI = 0x12000000,
308 I3404_ORRI = 0x32000000,
309 I3404_EORI = 0x52000000,
311 /* Move wide immediate instructions. */
312 I3405_MOVN = 0x12800000,
313 I3405_MOVZ = 0x52800000,
314 I3405_MOVK = 0x72800000,
316 /* PC relative addressing instructions. */
317 I3406_ADR = 0x10000000,
318 I3406_ADRP = 0x90000000,
320 /* Add/subtract shifted register instructions (without a shift). */
321 I3502_ADD = 0x0b000000,
322 I3502_ADDS = 0x2b000000,
323 I3502_SUB = 0x4b000000,
324 I3502_SUBS = 0x6b000000,
326 /* Add/subtract shifted register instructions (with a shift). */
327 I3502S_ADD_LSL = I3502_ADD,
329 /* Add/subtract with carry instructions. */
330 I3503_ADC = 0x1a000000,
331 I3503_SBC = 0x5a000000,
333 /* Conditional select instructions. */
334 I3506_CSEL = 0x1a800000,
335 I3506_CSINC = 0x1a800400,
337 /* Data-processing (1 source) instructions. */
338 I3507_REV16 = 0x5ac00400,
339 I3507_REV32 = 0x5ac00800,
340 I3507_REV64 = 0x5ac00c00,
342 /* Data-processing (2 source) instructions. */
343 I3508_LSLV = 0x1ac02000,
344 I3508_LSRV = 0x1ac02400,
345 I3508_ASRV = 0x1ac02800,
346 I3508_RORV = 0x1ac02c00,
347 I3508_SMULH = 0x9b407c00,
348 I3508_UMULH = 0x9bc07c00,
349 I3508_UDIV = 0x1ac00800,
350 I3508_SDIV = 0x1ac00c00,
352 /* Data-processing (3 source) instructions. */
353 I3509_MADD = 0x1b000000,
354 I3509_MSUB = 0x1b008000,
356 /* Logical shifted register instructions (without a shift). */
357 I3510_AND = 0x0a000000,
358 I3510_BIC = 0x0a200000,
359 I3510_ORR = 0x2a000000,
360 I3510_ORN = 0x2a200000,
361 I3510_EOR = 0x4a000000,
362 I3510_EON = 0x4a200000,
363 I3510_ANDS = 0x6a000000,
364 } AArch64Insn;
366 static inline uint32_t tcg_in32(TCGContext *s)
368 uint32_t v = *(uint32_t *)s->code_ptr;
369 return v;
372 /* Emit an opcode with "type-checking" of the format. */
373 #define tcg_out_insn(S, FMT, OP, ...) \
374 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
376 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
377 TCGReg rt, int imm19)
379 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
382 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
383 TCGCond c, int imm19)
385 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
388 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
390 tcg_out32(s, insn | (imm26 & 0x03ffffff));
393 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
395 tcg_out32(s, insn | rn << 5);
398 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
399 TCGReg r1, TCGReg r2, TCGReg rn,
400 tcg_target_long ofs, bool pre, bool w)
402 insn |= 1u << 31; /* ext */
403 insn |= pre << 24;
404 insn |= w << 23;
406 assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
407 insn |= (ofs & (0x7f << 3)) << (15 - 3);
409 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
412 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
413 TCGReg rd, TCGReg rn, uint64_t aimm)
415 if (aimm > 0xfff) {
416 assert((aimm & 0xfff) == 0);
417 aimm >>= 12;
418 assert(aimm <= 0xfff);
419 aimm |= 1 << 12; /* apply LSL 12 */
421 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
424 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
425 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
426 that feed the DecodeBitMasks pseudo function. */
427 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
428 TCGReg rd, TCGReg rn, int n, int immr, int imms)
430 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
431 | rn << 5 | rd);
434 #define tcg_out_insn_3404 tcg_out_insn_3402
436 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
437 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
439 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
440 | rn << 5 | rd);
443 /* This function is used for the Move (wide immediate) instruction group.
444 Note that SHIFT is a full shift count, not the 2 bit HW field. */
445 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
446 TCGReg rd, uint16_t half, unsigned shift)
448 assert((shift & ~0x30) == 0);
449 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
452 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
453 TCGReg rd, int64_t disp)
455 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
458 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
459 the rare occasion when we actually want to supply a shift amount. */
460 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
461 TCGType ext, TCGReg rd, TCGReg rn,
462 TCGReg rm, int imm6)
464 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
467 /* This function is for 3.5.2 (Add/subtract shifted register),
468 and 3.5.10 (Logical shifted register), for the vast majorty of cases
469 when we don't want to apply a shift. Thus it can also be used for
470 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
471 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
472 TCGReg rd, TCGReg rn, TCGReg rm)
474 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
477 #define tcg_out_insn_3503 tcg_out_insn_3502
478 #define tcg_out_insn_3508 tcg_out_insn_3502
479 #define tcg_out_insn_3510 tcg_out_insn_3502
481 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
482 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
484 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
485 | tcg_cond_to_aarch64[c] << 12);
488 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
489 TCGReg rd, TCGReg rn)
491 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
494 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
495 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
497 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
500 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
501 TCGReg rd, TCGReg base, TCGType ext,
502 TCGReg regoff)
504 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
505 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
506 0x4000 | ext << 13 | base << 5 | rd);
509 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
510 TCGReg rd, TCGReg rn, intptr_t offset)
512 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
515 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
516 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
518 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
519 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
522 /* Register to register move using ORR (shifted register with no shift). */
523 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
525 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
528 /* Register to register move using ADDI (move to/from SP). */
529 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
531 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
534 /* This function is used for the Logical (immediate) instruction group.
535 The value of LIMM must satisfy IS_LIMM. See the comment above about
536 only supporting simplified logical immediates. */
537 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
538 TCGReg rd, TCGReg rn, uint64_t limm)
540 unsigned h, l, r, c;
542 assert(is_limm(limm));
544 h = clz64(limm);
545 l = ctz64(limm);
546 if (l == 0) {
547 r = 0; /* form 0....01....1 */
548 c = ctz64(~limm) - 1;
549 if (h == 0) {
550 r = clz64(~limm); /* form 1..10..01..1 */
551 c += r;
553 } else {
554 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
555 c = r - h - 1;
557 if (ext == TCG_TYPE_I32) {
558 r &= 31;
559 c &= 31;
562 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
565 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
566 tcg_target_long value)
568 AArch64Insn insn;
569 int i, wantinv, shift;
570 tcg_target_long svalue = value;
571 tcg_target_long ivalue = ~value;
572 tcg_target_long imask;
574 /* For 32-bit values, discard potential garbage in value. For 64-bit
575 values within [2**31, 2**32-1], we can create smaller sequences by
576 interpreting this as a negative 32-bit number, while ensuring that
577 the high 32 bits are cleared by setting SF=0. */
578 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
579 svalue = (int32_t)value;
580 value = (uint32_t)value;
581 ivalue = (uint32_t)ivalue;
582 type = TCG_TYPE_I32;
585 /* Speed things up by handling the common case of small positive
586 and negative values specially. */
587 if ((value & ~0xffffull) == 0) {
588 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
589 return;
590 } else if ((ivalue & ~0xffffull) == 0) {
591 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
592 return;
595 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
596 use the sign-extended value. That lets us match rotated values such
597 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
598 if (is_limm(svalue)) {
599 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
600 return;
603 /* Look for host pointer values within 4G of the PC. This happens
604 often when loading pointers to QEMU's own data structures. */
605 if (type == TCG_TYPE_I64) {
606 tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
607 if (disp == sextract64(disp, 0, 21)) {
608 tcg_out_insn(s, 3406, ADRP, rd, disp);
609 if (value & 0xfff) {
610 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
612 return;
616 /* Would it take fewer insns to begin with MOVN? For the value and its
617 inverse, count the number of 16-bit lanes that are 0. */
618 for (i = wantinv = imask = 0; i < 64; i += 16) {
619 tcg_target_long mask = 0xffffull << i;
620 if ((value & mask) == 0) {
621 wantinv -= 1;
623 if ((ivalue & mask) == 0) {
624 wantinv += 1;
625 imask |= mask;
629 /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
630 insn = I3405_MOVZ;
631 if (wantinv > 0) {
632 value = ivalue;
633 insn = I3405_MOVN;
636 /* Find the lowest lane that is not 0x0000. */
637 shift = ctz64(value) & (63 & -16);
638 tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
640 if (wantinv > 0) {
641 /* Re-invert the value, so MOVK sees non-inverted bits. */
642 value = ~value;
643 /* Clear out all the 0xffff lanes. */
644 value ^= imask;
646 /* Clear out the lane that we just set. */
647 value &= ~(0xffffUL << shift);
649 /* Iterate until all lanes have been set, and thus cleared from VALUE. */
650 while (value) {
651 shift = ctz64(value) & (63 & -16);
652 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
653 value &= ~(0xffffUL << shift);
657 /* Define something more legible for general use. */
658 #define tcg_out_ldst_r tcg_out_insn_3310
660 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
661 TCGReg rd, TCGReg rn, intptr_t offset)
663 TCGMemOp size = (uint32_t)insn >> 30;
665 /* If the offset is naturally aligned and in range, then we can
666 use the scaled uimm12 encoding */
667 if (offset >= 0 && !(offset & ((1 << size) - 1))) {
668 uintptr_t scaled_uimm = offset >> size;
669 if (scaled_uimm <= 0xfff) {
670 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
671 return;
675 /* Small signed offsets can use the unscaled encoding. */
676 if (offset >= -256 && offset < 256) {
677 tcg_out_insn_3312(s, insn, rd, rn, offset);
678 return;
681 /* Worst-case scenario, move offset to temp register, use reg offset. */
682 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
683 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
686 static inline void tcg_out_mov(TCGContext *s,
687 TCGType type, TCGReg ret, TCGReg arg)
689 if (ret != arg) {
690 tcg_out_movr(s, type, ret, arg);
694 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
695 TCGReg arg1, intptr_t arg2)
697 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
698 arg, arg1, arg2);
701 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
702 TCGReg arg1, intptr_t arg2)
704 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
705 arg, arg1, arg2);
708 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
709 TCGReg rn, unsigned int a, unsigned int b)
711 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
714 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
715 TCGReg rn, unsigned int a, unsigned int b)
717 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
720 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
721 TCGReg rn, unsigned int a, unsigned int b)
723 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
726 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
727 TCGReg rn, TCGReg rm, unsigned int a)
729 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
732 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
733 TCGReg rd, TCGReg rn, unsigned int m)
735 int bits = ext ? 64 : 32;
736 int max = bits - 1;
737 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
740 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
741 TCGReg rd, TCGReg rn, unsigned int m)
743 int max = ext ? 63 : 31;
744 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
747 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
748 TCGReg rd, TCGReg rn, unsigned int m)
750 int max = ext ? 63 : 31;
751 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
754 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
755 TCGReg rd, TCGReg rn, unsigned int m)
757 int max = ext ? 63 : 31;
758 tcg_out_extr(s, ext, rd, rn, rn, m & max);
761 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
762 TCGReg rd, TCGReg rn, unsigned int m)
764 int bits = ext ? 64 : 32;
765 int max = bits - 1;
766 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
769 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
770 TCGReg rn, unsigned lsb, unsigned width)
772 unsigned size = ext ? 64 : 32;
773 unsigned a = (size - lsb) & (size - 1);
774 unsigned b = width - 1;
775 tcg_out_bfm(s, ext, rd, rn, a, b);
778 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
779 tcg_target_long b, bool const_b)
781 if (const_b) {
782 /* Using CMP or CMN aliases. */
783 if (b >= 0) {
784 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
785 } else {
786 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
788 } else {
789 /* Using CMP alias SUBS wzr, Wn, Wm */
790 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
794 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
796 ptrdiff_t offset = target - s->code_ptr;
797 assert(offset == sextract64(offset, 0, 26));
798 tcg_out_insn(s, 3206, B, offset);
801 static inline void tcg_out_goto_noaddr(TCGContext *s)
803 /* We pay attention here to not modify the branch target by reading from
804 the buffer. This ensure that caches and memory are kept coherent during
805 retranslation. Mask away possible garbage in the high bits for the
806 first translation, while keeping the offset bits for retranslation. */
807 uint32_t old = tcg_in32(s);
808 tcg_out_insn(s, 3206, B, old);
811 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
813 /* See comments in tcg_out_goto_noaddr. */
814 uint32_t old = tcg_in32(s) >> 5;
815 tcg_out_insn(s, 3202, B_C, c, old);
818 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
820 tcg_out_insn(s, 3207, BLR, reg);
823 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
825 ptrdiff_t offset = target - s->code_ptr;
826 if (offset == sextract64(offset, 0, 26)) {
827 tcg_out_insn(s, 3206, BL, offset);
828 } else {
829 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
830 tcg_out_callr(s, TCG_REG_TMP);
834 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
836 tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
837 tcg_insn_unit *target = (tcg_insn_unit *)addr;
839 reloc_pc26(code_ptr, target);
840 flush_icache_range(jmp_addr, jmp_addr + 4);
843 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
845 if (!l->has_value) {
846 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
847 tcg_out_goto_noaddr(s);
848 } else {
849 tcg_out_goto(s, l->u.value_ptr);
853 static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
854 TCGArg b, bool b_const, TCGLabel *l)
856 intptr_t offset;
857 bool need_cmp;
859 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
860 need_cmp = false;
861 } else {
862 need_cmp = true;
863 tcg_out_cmp(s, ext, a, b, b_const);
866 if (!l->has_value) {
867 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
868 offset = tcg_in32(s) >> 5;
869 } else {
870 offset = l->u.value_ptr - s->code_ptr;
871 assert(offset == sextract64(offset, 0, 19));
874 if (need_cmp) {
875 tcg_out_insn(s, 3202, B_C, c, offset);
876 } else if (c == TCG_COND_EQ) {
877 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
878 } else {
879 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
883 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
885 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
888 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
890 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
893 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
895 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
898 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
899 TCGReg rd, TCGReg rn)
901 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
902 int bits = (8 << s_bits) - 1;
903 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
906 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
907 TCGReg rd, TCGReg rn)
909 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
910 int bits = (8 << s_bits) - 1;
911 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
914 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
915 TCGReg rn, int64_t aimm)
917 if (aimm >= 0) {
918 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
919 } else {
920 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
924 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
925 TCGReg rh, TCGReg al, TCGReg ah,
926 tcg_target_long bl, tcg_target_long bh,
927 bool const_bl, bool const_bh, bool sub)
929 TCGReg orig_rl = rl;
930 AArch64Insn insn;
932 if (rl == ah || (!const_bh && rl == bh)) {
933 rl = TCG_REG_TMP;
936 if (const_bl) {
937 insn = I3401_ADDSI;
938 if ((bl < 0) ^ sub) {
939 insn = I3401_SUBSI;
940 bl = -bl;
942 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
943 } else {
944 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
947 insn = I3503_ADC;
948 if (const_bh) {
949 /* Note that the only two constants we support are 0 and -1, and
950 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
951 if ((bh != 0) ^ sub) {
952 insn = I3503_SBC;
954 bh = TCG_REG_XZR;
955 } else if (sub) {
956 insn = I3503_SBC;
958 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
960 tcg_out_mov(s, ext, orig_rl, rl);
963 #ifdef CONFIG_SOFTMMU
964 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
965 * TCGMemOpIdx oi, uintptr_t ra)
967 static void * const qemu_ld_helpers[16] = {
968 [MO_UB] = helper_ret_ldub_mmu,
969 [MO_LEUW] = helper_le_lduw_mmu,
970 [MO_LEUL] = helper_le_ldul_mmu,
971 [MO_LEQ] = helper_le_ldq_mmu,
972 [MO_BEUW] = helper_be_lduw_mmu,
973 [MO_BEUL] = helper_be_ldul_mmu,
974 [MO_BEQ] = helper_be_ldq_mmu,
977 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
978 * uintxx_t val, TCGMemOpIdx oi,
979 * uintptr_t ra)
981 static void * const qemu_st_helpers[16] = {
982 [MO_UB] = helper_ret_stb_mmu,
983 [MO_LEUW] = helper_le_stw_mmu,
984 [MO_LEUL] = helper_le_stl_mmu,
985 [MO_LEQ] = helper_le_stq_mmu,
986 [MO_BEUW] = helper_be_stw_mmu,
987 [MO_BEUL] = helper_be_stl_mmu,
988 [MO_BEQ] = helper_be_stq_mmu,
991 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
993 ptrdiff_t offset = tcg_pcrel_diff(s, target);
994 assert(offset == sextract64(offset, 0, 21));
995 tcg_out_insn(s, 3406, ADR, rd, offset);
998 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1000 TCGMemOpIdx oi = lb->oi;
1001 TCGMemOp opc = get_memop(oi);
1002 TCGMemOp size = opc & MO_SIZE;
1004 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1006 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1007 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1008 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1009 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1010 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1011 if (opc & MO_SIGN) {
1012 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1013 } else {
1014 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1017 tcg_out_goto(s, lb->raddr);
1020 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1022 TCGMemOpIdx oi = lb->oi;
1023 TCGMemOp opc = get_memop(oi);
1024 TCGMemOp size = opc & MO_SIZE;
1026 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1028 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1029 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1030 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1031 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1032 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1033 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1034 tcg_out_goto(s, lb->raddr);
1037 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1038 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1039 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1041 TCGLabelQemuLdst *label = new_ldst_label(s);
1043 label->is_ld = is_ld;
1044 label->oi = oi;
1045 label->type = ext;
1046 label->datalo_reg = data_reg;
1047 label->addrlo_reg = addr_reg;
1048 label->raddr = raddr;
1049 label->label_ptr[0] = label_ptr;
1052 /* Load and compare a TLB entry, emitting the conditional jump to the
1053 slow path for the failure case, which will be patched later when finalizing
1054 the slow path. Generated code returns the host addend in X1,
1055 clobbers X0,X2,X3,TMP. */
1056 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1057 tcg_insn_unit **label_ptr, int mem_index,
1058 bool is_read)
1060 int tlb_offset = is_read ?
1061 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1062 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1063 int s_mask = (1 << (opc & MO_SIZE)) - 1;
1064 TCGReg base = TCG_AREG0, x3;
1065 uint64_t tlb_mask;
1067 /* For aligned accesses, we check the first byte and include the alignment
1068 bits within the address. For unaligned access, we check that we don't
1069 cross pages using the address of the last byte of the access. */
1070 if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) {
1071 tlb_mask = TARGET_PAGE_MASK | s_mask;
1072 x3 = addr_reg;
1073 } else {
1074 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1075 TCG_REG_X3, addr_reg, s_mask);
1076 tlb_mask = TARGET_PAGE_MASK;
1077 x3 = TCG_REG_X3;
1080 /* Extract the TLB index from the address into X0.
1081 X0<CPU_TLB_BITS:0> =
1082 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1083 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1084 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1086 /* Store the page mask part of the address into X3. */
1087 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1088 TCG_REG_X3, x3, tlb_mask);
1090 /* Add any "high bits" from the tlb offset to the env address into X2,
1091 to take advantage of the LSL12 form of the ADDI instruction.
1092 X2 = env + (tlb_offset & 0xfff000) */
1093 if (tlb_offset & 0xfff000) {
1094 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1095 tlb_offset & 0xfff000);
1096 base = TCG_REG_X2;
1099 /* Merge the tlb index contribution into X2.
1100 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1101 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1102 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1104 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1105 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1106 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1107 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1109 /* Load the tlb addend. Do that early to avoid stalling.
1110 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1111 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1112 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1113 (is_read ? offsetof(CPUTLBEntry, addr_read)
1114 : offsetof(CPUTLBEntry, addr_write)));
1116 /* Perform the address comparison. */
1117 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1119 /* If not equal, we jump to the slow path. */
1120 *label_ptr = s->code_ptr;
1121 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1124 #endif /* CONFIG_SOFTMMU */
1126 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1127 TCGReg data_r, TCGReg addr_r,
1128 TCGType otype, TCGReg off_r)
1130 const TCGMemOp bswap = memop & MO_BSWAP;
1132 switch (memop & MO_SSIZE) {
1133 case MO_UB:
1134 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1135 break;
1136 case MO_SB:
1137 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1138 data_r, addr_r, otype, off_r);
1139 break;
1140 case MO_UW:
1141 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1142 if (bswap) {
1143 tcg_out_rev16(s, data_r, data_r);
1145 break;
1146 case MO_SW:
1147 if (bswap) {
1148 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1149 tcg_out_rev16(s, data_r, data_r);
1150 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1151 } else {
1152 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1153 data_r, addr_r, otype, off_r);
1155 break;
1156 case MO_UL:
1157 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1158 if (bswap) {
1159 tcg_out_rev32(s, data_r, data_r);
1161 break;
1162 case MO_SL:
1163 if (bswap) {
1164 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1165 tcg_out_rev32(s, data_r, data_r);
1166 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1167 } else {
1168 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1170 break;
1171 case MO_Q:
1172 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1173 if (bswap) {
1174 tcg_out_rev64(s, data_r, data_r);
1176 break;
1177 default:
1178 tcg_abort();
1182 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1183 TCGReg data_r, TCGReg addr_r,
1184 TCGType otype, TCGReg off_r)
1186 const TCGMemOp bswap = memop & MO_BSWAP;
1188 switch (memop & MO_SIZE) {
1189 case MO_8:
1190 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1191 break;
1192 case MO_16:
1193 if (bswap && data_r != TCG_REG_XZR) {
1194 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1195 data_r = TCG_REG_TMP;
1197 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1198 break;
1199 case MO_32:
1200 if (bswap && data_r != TCG_REG_XZR) {
1201 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1202 data_r = TCG_REG_TMP;
1204 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1205 break;
1206 case MO_64:
1207 if (bswap && data_r != TCG_REG_XZR) {
1208 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1209 data_r = TCG_REG_TMP;
1211 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1212 break;
1213 default:
1214 tcg_abort();
1218 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1219 TCGMemOpIdx oi, TCGType ext)
1221 TCGMemOp memop = get_memop(oi);
1222 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1223 #ifdef CONFIG_SOFTMMU
1224 unsigned mem_index = get_mmuidx(oi);
1225 tcg_insn_unit *label_ptr;
1227 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1228 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1229 TCG_REG_X1, otype, addr_reg);
1230 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1231 s->code_ptr, label_ptr);
1232 #else /* !CONFIG_SOFTMMU */
1233 if (USE_GUEST_BASE) {
1234 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1235 TCG_REG_GUEST_BASE, otype, addr_reg);
1236 } else {
1237 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1238 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1240 #endif /* CONFIG_SOFTMMU */
1243 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1244 TCGMemOpIdx oi)
1246 TCGMemOp memop = get_memop(oi);
1247 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1248 #ifdef CONFIG_SOFTMMU
1249 unsigned mem_index = get_mmuidx(oi);
1250 tcg_insn_unit *label_ptr;
1252 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1253 tcg_out_qemu_st_direct(s, memop, data_reg,
1254 TCG_REG_X1, otype, addr_reg);
1255 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1256 data_reg, addr_reg, s->code_ptr, label_ptr);
1257 #else /* !CONFIG_SOFTMMU */
1258 if (USE_GUEST_BASE) {
1259 tcg_out_qemu_st_direct(s, memop, data_reg,
1260 TCG_REG_GUEST_BASE, otype, addr_reg);
1261 } else {
1262 tcg_out_qemu_st_direct(s, memop, data_reg,
1263 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1265 #endif /* CONFIG_SOFTMMU */
1268 static tcg_insn_unit *tb_ret_addr;
1270 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1271 const TCGArg args[TCG_MAX_OP_ARGS],
1272 const int const_args[TCG_MAX_OP_ARGS])
1274 /* 99% of the time, we can signal the use of extension registers
1275 by looking to see if the opcode handles 64-bit data. */
1276 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1278 /* Hoist the loads of the most common arguments. */
1279 TCGArg a0 = args[0];
1280 TCGArg a1 = args[1];
1281 TCGArg a2 = args[2];
1282 int c2 = const_args[2];
1284 /* Some operands are defined with "rZ" constraint, a register or
1285 the zero register. These need not actually test args[I] == 0. */
1286 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1288 switch (opc) {
1289 case INDEX_op_exit_tb:
1290 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1291 tcg_out_goto(s, tb_ret_addr);
1292 break;
1294 case INDEX_op_goto_tb:
1295 #ifndef USE_DIRECT_JUMP
1296 #error "USE_DIRECT_JUMP required for aarch64"
1297 #endif
1298 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1299 s->tb_jmp_offset[a0] = tcg_current_code_size(s);
1300 /* actual branch destination will be patched by
1301 aarch64_tb_set_jmp_target later, beware retranslation. */
1302 tcg_out_goto_noaddr(s);
1303 s->tb_next_offset[a0] = tcg_current_code_size(s);
1304 break;
1306 case INDEX_op_br:
1307 tcg_out_goto_label(s, arg_label(a0));
1308 break;
1310 case INDEX_op_ld8u_i32:
1311 case INDEX_op_ld8u_i64:
1312 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1313 break;
1314 case INDEX_op_ld8s_i32:
1315 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1316 break;
1317 case INDEX_op_ld8s_i64:
1318 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1319 break;
1320 case INDEX_op_ld16u_i32:
1321 case INDEX_op_ld16u_i64:
1322 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1323 break;
1324 case INDEX_op_ld16s_i32:
1325 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1326 break;
1327 case INDEX_op_ld16s_i64:
1328 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1329 break;
1330 case INDEX_op_ld_i32:
1331 case INDEX_op_ld32u_i64:
1332 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1333 break;
1334 case INDEX_op_ld32s_i64:
1335 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1336 break;
1337 case INDEX_op_ld_i64:
1338 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1339 break;
1341 case INDEX_op_st8_i32:
1342 case INDEX_op_st8_i64:
1343 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1344 break;
1345 case INDEX_op_st16_i32:
1346 case INDEX_op_st16_i64:
1347 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1348 break;
1349 case INDEX_op_st_i32:
1350 case INDEX_op_st32_i64:
1351 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1352 break;
1353 case INDEX_op_st_i64:
1354 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1355 break;
1357 case INDEX_op_add_i32:
1358 a2 = (int32_t)a2;
1359 /* FALLTHRU */
1360 case INDEX_op_add_i64:
1361 if (c2) {
1362 tcg_out_addsubi(s, ext, a0, a1, a2);
1363 } else {
1364 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1366 break;
1368 case INDEX_op_sub_i32:
1369 a2 = (int32_t)a2;
1370 /* FALLTHRU */
1371 case INDEX_op_sub_i64:
1372 if (c2) {
1373 tcg_out_addsubi(s, ext, a0, a1, -a2);
1374 } else {
1375 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1377 break;
1379 case INDEX_op_neg_i64:
1380 case INDEX_op_neg_i32:
1381 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1382 break;
1384 case INDEX_op_and_i32:
1385 a2 = (int32_t)a2;
1386 /* FALLTHRU */
1387 case INDEX_op_and_i64:
1388 if (c2) {
1389 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1390 } else {
1391 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1393 break;
1395 case INDEX_op_andc_i32:
1396 a2 = (int32_t)a2;
1397 /* FALLTHRU */
1398 case INDEX_op_andc_i64:
1399 if (c2) {
1400 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1401 } else {
1402 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1404 break;
1406 case INDEX_op_or_i32:
1407 a2 = (int32_t)a2;
1408 /* FALLTHRU */
1409 case INDEX_op_or_i64:
1410 if (c2) {
1411 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1412 } else {
1413 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1415 break;
1417 case INDEX_op_orc_i32:
1418 a2 = (int32_t)a2;
1419 /* FALLTHRU */
1420 case INDEX_op_orc_i64:
1421 if (c2) {
1422 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1423 } else {
1424 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1426 break;
1428 case INDEX_op_xor_i32:
1429 a2 = (int32_t)a2;
1430 /* FALLTHRU */
1431 case INDEX_op_xor_i64:
1432 if (c2) {
1433 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1434 } else {
1435 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1437 break;
1439 case INDEX_op_eqv_i32:
1440 a2 = (int32_t)a2;
1441 /* FALLTHRU */
1442 case INDEX_op_eqv_i64:
1443 if (c2) {
1444 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1445 } else {
1446 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1448 break;
1450 case INDEX_op_not_i64:
1451 case INDEX_op_not_i32:
1452 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1453 break;
1455 case INDEX_op_mul_i64:
1456 case INDEX_op_mul_i32:
1457 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1458 break;
1460 case INDEX_op_div_i64:
1461 case INDEX_op_div_i32:
1462 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1463 break;
1464 case INDEX_op_divu_i64:
1465 case INDEX_op_divu_i32:
1466 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1467 break;
1469 case INDEX_op_rem_i64:
1470 case INDEX_op_rem_i32:
1471 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1472 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1473 break;
1474 case INDEX_op_remu_i64:
1475 case INDEX_op_remu_i32:
1476 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1477 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1478 break;
1480 case INDEX_op_shl_i64:
1481 case INDEX_op_shl_i32:
1482 if (c2) {
1483 tcg_out_shl(s, ext, a0, a1, a2);
1484 } else {
1485 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1487 break;
1489 case INDEX_op_shr_i64:
1490 case INDEX_op_shr_i32:
1491 if (c2) {
1492 tcg_out_shr(s, ext, a0, a1, a2);
1493 } else {
1494 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1496 break;
1498 case INDEX_op_sar_i64:
1499 case INDEX_op_sar_i32:
1500 if (c2) {
1501 tcg_out_sar(s, ext, a0, a1, a2);
1502 } else {
1503 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1505 break;
1507 case INDEX_op_rotr_i64:
1508 case INDEX_op_rotr_i32:
1509 if (c2) {
1510 tcg_out_rotr(s, ext, a0, a1, a2);
1511 } else {
1512 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1514 break;
1516 case INDEX_op_rotl_i64:
1517 case INDEX_op_rotl_i32:
1518 if (c2) {
1519 tcg_out_rotl(s, ext, a0, a1, a2);
1520 } else {
1521 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1522 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1524 break;
1526 case INDEX_op_brcond_i32:
1527 a1 = (int32_t)a1;
1528 /* FALLTHRU */
1529 case INDEX_op_brcond_i64:
1530 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1531 break;
1533 case INDEX_op_setcond_i32:
1534 a2 = (int32_t)a2;
1535 /* FALLTHRU */
1536 case INDEX_op_setcond_i64:
1537 tcg_out_cmp(s, ext, a1, a2, c2);
1538 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1539 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1540 TCG_REG_XZR, tcg_invert_cond(args[3]));
1541 break;
1543 case INDEX_op_movcond_i32:
1544 a2 = (int32_t)a2;
1545 /* FALLTHRU */
1546 case INDEX_op_movcond_i64:
1547 tcg_out_cmp(s, ext, a1, a2, c2);
1548 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1549 break;
1551 case INDEX_op_qemu_ld_i32:
1552 case INDEX_op_qemu_ld_i64:
1553 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1554 break;
1555 case INDEX_op_qemu_st_i32:
1556 case INDEX_op_qemu_st_i64:
1557 tcg_out_qemu_st(s, REG0(0), a1, a2);
1558 break;
1560 case INDEX_op_bswap64_i64:
1561 tcg_out_rev64(s, a0, a1);
1562 break;
1563 case INDEX_op_bswap32_i64:
1564 case INDEX_op_bswap32_i32:
1565 tcg_out_rev32(s, a0, a1);
1566 break;
1567 case INDEX_op_bswap16_i64:
1568 case INDEX_op_bswap16_i32:
1569 tcg_out_rev16(s, a0, a1);
1570 break;
1572 case INDEX_op_ext8s_i64:
1573 case INDEX_op_ext8s_i32:
1574 tcg_out_sxt(s, ext, MO_8, a0, a1);
1575 break;
1576 case INDEX_op_ext16s_i64:
1577 case INDEX_op_ext16s_i32:
1578 tcg_out_sxt(s, ext, MO_16, a0, a1);
1579 break;
1580 case INDEX_op_ext_i32_i64:
1581 case INDEX_op_ext32s_i64:
1582 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1583 break;
1584 case INDEX_op_ext8u_i64:
1585 case INDEX_op_ext8u_i32:
1586 tcg_out_uxt(s, MO_8, a0, a1);
1587 break;
1588 case INDEX_op_ext16u_i64:
1589 case INDEX_op_ext16u_i32:
1590 tcg_out_uxt(s, MO_16, a0, a1);
1591 break;
1592 case INDEX_op_extu_i32_i64:
1593 case INDEX_op_ext32u_i64:
1594 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1595 break;
1597 case INDEX_op_deposit_i64:
1598 case INDEX_op_deposit_i32:
1599 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1600 break;
1602 case INDEX_op_add2_i32:
1603 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1604 (int32_t)args[4], args[5], const_args[4],
1605 const_args[5], false);
1606 break;
1607 case INDEX_op_add2_i64:
1608 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1609 args[5], const_args[4], const_args[5], false);
1610 break;
1611 case INDEX_op_sub2_i32:
1612 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1613 (int32_t)args[4], args[5], const_args[4],
1614 const_args[5], true);
1615 break;
1616 case INDEX_op_sub2_i64:
1617 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1618 args[5], const_args[4], const_args[5], true);
1619 break;
1621 case INDEX_op_muluh_i64:
1622 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1623 break;
1624 case INDEX_op_mulsh_i64:
1625 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1626 break;
1628 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1629 case INDEX_op_mov_i64:
1630 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1631 case INDEX_op_movi_i64:
1632 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1633 default:
1634 tcg_abort();
1637 #undef REG0
1640 static const TCGTargetOpDef aarch64_op_defs[] = {
1641 { INDEX_op_exit_tb, { } },
1642 { INDEX_op_goto_tb, { } },
1643 { INDEX_op_br, { } },
1645 { INDEX_op_ld8u_i32, { "r", "r" } },
1646 { INDEX_op_ld8s_i32, { "r", "r" } },
1647 { INDEX_op_ld16u_i32, { "r", "r" } },
1648 { INDEX_op_ld16s_i32, { "r", "r" } },
1649 { INDEX_op_ld_i32, { "r", "r" } },
1650 { INDEX_op_ld8u_i64, { "r", "r" } },
1651 { INDEX_op_ld8s_i64, { "r", "r" } },
1652 { INDEX_op_ld16u_i64, { "r", "r" } },
1653 { INDEX_op_ld16s_i64, { "r", "r" } },
1654 { INDEX_op_ld32u_i64, { "r", "r" } },
1655 { INDEX_op_ld32s_i64, { "r", "r" } },
1656 { INDEX_op_ld_i64, { "r", "r" } },
1658 { INDEX_op_st8_i32, { "rZ", "r" } },
1659 { INDEX_op_st16_i32, { "rZ", "r" } },
1660 { INDEX_op_st_i32, { "rZ", "r" } },
1661 { INDEX_op_st8_i64, { "rZ", "r" } },
1662 { INDEX_op_st16_i64, { "rZ", "r" } },
1663 { INDEX_op_st32_i64, { "rZ", "r" } },
1664 { INDEX_op_st_i64, { "rZ", "r" } },
1666 { INDEX_op_add_i32, { "r", "r", "rA" } },
1667 { INDEX_op_add_i64, { "r", "r", "rA" } },
1668 { INDEX_op_sub_i32, { "r", "r", "rA" } },
1669 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1670 { INDEX_op_mul_i32, { "r", "r", "r" } },
1671 { INDEX_op_mul_i64, { "r", "r", "r" } },
1672 { INDEX_op_div_i32, { "r", "r", "r" } },
1673 { INDEX_op_div_i64, { "r", "r", "r" } },
1674 { INDEX_op_divu_i32, { "r", "r", "r" } },
1675 { INDEX_op_divu_i64, { "r", "r", "r" } },
1676 { INDEX_op_rem_i32, { "r", "r", "r" } },
1677 { INDEX_op_rem_i64, { "r", "r", "r" } },
1678 { INDEX_op_remu_i32, { "r", "r", "r" } },
1679 { INDEX_op_remu_i64, { "r", "r", "r" } },
1680 { INDEX_op_and_i32, { "r", "r", "rL" } },
1681 { INDEX_op_and_i64, { "r", "r", "rL" } },
1682 { INDEX_op_or_i32, { "r", "r", "rL" } },
1683 { INDEX_op_or_i64, { "r", "r", "rL" } },
1684 { INDEX_op_xor_i32, { "r", "r", "rL" } },
1685 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1686 { INDEX_op_andc_i32, { "r", "r", "rL" } },
1687 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1688 { INDEX_op_orc_i32, { "r", "r", "rL" } },
1689 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1690 { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1691 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1693 { INDEX_op_neg_i32, { "r", "r" } },
1694 { INDEX_op_neg_i64, { "r", "r" } },
1695 { INDEX_op_not_i32, { "r", "r" } },
1696 { INDEX_op_not_i64, { "r", "r" } },
1698 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1699 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1700 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1701 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1702 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1703 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1704 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1705 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1706 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1707 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1709 { INDEX_op_brcond_i32, { "r", "rA" } },
1710 { INDEX_op_brcond_i64, { "r", "rA" } },
1711 { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1712 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1713 { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1714 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1716 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1717 { INDEX_op_qemu_ld_i64, { "r", "l" } },
1718 { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1719 { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1721 { INDEX_op_bswap16_i32, { "r", "r" } },
1722 { INDEX_op_bswap32_i32, { "r", "r" } },
1723 { INDEX_op_bswap16_i64, { "r", "r" } },
1724 { INDEX_op_bswap32_i64, { "r", "r" } },
1725 { INDEX_op_bswap64_i64, { "r", "r" } },
1727 { INDEX_op_ext8s_i32, { "r", "r" } },
1728 { INDEX_op_ext16s_i32, { "r", "r" } },
1729 { INDEX_op_ext8u_i32, { "r", "r" } },
1730 { INDEX_op_ext16u_i32, { "r", "r" } },
1732 { INDEX_op_ext8s_i64, { "r", "r" } },
1733 { INDEX_op_ext16s_i64, { "r", "r" } },
1734 { INDEX_op_ext32s_i64, { "r", "r" } },
1735 { INDEX_op_ext8u_i64, { "r", "r" } },
1736 { INDEX_op_ext16u_i64, { "r", "r" } },
1737 { INDEX_op_ext32u_i64, { "r", "r" } },
1738 { INDEX_op_ext_i32_i64, { "r", "r" } },
1739 { INDEX_op_extu_i32_i64, { "r", "r" } },
1741 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1742 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1744 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1745 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1746 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1747 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1749 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1750 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1752 { -1 },
1755 static void tcg_target_init(TCGContext *s)
1757 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1758 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1760 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1761 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1762 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1763 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1764 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1765 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1766 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1767 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1768 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1769 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1770 (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1772 tcg_regset_clear(s->reserved_regs);
1773 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1774 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1775 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1776 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1778 tcg_add_target_add_op_defs(aarch64_op_defs);
1781 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1782 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1784 #define FRAME_SIZE \
1785 ((PUSH_SIZE \
1786 + TCG_STATIC_CALL_ARGS_SIZE \
1787 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1788 + TCG_TARGET_STACK_ALIGN - 1) \
1789 & ~(TCG_TARGET_STACK_ALIGN - 1))
1791 /* We're expecting a 2 byte uleb128 encoded value. */
1792 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1794 /* We're expecting to use a single ADDI insn. */
1795 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1797 static void tcg_target_qemu_prologue(TCGContext *s)
1799 TCGReg r;
1801 /* Push (FP, LR) and allocate space for all saved registers. */
1802 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1803 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1805 /* Set up frame pointer for canonical unwinding. */
1806 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1808 /* Store callee-preserved regs x19..x28. */
1809 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1810 int ofs = (r - TCG_REG_X19 + 2) * 8;
1811 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1814 /* Make stack space for TCG locals. */
1815 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1816 FRAME_SIZE - PUSH_SIZE);
1818 /* Inform TCG about how to find TCG locals with register, offset, size. */
1819 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1820 CPU_TEMP_BUF_NLONGS * sizeof(long));
1822 #if !defined(CONFIG_SOFTMMU)
1823 if (USE_GUEST_BASE) {
1824 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
1825 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1827 #endif
1829 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1830 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1832 tb_ret_addr = s->code_ptr;
1834 /* Remove TCG locals stack space. */
1835 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1836 FRAME_SIZE - PUSH_SIZE);
1838 /* Restore registers x19..x28. */
1839 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1840 int ofs = (r - TCG_REG_X19 + 2) * 8;
1841 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1844 /* Pop (FP, LR), restore SP to previous frame. */
1845 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
1846 TCG_REG_SP, PUSH_SIZE, 0, 1);
1847 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1850 typedef struct {
1851 DebugFrameHeader h;
1852 uint8_t fde_def_cfa[4];
1853 uint8_t fde_reg_ofs[24];
1854 } DebugFrame;
1856 #define ELF_HOST_MACHINE EM_AARCH64
1858 static const DebugFrame debug_frame = {
1859 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1860 .h.cie.id = -1,
1861 .h.cie.version = 1,
1862 .h.cie.code_align = 1,
1863 .h.cie.data_align = 0x78, /* sleb128 -8 */
1864 .h.cie.return_column = TCG_REG_LR,
1866 /* Total FDE size does not include the "len" member. */
1867 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1869 .fde_def_cfa = {
1870 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
1871 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
1872 (FRAME_SIZE >> 7)
1874 .fde_reg_ofs = {
1875 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
1876 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
1877 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
1878 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
1879 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
1880 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
1881 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
1882 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
1883 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
1884 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
1885 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
1886 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
1890 void tcg_register_jit(void *buf, size_t buf_size)
1892 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));