nbd: Rely on block layer to break up large requests
[qemu/ar7.git] / tcg / aarch64 / tcg-target.inc.c
blob08b2d031aa32260bd1c6ecc42747736005f97979
1 /*
2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
28 #endif /* CONFIG_DEBUG_TCG */
30 static const int tcg_target_reg_alloc_order[] = {
31 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
32 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
33 TCG_REG_X28, /* we will reserve this for guest_base if configured */
35 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
36 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
37 TCG_REG_X16, TCG_REG_X17,
39 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
40 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
42 /* X18 reserved by system */
43 /* X19 reserved for AREG0 */
44 /* X29 reserved as fp */
45 /* X30 reserved as temporary */
48 static const int tcg_target_call_iarg_regs[8] = {
49 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
50 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
52 static const int tcg_target_call_oarg_regs[1] = {
53 TCG_REG_X0
56 #define TCG_REG_TMP TCG_REG_X30
58 #ifndef CONFIG_SOFTMMU
59 /* Note that XZR cannot be encoded in the address base register slot,
60 as that actaully encodes SP. So if we need to zero-extend the guest
61 address, via the address index register slot, we need to load even
62 a zero guest base into a register. */
63 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
64 #define TCG_REG_GUEST_BASE TCG_REG_X28
65 #endif
67 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
69 ptrdiff_t offset = target - code_ptr;
70 tcg_debug_assert(offset == sextract64(offset, 0, 26));
71 /* read instruction, mask away previous PC_REL26 parameter contents,
72 set the proper offset, then write back the instruction. */
73 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
76 static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
77 tcg_insn_unit *target)
79 ptrdiff_t offset = target - code_ptr;
80 tcg_insn_unit insn;
81 tcg_debug_assert(offset == sextract64(offset, 0, 26));
82 /* read instruction, mask away previous PC_REL26 parameter contents,
83 set the proper offset, then write back the instruction. */
84 insn = atomic_read(code_ptr);
85 atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
88 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
90 ptrdiff_t offset = target - code_ptr;
91 tcg_debug_assert(offset == sextract64(offset, 0, 19));
92 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
95 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
96 intptr_t value, intptr_t addend)
98 tcg_debug_assert(addend == 0);
99 switch (type) {
100 case R_AARCH64_JUMP26:
101 case R_AARCH64_CALL26:
102 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
103 break;
104 case R_AARCH64_CONDBR19:
105 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
106 break;
107 default:
108 tcg_abort();
112 #define TCG_CT_CONST_AIMM 0x100
113 #define TCG_CT_CONST_LIMM 0x200
114 #define TCG_CT_CONST_ZERO 0x400
115 #define TCG_CT_CONST_MONE 0x800
117 /* parse target specific constraints */
118 static int target_parse_constraint(TCGArgConstraint *ct,
119 const char **pct_str)
121 const char *ct_str = *pct_str;
123 switch (ct_str[0]) {
124 case 'r':
125 ct->ct |= TCG_CT_REG;
126 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
127 break;
128 case 'l': /* qemu_ld / qemu_st address, data_reg */
129 ct->ct |= TCG_CT_REG;
130 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
131 #ifdef CONFIG_SOFTMMU
132 /* x0 and x1 will be overwritten when reading the tlb entry,
133 and x2, and x3 for helper args, better to avoid using them. */
134 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
135 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
136 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
137 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
138 #endif
139 break;
140 case 'A': /* Valid for arithmetic immediate (positive or negative). */
141 ct->ct |= TCG_CT_CONST_AIMM;
142 break;
143 case 'L': /* Valid for logical immediate. */
144 ct->ct |= TCG_CT_CONST_LIMM;
145 break;
146 case 'M': /* minus one */
147 ct->ct |= TCG_CT_CONST_MONE;
148 break;
149 case 'Z': /* zero */
150 ct->ct |= TCG_CT_CONST_ZERO;
151 break;
152 default:
153 return -1;
156 ct_str++;
157 *pct_str = ct_str;
158 return 0;
161 static inline bool is_aimm(uint64_t val)
163 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
166 static inline bool is_limm(uint64_t val)
168 /* Taking a simplified view of the logical immediates for now, ignoring
169 the replication that can happen across the field. Match bit patterns
170 of the forms
171 0....01....1
172 0..01..10..0
173 and their inverses. */
175 /* Make things easier below, by testing the form with msb clear. */
176 if ((int64_t)val < 0) {
177 val = ~val;
179 if (val == 0) {
180 return false;
182 val += val & -val;
183 return (val & (val - 1)) == 0;
186 static int tcg_target_const_match(tcg_target_long val, TCGType type,
187 const TCGArgConstraint *arg_ct)
189 int ct = arg_ct->ct;
191 if (ct & TCG_CT_CONST) {
192 return 1;
194 if (type == TCG_TYPE_I32) {
195 val = (int32_t)val;
197 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
198 return 1;
200 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
201 return 1;
203 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
204 return 1;
206 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
207 return 1;
210 return 0;
213 enum aarch64_cond_code {
214 COND_EQ = 0x0,
215 COND_NE = 0x1,
216 COND_CS = 0x2, /* Unsigned greater or equal */
217 COND_HS = COND_CS, /* ALIAS greater or equal */
218 COND_CC = 0x3, /* Unsigned less than */
219 COND_LO = COND_CC, /* ALIAS Lower */
220 COND_MI = 0x4, /* Negative */
221 COND_PL = 0x5, /* Zero or greater */
222 COND_VS = 0x6, /* Overflow */
223 COND_VC = 0x7, /* No overflow */
224 COND_HI = 0x8, /* Unsigned greater than */
225 COND_LS = 0x9, /* Unsigned less or equal */
226 COND_GE = 0xa,
227 COND_LT = 0xb,
228 COND_GT = 0xc,
229 COND_LE = 0xd,
230 COND_AL = 0xe,
231 COND_NV = 0xf, /* behaves like COND_AL here */
234 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
235 [TCG_COND_EQ] = COND_EQ,
236 [TCG_COND_NE] = COND_NE,
237 [TCG_COND_LT] = COND_LT,
238 [TCG_COND_GE] = COND_GE,
239 [TCG_COND_LE] = COND_LE,
240 [TCG_COND_GT] = COND_GT,
241 /* unsigned */
242 [TCG_COND_LTU] = COND_LO,
243 [TCG_COND_GTU] = COND_HI,
244 [TCG_COND_GEU] = COND_HS,
245 [TCG_COND_LEU] = COND_LS,
248 typedef enum {
249 LDST_ST = 0, /* store */
250 LDST_LD = 1, /* load */
251 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
252 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
253 } AArch64LdstType;
255 /* We encode the format of the insn into the beginning of the name, so that
256 we can have the preprocessor help "typecheck" the insn vs the output
257 function. Arm didn't provide us with nice names for the formats, so we
258 use the section number of the architecture reference manual in which the
259 instruction group is described. */
260 typedef enum {
261 /* Compare and branch (immediate). */
262 I3201_CBZ = 0x34000000,
263 I3201_CBNZ = 0x35000000,
265 /* Conditional branch (immediate). */
266 I3202_B_C = 0x54000000,
268 /* Unconditional branch (immediate). */
269 I3206_B = 0x14000000,
270 I3206_BL = 0x94000000,
272 /* Unconditional branch (register). */
273 I3207_BR = 0xd61f0000,
274 I3207_BLR = 0xd63f0000,
275 I3207_RET = 0xd65f0000,
277 /* Load/store register. Described here as 3.3.12, but the helper
278 that emits them can transform to 3.3.10 or 3.3.13. */
279 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
280 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
281 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
282 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
284 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
285 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
286 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
287 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
289 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
290 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
292 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
293 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
294 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
296 I3312_TO_I3310 = 0x00200800,
297 I3312_TO_I3313 = 0x01000000,
299 /* Load/store register pair instructions. */
300 I3314_LDP = 0x28400000,
301 I3314_STP = 0x28000000,
303 /* Add/subtract immediate instructions. */
304 I3401_ADDI = 0x11000000,
305 I3401_ADDSI = 0x31000000,
306 I3401_SUBI = 0x51000000,
307 I3401_SUBSI = 0x71000000,
309 /* Bitfield instructions. */
310 I3402_BFM = 0x33000000,
311 I3402_SBFM = 0x13000000,
312 I3402_UBFM = 0x53000000,
314 /* Extract instruction. */
315 I3403_EXTR = 0x13800000,
317 /* Logical immediate instructions. */
318 I3404_ANDI = 0x12000000,
319 I3404_ORRI = 0x32000000,
320 I3404_EORI = 0x52000000,
322 /* Move wide immediate instructions. */
323 I3405_MOVN = 0x12800000,
324 I3405_MOVZ = 0x52800000,
325 I3405_MOVK = 0x72800000,
327 /* PC relative addressing instructions. */
328 I3406_ADR = 0x10000000,
329 I3406_ADRP = 0x90000000,
331 /* Add/subtract shifted register instructions (without a shift). */
332 I3502_ADD = 0x0b000000,
333 I3502_ADDS = 0x2b000000,
334 I3502_SUB = 0x4b000000,
335 I3502_SUBS = 0x6b000000,
337 /* Add/subtract shifted register instructions (with a shift). */
338 I3502S_ADD_LSL = I3502_ADD,
340 /* Add/subtract with carry instructions. */
341 I3503_ADC = 0x1a000000,
342 I3503_SBC = 0x5a000000,
344 /* Conditional select instructions. */
345 I3506_CSEL = 0x1a800000,
346 I3506_CSINC = 0x1a800400,
348 /* Data-processing (1 source) instructions. */
349 I3507_REV16 = 0x5ac00400,
350 I3507_REV32 = 0x5ac00800,
351 I3507_REV64 = 0x5ac00c00,
353 /* Data-processing (2 source) instructions. */
354 I3508_LSLV = 0x1ac02000,
355 I3508_LSRV = 0x1ac02400,
356 I3508_ASRV = 0x1ac02800,
357 I3508_RORV = 0x1ac02c00,
358 I3508_SMULH = 0x9b407c00,
359 I3508_UMULH = 0x9bc07c00,
360 I3508_UDIV = 0x1ac00800,
361 I3508_SDIV = 0x1ac00c00,
363 /* Data-processing (3 source) instructions. */
364 I3509_MADD = 0x1b000000,
365 I3509_MSUB = 0x1b008000,
367 /* Logical shifted register instructions (without a shift). */
368 I3510_AND = 0x0a000000,
369 I3510_BIC = 0x0a200000,
370 I3510_ORR = 0x2a000000,
371 I3510_ORN = 0x2a200000,
372 I3510_EOR = 0x4a000000,
373 I3510_EON = 0x4a200000,
374 I3510_ANDS = 0x6a000000,
375 } AArch64Insn;
377 static inline uint32_t tcg_in32(TCGContext *s)
379 uint32_t v = *(uint32_t *)s->code_ptr;
380 return v;
383 /* Emit an opcode with "type-checking" of the format. */
384 #define tcg_out_insn(S, FMT, OP, ...) \
385 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
387 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
388 TCGReg rt, int imm19)
390 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
393 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
394 TCGCond c, int imm19)
396 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
399 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
401 tcg_out32(s, insn | (imm26 & 0x03ffffff));
404 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
406 tcg_out32(s, insn | rn << 5);
409 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
410 TCGReg r1, TCGReg r2, TCGReg rn,
411 tcg_target_long ofs, bool pre, bool w)
413 insn |= 1u << 31; /* ext */
414 insn |= pre << 24;
415 insn |= w << 23;
417 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
418 insn |= (ofs & (0x7f << 3)) << (15 - 3);
420 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
423 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
424 TCGReg rd, TCGReg rn, uint64_t aimm)
426 if (aimm > 0xfff) {
427 tcg_debug_assert((aimm & 0xfff) == 0);
428 aimm >>= 12;
429 tcg_debug_assert(aimm <= 0xfff);
430 aimm |= 1 << 12; /* apply LSL 12 */
432 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
435 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
436 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
437 that feed the DecodeBitMasks pseudo function. */
438 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
439 TCGReg rd, TCGReg rn, int n, int immr, int imms)
441 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
442 | rn << 5 | rd);
445 #define tcg_out_insn_3404 tcg_out_insn_3402
447 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
448 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
450 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
451 | rn << 5 | rd);
454 /* This function is used for the Move (wide immediate) instruction group.
455 Note that SHIFT is a full shift count, not the 2 bit HW field. */
456 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
457 TCGReg rd, uint16_t half, unsigned shift)
459 tcg_debug_assert((shift & ~0x30) == 0);
460 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
463 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
464 TCGReg rd, int64_t disp)
466 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
469 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
470 the rare occasion when we actually want to supply a shift amount. */
471 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
472 TCGType ext, TCGReg rd, TCGReg rn,
473 TCGReg rm, int imm6)
475 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
478 /* This function is for 3.5.2 (Add/subtract shifted register),
479 and 3.5.10 (Logical shifted register), for the vast majorty of cases
480 when we don't want to apply a shift. Thus it can also be used for
481 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
482 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
483 TCGReg rd, TCGReg rn, TCGReg rm)
485 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
488 #define tcg_out_insn_3503 tcg_out_insn_3502
489 #define tcg_out_insn_3508 tcg_out_insn_3502
490 #define tcg_out_insn_3510 tcg_out_insn_3502
492 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
493 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
495 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
496 | tcg_cond_to_aarch64[c] << 12);
499 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
500 TCGReg rd, TCGReg rn)
502 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
505 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
506 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
508 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
511 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
512 TCGReg rd, TCGReg base, TCGType ext,
513 TCGReg regoff)
515 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
516 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
517 0x4000 | ext << 13 | base << 5 | rd);
520 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
521 TCGReg rd, TCGReg rn, intptr_t offset)
523 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
526 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
527 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
529 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
530 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
533 /* Register to register move using ORR (shifted register with no shift). */
534 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
536 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
539 /* Register to register move using ADDI (move to/from SP). */
540 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
542 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
545 /* This function is used for the Logical (immediate) instruction group.
546 The value of LIMM must satisfy IS_LIMM. See the comment above about
547 only supporting simplified logical immediates. */
548 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
549 TCGReg rd, TCGReg rn, uint64_t limm)
551 unsigned h, l, r, c;
553 tcg_debug_assert(is_limm(limm));
555 h = clz64(limm);
556 l = ctz64(limm);
557 if (l == 0) {
558 r = 0; /* form 0....01....1 */
559 c = ctz64(~limm) - 1;
560 if (h == 0) {
561 r = clz64(~limm); /* form 1..10..01..1 */
562 c += r;
564 } else {
565 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
566 c = r - h - 1;
568 if (ext == TCG_TYPE_I32) {
569 r &= 31;
570 c &= 31;
573 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
576 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
577 tcg_target_long value)
579 AArch64Insn insn;
580 int i, wantinv, shift;
581 tcg_target_long svalue = value;
582 tcg_target_long ivalue = ~value;
583 tcg_target_long imask;
585 /* For 32-bit values, discard potential garbage in value. For 64-bit
586 values within [2**31, 2**32-1], we can create smaller sequences by
587 interpreting this as a negative 32-bit number, while ensuring that
588 the high 32 bits are cleared by setting SF=0. */
589 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
590 svalue = (int32_t)value;
591 value = (uint32_t)value;
592 ivalue = (uint32_t)ivalue;
593 type = TCG_TYPE_I32;
596 /* Speed things up by handling the common case of small positive
597 and negative values specially. */
598 if ((value & ~0xffffull) == 0) {
599 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
600 return;
601 } else if ((ivalue & ~0xffffull) == 0) {
602 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
603 return;
606 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
607 use the sign-extended value. That lets us match rotated values such
608 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
609 if (is_limm(svalue)) {
610 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
611 return;
614 /* Look for host pointer values within 4G of the PC. This happens
615 often when loading pointers to QEMU's own data structures. */
616 if (type == TCG_TYPE_I64) {
617 tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
618 if (disp == sextract64(disp, 0, 21)) {
619 tcg_out_insn(s, 3406, ADRP, rd, disp);
620 if (value & 0xfff) {
621 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
623 return;
627 /* Would it take fewer insns to begin with MOVN? For the value and its
628 inverse, count the number of 16-bit lanes that are 0. */
629 for (i = wantinv = imask = 0; i < 64; i += 16) {
630 tcg_target_long mask = 0xffffull << i;
631 if ((value & mask) == 0) {
632 wantinv -= 1;
634 if ((ivalue & mask) == 0) {
635 wantinv += 1;
636 imask |= mask;
640 /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
641 insn = I3405_MOVZ;
642 if (wantinv > 0) {
643 value = ivalue;
644 insn = I3405_MOVN;
647 /* Find the lowest lane that is not 0x0000. */
648 shift = ctz64(value) & (63 & -16);
649 tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
651 if (wantinv > 0) {
652 /* Re-invert the value, so MOVK sees non-inverted bits. */
653 value = ~value;
654 /* Clear out all the 0xffff lanes. */
655 value ^= imask;
657 /* Clear out the lane that we just set. */
658 value &= ~(0xffffUL << shift);
660 /* Iterate until all lanes have been set, and thus cleared from VALUE. */
661 while (value) {
662 shift = ctz64(value) & (63 & -16);
663 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
664 value &= ~(0xffffUL << shift);
668 /* Define something more legible for general use. */
669 #define tcg_out_ldst_r tcg_out_insn_3310
671 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
672 TCGReg rd, TCGReg rn, intptr_t offset)
674 TCGMemOp size = (uint32_t)insn >> 30;
676 /* If the offset is naturally aligned and in range, then we can
677 use the scaled uimm12 encoding */
678 if (offset >= 0 && !(offset & ((1 << size) - 1))) {
679 uintptr_t scaled_uimm = offset >> size;
680 if (scaled_uimm <= 0xfff) {
681 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
682 return;
686 /* Small signed offsets can use the unscaled encoding. */
687 if (offset >= -256 && offset < 256) {
688 tcg_out_insn_3312(s, insn, rd, rn, offset);
689 return;
692 /* Worst-case scenario, move offset to temp register, use reg offset. */
693 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
694 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
697 static inline void tcg_out_mov(TCGContext *s,
698 TCGType type, TCGReg ret, TCGReg arg)
700 if (ret != arg) {
701 tcg_out_movr(s, type, ret, arg);
705 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
706 TCGReg arg1, intptr_t arg2)
708 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
709 arg, arg1, arg2);
712 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
713 TCGReg arg1, intptr_t arg2)
715 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
716 arg, arg1, arg2);
719 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
720 TCGReg base, intptr_t ofs)
722 if (val == 0) {
723 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
724 return true;
726 return false;
729 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
730 TCGReg rn, unsigned int a, unsigned int b)
732 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
735 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
736 TCGReg rn, unsigned int a, unsigned int b)
738 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
741 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
742 TCGReg rn, unsigned int a, unsigned int b)
744 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
747 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
748 TCGReg rn, TCGReg rm, unsigned int a)
750 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
753 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
754 TCGReg rd, TCGReg rn, unsigned int m)
756 int bits = ext ? 64 : 32;
757 int max = bits - 1;
758 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
761 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
762 TCGReg rd, TCGReg rn, unsigned int m)
764 int max = ext ? 63 : 31;
765 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
768 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
769 TCGReg rd, TCGReg rn, unsigned int m)
771 int max = ext ? 63 : 31;
772 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
775 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
776 TCGReg rd, TCGReg rn, unsigned int m)
778 int max = ext ? 63 : 31;
779 tcg_out_extr(s, ext, rd, rn, rn, m & max);
782 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
783 TCGReg rd, TCGReg rn, unsigned int m)
785 int bits = ext ? 64 : 32;
786 int max = bits - 1;
787 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
790 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
791 TCGReg rn, unsigned lsb, unsigned width)
793 unsigned size = ext ? 64 : 32;
794 unsigned a = (size - lsb) & (size - 1);
795 unsigned b = width - 1;
796 tcg_out_bfm(s, ext, rd, rn, a, b);
799 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
800 tcg_target_long b, bool const_b)
802 if (const_b) {
803 /* Using CMP or CMN aliases. */
804 if (b >= 0) {
805 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
806 } else {
807 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
809 } else {
810 /* Using CMP alias SUBS wzr, Wn, Wm */
811 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
815 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
817 ptrdiff_t offset = target - s->code_ptr;
818 tcg_debug_assert(offset == sextract64(offset, 0, 26));
819 tcg_out_insn(s, 3206, B, offset);
822 static inline void tcg_out_goto_noaddr(TCGContext *s)
824 /* We pay attention here to not modify the branch target by reading from
825 the buffer. This ensure that caches and memory are kept coherent during
826 retranslation. Mask away possible garbage in the high bits for the
827 first translation, while keeping the offset bits for retranslation. */
828 uint32_t old = tcg_in32(s);
829 tcg_out_insn(s, 3206, B, old);
832 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
834 /* See comments in tcg_out_goto_noaddr. */
835 uint32_t old = tcg_in32(s) >> 5;
836 tcg_out_insn(s, 3202, B_C, c, old);
839 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
841 tcg_out_insn(s, 3207, BLR, reg);
844 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
846 ptrdiff_t offset = target - s->code_ptr;
847 if (offset == sextract64(offset, 0, 26)) {
848 tcg_out_insn(s, 3206, BL, offset);
849 } else {
850 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
851 tcg_out_callr(s, TCG_REG_TMP);
855 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
857 tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
858 tcg_insn_unit *target = (tcg_insn_unit *)addr;
860 reloc_pc26_atomic(code_ptr, target);
861 flush_icache_range(jmp_addr, jmp_addr + 4);
864 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
866 if (!l->has_value) {
867 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
868 tcg_out_goto_noaddr(s);
869 } else {
870 tcg_out_goto(s, l->u.value_ptr);
874 static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
875 TCGArg b, bool b_const, TCGLabel *l)
877 intptr_t offset;
878 bool need_cmp;
880 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
881 need_cmp = false;
882 } else {
883 need_cmp = true;
884 tcg_out_cmp(s, ext, a, b, b_const);
887 if (!l->has_value) {
888 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
889 offset = tcg_in32(s) >> 5;
890 } else {
891 offset = l->u.value_ptr - s->code_ptr;
892 tcg_debug_assert(offset == sextract64(offset, 0, 19));
895 if (need_cmp) {
896 tcg_out_insn(s, 3202, B_C, c, offset);
897 } else if (c == TCG_COND_EQ) {
898 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
899 } else {
900 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
904 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
906 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
909 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
911 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
914 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
916 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
919 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
920 TCGReg rd, TCGReg rn)
922 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
923 int bits = (8 << s_bits) - 1;
924 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
927 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
928 TCGReg rd, TCGReg rn)
930 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
931 int bits = (8 << s_bits) - 1;
932 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
935 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
936 TCGReg rn, int64_t aimm)
938 if (aimm >= 0) {
939 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
940 } else {
941 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
945 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
946 TCGReg rh, TCGReg al, TCGReg ah,
947 tcg_target_long bl, tcg_target_long bh,
948 bool const_bl, bool const_bh, bool sub)
950 TCGReg orig_rl = rl;
951 AArch64Insn insn;
953 if (rl == ah || (!const_bh && rl == bh)) {
954 rl = TCG_REG_TMP;
957 if (const_bl) {
958 insn = I3401_ADDSI;
959 if ((bl < 0) ^ sub) {
960 insn = I3401_SUBSI;
961 bl = -bl;
963 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
964 } else {
965 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
968 insn = I3503_ADC;
969 if (const_bh) {
970 /* Note that the only two constants we support are 0 and -1, and
971 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
972 if ((bh != 0) ^ sub) {
973 insn = I3503_SBC;
975 bh = TCG_REG_XZR;
976 } else if (sub) {
977 insn = I3503_SBC;
979 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
981 tcg_out_mov(s, ext, orig_rl, rl);
984 #ifdef CONFIG_SOFTMMU
985 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
986 * TCGMemOpIdx oi, uintptr_t ra)
988 static void * const qemu_ld_helpers[16] = {
989 [MO_UB] = helper_ret_ldub_mmu,
990 [MO_LEUW] = helper_le_lduw_mmu,
991 [MO_LEUL] = helper_le_ldul_mmu,
992 [MO_LEQ] = helper_le_ldq_mmu,
993 [MO_BEUW] = helper_be_lduw_mmu,
994 [MO_BEUL] = helper_be_ldul_mmu,
995 [MO_BEQ] = helper_be_ldq_mmu,
998 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
999 * uintxx_t val, TCGMemOpIdx oi,
1000 * uintptr_t ra)
1002 static void * const qemu_st_helpers[16] = {
1003 [MO_UB] = helper_ret_stb_mmu,
1004 [MO_LEUW] = helper_le_stw_mmu,
1005 [MO_LEUL] = helper_le_stl_mmu,
1006 [MO_LEQ] = helper_le_stq_mmu,
1007 [MO_BEUW] = helper_be_stw_mmu,
1008 [MO_BEUL] = helper_be_stl_mmu,
1009 [MO_BEQ] = helper_be_stq_mmu,
1012 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1014 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1015 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1016 tcg_out_insn(s, 3406, ADR, rd, offset);
1019 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1021 TCGMemOpIdx oi = lb->oi;
1022 TCGMemOp opc = get_memop(oi);
1023 TCGMemOp size = opc & MO_SIZE;
1025 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1027 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1028 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1029 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1030 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1031 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1032 if (opc & MO_SIGN) {
1033 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1034 } else {
1035 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1038 tcg_out_goto(s, lb->raddr);
1041 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1043 TCGMemOpIdx oi = lb->oi;
1044 TCGMemOp opc = get_memop(oi);
1045 TCGMemOp size = opc & MO_SIZE;
1047 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1049 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1050 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1051 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1052 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1053 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1054 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1055 tcg_out_goto(s, lb->raddr);
1058 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1059 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1060 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1062 TCGLabelQemuLdst *label = new_ldst_label(s);
1064 label->is_ld = is_ld;
1065 label->oi = oi;
1066 label->type = ext;
1067 label->datalo_reg = data_reg;
1068 label->addrlo_reg = addr_reg;
1069 label->raddr = raddr;
1070 label->label_ptr[0] = label_ptr;
1073 /* Load and compare a TLB entry, emitting the conditional jump to the
1074 slow path for the failure case, which will be patched later when finalizing
1075 the slow path. Generated code returns the host addend in X1,
1076 clobbers X0,X2,X3,TMP. */
1077 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1078 tcg_insn_unit **label_ptr, int mem_index,
1079 bool is_read)
1081 int tlb_offset = is_read ?
1082 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1083 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1084 int a_bits = get_alignment_bits(opc);
1085 TCGReg base = TCG_AREG0, x3;
1086 uint64_t tlb_mask;
1088 /* For aligned accesses, we check the first byte and include the alignment
1089 bits within the address. For unaligned access, we check that we don't
1090 cross pages using the address of the last byte of the access. */
1091 if (a_bits >= 0) {
1092 /* A byte access or an alignment check required */
1093 tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
1094 x3 = addr_reg;
1095 } else {
1096 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1097 TCG_REG_X3, addr_reg, (1 << (opc & MO_SIZE)) - 1);
1098 tlb_mask = TARGET_PAGE_MASK;
1099 x3 = TCG_REG_X3;
1102 /* Extract the TLB index from the address into X0.
1103 X0<CPU_TLB_BITS:0> =
1104 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1105 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1106 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1108 /* Store the page mask part of the address into X3. */
1109 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1110 TCG_REG_X3, x3, tlb_mask);
1112 /* Add any "high bits" from the tlb offset to the env address into X2,
1113 to take advantage of the LSL12 form of the ADDI instruction.
1114 X2 = env + (tlb_offset & 0xfff000) */
1115 if (tlb_offset & 0xfff000) {
1116 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1117 tlb_offset & 0xfff000);
1118 base = TCG_REG_X2;
1121 /* Merge the tlb index contribution into X2.
1122 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1123 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1124 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1126 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1127 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1128 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1129 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1131 /* Load the tlb addend. Do that early to avoid stalling.
1132 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1133 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1134 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1135 (is_read ? offsetof(CPUTLBEntry, addr_read)
1136 : offsetof(CPUTLBEntry, addr_write)));
1138 /* Perform the address comparison. */
1139 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1141 /* If not equal, we jump to the slow path. */
1142 *label_ptr = s->code_ptr;
1143 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1146 #endif /* CONFIG_SOFTMMU */
1148 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1149 TCGReg data_r, TCGReg addr_r,
1150 TCGType otype, TCGReg off_r)
1152 const TCGMemOp bswap = memop & MO_BSWAP;
1154 switch (memop & MO_SSIZE) {
1155 case MO_UB:
1156 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1157 break;
1158 case MO_SB:
1159 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1160 data_r, addr_r, otype, off_r);
1161 break;
1162 case MO_UW:
1163 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1164 if (bswap) {
1165 tcg_out_rev16(s, data_r, data_r);
1167 break;
1168 case MO_SW:
1169 if (bswap) {
1170 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1171 tcg_out_rev16(s, data_r, data_r);
1172 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1173 } else {
1174 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1175 data_r, addr_r, otype, off_r);
1177 break;
1178 case MO_UL:
1179 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1180 if (bswap) {
1181 tcg_out_rev32(s, data_r, data_r);
1183 break;
1184 case MO_SL:
1185 if (bswap) {
1186 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1187 tcg_out_rev32(s, data_r, data_r);
1188 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1189 } else {
1190 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1192 break;
1193 case MO_Q:
1194 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1195 if (bswap) {
1196 tcg_out_rev64(s, data_r, data_r);
1198 break;
1199 default:
1200 tcg_abort();
1204 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1205 TCGReg data_r, TCGReg addr_r,
1206 TCGType otype, TCGReg off_r)
1208 const TCGMemOp bswap = memop & MO_BSWAP;
1210 switch (memop & MO_SIZE) {
1211 case MO_8:
1212 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1213 break;
1214 case MO_16:
1215 if (bswap && data_r != TCG_REG_XZR) {
1216 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1217 data_r = TCG_REG_TMP;
1219 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1220 break;
1221 case MO_32:
1222 if (bswap && data_r != TCG_REG_XZR) {
1223 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1224 data_r = TCG_REG_TMP;
1226 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1227 break;
1228 case MO_64:
1229 if (bswap && data_r != TCG_REG_XZR) {
1230 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1231 data_r = TCG_REG_TMP;
1233 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1234 break;
1235 default:
1236 tcg_abort();
1240 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1241 TCGMemOpIdx oi, TCGType ext)
1243 TCGMemOp memop = get_memop(oi);
1244 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1245 #ifdef CONFIG_SOFTMMU
1246 unsigned mem_index = get_mmuidx(oi);
1247 tcg_insn_unit *label_ptr;
1249 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1250 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1251 TCG_REG_X1, otype, addr_reg);
1252 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1253 s->code_ptr, label_ptr);
1254 #else /* !CONFIG_SOFTMMU */
1255 if (USE_GUEST_BASE) {
1256 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1257 TCG_REG_GUEST_BASE, otype, addr_reg);
1258 } else {
1259 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1260 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1262 #endif /* CONFIG_SOFTMMU */
1265 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1266 TCGMemOpIdx oi)
1268 TCGMemOp memop = get_memop(oi);
1269 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1270 #ifdef CONFIG_SOFTMMU
1271 unsigned mem_index = get_mmuidx(oi);
1272 tcg_insn_unit *label_ptr;
1274 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1275 tcg_out_qemu_st_direct(s, memop, data_reg,
1276 TCG_REG_X1, otype, addr_reg);
1277 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1278 data_reg, addr_reg, s->code_ptr, label_ptr);
1279 #else /* !CONFIG_SOFTMMU */
1280 if (USE_GUEST_BASE) {
1281 tcg_out_qemu_st_direct(s, memop, data_reg,
1282 TCG_REG_GUEST_BASE, otype, addr_reg);
1283 } else {
1284 tcg_out_qemu_st_direct(s, memop, data_reg,
1285 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1287 #endif /* CONFIG_SOFTMMU */
1290 static tcg_insn_unit *tb_ret_addr;
1292 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1293 const TCGArg args[TCG_MAX_OP_ARGS],
1294 const int const_args[TCG_MAX_OP_ARGS])
1296 /* 99% of the time, we can signal the use of extension registers
1297 by looking to see if the opcode handles 64-bit data. */
1298 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1300 /* Hoist the loads of the most common arguments. */
1301 TCGArg a0 = args[0];
1302 TCGArg a1 = args[1];
1303 TCGArg a2 = args[2];
1304 int c2 = const_args[2];
1306 /* Some operands are defined with "rZ" constraint, a register or
1307 the zero register. These need not actually test args[I] == 0. */
1308 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1310 switch (opc) {
1311 case INDEX_op_exit_tb:
1312 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1313 tcg_out_goto(s, tb_ret_addr);
1314 break;
1316 case INDEX_op_goto_tb:
1317 #ifndef USE_DIRECT_JUMP
1318 #error "USE_DIRECT_JUMP required for aarch64"
1319 #endif
1320 /* consistency for USE_DIRECT_JUMP */
1321 tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
1322 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1323 /* actual branch destination will be patched by
1324 aarch64_tb_set_jmp_target later, beware retranslation. */
1325 tcg_out_goto_noaddr(s);
1326 s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
1327 break;
1329 case INDEX_op_br:
1330 tcg_out_goto_label(s, arg_label(a0));
1331 break;
1333 case INDEX_op_ld8u_i32:
1334 case INDEX_op_ld8u_i64:
1335 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1336 break;
1337 case INDEX_op_ld8s_i32:
1338 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1339 break;
1340 case INDEX_op_ld8s_i64:
1341 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1342 break;
1343 case INDEX_op_ld16u_i32:
1344 case INDEX_op_ld16u_i64:
1345 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1346 break;
1347 case INDEX_op_ld16s_i32:
1348 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1349 break;
1350 case INDEX_op_ld16s_i64:
1351 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1352 break;
1353 case INDEX_op_ld_i32:
1354 case INDEX_op_ld32u_i64:
1355 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1356 break;
1357 case INDEX_op_ld32s_i64:
1358 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1359 break;
1360 case INDEX_op_ld_i64:
1361 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1362 break;
1364 case INDEX_op_st8_i32:
1365 case INDEX_op_st8_i64:
1366 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1367 break;
1368 case INDEX_op_st16_i32:
1369 case INDEX_op_st16_i64:
1370 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1371 break;
1372 case INDEX_op_st_i32:
1373 case INDEX_op_st32_i64:
1374 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1375 break;
1376 case INDEX_op_st_i64:
1377 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1378 break;
1380 case INDEX_op_add_i32:
1381 a2 = (int32_t)a2;
1382 /* FALLTHRU */
1383 case INDEX_op_add_i64:
1384 if (c2) {
1385 tcg_out_addsubi(s, ext, a0, a1, a2);
1386 } else {
1387 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1389 break;
1391 case INDEX_op_sub_i32:
1392 a2 = (int32_t)a2;
1393 /* FALLTHRU */
1394 case INDEX_op_sub_i64:
1395 if (c2) {
1396 tcg_out_addsubi(s, ext, a0, a1, -a2);
1397 } else {
1398 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1400 break;
1402 case INDEX_op_neg_i64:
1403 case INDEX_op_neg_i32:
1404 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1405 break;
1407 case INDEX_op_and_i32:
1408 a2 = (int32_t)a2;
1409 /* FALLTHRU */
1410 case INDEX_op_and_i64:
1411 if (c2) {
1412 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1413 } else {
1414 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1416 break;
1418 case INDEX_op_andc_i32:
1419 a2 = (int32_t)a2;
1420 /* FALLTHRU */
1421 case INDEX_op_andc_i64:
1422 if (c2) {
1423 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1424 } else {
1425 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1427 break;
1429 case INDEX_op_or_i32:
1430 a2 = (int32_t)a2;
1431 /* FALLTHRU */
1432 case INDEX_op_or_i64:
1433 if (c2) {
1434 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1435 } else {
1436 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1438 break;
1440 case INDEX_op_orc_i32:
1441 a2 = (int32_t)a2;
1442 /* FALLTHRU */
1443 case INDEX_op_orc_i64:
1444 if (c2) {
1445 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1446 } else {
1447 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1449 break;
1451 case INDEX_op_xor_i32:
1452 a2 = (int32_t)a2;
1453 /* FALLTHRU */
1454 case INDEX_op_xor_i64:
1455 if (c2) {
1456 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1457 } else {
1458 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1460 break;
1462 case INDEX_op_eqv_i32:
1463 a2 = (int32_t)a2;
1464 /* FALLTHRU */
1465 case INDEX_op_eqv_i64:
1466 if (c2) {
1467 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1468 } else {
1469 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1471 break;
1473 case INDEX_op_not_i64:
1474 case INDEX_op_not_i32:
1475 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1476 break;
1478 case INDEX_op_mul_i64:
1479 case INDEX_op_mul_i32:
1480 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1481 break;
1483 case INDEX_op_div_i64:
1484 case INDEX_op_div_i32:
1485 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1486 break;
1487 case INDEX_op_divu_i64:
1488 case INDEX_op_divu_i32:
1489 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1490 break;
1492 case INDEX_op_rem_i64:
1493 case INDEX_op_rem_i32:
1494 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1495 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1496 break;
1497 case INDEX_op_remu_i64:
1498 case INDEX_op_remu_i32:
1499 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1500 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1501 break;
1503 case INDEX_op_shl_i64:
1504 case INDEX_op_shl_i32:
1505 if (c2) {
1506 tcg_out_shl(s, ext, a0, a1, a2);
1507 } else {
1508 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1510 break;
1512 case INDEX_op_shr_i64:
1513 case INDEX_op_shr_i32:
1514 if (c2) {
1515 tcg_out_shr(s, ext, a0, a1, a2);
1516 } else {
1517 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1519 break;
1521 case INDEX_op_sar_i64:
1522 case INDEX_op_sar_i32:
1523 if (c2) {
1524 tcg_out_sar(s, ext, a0, a1, a2);
1525 } else {
1526 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1528 break;
1530 case INDEX_op_rotr_i64:
1531 case INDEX_op_rotr_i32:
1532 if (c2) {
1533 tcg_out_rotr(s, ext, a0, a1, a2);
1534 } else {
1535 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1537 break;
1539 case INDEX_op_rotl_i64:
1540 case INDEX_op_rotl_i32:
1541 if (c2) {
1542 tcg_out_rotl(s, ext, a0, a1, a2);
1543 } else {
1544 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1545 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1547 break;
1549 case INDEX_op_brcond_i32:
1550 a1 = (int32_t)a1;
1551 /* FALLTHRU */
1552 case INDEX_op_brcond_i64:
1553 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1554 break;
1556 case INDEX_op_setcond_i32:
1557 a2 = (int32_t)a2;
1558 /* FALLTHRU */
1559 case INDEX_op_setcond_i64:
1560 tcg_out_cmp(s, ext, a1, a2, c2);
1561 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1562 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1563 TCG_REG_XZR, tcg_invert_cond(args[3]));
1564 break;
1566 case INDEX_op_movcond_i32:
1567 a2 = (int32_t)a2;
1568 /* FALLTHRU */
1569 case INDEX_op_movcond_i64:
1570 tcg_out_cmp(s, ext, a1, a2, c2);
1571 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1572 break;
1574 case INDEX_op_qemu_ld_i32:
1575 case INDEX_op_qemu_ld_i64:
1576 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1577 break;
1578 case INDEX_op_qemu_st_i32:
1579 case INDEX_op_qemu_st_i64:
1580 tcg_out_qemu_st(s, REG0(0), a1, a2);
1581 break;
1583 case INDEX_op_bswap64_i64:
1584 tcg_out_rev64(s, a0, a1);
1585 break;
1586 case INDEX_op_bswap32_i64:
1587 case INDEX_op_bswap32_i32:
1588 tcg_out_rev32(s, a0, a1);
1589 break;
1590 case INDEX_op_bswap16_i64:
1591 case INDEX_op_bswap16_i32:
1592 tcg_out_rev16(s, a0, a1);
1593 break;
1595 case INDEX_op_ext8s_i64:
1596 case INDEX_op_ext8s_i32:
1597 tcg_out_sxt(s, ext, MO_8, a0, a1);
1598 break;
1599 case INDEX_op_ext16s_i64:
1600 case INDEX_op_ext16s_i32:
1601 tcg_out_sxt(s, ext, MO_16, a0, a1);
1602 break;
1603 case INDEX_op_ext_i32_i64:
1604 case INDEX_op_ext32s_i64:
1605 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1606 break;
1607 case INDEX_op_ext8u_i64:
1608 case INDEX_op_ext8u_i32:
1609 tcg_out_uxt(s, MO_8, a0, a1);
1610 break;
1611 case INDEX_op_ext16u_i64:
1612 case INDEX_op_ext16u_i32:
1613 tcg_out_uxt(s, MO_16, a0, a1);
1614 break;
1615 case INDEX_op_extu_i32_i64:
1616 case INDEX_op_ext32u_i64:
1617 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1618 break;
1620 case INDEX_op_deposit_i64:
1621 case INDEX_op_deposit_i32:
1622 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1623 break;
1625 case INDEX_op_add2_i32:
1626 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1627 (int32_t)args[4], args[5], const_args[4],
1628 const_args[5], false);
1629 break;
1630 case INDEX_op_add2_i64:
1631 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1632 args[5], const_args[4], const_args[5], false);
1633 break;
1634 case INDEX_op_sub2_i32:
1635 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1636 (int32_t)args[4], args[5], const_args[4],
1637 const_args[5], true);
1638 break;
1639 case INDEX_op_sub2_i64:
1640 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1641 args[5], const_args[4], const_args[5], true);
1642 break;
1644 case INDEX_op_muluh_i64:
1645 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1646 break;
1647 case INDEX_op_mulsh_i64:
1648 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1649 break;
1651 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1652 case INDEX_op_mov_i64:
1653 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1654 case INDEX_op_movi_i64:
1655 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1656 default:
1657 tcg_abort();
1660 #undef REG0
1663 static const TCGTargetOpDef aarch64_op_defs[] = {
1664 { INDEX_op_exit_tb, { } },
1665 { INDEX_op_goto_tb, { } },
1666 { INDEX_op_br, { } },
1668 { INDEX_op_ld8u_i32, { "r", "r" } },
1669 { INDEX_op_ld8s_i32, { "r", "r" } },
1670 { INDEX_op_ld16u_i32, { "r", "r" } },
1671 { INDEX_op_ld16s_i32, { "r", "r" } },
1672 { INDEX_op_ld_i32, { "r", "r" } },
1673 { INDEX_op_ld8u_i64, { "r", "r" } },
1674 { INDEX_op_ld8s_i64, { "r", "r" } },
1675 { INDEX_op_ld16u_i64, { "r", "r" } },
1676 { INDEX_op_ld16s_i64, { "r", "r" } },
1677 { INDEX_op_ld32u_i64, { "r", "r" } },
1678 { INDEX_op_ld32s_i64, { "r", "r" } },
1679 { INDEX_op_ld_i64, { "r", "r" } },
1681 { INDEX_op_st8_i32, { "rZ", "r" } },
1682 { INDEX_op_st16_i32, { "rZ", "r" } },
1683 { INDEX_op_st_i32, { "rZ", "r" } },
1684 { INDEX_op_st8_i64, { "rZ", "r" } },
1685 { INDEX_op_st16_i64, { "rZ", "r" } },
1686 { INDEX_op_st32_i64, { "rZ", "r" } },
1687 { INDEX_op_st_i64, { "rZ", "r" } },
1689 { INDEX_op_add_i32, { "r", "r", "rA" } },
1690 { INDEX_op_add_i64, { "r", "r", "rA" } },
1691 { INDEX_op_sub_i32, { "r", "r", "rA" } },
1692 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1693 { INDEX_op_mul_i32, { "r", "r", "r" } },
1694 { INDEX_op_mul_i64, { "r", "r", "r" } },
1695 { INDEX_op_div_i32, { "r", "r", "r" } },
1696 { INDEX_op_div_i64, { "r", "r", "r" } },
1697 { INDEX_op_divu_i32, { "r", "r", "r" } },
1698 { INDEX_op_divu_i64, { "r", "r", "r" } },
1699 { INDEX_op_rem_i32, { "r", "r", "r" } },
1700 { INDEX_op_rem_i64, { "r", "r", "r" } },
1701 { INDEX_op_remu_i32, { "r", "r", "r" } },
1702 { INDEX_op_remu_i64, { "r", "r", "r" } },
1703 { INDEX_op_and_i32, { "r", "r", "rL" } },
1704 { INDEX_op_and_i64, { "r", "r", "rL" } },
1705 { INDEX_op_or_i32, { "r", "r", "rL" } },
1706 { INDEX_op_or_i64, { "r", "r", "rL" } },
1707 { INDEX_op_xor_i32, { "r", "r", "rL" } },
1708 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1709 { INDEX_op_andc_i32, { "r", "r", "rL" } },
1710 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1711 { INDEX_op_orc_i32, { "r", "r", "rL" } },
1712 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1713 { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1714 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1716 { INDEX_op_neg_i32, { "r", "r" } },
1717 { INDEX_op_neg_i64, { "r", "r" } },
1718 { INDEX_op_not_i32, { "r", "r" } },
1719 { INDEX_op_not_i64, { "r", "r" } },
1721 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1722 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1723 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1724 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1725 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1726 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1727 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1728 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1729 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1730 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1732 { INDEX_op_brcond_i32, { "r", "rA" } },
1733 { INDEX_op_brcond_i64, { "r", "rA" } },
1734 { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1735 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1736 { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1737 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1739 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1740 { INDEX_op_qemu_ld_i64, { "r", "l" } },
1741 { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1742 { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1744 { INDEX_op_bswap16_i32, { "r", "r" } },
1745 { INDEX_op_bswap32_i32, { "r", "r" } },
1746 { INDEX_op_bswap16_i64, { "r", "r" } },
1747 { INDEX_op_bswap32_i64, { "r", "r" } },
1748 { INDEX_op_bswap64_i64, { "r", "r" } },
1750 { INDEX_op_ext8s_i32, { "r", "r" } },
1751 { INDEX_op_ext16s_i32, { "r", "r" } },
1752 { INDEX_op_ext8u_i32, { "r", "r" } },
1753 { INDEX_op_ext16u_i32, { "r", "r" } },
1755 { INDEX_op_ext8s_i64, { "r", "r" } },
1756 { INDEX_op_ext16s_i64, { "r", "r" } },
1757 { INDEX_op_ext32s_i64, { "r", "r" } },
1758 { INDEX_op_ext8u_i64, { "r", "r" } },
1759 { INDEX_op_ext16u_i64, { "r", "r" } },
1760 { INDEX_op_ext32u_i64, { "r", "r" } },
1761 { INDEX_op_ext_i32_i64, { "r", "r" } },
1762 { INDEX_op_extu_i32_i64, { "r", "r" } },
1764 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1765 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1767 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1768 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1769 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1770 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1772 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1773 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1775 { -1 },
1778 static void tcg_target_init(TCGContext *s)
1780 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1781 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1783 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1784 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1785 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1786 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1787 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1788 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1789 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1790 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1791 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1792 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1793 (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1795 tcg_regset_clear(s->reserved_regs);
1796 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1797 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1798 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1799 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1801 tcg_add_target_add_op_defs(aarch64_op_defs);
1804 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1805 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1807 #define FRAME_SIZE \
1808 ((PUSH_SIZE \
1809 + TCG_STATIC_CALL_ARGS_SIZE \
1810 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1811 + TCG_TARGET_STACK_ALIGN - 1) \
1812 & ~(TCG_TARGET_STACK_ALIGN - 1))
1814 /* We're expecting a 2 byte uleb128 encoded value. */
1815 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1817 /* We're expecting to use a single ADDI insn. */
1818 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1820 static void tcg_target_qemu_prologue(TCGContext *s)
1822 TCGReg r;
1824 /* Push (FP, LR) and allocate space for all saved registers. */
1825 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1826 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1828 /* Set up frame pointer for canonical unwinding. */
1829 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1831 /* Store callee-preserved regs x19..x28. */
1832 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1833 int ofs = (r - TCG_REG_X19 + 2) * 8;
1834 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1837 /* Make stack space for TCG locals. */
1838 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1839 FRAME_SIZE - PUSH_SIZE);
1841 /* Inform TCG about how to find TCG locals with register, offset, size. */
1842 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1843 CPU_TEMP_BUF_NLONGS * sizeof(long));
1845 #if !defined(CONFIG_SOFTMMU)
1846 if (USE_GUEST_BASE) {
1847 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
1848 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1850 #endif
1852 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1853 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1855 tb_ret_addr = s->code_ptr;
1857 /* Remove TCG locals stack space. */
1858 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1859 FRAME_SIZE - PUSH_SIZE);
1861 /* Restore registers x19..x28. */
1862 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1863 int ofs = (r - TCG_REG_X19 + 2) * 8;
1864 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1867 /* Pop (FP, LR), restore SP to previous frame. */
1868 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
1869 TCG_REG_SP, PUSH_SIZE, 0, 1);
1870 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1873 typedef struct {
1874 DebugFrameHeader h;
1875 uint8_t fde_def_cfa[4];
1876 uint8_t fde_reg_ofs[24];
1877 } DebugFrame;
1879 #define ELF_HOST_MACHINE EM_AARCH64
1881 static const DebugFrame debug_frame = {
1882 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1883 .h.cie.id = -1,
1884 .h.cie.version = 1,
1885 .h.cie.code_align = 1,
1886 .h.cie.data_align = 0x78, /* sleb128 -8 */
1887 .h.cie.return_column = TCG_REG_LR,
1889 /* Total FDE size does not include the "len" member. */
1890 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1892 .fde_def_cfa = {
1893 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
1894 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
1895 (FRAME_SIZE >> 7)
1897 .fde_reg_ofs = {
1898 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
1899 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
1900 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
1901 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
1902 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
1903 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
1904 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
1905 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
1906 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
1907 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
1908 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
1909 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
1913 void tcg_register_jit(void *buf, size_t buf_size)
1915 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));