tcg/aarch64: Implement tlb lookup fast path
[qemu/kevin.git] / tcg / aarch64 / tcg-target.c
blob41a17f8a6280bc34078eea357e380fc01a596d85
1 /*
2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "qemu/bitops.h"
15 #ifndef NDEBUG
16 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
17 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
18 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
19 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
20 "%x24", "%x25", "%x26", "%x27", "%x28",
21 "%fp", /* frame pointer */
22 "%lr", /* link register */
23 "%sp", /* stack pointer */
25 #endif /* NDEBUG */
27 #ifdef TARGET_WORDS_BIGENDIAN
28 #define TCG_LDST_BSWAP 1
29 #else
30 #define TCG_LDST_BSWAP 0
31 #endif
33 static const int tcg_target_reg_alloc_order[] = {
34 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
35 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
36 TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
38 TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12,
39 TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
40 TCG_REG_X16, TCG_REG_X17,
42 TCG_REG_X18, TCG_REG_X19, /* will not use these, see tcg_target_init */
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47 TCG_REG_X8, /* will not use, see tcg_target_init */
50 static const int tcg_target_call_iarg_regs[8] = {
51 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
52 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
54 static const int tcg_target_call_oarg_regs[1] = {
55 TCG_REG_X0
58 #define TCG_REG_TMP TCG_REG_X8
60 #ifndef CONFIG_SOFTMMU
61 # if defined(CONFIG_USE_GUEST_BASE)
62 # define TCG_REG_GUEST_BASE TCG_REG_X28
63 # else
64 # define TCG_REG_GUEST_BASE TCG_REG_XZR
65 # endif
66 #endif
68 static inline void reloc_pc26(void *code_ptr, tcg_target_long target)
70 tcg_target_long offset; uint32_t insn;
71 offset = (target - (tcg_target_long)code_ptr) / 4;
72 /* read instruction, mask away previous PC_REL26 parameter contents,
73 set the proper offset, then write back the instruction. */
74 insn = *(uint32_t *)code_ptr;
75 insn = deposit32(insn, 0, 26, offset);
76 *(uint32_t *)code_ptr = insn;
79 static inline void reloc_pc19(void *code_ptr, tcg_target_long target)
81 tcg_target_long offset; uint32_t insn;
82 offset = (target - (tcg_target_long)code_ptr) / 4;
83 /* read instruction, mask away previous PC_REL19 parameter contents,
84 set the proper offset, then write back the instruction. */
85 insn = *(uint32_t *)code_ptr;
86 insn = deposit32(insn, 5, 19, offset);
87 *(uint32_t *)code_ptr = insn;
90 static inline void patch_reloc(uint8_t *code_ptr, int type,
91 tcg_target_long value, tcg_target_long addend)
93 value += addend;
95 switch (type) {
96 case R_AARCH64_JUMP26:
97 case R_AARCH64_CALL26:
98 reloc_pc26(code_ptr, value);
99 break;
100 case R_AARCH64_CONDBR19:
101 reloc_pc19(code_ptr, value);
102 break;
104 default:
105 tcg_abort();
109 /* parse target specific constraints */
110 static int target_parse_constraint(TCGArgConstraint *ct,
111 const char **pct_str)
113 const char *ct_str = *pct_str;
115 switch (ct_str[0]) {
116 case 'r':
117 ct->ct |= TCG_CT_REG;
118 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
119 break;
120 case 'l': /* qemu_ld / qemu_st address, data_reg */
121 ct->ct |= TCG_CT_REG;
122 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
123 #ifdef CONFIG_SOFTMMU
124 /* x0 and x1 will be overwritten when reading the tlb entry,
125 and x2, and x3 for helper args, better to avoid using them. */
126 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
127 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
128 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
129 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
130 #endif
131 break;
132 default:
133 return -1;
136 ct_str++;
137 *pct_str = ct_str;
138 return 0;
141 static inline int tcg_target_const_match(tcg_target_long val,
142 const TCGArgConstraint *arg_ct)
144 int ct = arg_ct->ct;
146 if (ct & TCG_CT_CONST) {
147 return 1;
150 return 0;
153 enum aarch64_cond_code {
154 COND_EQ = 0x0,
155 COND_NE = 0x1,
156 COND_CS = 0x2, /* Unsigned greater or equal */
157 COND_HS = COND_CS, /* ALIAS greater or equal */
158 COND_CC = 0x3, /* Unsigned less than */
159 COND_LO = COND_CC, /* ALIAS Lower */
160 COND_MI = 0x4, /* Negative */
161 COND_PL = 0x5, /* Zero or greater */
162 COND_VS = 0x6, /* Overflow */
163 COND_VC = 0x7, /* No overflow */
164 COND_HI = 0x8, /* Unsigned greater than */
165 COND_LS = 0x9, /* Unsigned less or equal */
166 COND_GE = 0xa,
167 COND_LT = 0xb,
168 COND_GT = 0xc,
169 COND_LE = 0xd,
170 COND_AL = 0xe,
171 COND_NV = 0xf, /* behaves like COND_AL here */
174 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
175 [TCG_COND_EQ] = COND_EQ,
176 [TCG_COND_NE] = COND_NE,
177 [TCG_COND_LT] = COND_LT,
178 [TCG_COND_GE] = COND_GE,
179 [TCG_COND_LE] = COND_LE,
180 [TCG_COND_GT] = COND_GT,
181 /* unsigned */
182 [TCG_COND_LTU] = COND_LO,
183 [TCG_COND_GTU] = COND_HI,
184 [TCG_COND_GEU] = COND_HS,
185 [TCG_COND_LEU] = COND_LS,
188 /* opcodes for LDR / STR instructions with base + simm9 addressing */
189 enum aarch64_ldst_op_data { /* size of the data moved */
190 LDST_8 = 0x38,
191 LDST_16 = 0x78,
192 LDST_32 = 0xb8,
193 LDST_64 = 0xf8,
195 enum aarch64_ldst_op_type { /* type of operation */
196 LDST_ST = 0x0, /* store */
197 LDST_LD = 0x4, /* load */
198 LDST_LD_S_X = 0x8, /* load and sign-extend into Xt */
199 LDST_LD_S_W = 0xc, /* load and sign-extend into Wt */
202 enum aarch64_arith_opc {
203 ARITH_AND = 0x0a,
204 ARITH_ADD = 0x0b,
205 ARITH_OR = 0x2a,
206 ARITH_ADDS = 0x2b,
207 ARITH_XOR = 0x4a,
208 ARITH_SUB = 0x4b,
209 ARITH_ANDS = 0x6a,
210 ARITH_SUBS = 0x6b,
213 enum aarch64_srr_opc {
214 SRR_SHL = 0x0,
215 SRR_SHR = 0x4,
216 SRR_SAR = 0x8,
217 SRR_ROR = 0xc
220 static inline enum aarch64_ldst_op_data
221 aarch64_ldst_get_data(TCGOpcode tcg_op)
223 switch (tcg_op) {
224 case INDEX_op_ld8u_i32:
225 case INDEX_op_ld8s_i32:
226 case INDEX_op_ld8u_i64:
227 case INDEX_op_ld8s_i64:
228 case INDEX_op_st8_i32:
229 case INDEX_op_st8_i64:
230 return LDST_8;
232 case INDEX_op_ld16u_i32:
233 case INDEX_op_ld16s_i32:
234 case INDEX_op_ld16u_i64:
235 case INDEX_op_ld16s_i64:
236 case INDEX_op_st16_i32:
237 case INDEX_op_st16_i64:
238 return LDST_16;
240 case INDEX_op_ld_i32:
241 case INDEX_op_st_i32:
242 case INDEX_op_ld32u_i64:
243 case INDEX_op_ld32s_i64:
244 case INDEX_op_st32_i64:
245 return LDST_32;
247 case INDEX_op_ld_i64:
248 case INDEX_op_st_i64:
249 return LDST_64;
251 default:
252 tcg_abort();
256 static inline enum aarch64_ldst_op_type
257 aarch64_ldst_get_type(TCGOpcode tcg_op)
259 switch (tcg_op) {
260 case INDEX_op_st8_i32:
261 case INDEX_op_st16_i32:
262 case INDEX_op_st8_i64:
263 case INDEX_op_st16_i64:
264 case INDEX_op_st_i32:
265 case INDEX_op_st32_i64:
266 case INDEX_op_st_i64:
267 return LDST_ST;
269 case INDEX_op_ld8u_i32:
270 case INDEX_op_ld16u_i32:
271 case INDEX_op_ld8u_i64:
272 case INDEX_op_ld16u_i64:
273 case INDEX_op_ld_i32:
274 case INDEX_op_ld32u_i64:
275 case INDEX_op_ld_i64:
276 return LDST_LD;
278 case INDEX_op_ld8s_i32:
279 case INDEX_op_ld16s_i32:
280 return LDST_LD_S_W;
282 case INDEX_op_ld8s_i64:
283 case INDEX_op_ld16s_i64:
284 case INDEX_op_ld32s_i64:
285 return LDST_LD_S_X;
287 default:
288 tcg_abort();
292 static inline uint32_t tcg_in32(TCGContext *s)
294 uint32_t v = *(uint32_t *)s->code_ptr;
295 return v;
298 static inline void tcg_out_ldst_9(TCGContext *s,
299 enum aarch64_ldst_op_data op_data,
300 enum aarch64_ldst_op_type op_type,
301 TCGReg rd, TCGReg rn, tcg_target_long offset)
303 /* use LDUR with BASE register with 9bit signed unscaled offset */
304 unsigned int mod, off;
306 if (offset < 0) {
307 off = (256 + offset);
308 mod = 0x1;
309 } else {
310 off = offset;
311 mod = 0x0;
314 mod |= op_type;
315 tcg_out32(s, op_data << 24 | mod << 20 | off << 12 | rn << 5 | rd);
318 /* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
319 static inline void tcg_out_ldst_12(TCGContext *s,
320 enum aarch64_ldst_op_data op_data,
321 enum aarch64_ldst_op_type op_type,
322 TCGReg rd, TCGReg rn,
323 tcg_target_ulong scaled_uimm)
325 tcg_out32(s, (op_data | 1) << 24
326 | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
329 static inline void tcg_out_movr(TCGContext *s, int ext, TCGReg rd, TCGReg src)
331 /* register to register move using MOV (shifted register with no shift) */
332 /* using MOV 0x2a0003e0 | (shift).. */
333 unsigned int base = ext ? 0xaa0003e0 : 0x2a0003e0;
334 tcg_out32(s, base | src << 16 | rd);
337 static inline void tcg_out_movi_aux(TCGContext *s,
338 TCGReg rd, uint64_t value)
340 uint32_t half, base, shift, movk = 0;
341 /* construct halfwords of the immediate with MOVZ/MOVK with LSL */
342 /* using MOVZ 0x52800000 | extended reg.. */
343 base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000;
344 /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
345 first MOVZ with the half-word immediate skipping the zeros, with a shift
346 (LSL) equal to this number. Then morph all next instructions into MOVKs.
347 Zero the processed half-word in the value, continue until empty.
348 We build the final result 16bits at a time with up to 4 instructions,
349 but do not emit instructions for 16bit zero holes. */
350 do {
351 shift = ctz64(value) & (63 & -16);
352 half = (value >> shift) & 0xffff;
353 tcg_out32(s, base | movk | shift << 17 | half << 5 | rd);
354 movk = 0x20000000; /* morph next MOVZs into MOVKs */
355 value &= ~(0xffffUL << shift);
356 } while (value);
359 static inline void tcg_out_movi(TCGContext *s, TCGType type,
360 TCGReg rd, tcg_target_long value)
362 if (type == TCG_TYPE_I64) {
363 tcg_out_movi_aux(s, rd, value);
364 } else {
365 tcg_out_movi_aux(s, rd, value & 0xffffffff);
369 static inline void tcg_out_ldst_r(TCGContext *s,
370 enum aarch64_ldst_op_data op_data,
371 enum aarch64_ldst_op_type op_type,
372 TCGReg rd, TCGReg base, TCGReg regoff)
374 /* load from memory to register using base + 64bit register offset */
375 /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */
376 /* the 0x6000 is for the "no extend field" */
377 tcg_out32(s, 0x00206800
378 | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd);
381 /* solve the whole ldst problem */
382 static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
383 enum aarch64_ldst_op_type type,
384 TCGReg rd, TCGReg rn, tcg_target_long offset)
386 if (offset >= -256 && offset < 256) {
387 tcg_out_ldst_9(s, data, type, rd, rn, offset);
388 return;
391 if (offset >= 256) {
392 /* if the offset is naturally aligned and in range,
393 then we can use the scaled uimm12 encoding */
394 unsigned int s_bits = data >> 6;
395 if (!(offset & ((1 << s_bits) - 1))) {
396 tcg_target_ulong scaled_uimm = offset >> s_bits;
397 if (scaled_uimm <= 0xfff) {
398 tcg_out_ldst_12(s, data, type, rd, rn, scaled_uimm);
399 return;
404 /* worst-case scenario, move offset to temp register, use reg offset */
405 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
406 tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
409 /* mov alias implemented with add immediate, useful to move to/from SP */
410 static inline void tcg_out_movr_sp(TCGContext *s, int ext, TCGReg rd, TCGReg rn)
412 /* using ADD 0x11000000 | (ext) | rn << 5 | rd */
413 unsigned int base = ext ? 0x91000000 : 0x11000000;
414 tcg_out32(s, base | rn << 5 | rd);
417 static inline void tcg_out_mov(TCGContext *s,
418 TCGType type, TCGReg ret, TCGReg arg)
420 if (ret != arg) {
421 tcg_out_movr(s, type == TCG_TYPE_I64, ret, arg);
425 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
426 TCGReg arg1, tcg_target_long arg2)
428 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD,
429 arg, arg1, arg2);
432 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
433 TCGReg arg1, tcg_target_long arg2)
435 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST,
436 arg, arg1, arg2);
439 static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc,
440 int ext, TCGReg rd, TCGReg rn, TCGReg rm,
441 int shift_imm)
443 /* Using shifted register arithmetic operations */
444 /* if extended register operation (64bit) just OR with 0x80 << 24 */
445 unsigned int shift, base = ext ? (0x80 | opc) << 24 : opc << 24;
446 if (shift_imm == 0) {
447 shift = 0;
448 } else if (shift_imm > 0) {
449 shift = shift_imm << 10 | 1 << 22;
450 } else /* (shift_imm < 0) */ {
451 shift = (-shift_imm) << 10;
453 tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd);
456 static inline void tcg_out_mul(TCGContext *s, int ext,
457 TCGReg rd, TCGReg rn, TCGReg rm)
459 /* Using MADD 0x1b000000 with Ra = wzr alias MUL 0x1b007c00 */
460 unsigned int base = ext ? 0x9b007c00 : 0x1b007c00;
461 tcg_out32(s, base | rm << 16 | rn << 5 | rd);
464 static inline void tcg_out_shiftrot_reg(TCGContext *s,
465 enum aarch64_srr_opc opc, int ext,
466 TCGReg rd, TCGReg rn, TCGReg rm)
468 /* using 2-source data processing instructions 0x1ac02000 */
469 unsigned int base = ext ? 0x9ac02000 : 0x1ac02000;
470 tcg_out32(s, base | rm << 16 | opc << 8 | rn << 5 | rd);
473 static inline void tcg_out_ubfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
474 unsigned int a, unsigned int b)
476 /* Using UBFM 0x53000000 Wd, Wn, a, b */
477 unsigned int base = ext ? 0xd3400000 : 0x53000000;
478 tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
481 static inline void tcg_out_sbfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
482 unsigned int a, unsigned int b)
484 /* Using SBFM 0x13000000 Wd, Wn, a, b */
485 unsigned int base = ext ? 0x93400000 : 0x13000000;
486 tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
489 static inline void tcg_out_extr(TCGContext *s, int ext, TCGReg rd,
490 TCGReg rn, TCGReg rm, unsigned int a)
492 /* Using EXTR 0x13800000 Wd, Wn, Wm, a */
493 unsigned int base = ext ? 0x93c00000 : 0x13800000;
494 tcg_out32(s, base | rm << 16 | a << 10 | rn << 5 | rd);
497 static inline void tcg_out_shl(TCGContext *s, int ext,
498 TCGReg rd, TCGReg rn, unsigned int m)
500 int bits, max;
501 bits = ext ? 64 : 32;
502 max = bits - 1;
503 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
506 static inline void tcg_out_shr(TCGContext *s, int ext,
507 TCGReg rd, TCGReg rn, unsigned int m)
509 int max = ext ? 63 : 31;
510 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
513 static inline void tcg_out_sar(TCGContext *s, int ext,
514 TCGReg rd, TCGReg rn, unsigned int m)
516 int max = ext ? 63 : 31;
517 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
520 static inline void tcg_out_rotr(TCGContext *s, int ext,
521 TCGReg rd, TCGReg rn, unsigned int m)
523 int max = ext ? 63 : 31;
524 tcg_out_extr(s, ext, rd, rn, rn, m & max);
527 static inline void tcg_out_rotl(TCGContext *s, int ext,
528 TCGReg rd, TCGReg rn, unsigned int m)
530 int bits, max;
531 bits = ext ? 64 : 32;
532 max = bits - 1;
533 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
536 static inline void tcg_out_cmp(TCGContext *s, int ext, TCGReg rn, TCGReg rm,
537 int shift_imm)
539 /* Using CMP alias SUBS wzr, Wn, Wm */
540 tcg_out_arith(s, ARITH_SUBS, ext, TCG_REG_XZR, rn, rm, shift_imm);
543 static inline void tcg_out_cset(TCGContext *s, int ext, TCGReg rd, TCGCond c)
545 /* Using CSET alias of CSINC 0x1a800400 Xd, XZR, XZR, invert(cond) */
546 unsigned int base = ext ? 0x9a9f07e0 : 0x1a9f07e0;
547 tcg_out32(s, base | tcg_cond_to_aarch64[tcg_invert_cond(c)] << 12 | rd);
550 static inline void tcg_out_goto(TCGContext *s, tcg_target_long target)
552 tcg_target_long offset;
553 offset = (target - (tcg_target_long)s->code_ptr) / 4;
555 if (offset < -0x02000000 || offset >= 0x02000000) {
556 /* out of 26bit range */
557 tcg_abort();
560 tcg_out32(s, 0x14000000 | (offset & 0x03ffffff));
563 static inline void tcg_out_goto_noaddr(TCGContext *s)
565 /* We pay attention here to not modify the branch target by
566 reading from the buffer. This ensure that caches and memory are
567 kept coherent during retranslation.
568 Mask away possible garbage in the high bits for the first translation,
569 while keeping the offset bits for retranslation. */
570 uint32_t insn;
571 insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000;
572 tcg_out32(s, insn);
575 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
577 /* see comments in tcg_out_goto_noaddr */
578 uint32_t insn;
579 insn = tcg_in32(s) & (0x07ffff << 5);
580 insn |= 0x54000000 | tcg_cond_to_aarch64[c];
581 tcg_out32(s, insn);
584 static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c,
585 tcg_target_long target)
587 tcg_target_long offset;
588 offset = (target - (tcg_target_long)s->code_ptr) / 4;
590 if (offset < -0x40000 || offset >= 0x40000) {
591 /* out of 19bit range */
592 tcg_abort();
595 offset &= 0x7ffff;
596 tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5);
599 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
601 tcg_out32(s, 0xd63f0000 | reg << 5);
604 static inline void tcg_out_gotor(TCGContext *s, TCGReg reg)
606 tcg_out32(s, 0xd61f0000 | reg << 5);
609 static inline void tcg_out_call(TCGContext *s, tcg_target_long target)
611 tcg_target_long offset;
613 offset = (target - (tcg_target_long)s->code_ptr) / 4;
615 if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
616 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
617 tcg_out_callr(s, TCG_REG_TMP);
618 } else {
619 tcg_out32(s, 0x94000000 | (offset & 0x03ffffff));
623 /* encode a logical immediate, mapping user parameter
624 M=set bits pattern length to S=M-1 */
625 static inline unsigned int
626 aarch64_limm(unsigned int m, unsigned int r)
628 assert(m > 0);
629 return r << 16 | (m - 1) << 10;
632 /* test a register against an immediate bit pattern made of
633 M set bits rotated right by R.
634 Examples:
635 to test a 32/64 reg against 0x00000007, pass M = 3, R = 0.
636 to test a 32/64 reg against 0x000000ff, pass M = 8, R = 0.
637 to test a 32bit reg against 0xff000000, pass M = 8, R = 8.
638 to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8.
640 static inline void tcg_out_tst(TCGContext *s, int ext, TCGReg rn,
641 unsigned int m, unsigned int r)
643 /* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */
644 unsigned int base = ext ? 0xf240001f : 0x7200001f;
645 tcg_out32(s, base | aarch64_limm(m, r) | rn << 5);
648 /* and a register with a bit pattern, similarly to TST, no flags change */
649 static inline void tcg_out_andi(TCGContext *s, int ext, TCGReg rd, TCGReg rn,
650 unsigned int m, unsigned int r)
652 /* using AND 0x12000000 */
653 unsigned int base = ext ? 0x92400000 : 0x12000000;
654 tcg_out32(s, base | aarch64_limm(m, r) | rn << 5 | rd);
657 static inline void tcg_out_ret(TCGContext *s)
659 /* emit RET { LR } */
660 tcg_out32(s, 0xd65f03c0);
663 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
665 tcg_target_long target, offset;
666 target = (tcg_target_long)addr;
667 offset = (target - (tcg_target_long)jmp_addr) / 4;
669 if (offset < -0x02000000 || offset >= 0x02000000) {
670 /* out of 26bit range */
671 tcg_abort();
674 patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
675 flush_icache_range(jmp_addr, jmp_addr + 4);
678 static inline void tcg_out_goto_label(TCGContext *s, int label_index)
680 TCGLabel *l = &s->labels[label_index];
682 if (!l->has_value) {
683 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
684 tcg_out_goto_noaddr(s);
685 } else {
686 tcg_out_goto(s, l->u.value);
690 static inline void tcg_out_goto_label_cond(TCGContext *s,
691 TCGCond c, int label_index)
693 TCGLabel *l = &s->labels[label_index];
695 if (!l->has_value) {
696 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label_index, 0);
697 tcg_out_goto_cond_noaddr(s, c);
698 } else {
699 tcg_out_goto_cond(s, c, l->u.value);
703 static inline void tcg_out_rev(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
705 /* using REV 0x5ac00800 */
706 unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
707 tcg_out32(s, base | rm << 5 | rd);
710 static inline void tcg_out_rev16(TCGContext *s, int ext, TCGReg rd, TCGReg rm)
712 /* using REV16 0x5ac00400 */
713 unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
714 tcg_out32(s, base | rm << 5 | rd);
717 static inline void tcg_out_sxt(TCGContext *s, int ext, int s_bits,
718 TCGReg rd, TCGReg rn)
720 /* using ALIASes SXTB 0x13001c00, SXTH 0x13003c00, SXTW 0x93407c00
721 of SBFM Xd, Xn, #0, #7|15|31 */
722 int bits = 8 * (1 << s_bits) - 1;
723 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
726 static inline void tcg_out_uxt(TCGContext *s, int s_bits,
727 TCGReg rd, TCGReg rn)
729 /* using ALIASes UXTB 0x53001c00, UXTH 0x53003c00
730 of UBFM Wd, Wn, #0, #7|15 */
731 int bits = 8 * (1 << s_bits) - 1;
732 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
735 static inline void tcg_out_addi(TCGContext *s, int ext,
736 TCGReg rd, TCGReg rn, unsigned int aimm)
738 /* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */
739 /* using ADD 0x11000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
740 unsigned int base = ext ? 0x91000000 : 0x11000000;
742 if (aimm <= 0xfff) {
743 aimm <<= 10;
744 } else {
745 /* we can only shift left by 12, on assert we cannot represent */
746 assert(!(aimm & 0xfff));
747 assert(aimm <= 0xfff000);
748 base |= 1 << 22; /* apply LSL 12 */
749 aimm >>= 2;
752 tcg_out32(s, base | aimm | (rn << 5) | rd);
755 static inline void tcg_out_subi(TCGContext *s, int ext,
756 TCGReg rd, TCGReg rn, unsigned int aimm)
758 /* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */
759 /* using SUB 0x51000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
760 unsigned int base = ext ? 0xd1000000 : 0x51000000;
762 if (aimm <= 0xfff) {
763 aimm <<= 10;
764 } else {
765 /* we can only shift left by 12, on assert we cannot represent */
766 assert(!(aimm & 0xfff));
767 assert(aimm <= 0xfff000);
768 base |= 1 << 22; /* apply LSL 12 */
769 aimm >>= 2;
772 tcg_out32(s, base | aimm | (rn << 5) | rd);
775 static inline void tcg_out_nop(TCGContext *s)
777 tcg_out32(s, 0xd503201f);
780 #ifdef CONFIG_SOFTMMU
781 #include "exec/softmmu_defs.h"
783 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
784 int mmu_idx) */
785 static const void * const qemu_ld_helpers[4] = {
786 helper_ldb_mmu,
787 helper_ldw_mmu,
788 helper_ldl_mmu,
789 helper_ldq_mmu,
792 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
793 uintxx_t val, int mmu_idx) */
794 static const void * const qemu_st_helpers[4] = {
795 helper_stb_mmu,
796 helper_stw_mmu,
797 helper_stl_mmu,
798 helper_stq_mmu,
801 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
803 reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
804 tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
805 tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
806 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
807 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
808 (tcg_target_long)qemu_ld_helpers[lb->opc & 3]);
809 tcg_out_callr(s, TCG_REG_TMP);
810 if (lb->opc & 0x04) {
811 tcg_out_sxt(s, 1, lb->opc & 3, lb->datalo_reg, TCG_REG_X0);
812 } else {
813 tcg_out_movr(s, 1, lb->datalo_reg, TCG_REG_X0);
816 tcg_out_goto(s, (tcg_target_long)lb->raddr);
819 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
821 reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
823 tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
824 tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
825 tcg_out_movr(s, 1, TCG_REG_X2, lb->datalo_reg);
826 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
827 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
828 (tcg_target_long)qemu_st_helpers[lb->opc & 3]);
829 tcg_out_callr(s, TCG_REG_TMP);
831 tcg_out_nop(s);
832 tcg_out_goto(s, (tcg_target_long)lb->raddr);
835 void tcg_out_tb_finalize(TCGContext *s)
837 int i;
838 for (i = 0; i < s->nb_qemu_ldst_labels; i++) {
839 TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i];
840 if (label->is_ld) {
841 tcg_out_qemu_ld_slow_path(s, label);
842 } else {
843 tcg_out_qemu_st_slow_path(s, label);
848 static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
849 TCGReg data_reg, TCGReg addr_reg,
850 int mem_index,
851 uint8_t *raddr, uint8_t *label_ptr)
853 int idx;
854 TCGLabelQemuLdst *label;
856 if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) {
857 tcg_abort();
860 idx = s->nb_qemu_ldst_labels++;
861 label = &s->qemu_ldst_labels[idx];
862 label->is_ld = is_ld;
863 label->opc = opc;
864 label->datalo_reg = data_reg;
865 label->addrlo_reg = addr_reg;
866 label->mem_index = mem_index;
867 label->raddr = raddr;
868 label->label_ptr[0] = label_ptr;
871 /* Load and compare a TLB entry, emitting the conditional jump to the
872 slow path for the failure case, which will be patched later when finalizing
873 the slow path. Generated code returns the host addend in X1,
874 clobbers X0,X2,X3,TMP. */
875 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
876 int s_bits, uint8_t **label_ptr, int mem_index, int is_read)
878 TCGReg base = TCG_AREG0;
879 int tlb_offset = is_read ?
880 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
881 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
882 /* Extract the TLB index from the address into X0.
883 X0<CPU_TLB_BITS:0> =
884 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
885 tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg,
886 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
887 /* Store the page mask part of the address and the low s_bits into X3.
888 Later this allows checking for equality and alignment at the same time.
889 X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
890 tcg_out_andi(s, (TARGET_LONG_BITS == 64), TCG_REG_X3, addr_reg,
891 (TARGET_LONG_BITS - TARGET_PAGE_BITS) + s_bits,
892 (TARGET_LONG_BITS - TARGET_PAGE_BITS));
893 /* Add any "high bits" from the tlb offset to the env address into X2,
894 to take advantage of the LSL12 form of the addi instruction.
895 X2 = env + (tlb_offset & 0xfff000) */
896 tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
897 /* Merge the tlb index contribution into X2.
898 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
899 tcg_out_arith(s, ARITH_ADD, 1, TCG_REG_X2, TCG_REG_X2,
900 TCG_REG_X0, -CPU_TLB_ENTRY_BITS);
901 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
902 X0 = load [X2 + (tlb_offset & 0x000fff)] */
903 tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
904 LDST_LD, TCG_REG_X0, TCG_REG_X2,
905 (tlb_offset & 0xfff));
906 /* Load the tlb addend. Do that early to avoid stalling.
907 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
908 tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
909 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
910 (is_read ? offsetof(CPUTLBEntry, addr_read)
911 : offsetof(CPUTLBEntry, addr_write)));
912 /* Perform the address comparison. */
913 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
914 *label_ptr = s->code_ptr;
915 /* If not equal, we jump to the slow path. */
916 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
919 #endif /* CONFIG_SOFTMMU */
921 static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r,
922 TCGReg addr_r, TCGReg off_r)
924 switch (opc) {
925 case 0:
926 tcg_out_ldst_r(s, LDST_8, LDST_LD, data_r, addr_r, off_r);
927 break;
928 case 0 | 4:
929 tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_r, addr_r, off_r);
930 break;
931 case 1:
932 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
933 if (TCG_LDST_BSWAP) {
934 tcg_out_rev16(s, 0, data_r, data_r);
936 break;
937 case 1 | 4:
938 if (TCG_LDST_BSWAP) {
939 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
940 tcg_out_rev16(s, 0, data_r, data_r);
941 tcg_out_sxt(s, 1, 1, data_r, data_r);
942 } else {
943 tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r);
945 break;
946 case 2:
947 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
948 if (TCG_LDST_BSWAP) {
949 tcg_out_rev(s, 0, data_r, data_r);
951 break;
952 case 2 | 4:
953 if (TCG_LDST_BSWAP) {
954 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
955 tcg_out_rev(s, 0, data_r, data_r);
956 tcg_out_sxt(s, 1, 2, data_r, data_r);
957 } else {
958 tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r);
960 break;
961 case 3:
962 tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r);
963 if (TCG_LDST_BSWAP) {
964 tcg_out_rev(s, 1, data_r, data_r);
966 break;
967 default:
968 tcg_abort();
972 static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r,
973 TCGReg addr_r, TCGReg off_r)
975 switch (opc) {
976 case 0:
977 tcg_out_ldst_r(s, LDST_8, LDST_ST, data_r, addr_r, off_r);
978 break;
979 case 1:
980 if (TCG_LDST_BSWAP) {
981 tcg_out_rev16(s, 0, TCG_REG_TMP, data_r);
982 tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_TMP, addr_r, off_r);
983 } else {
984 tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r);
986 break;
987 case 2:
988 if (TCG_LDST_BSWAP) {
989 tcg_out_rev(s, 0, TCG_REG_TMP, data_r);
990 tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_TMP, addr_r, off_r);
991 } else {
992 tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r);
994 break;
995 case 3:
996 if (TCG_LDST_BSWAP) {
997 tcg_out_rev(s, 1, TCG_REG_TMP, data_r);
998 tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_TMP, addr_r, off_r);
999 } else {
1000 tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r);
1002 break;
1003 default:
1004 tcg_abort();
1008 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
1010 TCGReg addr_reg, data_reg;
1011 #ifdef CONFIG_SOFTMMU
1012 int mem_index, s_bits;
1013 uint8_t *label_ptr;
1014 #endif
1015 data_reg = args[0];
1016 addr_reg = args[1];
1018 #ifdef CONFIG_SOFTMMU
1019 mem_index = args[2];
1020 s_bits = opc & 3;
1021 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1022 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1023 add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg,
1024 mem_index, s->code_ptr, label_ptr);
1025 #else /* !CONFIG_SOFTMMU */
1026 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg,
1027 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1028 #endif /* CONFIG_SOFTMMU */
1031 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
1033 TCGReg addr_reg, data_reg;
1034 #ifdef CONFIG_SOFTMMU
1035 int mem_index, s_bits;
1036 uint8_t *label_ptr;
1037 #endif
1038 data_reg = args[0];
1039 addr_reg = args[1];
1041 #ifdef CONFIG_SOFTMMU
1042 mem_index = args[2];
1043 s_bits = opc & 3;
1045 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1046 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1047 add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg,
1048 mem_index, s->code_ptr, label_ptr);
1049 #else /* !CONFIG_SOFTMMU */
1050 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg,
1051 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1052 #endif /* CONFIG_SOFTMMU */
1055 static uint8_t *tb_ret_addr;
1057 /* callee stack use example:
1058 stp x29, x30, [sp,#-32]!
1059 mov x29, sp
1060 stp x1, x2, [sp,#16]
1062 ldp x1, x2, [sp,#16]
1063 ldp x29, x30, [sp],#32
1067 /* push r1 and r2, and alloc stack space for a total of
1068 alloc_n elements (1 element=16 bytes, must be between 1 and 31. */
1069 static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr,
1070 TCGReg r1, TCGReg r2, int alloc_n)
1072 /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx)
1073 | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */
1074 assert(alloc_n > 0 && alloc_n < 0x20);
1075 alloc_n = (-alloc_n) & 0x3f;
1076 tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1079 /* dealloc stack space for a total of alloc_n elements and pop r1, r2. */
1080 static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr,
1081 TCGReg r1, TCGReg r2, int alloc_n)
1083 /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx)
1084 | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */
1085 assert(alloc_n > 0 && alloc_n < 0x20);
1086 tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1089 static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr,
1090 TCGReg r1, TCGReg r2, int idx)
1092 /* using register pair offset simm7 STP 0x29000000 | (ext)
1093 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1094 assert(idx > 0 && idx < 0x20);
1095 tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1098 static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
1099 TCGReg r1, TCGReg r2, int idx)
1101 /* using register pair offset simm7 LDP 0x29400000 | (ext)
1102 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1103 assert(idx > 0 && idx < 0x20);
1104 tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1107 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1108 const TCGArg *args, const int *const_args)
1110 /* ext will be set in the switch below, which will fall through to the
1111 common code. It triggers the use of extended regs where appropriate. */
1112 int ext = 0;
1114 switch (opc) {
1115 case INDEX_op_exit_tb:
1116 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, args[0]);
1117 tcg_out_goto(s, (tcg_target_long)tb_ret_addr);
1118 break;
1120 case INDEX_op_goto_tb:
1121 #ifndef USE_DIRECT_JUMP
1122 #error "USE_DIRECT_JUMP required for aarch64"
1123 #endif
1124 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1125 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1126 /* actual branch destination will be patched by
1127 aarch64_tb_set_jmp_target later, beware retranslation. */
1128 tcg_out_goto_noaddr(s);
1129 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1130 break;
1132 case INDEX_op_call:
1133 if (const_args[0]) {
1134 tcg_out_call(s, args[0]);
1135 } else {
1136 tcg_out_callr(s, args[0]);
1138 break;
1140 case INDEX_op_br:
1141 tcg_out_goto_label(s, args[0]);
1142 break;
1144 case INDEX_op_ld_i32:
1145 case INDEX_op_ld_i64:
1146 case INDEX_op_st_i32:
1147 case INDEX_op_st_i64:
1148 case INDEX_op_ld8u_i32:
1149 case INDEX_op_ld8s_i32:
1150 case INDEX_op_ld16u_i32:
1151 case INDEX_op_ld16s_i32:
1152 case INDEX_op_ld8u_i64:
1153 case INDEX_op_ld8s_i64:
1154 case INDEX_op_ld16u_i64:
1155 case INDEX_op_ld16s_i64:
1156 case INDEX_op_ld32u_i64:
1157 case INDEX_op_ld32s_i64:
1158 case INDEX_op_st8_i32:
1159 case INDEX_op_st8_i64:
1160 case INDEX_op_st16_i32:
1161 case INDEX_op_st16_i64:
1162 case INDEX_op_st32_i64:
1163 tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
1164 args[0], args[1], args[2]);
1165 break;
1167 case INDEX_op_mov_i64:
1168 ext = 1; /* fall through */
1169 case INDEX_op_mov_i32:
1170 tcg_out_movr(s, ext, args[0], args[1]);
1171 break;
1173 case INDEX_op_movi_i64:
1174 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1175 break;
1176 case INDEX_op_movi_i32:
1177 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1178 break;
1180 case INDEX_op_add_i64:
1181 ext = 1; /* fall through */
1182 case INDEX_op_add_i32:
1183 tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0);
1184 break;
1186 case INDEX_op_sub_i64:
1187 ext = 1; /* fall through */
1188 case INDEX_op_sub_i32:
1189 tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0);
1190 break;
1192 case INDEX_op_and_i64:
1193 ext = 1; /* fall through */
1194 case INDEX_op_and_i32:
1195 tcg_out_arith(s, ARITH_AND, ext, args[0], args[1], args[2], 0);
1196 break;
1198 case INDEX_op_or_i64:
1199 ext = 1; /* fall through */
1200 case INDEX_op_or_i32:
1201 tcg_out_arith(s, ARITH_OR, ext, args[0], args[1], args[2], 0);
1202 break;
1204 case INDEX_op_xor_i64:
1205 ext = 1; /* fall through */
1206 case INDEX_op_xor_i32:
1207 tcg_out_arith(s, ARITH_XOR, ext, args[0], args[1], args[2], 0);
1208 break;
1210 case INDEX_op_mul_i64:
1211 ext = 1; /* fall through */
1212 case INDEX_op_mul_i32:
1213 tcg_out_mul(s, ext, args[0], args[1], args[2]);
1214 break;
1216 case INDEX_op_shl_i64:
1217 ext = 1; /* fall through */
1218 case INDEX_op_shl_i32:
1219 if (const_args[2]) { /* LSL / UBFM Wd, Wn, (32 - m) */
1220 tcg_out_shl(s, ext, args[0], args[1], args[2]);
1221 } else { /* LSL / LSLV */
1222 tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]);
1224 break;
1226 case INDEX_op_shr_i64:
1227 ext = 1; /* fall through */
1228 case INDEX_op_shr_i32:
1229 if (const_args[2]) { /* LSR / UBFM Wd, Wn, m, 31 */
1230 tcg_out_shr(s, ext, args[0], args[1], args[2]);
1231 } else { /* LSR / LSRV */
1232 tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]);
1234 break;
1236 case INDEX_op_sar_i64:
1237 ext = 1; /* fall through */
1238 case INDEX_op_sar_i32:
1239 if (const_args[2]) { /* ASR / SBFM Wd, Wn, m, 31 */
1240 tcg_out_sar(s, ext, args[0], args[1], args[2]);
1241 } else { /* ASR / ASRV */
1242 tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]);
1244 break;
1246 case INDEX_op_rotr_i64:
1247 ext = 1; /* fall through */
1248 case INDEX_op_rotr_i32:
1249 if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, m */
1250 tcg_out_rotr(s, ext, args[0], args[1], args[2]);
1251 } else { /* ROR / RORV */
1252 tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]);
1254 break;
1256 case INDEX_op_rotl_i64:
1257 ext = 1; /* fall through */
1258 case INDEX_op_rotl_i32: /* same as rotate right by (32 - m) */
1259 if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
1260 tcg_out_rotl(s, ext, args[0], args[1], args[2]);
1261 } else {
1262 tcg_out_arith(s, ARITH_SUB, 0,
1263 TCG_REG_TMP, TCG_REG_XZR, args[2], 0);
1264 tcg_out_shiftrot_reg(s, SRR_ROR, ext,
1265 args[0], args[1], TCG_REG_TMP);
1267 break;
1269 case INDEX_op_brcond_i64:
1270 ext = 1; /* fall through */
1271 case INDEX_op_brcond_i32: /* CMP 0, 1, cond(2), label 3 */
1272 tcg_out_cmp(s, ext, args[0], args[1], 0);
1273 tcg_out_goto_label_cond(s, args[2], args[3]);
1274 break;
1276 case INDEX_op_setcond_i64:
1277 ext = 1; /* fall through */
1278 case INDEX_op_setcond_i32:
1279 tcg_out_cmp(s, ext, args[1], args[2], 0);
1280 tcg_out_cset(s, 0, args[0], args[3]);
1281 break;
1283 case INDEX_op_qemu_ld8u:
1284 tcg_out_qemu_ld(s, args, 0 | 0);
1285 break;
1286 case INDEX_op_qemu_ld8s:
1287 tcg_out_qemu_ld(s, args, 4 | 0);
1288 break;
1289 case INDEX_op_qemu_ld16u:
1290 tcg_out_qemu_ld(s, args, 0 | 1);
1291 break;
1292 case INDEX_op_qemu_ld16s:
1293 tcg_out_qemu_ld(s, args, 4 | 1);
1294 break;
1295 case INDEX_op_qemu_ld32u:
1296 tcg_out_qemu_ld(s, args, 0 | 2);
1297 break;
1298 case INDEX_op_qemu_ld32s:
1299 tcg_out_qemu_ld(s, args, 4 | 2);
1300 break;
1301 case INDEX_op_qemu_ld32:
1302 tcg_out_qemu_ld(s, args, 0 | 2);
1303 break;
1304 case INDEX_op_qemu_ld64:
1305 tcg_out_qemu_ld(s, args, 0 | 3);
1306 break;
1307 case INDEX_op_qemu_st8:
1308 tcg_out_qemu_st(s, args, 0);
1309 break;
1310 case INDEX_op_qemu_st16:
1311 tcg_out_qemu_st(s, args, 1);
1312 break;
1313 case INDEX_op_qemu_st32:
1314 tcg_out_qemu_st(s, args, 2);
1315 break;
1316 case INDEX_op_qemu_st64:
1317 tcg_out_qemu_st(s, args, 3);
1318 break;
1320 case INDEX_op_bswap64_i64:
1321 ext = 1; /* fall through */
1322 case INDEX_op_bswap32_i64:
1323 case INDEX_op_bswap32_i32:
1324 tcg_out_rev(s, ext, args[0], args[1]);
1325 break;
1326 case INDEX_op_bswap16_i64:
1327 case INDEX_op_bswap16_i32:
1328 tcg_out_rev16(s, 0, args[0], args[1]);
1329 break;
1331 case INDEX_op_ext8s_i64:
1332 ext = 1; /* fall through */
1333 case INDEX_op_ext8s_i32:
1334 tcg_out_sxt(s, ext, 0, args[0], args[1]);
1335 break;
1336 case INDEX_op_ext16s_i64:
1337 ext = 1; /* fall through */
1338 case INDEX_op_ext16s_i32:
1339 tcg_out_sxt(s, ext, 1, args[0], args[1]);
1340 break;
1341 case INDEX_op_ext32s_i64:
1342 tcg_out_sxt(s, 1, 2, args[0], args[1]);
1343 break;
1344 case INDEX_op_ext8u_i64:
1345 case INDEX_op_ext8u_i32:
1346 tcg_out_uxt(s, 0, args[0], args[1]);
1347 break;
1348 case INDEX_op_ext16u_i64:
1349 case INDEX_op_ext16u_i32:
1350 tcg_out_uxt(s, 1, args[0], args[1]);
1351 break;
1352 case INDEX_op_ext32u_i64:
1353 tcg_out_movr(s, 0, args[0], args[1]);
1354 break;
1356 default:
1357 tcg_abort(); /* opcode not implemented */
1361 static const TCGTargetOpDef aarch64_op_defs[] = {
1362 { INDEX_op_exit_tb, { } },
1363 { INDEX_op_goto_tb, { } },
1364 { INDEX_op_call, { "ri" } },
1365 { INDEX_op_br, { } },
1367 { INDEX_op_mov_i32, { "r", "r" } },
1368 { INDEX_op_mov_i64, { "r", "r" } },
1370 { INDEX_op_movi_i32, { "r" } },
1371 { INDEX_op_movi_i64, { "r" } },
1373 { INDEX_op_ld8u_i32, { "r", "r" } },
1374 { INDEX_op_ld8s_i32, { "r", "r" } },
1375 { INDEX_op_ld16u_i32, { "r", "r" } },
1376 { INDEX_op_ld16s_i32, { "r", "r" } },
1377 { INDEX_op_ld_i32, { "r", "r" } },
1378 { INDEX_op_ld8u_i64, { "r", "r" } },
1379 { INDEX_op_ld8s_i64, { "r", "r" } },
1380 { INDEX_op_ld16u_i64, { "r", "r" } },
1381 { INDEX_op_ld16s_i64, { "r", "r" } },
1382 { INDEX_op_ld32u_i64, { "r", "r" } },
1383 { INDEX_op_ld32s_i64, { "r", "r" } },
1384 { INDEX_op_ld_i64, { "r", "r" } },
1386 { INDEX_op_st8_i32, { "r", "r" } },
1387 { INDEX_op_st16_i32, { "r", "r" } },
1388 { INDEX_op_st_i32, { "r", "r" } },
1389 { INDEX_op_st8_i64, { "r", "r" } },
1390 { INDEX_op_st16_i64, { "r", "r" } },
1391 { INDEX_op_st32_i64, { "r", "r" } },
1392 { INDEX_op_st_i64, { "r", "r" } },
1394 { INDEX_op_add_i32, { "r", "r", "r" } },
1395 { INDEX_op_add_i64, { "r", "r", "r" } },
1396 { INDEX_op_sub_i32, { "r", "r", "r" } },
1397 { INDEX_op_sub_i64, { "r", "r", "r" } },
1398 { INDEX_op_mul_i32, { "r", "r", "r" } },
1399 { INDEX_op_mul_i64, { "r", "r", "r" } },
1400 { INDEX_op_and_i32, { "r", "r", "r" } },
1401 { INDEX_op_and_i64, { "r", "r", "r" } },
1402 { INDEX_op_or_i32, { "r", "r", "r" } },
1403 { INDEX_op_or_i64, { "r", "r", "r" } },
1404 { INDEX_op_xor_i32, { "r", "r", "r" } },
1405 { INDEX_op_xor_i64, { "r", "r", "r" } },
1407 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1408 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1409 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1410 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1411 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1412 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1413 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1414 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1415 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1416 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1418 { INDEX_op_brcond_i32, { "r", "r" } },
1419 { INDEX_op_setcond_i32, { "r", "r", "r" } },
1420 { INDEX_op_brcond_i64, { "r", "r" } },
1421 { INDEX_op_setcond_i64, { "r", "r", "r" } },
1423 { INDEX_op_qemu_ld8u, { "r", "l" } },
1424 { INDEX_op_qemu_ld8s, { "r", "l" } },
1425 { INDEX_op_qemu_ld16u, { "r", "l" } },
1426 { INDEX_op_qemu_ld16s, { "r", "l" } },
1427 { INDEX_op_qemu_ld32u, { "r", "l" } },
1428 { INDEX_op_qemu_ld32s, { "r", "l" } },
1430 { INDEX_op_qemu_ld32, { "r", "l" } },
1431 { INDEX_op_qemu_ld64, { "r", "l" } },
1433 { INDEX_op_qemu_st8, { "l", "l" } },
1434 { INDEX_op_qemu_st16, { "l", "l" } },
1435 { INDEX_op_qemu_st32, { "l", "l" } },
1436 { INDEX_op_qemu_st64, { "l", "l" } },
1438 { INDEX_op_bswap16_i32, { "r", "r" } },
1439 { INDEX_op_bswap32_i32, { "r", "r" } },
1440 { INDEX_op_bswap16_i64, { "r", "r" } },
1441 { INDEX_op_bswap32_i64, { "r", "r" } },
1442 { INDEX_op_bswap64_i64, { "r", "r" } },
1444 { INDEX_op_ext8s_i32, { "r", "r" } },
1445 { INDEX_op_ext16s_i32, { "r", "r" } },
1446 { INDEX_op_ext8u_i32, { "r", "r" } },
1447 { INDEX_op_ext16u_i32, { "r", "r" } },
1449 { INDEX_op_ext8s_i64, { "r", "r" } },
1450 { INDEX_op_ext16s_i64, { "r", "r" } },
1451 { INDEX_op_ext32s_i64, { "r", "r" } },
1452 { INDEX_op_ext8u_i64, { "r", "r" } },
1453 { INDEX_op_ext16u_i64, { "r", "r" } },
1454 { INDEX_op_ext32u_i64, { "r", "r" } },
1456 { -1 },
1459 static void tcg_target_init(TCGContext *s)
1461 #if !defined(CONFIG_USER_ONLY)
1462 /* fail safe */
1463 if ((1ULL << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) {
1464 tcg_abort();
1466 #endif
1467 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1468 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1470 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1471 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1472 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1473 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1474 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1475 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1476 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1477 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1478 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1479 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1480 (1 << TCG_REG_X18));
1482 tcg_regset_clear(s->reserved_regs);
1483 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1484 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1485 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1486 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1488 tcg_add_target_add_op_defs(aarch64_op_defs);
1491 static void tcg_target_qemu_prologue(TCGContext *s)
1493 /* NB: frame sizes are in 16 byte stack units! */
1494 int frame_size_callee_saved, frame_size_tcg_locals;
1495 TCGReg r;
1497 /* save pairs (FP, LR) and (X19, X20) .. (X27, X28) */
1498 frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1;
1500 /* frame size requirement for TCG local variables */
1501 frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE
1502 + CPU_TEMP_BUF_NLONGS * sizeof(long)
1503 + (TCG_TARGET_STACK_ALIGN - 1);
1504 frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1);
1505 frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN;
1507 /* push (FP, LR) and update sp */
1508 tcg_out_push_pair(s, TCG_REG_SP,
1509 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1511 /* FP -> callee_saved */
1512 tcg_out_movr_sp(s, 1, TCG_REG_FP, TCG_REG_SP);
1514 /* store callee-preserved regs x19..x28 using FP -> callee_saved */
1515 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1516 int idx = (r - TCG_REG_X19) / 2 + 1;
1517 tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
1520 /* make stack space for TCG locals */
1521 tcg_out_subi(s, 1, TCG_REG_SP, TCG_REG_SP,
1522 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1523 /* inform TCG about how to find TCG locals with register, offset, size */
1524 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1525 CPU_TEMP_BUF_NLONGS * sizeof(long));
1527 #if defined(CONFIG_USE_GUEST_BASE)
1528 if (GUEST_BASE) {
1529 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1530 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1532 #endif
1534 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1535 tcg_out_gotor(s, tcg_target_call_iarg_regs[1]);
1537 tb_ret_addr = s->code_ptr;
1539 /* remove TCG locals stack space */
1540 tcg_out_addi(s, 1, TCG_REG_SP, TCG_REG_SP,
1541 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1543 /* restore registers x19..x28.
1544 FP must be preserved, so it still points to callee_saved area */
1545 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1546 int idx = (r - TCG_REG_X19) / 2 + 1;
1547 tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx);
1550 /* pop (FP, LR), restore SP to previous frame, return */
1551 tcg_out_pop_pair(s, TCG_REG_SP,
1552 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1553 tcg_out_ret(s);