s390x: introduce 4.0 compat machine
[qemu.git] / tcg / aarch64 / tcg-target.inc.c
blob083592a4d7d3965488489e210abc8f43079edea2
1 /*
2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "tcg-pool.inc.c"
14 #include "qemu/bitops.h"
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33 #endif /* CONFIG_DEBUG_TCG */
35 static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42 TCG_REG_X16, TCG_REG_X17,
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61 static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65 static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
79 #endif
81 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
83 ptrdiff_t offset = target - code_ptr;
84 tcg_debug_assert(offset == sextract64(offset, 0, 26));
85 /* read instruction, mask away previous PC_REL26 parameter contents,
86 set the proper offset, then write back the instruction. */
87 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
90 static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
91 tcg_insn_unit *target)
93 ptrdiff_t offset = target - code_ptr;
94 tcg_insn_unit insn;
95 tcg_debug_assert(offset == sextract64(offset, 0, 26));
96 /* read instruction, mask away previous PC_REL26 parameter contents,
97 set the proper offset, then write back the instruction. */
98 insn = atomic_read(code_ptr);
99 atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
102 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
104 ptrdiff_t offset = target - code_ptr;
105 tcg_debug_assert(offset == sextract64(offset, 0, 19));
106 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
109 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
110 intptr_t value, intptr_t addend)
112 tcg_debug_assert(addend == 0);
113 switch (type) {
114 case R_AARCH64_JUMP26:
115 case R_AARCH64_CALL26:
116 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
117 break;
118 case R_AARCH64_CONDBR19:
119 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
120 break;
121 default:
122 tcg_abort();
126 #define TCG_CT_CONST_AIMM 0x100
127 #define TCG_CT_CONST_LIMM 0x200
128 #define TCG_CT_CONST_ZERO 0x400
129 #define TCG_CT_CONST_MONE 0x800
131 /* parse target specific constraints */
132 static const char *target_parse_constraint(TCGArgConstraint *ct,
133 const char *ct_str, TCGType type)
135 switch (*ct_str++) {
136 case 'r': /* general registers */
137 ct->ct |= TCG_CT_REG;
138 ct->u.regs |= 0xffffffffu;
139 break;
140 case 'w': /* advsimd registers */
141 ct->ct |= TCG_CT_REG;
142 ct->u.regs |= 0xffffffff00000000ull;
143 break;
144 case 'l': /* qemu_ld / qemu_st address, data_reg */
145 ct->ct |= TCG_CT_REG;
146 ct->u.regs = 0xffffffffu;
147 #ifdef CONFIG_SOFTMMU
148 /* x0 and x1 will be overwritten when reading the tlb entry,
149 and x2, and x3 for helper args, better to avoid using them. */
150 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
151 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
152 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
153 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
154 #endif
155 break;
156 case 'A': /* Valid for arithmetic immediate (positive or negative). */
157 ct->ct |= TCG_CT_CONST_AIMM;
158 break;
159 case 'L': /* Valid for logical immediate. */
160 ct->ct |= TCG_CT_CONST_LIMM;
161 break;
162 case 'M': /* minus one */
163 ct->ct |= TCG_CT_CONST_MONE;
164 break;
165 case 'Z': /* zero */
166 ct->ct |= TCG_CT_CONST_ZERO;
167 break;
168 default:
169 return NULL;
171 return ct_str;
174 /* Match a constant valid for addition (12-bit, optionally shifted). */
175 static inline bool is_aimm(uint64_t val)
177 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
180 /* Match a constant valid for logical operations. */
181 static inline bool is_limm(uint64_t val)
183 /* Taking a simplified view of the logical immediates for now, ignoring
184 the replication that can happen across the field. Match bit patterns
185 of the forms
186 0....01....1
187 0..01..10..0
188 and their inverses. */
190 /* Make things easier below, by testing the form with msb clear. */
191 if ((int64_t)val < 0) {
192 val = ~val;
194 if (val == 0) {
195 return false;
197 val += val & -val;
198 return (val & (val - 1)) == 0;
201 /* Match a constant that is valid for vectors. */
202 static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
204 int i;
206 *op = 0;
207 /* Match replication across 8 bits. */
208 if (v64 == dup_const(MO_8, v64)) {
209 *cmode = 0xe;
210 *imm8 = v64 & 0xff;
211 return true;
213 /* Match replication across 16 bits. */
214 if (v64 == dup_const(MO_16, v64)) {
215 uint16_t v16 = v64;
217 if (v16 == (v16 & 0xff)) {
218 *cmode = 0x8;
219 *imm8 = v16 & 0xff;
220 return true;
221 } else if (v16 == (v16 & 0xff00)) {
222 *cmode = 0xa;
223 *imm8 = v16 >> 8;
224 return true;
227 /* Match replication across 32 bits. */
228 if (v64 == dup_const(MO_32, v64)) {
229 uint32_t v32 = v64;
231 if (v32 == (v32 & 0xff)) {
232 *cmode = 0x0;
233 *imm8 = v32 & 0xff;
234 return true;
235 } else if (v32 == (v32 & 0xff00)) {
236 *cmode = 0x2;
237 *imm8 = (v32 >> 8) & 0xff;
238 return true;
239 } else if (v32 == (v32 & 0xff0000)) {
240 *cmode = 0x4;
241 *imm8 = (v32 >> 16) & 0xff;
242 return true;
243 } else if (v32 == (v32 & 0xff000000)) {
244 *cmode = 0x6;
245 *imm8 = v32 >> 24;
246 return true;
247 } else if ((v32 & 0xffff00ff) == 0xff) {
248 *cmode = 0xc;
249 *imm8 = (v32 >> 8) & 0xff;
250 return true;
251 } else if ((v32 & 0xff00ffff) == 0xffff) {
252 *cmode = 0xd;
253 *imm8 = (v32 >> 16) & 0xff;
254 return true;
256 /* Match forms of a float32. */
257 if (extract32(v32, 0, 19) == 0
258 && (extract32(v32, 25, 6) == 0x20
259 || extract32(v32, 25, 6) == 0x1f)) {
260 *cmode = 0xf;
261 *imm8 = (extract32(v32, 31, 1) << 7)
262 | (extract32(v32, 25, 1) << 6)
263 | extract32(v32, 19, 6);
264 return true;
267 /* Match forms of a float64. */
268 if (extract64(v64, 0, 48) == 0
269 && (extract64(v64, 54, 9) == 0x100
270 || extract64(v64, 54, 9) == 0x0ff)) {
271 *cmode = 0xf;
272 *op = 1;
273 *imm8 = (extract64(v64, 63, 1) << 7)
274 | (extract64(v64, 54, 1) << 6)
275 | extract64(v64, 48, 6);
276 return true;
278 /* Match bytes of 0x00 and 0xff. */
279 for (i = 0; i < 64; i += 8) {
280 uint64_t byte = extract64(v64, i, 8);
281 if (byte != 0 && byte != 0xff) {
282 break;
285 if (i == 64) {
286 *cmode = 0xe;
287 *op = 1;
288 *imm8 = (extract64(v64, 0, 1) << 0)
289 | (extract64(v64, 8, 1) << 1)
290 | (extract64(v64, 16, 1) << 2)
291 | (extract64(v64, 24, 1) << 3)
292 | (extract64(v64, 32, 1) << 4)
293 | (extract64(v64, 40, 1) << 5)
294 | (extract64(v64, 48, 1) << 6)
295 | (extract64(v64, 56, 1) << 7);
296 return true;
298 return false;
301 static int tcg_target_const_match(tcg_target_long val, TCGType type,
302 const TCGArgConstraint *arg_ct)
304 int ct = arg_ct->ct;
306 if (ct & TCG_CT_CONST) {
307 return 1;
309 if (type == TCG_TYPE_I32) {
310 val = (int32_t)val;
312 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
313 return 1;
315 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
316 return 1;
318 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
319 return 1;
321 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
322 return 1;
325 return 0;
328 enum aarch64_cond_code {
329 COND_EQ = 0x0,
330 COND_NE = 0x1,
331 COND_CS = 0x2, /* Unsigned greater or equal */
332 COND_HS = COND_CS, /* ALIAS greater or equal */
333 COND_CC = 0x3, /* Unsigned less than */
334 COND_LO = COND_CC, /* ALIAS Lower */
335 COND_MI = 0x4, /* Negative */
336 COND_PL = 0x5, /* Zero or greater */
337 COND_VS = 0x6, /* Overflow */
338 COND_VC = 0x7, /* No overflow */
339 COND_HI = 0x8, /* Unsigned greater than */
340 COND_LS = 0x9, /* Unsigned less or equal */
341 COND_GE = 0xa,
342 COND_LT = 0xb,
343 COND_GT = 0xc,
344 COND_LE = 0xd,
345 COND_AL = 0xe,
346 COND_NV = 0xf, /* behaves like COND_AL here */
349 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
350 [TCG_COND_EQ] = COND_EQ,
351 [TCG_COND_NE] = COND_NE,
352 [TCG_COND_LT] = COND_LT,
353 [TCG_COND_GE] = COND_GE,
354 [TCG_COND_LE] = COND_LE,
355 [TCG_COND_GT] = COND_GT,
356 /* unsigned */
357 [TCG_COND_LTU] = COND_LO,
358 [TCG_COND_GTU] = COND_HI,
359 [TCG_COND_GEU] = COND_HS,
360 [TCG_COND_LEU] = COND_LS,
363 typedef enum {
364 LDST_ST = 0, /* store */
365 LDST_LD = 1, /* load */
366 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
367 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
368 } AArch64LdstType;
370 /* We encode the format of the insn into the beginning of the name, so that
371 we can have the preprocessor help "typecheck" the insn vs the output
372 function. Arm didn't provide us with nice names for the formats, so we
373 use the section number of the architecture reference manual in which the
374 instruction group is described. */
375 typedef enum {
376 /* Compare and branch (immediate). */
377 I3201_CBZ = 0x34000000,
378 I3201_CBNZ = 0x35000000,
380 /* Conditional branch (immediate). */
381 I3202_B_C = 0x54000000,
383 /* Unconditional branch (immediate). */
384 I3206_B = 0x14000000,
385 I3206_BL = 0x94000000,
387 /* Unconditional branch (register). */
388 I3207_BR = 0xd61f0000,
389 I3207_BLR = 0xd63f0000,
390 I3207_RET = 0xd65f0000,
392 /* Load literal for loading the address at pc-relative offset */
393 I3305_LDR = 0x58000000,
394 I3305_LDR_v64 = 0x5c000000,
395 I3305_LDR_v128 = 0x9c000000,
397 /* Load/store register. Described here as 3.3.12, but the helper
398 that emits them can transform to 3.3.10 or 3.3.13. */
399 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
400 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
401 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
402 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
404 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
405 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
406 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
407 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
409 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
410 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
412 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
413 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
414 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
416 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
417 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
419 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
420 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
422 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
423 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
425 I3312_TO_I3310 = 0x00200800,
426 I3312_TO_I3313 = 0x01000000,
428 /* Load/store register pair instructions. */
429 I3314_LDP = 0x28400000,
430 I3314_STP = 0x28000000,
432 /* Add/subtract immediate instructions. */
433 I3401_ADDI = 0x11000000,
434 I3401_ADDSI = 0x31000000,
435 I3401_SUBI = 0x51000000,
436 I3401_SUBSI = 0x71000000,
438 /* Bitfield instructions. */
439 I3402_BFM = 0x33000000,
440 I3402_SBFM = 0x13000000,
441 I3402_UBFM = 0x53000000,
443 /* Extract instruction. */
444 I3403_EXTR = 0x13800000,
446 /* Logical immediate instructions. */
447 I3404_ANDI = 0x12000000,
448 I3404_ORRI = 0x32000000,
449 I3404_EORI = 0x52000000,
451 /* Move wide immediate instructions. */
452 I3405_MOVN = 0x12800000,
453 I3405_MOVZ = 0x52800000,
454 I3405_MOVK = 0x72800000,
456 /* PC relative addressing instructions. */
457 I3406_ADR = 0x10000000,
458 I3406_ADRP = 0x90000000,
460 /* Add/subtract shifted register instructions (without a shift). */
461 I3502_ADD = 0x0b000000,
462 I3502_ADDS = 0x2b000000,
463 I3502_SUB = 0x4b000000,
464 I3502_SUBS = 0x6b000000,
466 /* Add/subtract shifted register instructions (with a shift). */
467 I3502S_ADD_LSL = I3502_ADD,
469 /* Add/subtract with carry instructions. */
470 I3503_ADC = 0x1a000000,
471 I3503_SBC = 0x5a000000,
473 /* Conditional select instructions. */
474 I3506_CSEL = 0x1a800000,
475 I3506_CSINC = 0x1a800400,
476 I3506_CSINV = 0x5a800000,
477 I3506_CSNEG = 0x5a800400,
479 /* Data-processing (1 source) instructions. */
480 I3507_CLZ = 0x5ac01000,
481 I3507_RBIT = 0x5ac00000,
482 I3507_REV16 = 0x5ac00400,
483 I3507_REV32 = 0x5ac00800,
484 I3507_REV64 = 0x5ac00c00,
486 /* Data-processing (2 source) instructions. */
487 I3508_LSLV = 0x1ac02000,
488 I3508_LSRV = 0x1ac02400,
489 I3508_ASRV = 0x1ac02800,
490 I3508_RORV = 0x1ac02c00,
491 I3508_SMULH = 0x9b407c00,
492 I3508_UMULH = 0x9bc07c00,
493 I3508_UDIV = 0x1ac00800,
494 I3508_SDIV = 0x1ac00c00,
496 /* Data-processing (3 source) instructions. */
497 I3509_MADD = 0x1b000000,
498 I3509_MSUB = 0x1b008000,
500 /* Logical shifted register instructions (without a shift). */
501 I3510_AND = 0x0a000000,
502 I3510_BIC = 0x0a200000,
503 I3510_ORR = 0x2a000000,
504 I3510_ORN = 0x2a200000,
505 I3510_EOR = 0x4a000000,
506 I3510_EON = 0x4a200000,
507 I3510_ANDS = 0x6a000000,
509 /* AdvSIMD copy */
510 I3605_DUP = 0x0e000400,
511 I3605_INS = 0x4e001c00,
512 I3605_UMOV = 0x0e003c00,
514 /* AdvSIMD modified immediate */
515 I3606_MOVI = 0x0f000400,
517 /* AdvSIMD shift by immediate */
518 I3614_SSHR = 0x0f000400,
519 I3614_SSRA = 0x0f001400,
520 I3614_SHL = 0x0f005400,
521 I3614_USHR = 0x2f000400,
522 I3614_USRA = 0x2f001400,
524 /* AdvSIMD three same. */
525 I3616_ADD = 0x0e208400,
526 I3616_AND = 0x0e201c00,
527 I3616_BIC = 0x0e601c00,
528 I3616_EOR = 0x2e201c00,
529 I3616_MUL = 0x0e209c00,
530 I3616_ORR = 0x0ea01c00,
531 I3616_ORN = 0x0ee01c00,
532 I3616_SUB = 0x2e208400,
533 I3616_CMGT = 0x0e203400,
534 I3616_CMGE = 0x0e203c00,
535 I3616_CMTST = 0x0e208c00,
536 I3616_CMHI = 0x2e203400,
537 I3616_CMHS = 0x2e203c00,
538 I3616_CMEQ = 0x2e208c00,
540 /* AdvSIMD two-reg misc. */
541 I3617_CMGT0 = 0x0e208800,
542 I3617_CMEQ0 = 0x0e209800,
543 I3617_CMLT0 = 0x0e20a800,
544 I3617_CMGE0 = 0x2e208800,
545 I3617_CMLE0 = 0x2e20a800,
546 I3617_NOT = 0x2e205800,
547 I3617_NEG = 0x2e20b800,
549 /* System instructions. */
550 NOP = 0xd503201f,
551 DMB_ISH = 0xd50338bf,
552 DMB_LD = 0x00000100,
553 DMB_ST = 0x00000200,
554 } AArch64Insn;
556 static inline uint32_t tcg_in32(TCGContext *s)
558 uint32_t v = *(uint32_t *)s->code_ptr;
559 return v;
562 /* Emit an opcode with "type-checking" of the format. */
563 #define tcg_out_insn(S, FMT, OP, ...) \
564 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
566 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
568 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
571 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
572 TCGReg rt, int imm19)
574 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
577 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
578 TCGCond c, int imm19)
580 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
583 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
585 tcg_out32(s, insn | (imm26 & 0x03ffffff));
588 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
590 tcg_out32(s, insn | rn << 5);
593 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
594 TCGReg r1, TCGReg r2, TCGReg rn,
595 tcg_target_long ofs, bool pre, bool w)
597 insn |= 1u << 31; /* ext */
598 insn |= pre << 24;
599 insn |= w << 23;
601 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
602 insn |= (ofs & (0x7f << 3)) << (15 - 3);
604 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
607 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
608 TCGReg rd, TCGReg rn, uint64_t aimm)
610 if (aimm > 0xfff) {
611 tcg_debug_assert((aimm & 0xfff) == 0);
612 aimm >>= 12;
613 tcg_debug_assert(aimm <= 0xfff);
614 aimm |= 1 << 12; /* apply LSL 12 */
616 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
619 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
620 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
621 that feed the DecodeBitMasks pseudo function. */
622 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
623 TCGReg rd, TCGReg rn, int n, int immr, int imms)
625 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
626 | rn << 5 | rd);
629 #define tcg_out_insn_3404 tcg_out_insn_3402
631 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
632 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
634 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
635 | rn << 5 | rd);
638 /* This function is used for the Move (wide immediate) instruction group.
639 Note that SHIFT is a full shift count, not the 2 bit HW field. */
640 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
641 TCGReg rd, uint16_t half, unsigned shift)
643 tcg_debug_assert((shift & ~0x30) == 0);
644 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
647 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
648 TCGReg rd, int64_t disp)
650 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
653 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
654 the rare occasion when we actually want to supply a shift amount. */
655 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
656 TCGType ext, TCGReg rd, TCGReg rn,
657 TCGReg rm, int imm6)
659 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
662 /* This function is for 3.5.2 (Add/subtract shifted register),
663 and 3.5.10 (Logical shifted register), for the vast majorty of cases
664 when we don't want to apply a shift. Thus it can also be used for
665 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
666 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
667 TCGReg rd, TCGReg rn, TCGReg rm)
669 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
672 #define tcg_out_insn_3503 tcg_out_insn_3502
673 #define tcg_out_insn_3508 tcg_out_insn_3502
674 #define tcg_out_insn_3510 tcg_out_insn_3502
676 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
677 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
679 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
680 | tcg_cond_to_aarch64[c] << 12);
683 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
684 TCGReg rd, TCGReg rn)
686 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
689 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
690 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
692 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
695 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
696 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
698 /* Note that bit 11 set means general register input. Therefore
699 we can handle both register sets with one function. */
700 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
701 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
704 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
705 TCGReg rd, bool op, int cmode, uint8_t imm8)
707 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
708 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
711 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
712 TCGReg rd, TCGReg rn, unsigned immhb)
714 tcg_out32(s, insn | q << 30 | immhb << 16
715 | (rn & 0x1f) << 5 | (rd & 0x1f));
718 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
719 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
721 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
722 | (rn & 0x1f) << 5 | (rd & 0x1f));
725 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
726 unsigned size, TCGReg rd, TCGReg rn)
728 tcg_out32(s, insn | q << 30 | (size << 22)
729 | (rn & 0x1f) << 5 | (rd & 0x1f));
732 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
733 TCGReg rd, TCGReg base, TCGType ext,
734 TCGReg regoff)
736 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
737 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
738 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
741 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
742 TCGReg rd, TCGReg rn, intptr_t offset)
744 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
747 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
748 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
750 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
751 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
752 | rn << 5 | (rd & 0x1f));
755 /* Register to register move using ORR (shifted register with no shift). */
756 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
758 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
761 /* Register to register move using ADDI (move to/from SP). */
762 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
764 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
767 /* This function is used for the Logical (immediate) instruction group.
768 The value of LIMM must satisfy IS_LIMM. See the comment above about
769 only supporting simplified logical immediates. */
770 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
771 TCGReg rd, TCGReg rn, uint64_t limm)
773 unsigned h, l, r, c;
775 tcg_debug_assert(is_limm(limm));
777 h = clz64(limm);
778 l = ctz64(limm);
779 if (l == 0) {
780 r = 0; /* form 0....01....1 */
781 c = ctz64(~limm) - 1;
782 if (h == 0) {
783 r = clz64(~limm); /* form 1..10..01..1 */
784 c += r;
786 } else {
787 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
788 c = r - h - 1;
790 if (ext == TCG_TYPE_I32) {
791 r &= 31;
792 c &= 31;
795 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
798 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
799 TCGReg rd, uint64_t v64)
801 int op, cmode, imm8;
803 if (is_fimm(v64, &op, &cmode, &imm8)) {
804 tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
805 } else if (type == TCG_TYPE_V128) {
806 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
807 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
808 } else {
809 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
810 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
814 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
815 tcg_target_long value)
817 tcg_target_long svalue = value;
818 tcg_target_long ivalue = ~value;
819 tcg_target_long t0, t1, t2;
820 int s0, s1;
821 AArch64Insn opc;
823 switch (type) {
824 case TCG_TYPE_I32:
825 case TCG_TYPE_I64:
826 tcg_debug_assert(rd < 32);
827 break;
829 case TCG_TYPE_V64:
830 case TCG_TYPE_V128:
831 tcg_debug_assert(rd >= 32);
832 tcg_out_dupi_vec(s, type, rd, value);
833 return;
835 default:
836 g_assert_not_reached();
839 /* For 32-bit values, discard potential garbage in value. For 64-bit
840 values within [2**31, 2**32-1], we can create smaller sequences by
841 interpreting this as a negative 32-bit number, while ensuring that
842 the high 32 bits are cleared by setting SF=0. */
843 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
844 svalue = (int32_t)value;
845 value = (uint32_t)value;
846 ivalue = (uint32_t)ivalue;
847 type = TCG_TYPE_I32;
850 /* Speed things up by handling the common case of small positive
851 and negative values specially. */
852 if ((value & ~0xffffull) == 0) {
853 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
854 return;
855 } else if ((ivalue & ~0xffffull) == 0) {
856 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
857 return;
860 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
861 use the sign-extended value. That lets us match rotated values such
862 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
863 if (is_limm(svalue)) {
864 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
865 return;
868 /* Look for host pointer values within 4G of the PC. This happens
869 often when loading pointers to QEMU's own data structures. */
870 if (type == TCG_TYPE_I64) {
871 tcg_target_long disp = value - (intptr_t)s->code_ptr;
872 if (disp == sextract64(disp, 0, 21)) {
873 tcg_out_insn(s, 3406, ADR, rd, disp);
874 return;
876 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
877 if (disp == sextract64(disp, 0, 21)) {
878 tcg_out_insn(s, 3406, ADRP, rd, disp);
879 if (value & 0xfff) {
880 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
882 return;
886 /* Would it take fewer insns to begin with MOVN? */
887 if (ctpop64(value) >= 32) {
888 t0 = ivalue;
889 opc = I3405_MOVN;
890 } else {
891 t0 = value;
892 opc = I3405_MOVZ;
894 s0 = ctz64(t0) & (63 & -16);
895 t1 = t0 & ~(0xffffUL << s0);
896 s1 = ctz64(t1) & (63 & -16);
897 t2 = t1 & ~(0xffffUL << s1);
898 if (t2 == 0) {
899 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
900 if (t1 != 0) {
901 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
903 return;
906 /* For more than 2 insns, dump it into the constant pool. */
907 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
908 tcg_out_insn(s, 3305, LDR, 0, rd);
911 /* Define something more legible for general use. */
912 #define tcg_out_ldst_r tcg_out_insn_3310
914 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
915 TCGReg rn, intptr_t offset, int lgsize)
917 /* If the offset is naturally aligned and in range, then we can
918 use the scaled uimm12 encoding */
919 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
920 uintptr_t scaled_uimm = offset >> lgsize;
921 if (scaled_uimm <= 0xfff) {
922 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
923 return;
927 /* Small signed offsets can use the unscaled encoding. */
928 if (offset >= -256 && offset < 256) {
929 tcg_out_insn_3312(s, insn, rd, rn, offset);
930 return;
933 /* Worst-case scenario, move offset to temp register, use reg offset. */
934 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
935 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
938 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
940 if (ret == arg) {
941 return;
943 switch (type) {
944 case TCG_TYPE_I32:
945 case TCG_TYPE_I64:
946 if (ret < 32 && arg < 32) {
947 tcg_out_movr(s, type, ret, arg);
948 break;
949 } else if (ret < 32) {
950 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
951 break;
952 } else if (arg < 32) {
953 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
954 break;
956 /* FALLTHRU */
958 case TCG_TYPE_V64:
959 tcg_debug_assert(ret >= 32 && arg >= 32);
960 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
961 break;
962 case TCG_TYPE_V128:
963 tcg_debug_assert(ret >= 32 && arg >= 32);
964 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
965 break;
967 default:
968 g_assert_not_reached();
972 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
973 TCGReg base, intptr_t ofs)
975 AArch64Insn insn;
976 int lgsz;
978 switch (type) {
979 case TCG_TYPE_I32:
980 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
981 lgsz = 2;
982 break;
983 case TCG_TYPE_I64:
984 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
985 lgsz = 3;
986 break;
987 case TCG_TYPE_V64:
988 insn = I3312_LDRVD;
989 lgsz = 3;
990 break;
991 case TCG_TYPE_V128:
992 insn = I3312_LDRVQ;
993 lgsz = 4;
994 break;
995 default:
996 g_assert_not_reached();
998 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1001 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1002 TCGReg base, intptr_t ofs)
1004 AArch64Insn insn;
1005 int lgsz;
1007 switch (type) {
1008 case TCG_TYPE_I32:
1009 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1010 lgsz = 2;
1011 break;
1012 case TCG_TYPE_I64:
1013 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1014 lgsz = 3;
1015 break;
1016 case TCG_TYPE_V64:
1017 insn = I3312_STRVD;
1018 lgsz = 3;
1019 break;
1020 case TCG_TYPE_V128:
1021 insn = I3312_STRVQ;
1022 lgsz = 4;
1023 break;
1024 default:
1025 g_assert_not_reached();
1027 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1030 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1031 TCGReg base, intptr_t ofs)
1033 if (type <= TCG_TYPE_I64 && val == 0) {
1034 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1035 return true;
1037 return false;
1040 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1041 TCGReg rn, unsigned int a, unsigned int b)
1043 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1046 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1047 TCGReg rn, unsigned int a, unsigned int b)
1049 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1052 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1053 TCGReg rn, unsigned int a, unsigned int b)
1055 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1058 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1059 TCGReg rn, TCGReg rm, unsigned int a)
1061 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1064 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1065 TCGReg rd, TCGReg rn, unsigned int m)
1067 int bits = ext ? 64 : 32;
1068 int max = bits - 1;
1069 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1072 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1073 TCGReg rd, TCGReg rn, unsigned int m)
1075 int max = ext ? 63 : 31;
1076 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1079 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1080 TCGReg rd, TCGReg rn, unsigned int m)
1082 int max = ext ? 63 : 31;
1083 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1086 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1087 TCGReg rd, TCGReg rn, unsigned int m)
1089 int max = ext ? 63 : 31;
1090 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1093 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1094 TCGReg rd, TCGReg rn, unsigned int m)
1096 int bits = ext ? 64 : 32;
1097 int max = bits - 1;
1098 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1101 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1102 TCGReg rn, unsigned lsb, unsigned width)
1104 unsigned size = ext ? 64 : 32;
1105 unsigned a = (size - lsb) & (size - 1);
1106 unsigned b = width - 1;
1107 tcg_out_bfm(s, ext, rd, rn, a, b);
1110 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1111 tcg_target_long b, bool const_b)
1113 if (const_b) {
1114 /* Using CMP or CMN aliases. */
1115 if (b >= 0) {
1116 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1117 } else {
1118 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1120 } else {
1121 /* Using CMP alias SUBS wzr, Wn, Wm */
1122 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1126 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1128 ptrdiff_t offset = target - s->code_ptr;
1129 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1130 tcg_out_insn(s, 3206, B, offset);
1133 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1135 ptrdiff_t offset = target - s->code_ptr;
1136 if (offset == sextract64(offset, 0, 26)) {
1137 tcg_out_insn(s, 3206, BL, offset);
1138 } else {
1139 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1140 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1144 static inline void tcg_out_goto_noaddr(TCGContext *s)
1146 /* We pay attention here to not modify the branch target by reading from
1147 the buffer. This ensure that caches and memory are kept coherent during
1148 retranslation. Mask away possible garbage in the high bits for the
1149 first translation, while keeping the offset bits for retranslation. */
1150 uint32_t old = tcg_in32(s);
1151 tcg_out_insn(s, 3206, B, old);
1154 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
1156 /* See comments in tcg_out_goto_noaddr. */
1157 uint32_t old = tcg_in32(s) >> 5;
1158 tcg_out_insn(s, 3202, B_C, c, old);
1161 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1163 tcg_out_insn(s, 3207, BLR, reg);
1166 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1168 ptrdiff_t offset = target - s->code_ptr;
1169 if (offset == sextract64(offset, 0, 26)) {
1170 tcg_out_insn(s, 3206, BL, offset);
1171 } else {
1172 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1173 tcg_out_callr(s, TCG_REG_TMP);
1177 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1178 uintptr_t addr)
1180 tcg_insn_unit i1, i2;
1181 TCGType rt = TCG_TYPE_I64;
1182 TCGReg rd = TCG_REG_TMP;
1183 uint64_t pair;
1185 ptrdiff_t offset = addr - jmp_addr;
1187 if (offset == sextract64(offset, 0, 26)) {
1188 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1189 i2 = NOP;
1190 } else {
1191 offset = (addr >> 12) - (jmp_addr >> 12);
1193 /* patch ADRP */
1194 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1195 /* patch ADDI */
1196 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1198 pair = (uint64_t)i2 << 32 | i1;
1199 atomic_set((uint64_t *)jmp_addr, pair);
1200 flush_icache_range(jmp_addr, jmp_addr + 8);
1203 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1205 if (!l->has_value) {
1206 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1207 tcg_out_goto_noaddr(s);
1208 } else {
1209 tcg_out_goto(s, l->u.value_ptr);
1213 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1214 TCGArg b, bool b_const, TCGLabel *l)
1216 intptr_t offset;
1217 bool need_cmp;
1219 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1220 need_cmp = false;
1221 } else {
1222 need_cmp = true;
1223 tcg_out_cmp(s, ext, a, b, b_const);
1226 if (!l->has_value) {
1227 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1228 offset = tcg_in32(s) >> 5;
1229 } else {
1230 offset = l->u.value_ptr - s->code_ptr;
1231 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1234 if (need_cmp) {
1235 tcg_out_insn(s, 3202, B_C, c, offset);
1236 } else if (c == TCG_COND_EQ) {
1237 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1238 } else {
1239 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1243 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1245 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1248 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1250 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1253 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1255 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1258 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1259 TCGReg rd, TCGReg rn)
1261 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1262 int bits = (8 << s_bits) - 1;
1263 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1266 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1267 TCGReg rd, TCGReg rn)
1269 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1270 int bits = (8 << s_bits) - 1;
1271 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1274 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1275 TCGReg rn, int64_t aimm)
1277 if (aimm >= 0) {
1278 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1279 } else {
1280 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1284 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1285 TCGReg rh, TCGReg al, TCGReg ah,
1286 tcg_target_long bl, tcg_target_long bh,
1287 bool const_bl, bool const_bh, bool sub)
1289 TCGReg orig_rl = rl;
1290 AArch64Insn insn;
1292 if (rl == ah || (!const_bh && rl == bh)) {
1293 rl = TCG_REG_TMP;
1296 if (const_bl) {
1297 insn = I3401_ADDSI;
1298 if ((bl < 0) ^ sub) {
1299 insn = I3401_SUBSI;
1300 bl = -bl;
1302 if (unlikely(al == TCG_REG_XZR)) {
1303 /* ??? We want to allow al to be zero for the benefit of
1304 negation via subtraction. However, that leaves open the
1305 possibility of adding 0+const in the low part, and the
1306 immediate add instructions encode XSP not XZR. Don't try
1307 anything more elaborate here than loading another zero. */
1308 al = TCG_REG_TMP;
1309 tcg_out_movi(s, ext, al, 0);
1311 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1312 } else {
1313 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1316 insn = I3503_ADC;
1317 if (const_bh) {
1318 /* Note that the only two constants we support are 0 and -1, and
1319 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1320 if ((bh != 0) ^ sub) {
1321 insn = I3503_SBC;
1323 bh = TCG_REG_XZR;
1324 } else if (sub) {
1325 insn = I3503_SBC;
1327 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1329 tcg_out_mov(s, ext, orig_rl, rl);
1332 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1334 static const uint32_t sync[] = {
1335 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1336 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1337 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1338 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1339 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1341 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1344 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1345 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1347 TCGReg a1 = a0;
1348 if (is_ctz) {
1349 a1 = TCG_REG_TMP;
1350 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1352 if (const_b && b == (ext ? 64 : 32)) {
1353 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1354 } else {
1355 AArch64Insn sel = I3506_CSEL;
1357 tcg_out_cmp(s, ext, a0, 0, 1);
1358 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1360 if (const_b) {
1361 if (b == -1) {
1362 b = TCG_REG_XZR;
1363 sel = I3506_CSINV;
1364 } else if (b == 0) {
1365 b = TCG_REG_XZR;
1366 } else {
1367 tcg_out_movi(s, ext, d, b);
1368 b = d;
1371 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1375 #ifdef CONFIG_SOFTMMU
1376 #include "tcg-ldst.inc.c"
1378 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1379 * TCGMemOpIdx oi, uintptr_t ra)
1381 static void * const qemu_ld_helpers[16] = {
1382 [MO_UB] = helper_ret_ldub_mmu,
1383 [MO_LEUW] = helper_le_lduw_mmu,
1384 [MO_LEUL] = helper_le_ldul_mmu,
1385 [MO_LEQ] = helper_le_ldq_mmu,
1386 [MO_BEUW] = helper_be_lduw_mmu,
1387 [MO_BEUL] = helper_be_ldul_mmu,
1388 [MO_BEQ] = helper_be_ldq_mmu,
1391 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1392 * uintxx_t val, TCGMemOpIdx oi,
1393 * uintptr_t ra)
1395 static void * const qemu_st_helpers[16] = {
1396 [MO_UB] = helper_ret_stb_mmu,
1397 [MO_LEUW] = helper_le_stw_mmu,
1398 [MO_LEUL] = helper_le_stl_mmu,
1399 [MO_LEQ] = helper_le_stq_mmu,
1400 [MO_BEUW] = helper_be_stw_mmu,
1401 [MO_BEUL] = helper_be_stl_mmu,
1402 [MO_BEQ] = helper_be_stq_mmu,
1405 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1407 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1408 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1409 tcg_out_insn(s, 3406, ADR, rd, offset);
1412 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1414 TCGMemOpIdx oi = lb->oi;
1415 TCGMemOp opc = get_memop(oi);
1416 TCGMemOp size = opc & MO_SIZE;
1418 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1420 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1421 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1422 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1423 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1424 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1425 if (opc & MO_SIGN) {
1426 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1427 } else {
1428 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1431 tcg_out_goto(s, lb->raddr);
1434 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1436 TCGMemOpIdx oi = lb->oi;
1437 TCGMemOp opc = get_memop(oi);
1438 TCGMemOp size = opc & MO_SIZE;
1440 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1442 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1443 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1444 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1445 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1446 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1447 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1448 tcg_out_goto(s, lb->raddr);
1451 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1452 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1453 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1455 TCGLabelQemuLdst *label = new_ldst_label(s);
1457 label->is_ld = is_ld;
1458 label->oi = oi;
1459 label->type = ext;
1460 label->datalo_reg = data_reg;
1461 label->addrlo_reg = addr_reg;
1462 label->raddr = raddr;
1463 label->label_ptr[0] = label_ptr;
1466 /* Load and compare a TLB entry, emitting the conditional jump to the
1467 slow path for the failure case, which will be patched later when finalizing
1468 the slow path. Generated code returns the host addend in X1,
1469 clobbers X0,X2,X3,TMP. */
1470 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1471 tcg_insn_unit **label_ptr, int mem_index,
1472 bool is_read)
1474 int tlb_offset = is_read ?
1475 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1476 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1477 unsigned a_bits = get_alignment_bits(opc);
1478 unsigned s_bits = opc & MO_SIZE;
1479 unsigned a_mask = (1u << a_bits) - 1;
1480 unsigned s_mask = (1u << s_bits) - 1;
1481 TCGReg base = TCG_AREG0, x3;
1482 uint64_t tlb_mask;
1484 /* For aligned accesses, we check the first byte and include the alignment
1485 bits within the address. For unaligned access, we check that we don't
1486 cross pages using the address of the last byte of the access. */
1487 if (a_bits >= s_bits) {
1488 x3 = addr_reg;
1489 } else {
1490 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1491 TCG_REG_X3, addr_reg, s_mask - a_mask);
1492 x3 = TCG_REG_X3;
1494 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1496 /* Extract the TLB index from the address into X0.
1497 X0<CPU_TLB_BITS:0> =
1498 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1499 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1500 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1502 /* Store the page mask part of the address into X3. */
1503 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1504 TCG_REG_X3, x3, tlb_mask);
1506 /* Add any "high bits" from the tlb offset to the env address into X2,
1507 to take advantage of the LSL12 form of the ADDI instruction.
1508 X2 = env + (tlb_offset & 0xfff000) */
1509 if (tlb_offset & 0xfff000) {
1510 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1511 tlb_offset & 0xfff000);
1512 base = TCG_REG_X2;
1515 /* Merge the tlb index contribution into X2.
1516 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1517 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1518 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1520 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1521 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1522 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1523 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff,
1524 TARGET_LONG_BITS == 32 ? 2 : 3);
1526 /* Load the tlb addend. Do that early to avoid stalling.
1527 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1528 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1529 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1530 (is_read ? offsetof(CPUTLBEntry, addr_read)
1531 : offsetof(CPUTLBEntry, addr_write)), 3);
1533 /* Perform the address comparison. */
1534 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1536 /* If not equal, we jump to the slow path. */
1537 *label_ptr = s->code_ptr;
1538 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1541 #endif /* CONFIG_SOFTMMU */
1543 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1544 TCGReg data_r, TCGReg addr_r,
1545 TCGType otype, TCGReg off_r)
1547 const TCGMemOp bswap = memop & MO_BSWAP;
1549 switch (memop & MO_SSIZE) {
1550 case MO_UB:
1551 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1552 break;
1553 case MO_SB:
1554 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1555 data_r, addr_r, otype, off_r);
1556 break;
1557 case MO_UW:
1558 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1559 if (bswap) {
1560 tcg_out_rev16(s, data_r, data_r);
1562 break;
1563 case MO_SW:
1564 if (bswap) {
1565 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1566 tcg_out_rev16(s, data_r, data_r);
1567 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1568 } else {
1569 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1570 data_r, addr_r, otype, off_r);
1572 break;
1573 case MO_UL:
1574 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1575 if (bswap) {
1576 tcg_out_rev32(s, data_r, data_r);
1578 break;
1579 case MO_SL:
1580 if (bswap) {
1581 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1582 tcg_out_rev32(s, data_r, data_r);
1583 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1584 } else {
1585 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1587 break;
1588 case MO_Q:
1589 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1590 if (bswap) {
1591 tcg_out_rev64(s, data_r, data_r);
1593 break;
1594 default:
1595 tcg_abort();
1599 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1600 TCGReg data_r, TCGReg addr_r,
1601 TCGType otype, TCGReg off_r)
1603 const TCGMemOp bswap = memop & MO_BSWAP;
1605 switch (memop & MO_SIZE) {
1606 case MO_8:
1607 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1608 break;
1609 case MO_16:
1610 if (bswap && data_r != TCG_REG_XZR) {
1611 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1612 data_r = TCG_REG_TMP;
1614 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1615 break;
1616 case MO_32:
1617 if (bswap && data_r != TCG_REG_XZR) {
1618 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1619 data_r = TCG_REG_TMP;
1621 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1622 break;
1623 case MO_64:
1624 if (bswap && data_r != TCG_REG_XZR) {
1625 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1626 data_r = TCG_REG_TMP;
1628 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1629 break;
1630 default:
1631 tcg_abort();
1635 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1636 TCGMemOpIdx oi, TCGType ext)
1638 TCGMemOp memop = get_memop(oi);
1639 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1640 #ifdef CONFIG_SOFTMMU
1641 unsigned mem_index = get_mmuidx(oi);
1642 tcg_insn_unit *label_ptr;
1644 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1645 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1646 TCG_REG_X1, otype, addr_reg);
1647 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1648 s->code_ptr, label_ptr);
1649 #else /* !CONFIG_SOFTMMU */
1650 if (USE_GUEST_BASE) {
1651 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1652 TCG_REG_GUEST_BASE, otype, addr_reg);
1653 } else {
1654 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1655 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1657 #endif /* CONFIG_SOFTMMU */
1660 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1661 TCGMemOpIdx oi)
1663 TCGMemOp memop = get_memop(oi);
1664 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1665 #ifdef CONFIG_SOFTMMU
1666 unsigned mem_index = get_mmuidx(oi);
1667 tcg_insn_unit *label_ptr;
1669 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1670 tcg_out_qemu_st_direct(s, memop, data_reg,
1671 TCG_REG_X1, otype, addr_reg);
1672 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1673 data_reg, addr_reg, s->code_ptr, label_ptr);
1674 #else /* !CONFIG_SOFTMMU */
1675 if (USE_GUEST_BASE) {
1676 tcg_out_qemu_st_direct(s, memop, data_reg,
1677 TCG_REG_GUEST_BASE, otype, addr_reg);
1678 } else {
1679 tcg_out_qemu_st_direct(s, memop, data_reg,
1680 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1682 #endif /* CONFIG_SOFTMMU */
1685 static tcg_insn_unit *tb_ret_addr;
1687 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1688 const TCGArg args[TCG_MAX_OP_ARGS],
1689 const int const_args[TCG_MAX_OP_ARGS])
1691 /* 99% of the time, we can signal the use of extension registers
1692 by looking to see if the opcode handles 64-bit data. */
1693 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1695 /* Hoist the loads of the most common arguments. */
1696 TCGArg a0 = args[0];
1697 TCGArg a1 = args[1];
1698 TCGArg a2 = args[2];
1699 int c2 = const_args[2];
1701 /* Some operands are defined with "rZ" constraint, a register or
1702 the zero register. These need not actually test args[I] == 0. */
1703 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1705 switch (opc) {
1706 case INDEX_op_exit_tb:
1707 /* Reuse the zeroing that exists for goto_ptr. */
1708 if (a0 == 0) {
1709 tcg_out_goto_long(s, s->code_gen_epilogue);
1710 } else {
1711 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1712 tcg_out_goto_long(s, tb_ret_addr);
1714 break;
1716 case INDEX_op_goto_tb:
1717 if (s->tb_jmp_insn_offset != NULL) {
1718 /* TCG_TARGET_HAS_direct_jump */
1719 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1720 write can be used to patch the target address. */
1721 if ((uintptr_t)s->code_ptr & 7) {
1722 tcg_out32(s, NOP);
1724 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1725 /* actual branch destination will be patched by
1726 tb_target_set_jmp_target later. */
1727 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1728 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1729 } else {
1730 /* !TCG_TARGET_HAS_direct_jump */
1731 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1732 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1733 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1735 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1736 set_jmp_reset_offset(s, a0);
1737 break;
1739 case INDEX_op_goto_ptr:
1740 tcg_out_insn(s, 3207, BR, a0);
1741 break;
1743 case INDEX_op_br:
1744 tcg_out_goto_label(s, arg_label(a0));
1745 break;
1747 case INDEX_op_ld8u_i32:
1748 case INDEX_op_ld8u_i64:
1749 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1750 break;
1751 case INDEX_op_ld8s_i32:
1752 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1753 break;
1754 case INDEX_op_ld8s_i64:
1755 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1756 break;
1757 case INDEX_op_ld16u_i32:
1758 case INDEX_op_ld16u_i64:
1759 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1760 break;
1761 case INDEX_op_ld16s_i32:
1762 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1763 break;
1764 case INDEX_op_ld16s_i64:
1765 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1766 break;
1767 case INDEX_op_ld_i32:
1768 case INDEX_op_ld32u_i64:
1769 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1770 break;
1771 case INDEX_op_ld32s_i64:
1772 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1773 break;
1774 case INDEX_op_ld_i64:
1775 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1776 break;
1778 case INDEX_op_st8_i32:
1779 case INDEX_op_st8_i64:
1780 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1781 break;
1782 case INDEX_op_st16_i32:
1783 case INDEX_op_st16_i64:
1784 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1785 break;
1786 case INDEX_op_st_i32:
1787 case INDEX_op_st32_i64:
1788 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1789 break;
1790 case INDEX_op_st_i64:
1791 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1792 break;
1794 case INDEX_op_add_i32:
1795 a2 = (int32_t)a2;
1796 /* FALLTHRU */
1797 case INDEX_op_add_i64:
1798 if (c2) {
1799 tcg_out_addsubi(s, ext, a0, a1, a2);
1800 } else {
1801 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1803 break;
1805 case INDEX_op_sub_i32:
1806 a2 = (int32_t)a2;
1807 /* FALLTHRU */
1808 case INDEX_op_sub_i64:
1809 if (c2) {
1810 tcg_out_addsubi(s, ext, a0, a1, -a2);
1811 } else {
1812 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1814 break;
1816 case INDEX_op_neg_i64:
1817 case INDEX_op_neg_i32:
1818 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1819 break;
1821 case INDEX_op_and_i32:
1822 a2 = (int32_t)a2;
1823 /* FALLTHRU */
1824 case INDEX_op_and_i64:
1825 if (c2) {
1826 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1827 } else {
1828 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1830 break;
1832 case INDEX_op_andc_i32:
1833 a2 = (int32_t)a2;
1834 /* FALLTHRU */
1835 case INDEX_op_andc_i64:
1836 if (c2) {
1837 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1838 } else {
1839 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1841 break;
1843 case INDEX_op_or_i32:
1844 a2 = (int32_t)a2;
1845 /* FALLTHRU */
1846 case INDEX_op_or_i64:
1847 if (c2) {
1848 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1849 } else {
1850 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1852 break;
1854 case INDEX_op_orc_i32:
1855 a2 = (int32_t)a2;
1856 /* FALLTHRU */
1857 case INDEX_op_orc_i64:
1858 if (c2) {
1859 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1860 } else {
1861 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1863 break;
1865 case INDEX_op_xor_i32:
1866 a2 = (int32_t)a2;
1867 /* FALLTHRU */
1868 case INDEX_op_xor_i64:
1869 if (c2) {
1870 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1871 } else {
1872 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1874 break;
1876 case INDEX_op_eqv_i32:
1877 a2 = (int32_t)a2;
1878 /* FALLTHRU */
1879 case INDEX_op_eqv_i64:
1880 if (c2) {
1881 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1882 } else {
1883 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1885 break;
1887 case INDEX_op_not_i64:
1888 case INDEX_op_not_i32:
1889 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1890 break;
1892 case INDEX_op_mul_i64:
1893 case INDEX_op_mul_i32:
1894 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1895 break;
1897 case INDEX_op_div_i64:
1898 case INDEX_op_div_i32:
1899 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1900 break;
1901 case INDEX_op_divu_i64:
1902 case INDEX_op_divu_i32:
1903 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1904 break;
1906 case INDEX_op_rem_i64:
1907 case INDEX_op_rem_i32:
1908 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1909 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1910 break;
1911 case INDEX_op_remu_i64:
1912 case INDEX_op_remu_i32:
1913 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1914 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1915 break;
1917 case INDEX_op_shl_i64:
1918 case INDEX_op_shl_i32:
1919 if (c2) {
1920 tcg_out_shl(s, ext, a0, a1, a2);
1921 } else {
1922 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1924 break;
1926 case INDEX_op_shr_i64:
1927 case INDEX_op_shr_i32:
1928 if (c2) {
1929 tcg_out_shr(s, ext, a0, a1, a2);
1930 } else {
1931 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1933 break;
1935 case INDEX_op_sar_i64:
1936 case INDEX_op_sar_i32:
1937 if (c2) {
1938 tcg_out_sar(s, ext, a0, a1, a2);
1939 } else {
1940 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1942 break;
1944 case INDEX_op_rotr_i64:
1945 case INDEX_op_rotr_i32:
1946 if (c2) {
1947 tcg_out_rotr(s, ext, a0, a1, a2);
1948 } else {
1949 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1951 break;
1953 case INDEX_op_rotl_i64:
1954 case INDEX_op_rotl_i32:
1955 if (c2) {
1956 tcg_out_rotl(s, ext, a0, a1, a2);
1957 } else {
1958 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1959 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1961 break;
1963 case INDEX_op_clz_i64:
1964 case INDEX_op_clz_i32:
1965 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1966 break;
1967 case INDEX_op_ctz_i64:
1968 case INDEX_op_ctz_i32:
1969 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1970 break;
1972 case INDEX_op_brcond_i32:
1973 a1 = (int32_t)a1;
1974 /* FALLTHRU */
1975 case INDEX_op_brcond_i64:
1976 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1977 break;
1979 case INDEX_op_setcond_i32:
1980 a2 = (int32_t)a2;
1981 /* FALLTHRU */
1982 case INDEX_op_setcond_i64:
1983 tcg_out_cmp(s, ext, a1, a2, c2);
1984 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1985 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1986 TCG_REG_XZR, tcg_invert_cond(args[3]));
1987 break;
1989 case INDEX_op_movcond_i32:
1990 a2 = (int32_t)a2;
1991 /* FALLTHRU */
1992 case INDEX_op_movcond_i64:
1993 tcg_out_cmp(s, ext, a1, a2, c2);
1994 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1995 break;
1997 case INDEX_op_qemu_ld_i32:
1998 case INDEX_op_qemu_ld_i64:
1999 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2000 break;
2001 case INDEX_op_qemu_st_i32:
2002 case INDEX_op_qemu_st_i64:
2003 tcg_out_qemu_st(s, REG0(0), a1, a2);
2004 break;
2006 case INDEX_op_bswap64_i64:
2007 tcg_out_rev64(s, a0, a1);
2008 break;
2009 case INDEX_op_bswap32_i64:
2010 case INDEX_op_bswap32_i32:
2011 tcg_out_rev32(s, a0, a1);
2012 break;
2013 case INDEX_op_bswap16_i64:
2014 case INDEX_op_bswap16_i32:
2015 tcg_out_rev16(s, a0, a1);
2016 break;
2018 case INDEX_op_ext8s_i64:
2019 case INDEX_op_ext8s_i32:
2020 tcg_out_sxt(s, ext, MO_8, a0, a1);
2021 break;
2022 case INDEX_op_ext16s_i64:
2023 case INDEX_op_ext16s_i32:
2024 tcg_out_sxt(s, ext, MO_16, a0, a1);
2025 break;
2026 case INDEX_op_ext_i32_i64:
2027 case INDEX_op_ext32s_i64:
2028 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2029 break;
2030 case INDEX_op_ext8u_i64:
2031 case INDEX_op_ext8u_i32:
2032 tcg_out_uxt(s, MO_8, a0, a1);
2033 break;
2034 case INDEX_op_ext16u_i64:
2035 case INDEX_op_ext16u_i32:
2036 tcg_out_uxt(s, MO_16, a0, a1);
2037 break;
2038 case INDEX_op_extu_i32_i64:
2039 case INDEX_op_ext32u_i64:
2040 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2041 break;
2043 case INDEX_op_deposit_i64:
2044 case INDEX_op_deposit_i32:
2045 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2046 break;
2048 case INDEX_op_extract_i64:
2049 case INDEX_op_extract_i32:
2050 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2051 break;
2053 case INDEX_op_sextract_i64:
2054 case INDEX_op_sextract_i32:
2055 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2056 break;
2058 case INDEX_op_add2_i32:
2059 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2060 (int32_t)args[4], args[5], const_args[4],
2061 const_args[5], false);
2062 break;
2063 case INDEX_op_add2_i64:
2064 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2065 args[5], const_args[4], const_args[5], false);
2066 break;
2067 case INDEX_op_sub2_i32:
2068 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2069 (int32_t)args[4], args[5], const_args[4],
2070 const_args[5], true);
2071 break;
2072 case INDEX_op_sub2_i64:
2073 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2074 args[5], const_args[4], const_args[5], true);
2075 break;
2077 case INDEX_op_muluh_i64:
2078 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2079 break;
2080 case INDEX_op_mulsh_i64:
2081 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2082 break;
2084 case INDEX_op_mb:
2085 tcg_out_mb(s, a0);
2086 break;
2088 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2089 case INDEX_op_mov_i64:
2090 case INDEX_op_mov_vec:
2091 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2092 case INDEX_op_movi_i64:
2093 case INDEX_op_dupi_vec:
2094 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2095 default:
2096 g_assert_not_reached();
2099 #undef REG0
2102 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2103 unsigned vecl, unsigned vece,
2104 const TCGArg *args, const int *const_args)
2106 static const AArch64Insn cmp_insn[16] = {
2107 [TCG_COND_EQ] = I3616_CMEQ,
2108 [TCG_COND_GT] = I3616_CMGT,
2109 [TCG_COND_GE] = I3616_CMGE,
2110 [TCG_COND_GTU] = I3616_CMHI,
2111 [TCG_COND_GEU] = I3616_CMHS,
2113 static const AArch64Insn cmp0_insn[16] = {
2114 [TCG_COND_EQ] = I3617_CMEQ0,
2115 [TCG_COND_GT] = I3617_CMGT0,
2116 [TCG_COND_GE] = I3617_CMGE0,
2117 [TCG_COND_LT] = I3617_CMLT0,
2118 [TCG_COND_LE] = I3617_CMLE0,
2121 TCGType type = vecl + TCG_TYPE_V64;
2122 unsigned is_q = vecl;
2123 TCGArg a0, a1, a2;
2125 a0 = args[0];
2126 a1 = args[1];
2127 a2 = args[2];
2129 switch (opc) {
2130 case INDEX_op_ld_vec:
2131 tcg_out_ld(s, type, a0, a1, a2);
2132 break;
2133 case INDEX_op_st_vec:
2134 tcg_out_st(s, type, a0, a1, a2);
2135 break;
2136 case INDEX_op_add_vec:
2137 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2138 break;
2139 case INDEX_op_sub_vec:
2140 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2141 break;
2142 case INDEX_op_mul_vec:
2143 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2144 break;
2145 case INDEX_op_neg_vec:
2146 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2147 break;
2148 case INDEX_op_and_vec:
2149 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2150 break;
2151 case INDEX_op_or_vec:
2152 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2153 break;
2154 case INDEX_op_xor_vec:
2155 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2156 break;
2157 case INDEX_op_andc_vec:
2158 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2159 break;
2160 case INDEX_op_orc_vec:
2161 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2162 break;
2163 case INDEX_op_not_vec:
2164 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2165 break;
2166 case INDEX_op_dup_vec:
2167 tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0);
2168 break;
2169 case INDEX_op_shli_vec:
2170 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2171 break;
2172 case INDEX_op_shri_vec:
2173 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2174 break;
2175 case INDEX_op_sari_vec:
2176 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2177 break;
2178 case INDEX_op_cmp_vec:
2180 TCGCond cond = args[3];
2181 AArch64Insn insn;
2183 if (cond == TCG_COND_NE) {
2184 if (const_args[2]) {
2185 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2186 } else {
2187 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2188 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2190 } else {
2191 if (const_args[2]) {
2192 insn = cmp0_insn[cond];
2193 if (insn) {
2194 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2195 break;
2197 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2198 a2 = TCG_VEC_TMP;
2200 insn = cmp_insn[cond];
2201 if (insn == 0) {
2202 TCGArg t;
2203 t = a1, a1 = a2, a2 = t;
2204 cond = tcg_swap_cond(cond);
2205 insn = cmp_insn[cond];
2206 tcg_debug_assert(insn != 0);
2208 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2211 break;
2212 default:
2213 g_assert_not_reached();
2217 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2219 switch (opc) {
2220 case INDEX_op_add_vec:
2221 case INDEX_op_sub_vec:
2222 case INDEX_op_and_vec:
2223 case INDEX_op_or_vec:
2224 case INDEX_op_xor_vec:
2225 case INDEX_op_andc_vec:
2226 case INDEX_op_orc_vec:
2227 case INDEX_op_neg_vec:
2228 case INDEX_op_not_vec:
2229 case INDEX_op_cmp_vec:
2230 case INDEX_op_shli_vec:
2231 case INDEX_op_shri_vec:
2232 case INDEX_op_sari_vec:
2233 return 1;
2234 case INDEX_op_mul_vec:
2235 return vece < MO_64;
2237 default:
2238 return 0;
2242 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2243 TCGArg a0, ...)
2247 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2249 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2250 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2251 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2252 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2253 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2254 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2255 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2256 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2257 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2258 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2259 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2260 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2261 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2262 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2263 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2264 static const TCGTargetOpDef r_r_rAL
2265 = { .args_ct_str = { "r", "r", "rAL" } };
2266 static const TCGTargetOpDef dep
2267 = { .args_ct_str = { "r", "0", "rZ" } };
2268 static const TCGTargetOpDef movc
2269 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2270 static const TCGTargetOpDef add2
2271 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2273 switch (op) {
2274 case INDEX_op_goto_ptr:
2275 return &r;
2277 case INDEX_op_ld8u_i32:
2278 case INDEX_op_ld8s_i32:
2279 case INDEX_op_ld16u_i32:
2280 case INDEX_op_ld16s_i32:
2281 case INDEX_op_ld_i32:
2282 case INDEX_op_ld8u_i64:
2283 case INDEX_op_ld8s_i64:
2284 case INDEX_op_ld16u_i64:
2285 case INDEX_op_ld16s_i64:
2286 case INDEX_op_ld32u_i64:
2287 case INDEX_op_ld32s_i64:
2288 case INDEX_op_ld_i64:
2289 case INDEX_op_neg_i32:
2290 case INDEX_op_neg_i64:
2291 case INDEX_op_not_i32:
2292 case INDEX_op_not_i64:
2293 case INDEX_op_bswap16_i32:
2294 case INDEX_op_bswap32_i32:
2295 case INDEX_op_bswap16_i64:
2296 case INDEX_op_bswap32_i64:
2297 case INDEX_op_bswap64_i64:
2298 case INDEX_op_ext8s_i32:
2299 case INDEX_op_ext16s_i32:
2300 case INDEX_op_ext8u_i32:
2301 case INDEX_op_ext16u_i32:
2302 case INDEX_op_ext8s_i64:
2303 case INDEX_op_ext16s_i64:
2304 case INDEX_op_ext32s_i64:
2305 case INDEX_op_ext8u_i64:
2306 case INDEX_op_ext16u_i64:
2307 case INDEX_op_ext32u_i64:
2308 case INDEX_op_ext_i32_i64:
2309 case INDEX_op_extu_i32_i64:
2310 case INDEX_op_extract_i32:
2311 case INDEX_op_extract_i64:
2312 case INDEX_op_sextract_i32:
2313 case INDEX_op_sextract_i64:
2314 return &r_r;
2316 case INDEX_op_st8_i32:
2317 case INDEX_op_st16_i32:
2318 case INDEX_op_st_i32:
2319 case INDEX_op_st8_i64:
2320 case INDEX_op_st16_i64:
2321 case INDEX_op_st32_i64:
2322 case INDEX_op_st_i64:
2323 return &rZ_r;
2325 case INDEX_op_add_i32:
2326 case INDEX_op_add_i64:
2327 case INDEX_op_sub_i32:
2328 case INDEX_op_sub_i64:
2329 case INDEX_op_setcond_i32:
2330 case INDEX_op_setcond_i64:
2331 return &r_r_rA;
2333 case INDEX_op_mul_i32:
2334 case INDEX_op_mul_i64:
2335 case INDEX_op_div_i32:
2336 case INDEX_op_div_i64:
2337 case INDEX_op_divu_i32:
2338 case INDEX_op_divu_i64:
2339 case INDEX_op_rem_i32:
2340 case INDEX_op_rem_i64:
2341 case INDEX_op_remu_i32:
2342 case INDEX_op_remu_i64:
2343 case INDEX_op_muluh_i64:
2344 case INDEX_op_mulsh_i64:
2345 return &r_r_r;
2347 case INDEX_op_and_i32:
2348 case INDEX_op_and_i64:
2349 case INDEX_op_or_i32:
2350 case INDEX_op_or_i64:
2351 case INDEX_op_xor_i32:
2352 case INDEX_op_xor_i64:
2353 case INDEX_op_andc_i32:
2354 case INDEX_op_andc_i64:
2355 case INDEX_op_orc_i32:
2356 case INDEX_op_orc_i64:
2357 case INDEX_op_eqv_i32:
2358 case INDEX_op_eqv_i64:
2359 return &r_r_rL;
2361 case INDEX_op_shl_i32:
2362 case INDEX_op_shr_i32:
2363 case INDEX_op_sar_i32:
2364 case INDEX_op_rotl_i32:
2365 case INDEX_op_rotr_i32:
2366 case INDEX_op_shl_i64:
2367 case INDEX_op_shr_i64:
2368 case INDEX_op_sar_i64:
2369 case INDEX_op_rotl_i64:
2370 case INDEX_op_rotr_i64:
2371 return &r_r_ri;
2373 case INDEX_op_clz_i32:
2374 case INDEX_op_ctz_i32:
2375 case INDEX_op_clz_i64:
2376 case INDEX_op_ctz_i64:
2377 return &r_r_rAL;
2379 case INDEX_op_brcond_i32:
2380 case INDEX_op_brcond_i64:
2381 return &r_rA;
2383 case INDEX_op_movcond_i32:
2384 case INDEX_op_movcond_i64:
2385 return &movc;
2387 case INDEX_op_qemu_ld_i32:
2388 case INDEX_op_qemu_ld_i64:
2389 return &r_l;
2390 case INDEX_op_qemu_st_i32:
2391 case INDEX_op_qemu_st_i64:
2392 return &lZ_l;
2394 case INDEX_op_deposit_i32:
2395 case INDEX_op_deposit_i64:
2396 return &dep;
2398 case INDEX_op_add2_i32:
2399 case INDEX_op_add2_i64:
2400 case INDEX_op_sub2_i32:
2401 case INDEX_op_sub2_i64:
2402 return &add2;
2404 case INDEX_op_add_vec:
2405 case INDEX_op_sub_vec:
2406 case INDEX_op_mul_vec:
2407 case INDEX_op_and_vec:
2408 case INDEX_op_or_vec:
2409 case INDEX_op_xor_vec:
2410 case INDEX_op_andc_vec:
2411 case INDEX_op_orc_vec:
2412 return &w_w_w;
2413 case INDEX_op_not_vec:
2414 case INDEX_op_neg_vec:
2415 case INDEX_op_shli_vec:
2416 case INDEX_op_shri_vec:
2417 case INDEX_op_sari_vec:
2418 return &w_w;
2419 case INDEX_op_ld_vec:
2420 case INDEX_op_st_vec:
2421 return &w_r;
2422 case INDEX_op_dup_vec:
2423 return &w_wr;
2424 case INDEX_op_cmp_vec:
2425 return &w_w_wZ;
2427 default:
2428 return NULL;
2432 static void tcg_target_init(TCGContext *s)
2434 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2435 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2436 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2437 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2439 tcg_target_call_clobber_regs = -1ull;
2440 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2441 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2442 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2443 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2444 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2445 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2446 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2447 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2448 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2449 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2450 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2451 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2452 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2453 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2454 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2455 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2456 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2457 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2458 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2460 s->reserved_regs = 0;
2461 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2462 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2463 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2464 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2465 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2468 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2469 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2471 #define FRAME_SIZE \
2472 ((PUSH_SIZE \
2473 + TCG_STATIC_CALL_ARGS_SIZE \
2474 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2475 + TCG_TARGET_STACK_ALIGN - 1) \
2476 & ~(TCG_TARGET_STACK_ALIGN - 1))
2478 /* We're expecting a 2 byte uleb128 encoded value. */
2479 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2481 /* We're expecting to use a single ADDI insn. */
2482 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2484 static void tcg_target_qemu_prologue(TCGContext *s)
2486 TCGReg r;
2488 /* Push (FP, LR) and allocate space for all saved registers. */
2489 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2490 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2492 /* Set up frame pointer for canonical unwinding. */
2493 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2495 /* Store callee-preserved regs x19..x28. */
2496 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2497 int ofs = (r - TCG_REG_X19 + 2) * 8;
2498 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2501 /* Make stack space for TCG locals. */
2502 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2503 FRAME_SIZE - PUSH_SIZE);
2505 /* Inform TCG about how to find TCG locals with register, offset, size. */
2506 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2507 CPU_TEMP_BUF_NLONGS * sizeof(long));
2509 #if !defined(CONFIG_SOFTMMU)
2510 if (USE_GUEST_BASE) {
2511 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2512 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2514 #endif
2516 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2517 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2520 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2521 * and fall through to the rest of the epilogue.
2523 s->code_gen_epilogue = s->code_ptr;
2524 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2526 /* TB epilogue */
2527 tb_ret_addr = s->code_ptr;
2529 /* Remove TCG locals stack space. */
2530 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2531 FRAME_SIZE - PUSH_SIZE);
2533 /* Restore registers x19..x28. */
2534 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2535 int ofs = (r - TCG_REG_X19 + 2) * 8;
2536 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2539 /* Pop (FP, LR), restore SP to previous frame. */
2540 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2541 TCG_REG_SP, PUSH_SIZE, 0, 1);
2542 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2545 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2547 int i;
2548 for (i = 0; i < count; ++i) {
2549 p[i] = NOP;
2553 typedef struct {
2554 DebugFrameHeader h;
2555 uint8_t fde_def_cfa[4];
2556 uint8_t fde_reg_ofs[24];
2557 } DebugFrame;
2559 #define ELF_HOST_MACHINE EM_AARCH64
2561 static const DebugFrame debug_frame = {
2562 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2563 .h.cie.id = -1,
2564 .h.cie.version = 1,
2565 .h.cie.code_align = 1,
2566 .h.cie.data_align = 0x78, /* sleb128 -8 */
2567 .h.cie.return_column = TCG_REG_LR,
2569 /* Total FDE size does not include the "len" member. */
2570 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2572 .fde_def_cfa = {
2573 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2574 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2575 (FRAME_SIZE >> 7)
2577 .fde_reg_ofs = {
2578 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2579 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2580 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2581 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2582 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2583 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2584 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2585 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2586 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2587 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2588 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2589 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2593 void tcg_register_jit(void *buf, size_t buf_size)
2595 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));