spapr: Drop duplicate PCI swizzle code
[qemu/ar7.git] / tcg / aarch64 / tcg-target.inc.c
blobd57f9e500fac880d721f388ef6d4eb19f43ff345
1 /*
2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "tcg-pool.inc.c"
14 #include "qemu/bitops.h"
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33 #endif /* CONFIG_DEBUG_TCG */
35 static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42 TCG_REG_X16, TCG_REG_X17,
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61 static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65 static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
79 #endif
81 static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
83 ptrdiff_t offset = target - code_ptr;
84 if (offset == sextract64(offset, 0, 26)) {
85 /* read instruction, mask away previous PC_REL26 parameter contents,
86 set the proper offset, then write back the instruction. */
87 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
88 return true;
90 return false;
93 static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
95 ptrdiff_t offset = target - code_ptr;
96 if (offset == sextract64(offset, 0, 19)) {
97 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
98 return true;
100 return false;
103 static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
104 intptr_t value, intptr_t addend)
106 tcg_debug_assert(addend == 0);
107 switch (type) {
108 case R_AARCH64_JUMP26:
109 case R_AARCH64_CALL26:
110 return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
111 case R_AARCH64_CONDBR19:
112 return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
113 default:
114 g_assert_not_reached();
118 #define TCG_CT_CONST_AIMM 0x100
119 #define TCG_CT_CONST_LIMM 0x200
120 #define TCG_CT_CONST_ZERO 0x400
121 #define TCG_CT_CONST_MONE 0x800
123 /* parse target specific constraints */
124 static const char *target_parse_constraint(TCGArgConstraint *ct,
125 const char *ct_str, TCGType type)
127 switch (*ct_str++) {
128 case 'r': /* general registers */
129 ct->ct |= TCG_CT_REG;
130 ct->u.regs |= 0xffffffffu;
131 break;
132 case 'w': /* advsimd registers */
133 ct->ct |= TCG_CT_REG;
134 ct->u.regs |= 0xffffffff00000000ull;
135 break;
136 case 'l': /* qemu_ld / qemu_st address, data_reg */
137 ct->ct |= TCG_CT_REG;
138 ct->u.regs = 0xffffffffu;
139 #ifdef CONFIG_SOFTMMU
140 /* x0 and x1 will be overwritten when reading the tlb entry,
141 and x2, and x3 for helper args, better to avoid using them. */
142 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
143 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
144 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
145 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
146 #endif
147 break;
148 case 'A': /* Valid for arithmetic immediate (positive or negative). */
149 ct->ct |= TCG_CT_CONST_AIMM;
150 break;
151 case 'L': /* Valid for logical immediate. */
152 ct->ct |= TCG_CT_CONST_LIMM;
153 break;
154 case 'M': /* minus one */
155 ct->ct |= TCG_CT_CONST_MONE;
156 break;
157 case 'Z': /* zero */
158 ct->ct |= TCG_CT_CONST_ZERO;
159 break;
160 default:
161 return NULL;
163 return ct_str;
166 /* Match a constant valid for addition (12-bit, optionally shifted). */
167 static inline bool is_aimm(uint64_t val)
169 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
172 /* Match a constant valid for logical operations. */
173 static inline bool is_limm(uint64_t val)
175 /* Taking a simplified view of the logical immediates for now, ignoring
176 the replication that can happen across the field. Match bit patterns
177 of the forms
178 0....01....1
179 0..01..10..0
180 and their inverses. */
182 /* Make things easier below, by testing the form with msb clear. */
183 if ((int64_t)val < 0) {
184 val = ~val;
186 if (val == 0) {
187 return false;
189 val += val & -val;
190 return (val & (val - 1)) == 0;
193 /* Match a constant that is valid for vectors. */
194 static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
196 int i;
198 *op = 0;
199 /* Match replication across 8 bits. */
200 if (v64 == dup_const(MO_8, v64)) {
201 *cmode = 0xe;
202 *imm8 = v64 & 0xff;
203 return true;
205 /* Match replication across 16 bits. */
206 if (v64 == dup_const(MO_16, v64)) {
207 uint16_t v16 = v64;
209 if (v16 == (v16 & 0xff)) {
210 *cmode = 0x8;
211 *imm8 = v16 & 0xff;
212 return true;
213 } else if (v16 == (v16 & 0xff00)) {
214 *cmode = 0xa;
215 *imm8 = v16 >> 8;
216 return true;
219 /* Match replication across 32 bits. */
220 if (v64 == dup_const(MO_32, v64)) {
221 uint32_t v32 = v64;
223 if (v32 == (v32 & 0xff)) {
224 *cmode = 0x0;
225 *imm8 = v32 & 0xff;
226 return true;
227 } else if (v32 == (v32 & 0xff00)) {
228 *cmode = 0x2;
229 *imm8 = (v32 >> 8) & 0xff;
230 return true;
231 } else if (v32 == (v32 & 0xff0000)) {
232 *cmode = 0x4;
233 *imm8 = (v32 >> 16) & 0xff;
234 return true;
235 } else if (v32 == (v32 & 0xff000000)) {
236 *cmode = 0x6;
237 *imm8 = v32 >> 24;
238 return true;
239 } else if ((v32 & 0xffff00ff) == 0xff) {
240 *cmode = 0xc;
241 *imm8 = (v32 >> 8) & 0xff;
242 return true;
243 } else if ((v32 & 0xff00ffff) == 0xffff) {
244 *cmode = 0xd;
245 *imm8 = (v32 >> 16) & 0xff;
246 return true;
248 /* Match forms of a float32. */
249 if (extract32(v32, 0, 19) == 0
250 && (extract32(v32, 25, 6) == 0x20
251 || extract32(v32, 25, 6) == 0x1f)) {
252 *cmode = 0xf;
253 *imm8 = (extract32(v32, 31, 1) << 7)
254 | (extract32(v32, 25, 1) << 6)
255 | extract32(v32, 19, 6);
256 return true;
259 /* Match forms of a float64. */
260 if (extract64(v64, 0, 48) == 0
261 && (extract64(v64, 54, 9) == 0x100
262 || extract64(v64, 54, 9) == 0x0ff)) {
263 *cmode = 0xf;
264 *op = 1;
265 *imm8 = (extract64(v64, 63, 1) << 7)
266 | (extract64(v64, 54, 1) << 6)
267 | extract64(v64, 48, 6);
268 return true;
270 /* Match bytes of 0x00 and 0xff. */
271 for (i = 0; i < 64; i += 8) {
272 uint64_t byte = extract64(v64, i, 8);
273 if (byte != 0 && byte != 0xff) {
274 break;
277 if (i == 64) {
278 *cmode = 0xe;
279 *op = 1;
280 *imm8 = (extract64(v64, 0, 1) << 0)
281 | (extract64(v64, 8, 1) << 1)
282 | (extract64(v64, 16, 1) << 2)
283 | (extract64(v64, 24, 1) << 3)
284 | (extract64(v64, 32, 1) << 4)
285 | (extract64(v64, 40, 1) << 5)
286 | (extract64(v64, 48, 1) << 6)
287 | (extract64(v64, 56, 1) << 7);
288 return true;
290 return false;
293 static int tcg_target_const_match(tcg_target_long val, TCGType type,
294 const TCGArgConstraint *arg_ct)
296 int ct = arg_ct->ct;
298 if (ct & TCG_CT_CONST) {
299 return 1;
301 if (type == TCG_TYPE_I32) {
302 val = (int32_t)val;
304 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
305 return 1;
307 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
308 return 1;
310 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
311 return 1;
313 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
314 return 1;
317 return 0;
320 enum aarch64_cond_code {
321 COND_EQ = 0x0,
322 COND_NE = 0x1,
323 COND_CS = 0x2, /* Unsigned greater or equal */
324 COND_HS = COND_CS, /* ALIAS greater or equal */
325 COND_CC = 0x3, /* Unsigned less than */
326 COND_LO = COND_CC, /* ALIAS Lower */
327 COND_MI = 0x4, /* Negative */
328 COND_PL = 0x5, /* Zero or greater */
329 COND_VS = 0x6, /* Overflow */
330 COND_VC = 0x7, /* No overflow */
331 COND_HI = 0x8, /* Unsigned greater than */
332 COND_LS = 0x9, /* Unsigned less or equal */
333 COND_GE = 0xa,
334 COND_LT = 0xb,
335 COND_GT = 0xc,
336 COND_LE = 0xd,
337 COND_AL = 0xe,
338 COND_NV = 0xf, /* behaves like COND_AL here */
341 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
342 [TCG_COND_EQ] = COND_EQ,
343 [TCG_COND_NE] = COND_NE,
344 [TCG_COND_LT] = COND_LT,
345 [TCG_COND_GE] = COND_GE,
346 [TCG_COND_LE] = COND_LE,
347 [TCG_COND_GT] = COND_GT,
348 /* unsigned */
349 [TCG_COND_LTU] = COND_LO,
350 [TCG_COND_GTU] = COND_HI,
351 [TCG_COND_GEU] = COND_HS,
352 [TCG_COND_LEU] = COND_LS,
355 typedef enum {
356 LDST_ST = 0, /* store */
357 LDST_LD = 1, /* load */
358 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
359 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
360 } AArch64LdstType;
362 /* We encode the format of the insn into the beginning of the name, so that
363 we can have the preprocessor help "typecheck" the insn vs the output
364 function. Arm didn't provide us with nice names for the formats, so we
365 use the section number of the architecture reference manual in which the
366 instruction group is described. */
367 typedef enum {
368 /* Compare and branch (immediate). */
369 I3201_CBZ = 0x34000000,
370 I3201_CBNZ = 0x35000000,
372 /* Conditional branch (immediate). */
373 I3202_B_C = 0x54000000,
375 /* Unconditional branch (immediate). */
376 I3206_B = 0x14000000,
377 I3206_BL = 0x94000000,
379 /* Unconditional branch (register). */
380 I3207_BR = 0xd61f0000,
381 I3207_BLR = 0xd63f0000,
382 I3207_RET = 0xd65f0000,
384 /* Load literal for loading the address at pc-relative offset */
385 I3305_LDR = 0x58000000,
386 I3305_LDR_v64 = 0x5c000000,
387 I3305_LDR_v128 = 0x9c000000,
389 /* Load/store register. Described here as 3.3.12, but the helper
390 that emits them can transform to 3.3.10 or 3.3.13. */
391 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
392 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
393 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
394 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
396 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
397 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
398 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
399 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
401 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
402 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
404 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
405 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
406 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
408 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
409 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
411 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
412 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
414 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
415 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
417 I3312_TO_I3310 = 0x00200800,
418 I3312_TO_I3313 = 0x01000000,
420 /* Load/store register pair instructions. */
421 I3314_LDP = 0x28400000,
422 I3314_STP = 0x28000000,
424 /* Add/subtract immediate instructions. */
425 I3401_ADDI = 0x11000000,
426 I3401_ADDSI = 0x31000000,
427 I3401_SUBI = 0x51000000,
428 I3401_SUBSI = 0x71000000,
430 /* Bitfield instructions. */
431 I3402_BFM = 0x33000000,
432 I3402_SBFM = 0x13000000,
433 I3402_UBFM = 0x53000000,
435 /* Extract instruction. */
436 I3403_EXTR = 0x13800000,
438 /* Logical immediate instructions. */
439 I3404_ANDI = 0x12000000,
440 I3404_ORRI = 0x32000000,
441 I3404_EORI = 0x52000000,
443 /* Move wide immediate instructions. */
444 I3405_MOVN = 0x12800000,
445 I3405_MOVZ = 0x52800000,
446 I3405_MOVK = 0x72800000,
448 /* PC relative addressing instructions. */
449 I3406_ADR = 0x10000000,
450 I3406_ADRP = 0x90000000,
452 /* Add/subtract shifted register instructions (without a shift). */
453 I3502_ADD = 0x0b000000,
454 I3502_ADDS = 0x2b000000,
455 I3502_SUB = 0x4b000000,
456 I3502_SUBS = 0x6b000000,
458 /* Add/subtract shifted register instructions (with a shift). */
459 I3502S_ADD_LSL = I3502_ADD,
461 /* Add/subtract with carry instructions. */
462 I3503_ADC = 0x1a000000,
463 I3503_SBC = 0x5a000000,
465 /* Conditional select instructions. */
466 I3506_CSEL = 0x1a800000,
467 I3506_CSINC = 0x1a800400,
468 I3506_CSINV = 0x5a800000,
469 I3506_CSNEG = 0x5a800400,
471 /* Data-processing (1 source) instructions. */
472 I3507_CLZ = 0x5ac01000,
473 I3507_RBIT = 0x5ac00000,
474 I3507_REV16 = 0x5ac00400,
475 I3507_REV32 = 0x5ac00800,
476 I3507_REV64 = 0x5ac00c00,
478 /* Data-processing (2 source) instructions. */
479 I3508_LSLV = 0x1ac02000,
480 I3508_LSRV = 0x1ac02400,
481 I3508_ASRV = 0x1ac02800,
482 I3508_RORV = 0x1ac02c00,
483 I3508_SMULH = 0x9b407c00,
484 I3508_UMULH = 0x9bc07c00,
485 I3508_UDIV = 0x1ac00800,
486 I3508_SDIV = 0x1ac00c00,
488 /* Data-processing (3 source) instructions. */
489 I3509_MADD = 0x1b000000,
490 I3509_MSUB = 0x1b008000,
492 /* Logical shifted register instructions (without a shift). */
493 I3510_AND = 0x0a000000,
494 I3510_BIC = 0x0a200000,
495 I3510_ORR = 0x2a000000,
496 I3510_ORN = 0x2a200000,
497 I3510_EOR = 0x4a000000,
498 I3510_EON = 0x4a200000,
499 I3510_ANDS = 0x6a000000,
501 /* Logical shifted register instructions (with a shift). */
502 I3502S_AND_LSR = I3510_AND | (1 << 22),
504 /* AdvSIMD copy */
505 I3605_DUP = 0x0e000400,
506 I3605_INS = 0x4e001c00,
507 I3605_UMOV = 0x0e003c00,
509 /* AdvSIMD modified immediate */
510 I3606_MOVI = 0x0f000400,
512 /* AdvSIMD shift by immediate */
513 I3614_SSHR = 0x0f000400,
514 I3614_SSRA = 0x0f001400,
515 I3614_SHL = 0x0f005400,
516 I3614_USHR = 0x2f000400,
517 I3614_USRA = 0x2f001400,
519 /* AdvSIMD three same. */
520 I3616_ADD = 0x0e208400,
521 I3616_AND = 0x0e201c00,
522 I3616_BIC = 0x0e601c00,
523 I3616_EOR = 0x2e201c00,
524 I3616_MUL = 0x0e209c00,
525 I3616_ORR = 0x0ea01c00,
526 I3616_ORN = 0x0ee01c00,
527 I3616_SUB = 0x2e208400,
528 I3616_CMGT = 0x0e203400,
529 I3616_CMGE = 0x0e203c00,
530 I3616_CMTST = 0x0e208c00,
531 I3616_CMHI = 0x2e203400,
532 I3616_CMHS = 0x2e203c00,
533 I3616_CMEQ = 0x2e208c00,
534 I3616_SMAX = 0x0e206400,
535 I3616_SMIN = 0x0e206c00,
536 I3616_SQADD = 0x0e200c00,
537 I3616_SQSUB = 0x0e202c00,
538 I3616_UMAX = 0x2e206400,
539 I3616_UMIN = 0x2e206c00,
540 I3616_UQADD = 0x2e200c00,
541 I3616_UQSUB = 0x2e202c00,
543 /* AdvSIMD two-reg misc. */
544 I3617_CMGT0 = 0x0e208800,
545 I3617_CMEQ0 = 0x0e209800,
546 I3617_CMLT0 = 0x0e20a800,
547 I3617_CMGE0 = 0x2e208800,
548 I3617_CMLE0 = 0x2e20a800,
549 I3617_NOT = 0x2e205800,
550 I3617_NEG = 0x2e20b800,
552 /* System instructions. */
553 NOP = 0xd503201f,
554 DMB_ISH = 0xd50338bf,
555 DMB_LD = 0x00000100,
556 DMB_ST = 0x00000200,
557 } AArch64Insn;
559 static inline uint32_t tcg_in32(TCGContext *s)
561 uint32_t v = *(uint32_t *)s->code_ptr;
562 return v;
565 /* Emit an opcode with "type-checking" of the format. */
566 #define tcg_out_insn(S, FMT, OP, ...) \
567 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
569 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
571 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
574 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
575 TCGReg rt, int imm19)
577 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
580 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
581 TCGCond c, int imm19)
583 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
586 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
588 tcg_out32(s, insn | (imm26 & 0x03ffffff));
591 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
593 tcg_out32(s, insn | rn << 5);
596 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
597 TCGReg r1, TCGReg r2, TCGReg rn,
598 tcg_target_long ofs, bool pre, bool w)
600 insn |= 1u << 31; /* ext */
601 insn |= pre << 24;
602 insn |= w << 23;
604 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
605 insn |= (ofs & (0x7f << 3)) << (15 - 3);
607 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
610 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
611 TCGReg rd, TCGReg rn, uint64_t aimm)
613 if (aimm > 0xfff) {
614 tcg_debug_assert((aimm & 0xfff) == 0);
615 aimm >>= 12;
616 tcg_debug_assert(aimm <= 0xfff);
617 aimm |= 1 << 12; /* apply LSL 12 */
619 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
622 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
623 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
624 that feed the DecodeBitMasks pseudo function. */
625 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
626 TCGReg rd, TCGReg rn, int n, int immr, int imms)
628 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
629 | rn << 5 | rd);
632 #define tcg_out_insn_3404 tcg_out_insn_3402
634 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
635 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
637 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
638 | rn << 5 | rd);
641 /* This function is used for the Move (wide immediate) instruction group.
642 Note that SHIFT is a full shift count, not the 2 bit HW field. */
643 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
644 TCGReg rd, uint16_t half, unsigned shift)
646 tcg_debug_assert((shift & ~0x30) == 0);
647 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
650 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
651 TCGReg rd, int64_t disp)
653 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
656 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
657 the rare occasion when we actually want to supply a shift amount. */
658 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
659 TCGType ext, TCGReg rd, TCGReg rn,
660 TCGReg rm, int imm6)
662 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
665 /* This function is for 3.5.2 (Add/subtract shifted register),
666 and 3.5.10 (Logical shifted register), for the vast majorty of cases
667 when we don't want to apply a shift. Thus it can also be used for
668 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
669 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
670 TCGReg rd, TCGReg rn, TCGReg rm)
672 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
675 #define tcg_out_insn_3503 tcg_out_insn_3502
676 #define tcg_out_insn_3508 tcg_out_insn_3502
677 #define tcg_out_insn_3510 tcg_out_insn_3502
679 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
680 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
682 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
683 | tcg_cond_to_aarch64[c] << 12);
686 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
687 TCGReg rd, TCGReg rn)
689 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
692 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
693 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
695 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
698 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
699 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
701 /* Note that bit 11 set means general register input. Therefore
702 we can handle both register sets with one function. */
703 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
704 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
707 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
708 TCGReg rd, bool op, int cmode, uint8_t imm8)
710 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
711 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
714 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
715 TCGReg rd, TCGReg rn, unsigned immhb)
717 tcg_out32(s, insn | q << 30 | immhb << 16
718 | (rn & 0x1f) << 5 | (rd & 0x1f));
721 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
722 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
724 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
725 | (rn & 0x1f) << 5 | (rd & 0x1f));
728 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
729 unsigned size, TCGReg rd, TCGReg rn)
731 tcg_out32(s, insn | q << 30 | (size << 22)
732 | (rn & 0x1f) << 5 | (rd & 0x1f));
735 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
736 TCGReg rd, TCGReg base, TCGType ext,
737 TCGReg regoff)
739 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
740 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
741 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
744 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
745 TCGReg rd, TCGReg rn, intptr_t offset)
747 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
750 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
751 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
753 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
754 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
755 | rn << 5 | (rd & 0x1f));
758 /* Register to register move using ORR (shifted register with no shift). */
759 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
761 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
764 /* Register to register move using ADDI (move to/from SP). */
765 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
767 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
770 /* This function is used for the Logical (immediate) instruction group.
771 The value of LIMM must satisfy IS_LIMM. See the comment above about
772 only supporting simplified logical immediates. */
773 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
774 TCGReg rd, TCGReg rn, uint64_t limm)
776 unsigned h, l, r, c;
778 tcg_debug_assert(is_limm(limm));
780 h = clz64(limm);
781 l = ctz64(limm);
782 if (l == 0) {
783 r = 0; /* form 0....01....1 */
784 c = ctz64(~limm) - 1;
785 if (h == 0) {
786 r = clz64(~limm); /* form 1..10..01..1 */
787 c += r;
789 } else {
790 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
791 c = r - h - 1;
793 if (ext == TCG_TYPE_I32) {
794 r &= 31;
795 c &= 31;
798 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
801 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
802 TCGReg rd, uint64_t v64)
804 int op, cmode, imm8;
806 if (is_fimm(v64, &op, &cmode, &imm8)) {
807 tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
808 } else if (type == TCG_TYPE_V128) {
809 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
810 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
811 } else {
812 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
813 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
817 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
818 tcg_target_long value)
820 tcg_target_long svalue = value;
821 tcg_target_long ivalue = ~value;
822 tcg_target_long t0, t1, t2;
823 int s0, s1;
824 AArch64Insn opc;
826 switch (type) {
827 case TCG_TYPE_I32:
828 case TCG_TYPE_I64:
829 tcg_debug_assert(rd < 32);
830 break;
832 case TCG_TYPE_V64:
833 case TCG_TYPE_V128:
834 tcg_debug_assert(rd >= 32);
835 tcg_out_dupi_vec(s, type, rd, value);
836 return;
838 default:
839 g_assert_not_reached();
842 /* For 32-bit values, discard potential garbage in value. For 64-bit
843 values within [2**31, 2**32-1], we can create smaller sequences by
844 interpreting this as a negative 32-bit number, while ensuring that
845 the high 32 bits are cleared by setting SF=0. */
846 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
847 svalue = (int32_t)value;
848 value = (uint32_t)value;
849 ivalue = (uint32_t)ivalue;
850 type = TCG_TYPE_I32;
853 /* Speed things up by handling the common case of small positive
854 and negative values specially. */
855 if ((value & ~0xffffull) == 0) {
856 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
857 return;
858 } else if ((ivalue & ~0xffffull) == 0) {
859 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
860 return;
863 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
864 use the sign-extended value. That lets us match rotated values such
865 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
866 if (is_limm(svalue)) {
867 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
868 return;
871 /* Look for host pointer values within 4G of the PC. This happens
872 often when loading pointers to QEMU's own data structures. */
873 if (type == TCG_TYPE_I64) {
874 tcg_target_long disp = value - (intptr_t)s->code_ptr;
875 if (disp == sextract64(disp, 0, 21)) {
876 tcg_out_insn(s, 3406, ADR, rd, disp);
877 return;
879 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
880 if (disp == sextract64(disp, 0, 21)) {
881 tcg_out_insn(s, 3406, ADRP, rd, disp);
882 if (value & 0xfff) {
883 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
885 return;
889 /* Would it take fewer insns to begin with MOVN? */
890 if (ctpop64(value) >= 32) {
891 t0 = ivalue;
892 opc = I3405_MOVN;
893 } else {
894 t0 = value;
895 opc = I3405_MOVZ;
897 s0 = ctz64(t0) & (63 & -16);
898 t1 = t0 & ~(0xffffUL << s0);
899 s1 = ctz64(t1) & (63 & -16);
900 t2 = t1 & ~(0xffffUL << s1);
901 if (t2 == 0) {
902 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
903 if (t1 != 0) {
904 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
906 return;
909 /* For more than 2 insns, dump it into the constant pool. */
910 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
911 tcg_out_insn(s, 3305, LDR, 0, rd);
914 /* Define something more legible for general use. */
915 #define tcg_out_ldst_r tcg_out_insn_3310
917 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
918 TCGReg rn, intptr_t offset, int lgsize)
920 /* If the offset is naturally aligned and in range, then we can
921 use the scaled uimm12 encoding */
922 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
923 uintptr_t scaled_uimm = offset >> lgsize;
924 if (scaled_uimm <= 0xfff) {
925 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
926 return;
930 /* Small signed offsets can use the unscaled encoding. */
931 if (offset >= -256 && offset < 256) {
932 tcg_out_insn_3312(s, insn, rd, rn, offset);
933 return;
936 /* Worst-case scenario, move offset to temp register, use reg offset. */
937 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
938 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
941 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
943 if (ret == arg) {
944 return;
946 switch (type) {
947 case TCG_TYPE_I32:
948 case TCG_TYPE_I64:
949 if (ret < 32 && arg < 32) {
950 tcg_out_movr(s, type, ret, arg);
951 break;
952 } else if (ret < 32) {
953 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
954 break;
955 } else if (arg < 32) {
956 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
957 break;
959 /* FALLTHRU */
961 case TCG_TYPE_V64:
962 tcg_debug_assert(ret >= 32 && arg >= 32);
963 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
964 break;
965 case TCG_TYPE_V128:
966 tcg_debug_assert(ret >= 32 && arg >= 32);
967 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
968 break;
970 default:
971 g_assert_not_reached();
975 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
976 TCGReg base, intptr_t ofs)
978 AArch64Insn insn;
979 int lgsz;
981 switch (type) {
982 case TCG_TYPE_I32:
983 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
984 lgsz = 2;
985 break;
986 case TCG_TYPE_I64:
987 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
988 lgsz = 3;
989 break;
990 case TCG_TYPE_V64:
991 insn = I3312_LDRVD;
992 lgsz = 3;
993 break;
994 case TCG_TYPE_V128:
995 insn = I3312_LDRVQ;
996 lgsz = 4;
997 break;
998 default:
999 g_assert_not_reached();
1001 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1004 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1005 TCGReg base, intptr_t ofs)
1007 AArch64Insn insn;
1008 int lgsz;
1010 switch (type) {
1011 case TCG_TYPE_I32:
1012 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1013 lgsz = 2;
1014 break;
1015 case TCG_TYPE_I64:
1016 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1017 lgsz = 3;
1018 break;
1019 case TCG_TYPE_V64:
1020 insn = I3312_STRVD;
1021 lgsz = 3;
1022 break;
1023 case TCG_TYPE_V128:
1024 insn = I3312_STRVQ;
1025 lgsz = 4;
1026 break;
1027 default:
1028 g_assert_not_reached();
1030 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1033 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1034 TCGReg base, intptr_t ofs)
1036 if (type <= TCG_TYPE_I64 && val == 0) {
1037 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1038 return true;
1040 return false;
1043 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1044 TCGReg rn, unsigned int a, unsigned int b)
1046 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1049 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1050 TCGReg rn, unsigned int a, unsigned int b)
1052 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1055 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1056 TCGReg rn, unsigned int a, unsigned int b)
1058 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1061 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1062 TCGReg rn, TCGReg rm, unsigned int a)
1064 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1067 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1068 TCGReg rd, TCGReg rn, unsigned int m)
1070 int bits = ext ? 64 : 32;
1071 int max = bits - 1;
1072 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1075 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1076 TCGReg rd, TCGReg rn, unsigned int m)
1078 int max = ext ? 63 : 31;
1079 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1082 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1083 TCGReg rd, TCGReg rn, unsigned int m)
1085 int max = ext ? 63 : 31;
1086 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1089 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1090 TCGReg rd, TCGReg rn, unsigned int m)
1092 int max = ext ? 63 : 31;
1093 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1096 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1097 TCGReg rd, TCGReg rn, unsigned int m)
1099 int bits = ext ? 64 : 32;
1100 int max = bits - 1;
1101 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1104 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1105 TCGReg rn, unsigned lsb, unsigned width)
1107 unsigned size = ext ? 64 : 32;
1108 unsigned a = (size - lsb) & (size - 1);
1109 unsigned b = width - 1;
1110 tcg_out_bfm(s, ext, rd, rn, a, b);
1113 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1114 tcg_target_long b, bool const_b)
1116 if (const_b) {
1117 /* Using CMP or CMN aliases. */
1118 if (b >= 0) {
1119 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1120 } else {
1121 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1123 } else {
1124 /* Using CMP alias SUBS wzr, Wn, Wm */
1125 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1129 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1131 ptrdiff_t offset = target - s->code_ptr;
1132 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1133 tcg_out_insn(s, 3206, B, offset);
1136 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1138 ptrdiff_t offset = target - s->code_ptr;
1139 if (offset == sextract64(offset, 0, 26)) {
1140 tcg_out_insn(s, 3206, BL, offset);
1141 } else {
1142 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1143 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1147 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1149 tcg_out_insn(s, 3207, BLR, reg);
1152 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1154 ptrdiff_t offset = target - s->code_ptr;
1155 if (offset == sextract64(offset, 0, 26)) {
1156 tcg_out_insn(s, 3206, BL, offset);
1157 } else {
1158 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1159 tcg_out_callr(s, TCG_REG_TMP);
1163 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1164 uintptr_t addr)
1166 tcg_insn_unit i1, i2;
1167 TCGType rt = TCG_TYPE_I64;
1168 TCGReg rd = TCG_REG_TMP;
1169 uint64_t pair;
1171 ptrdiff_t offset = addr - jmp_addr;
1173 if (offset == sextract64(offset, 0, 26)) {
1174 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1175 i2 = NOP;
1176 } else {
1177 offset = (addr >> 12) - (jmp_addr >> 12);
1179 /* patch ADRP */
1180 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1181 /* patch ADDI */
1182 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1184 pair = (uint64_t)i2 << 32 | i1;
1185 atomic_set((uint64_t *)jmp_addr, pair);
1186 flush_icache_range(jmp_addr, jmp_addr + 8);
1189 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1191 if (!l->has_value) {
1192 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1193 tcg_out_insn(s, 3206, B, 0);
1194 } else {
1195 tcg_out_goto(s, l->u.value_ptr);
1199 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1200 TCGArg b, bool b_const, TCGLabel *l)
1202 intptr_t offset;
1203 bool need_cmp;
1205 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1206 need_cmp = false;
1207 } else {
1208 need_cmp = true;
1209 tcg_out_cmp(s, ext, a, b, b_const);
1212 if (!l->has_value) {
1213 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1214 offset = tcg_in32(s) >> 5;
1215 } else {
1216 offset = l->u.value_ptr - s->code_ptr;
1217 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1220 if (need_cmp) {
1221 tcg_out_insn(s, 3202, B_C, c, offset);
1222 } else if (c == TCG_COND_EQ) {
1223 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1224 } else {
1225 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1229 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1231 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1234 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1236 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1239 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1241 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1244 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1245 TCGReg rd, TCGReg rn)
1247 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1248 int bits = (8 << s_bits) - 1;
1249 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1252 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1253 TCGReg rd, TCGReg rn)
1255 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1256 int bits = (8 << s_bits) - 1;
1257 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1260 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1261 TCGReg rn, int64_t aimm)
1263 if (aimm >= 0) {
1264 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1265 } else {
1266 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1270 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1271 TCGReg rh, TCGReg al, TCGReg ah,
1272 tcg_target_long bl, tcg_target_long bh,
1273 bool const_bl, bool const_bh, bool sub)
1275 TCGReg orig_rl = rl;
1276 AArch64Insn insn;
1278 if (rl == ah || (!const_bh && rl == bh)) {
1279 rl = TCG_REG_TMP;
1282 if (const_bl) {
1283 insn = I3401_ADDSI;
1284 if ((bl < 0) ^ sub) {
1285 insn = I3401_SUBSI;
1286 bl = -bl;
1288 if (unlikely(al == TCG_REG_XZR)) {
1289 /* ??? We want to allow al to be zero for the benefit of
1290 negation via subtraction. However, that leaves open the
1291 possibility of adding 0+const in the low part, and the
1292 immediate add instructions encode XSP not XZR. Don't try
1293 anything more elaborate here than loading another zero. */
1294 al = TCG_REG_TMP;
1295 tcg_out_movi(s, ext, al, 0);
1297 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1298 } else {
1299 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1302 insn = I3503_ADC;
1303 if (const_bh) {
1304 /* Note that the only two constants we support are 0 and -1, and
1305 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1306 if ((bh != 0) ^ sub) {
1307 insn = I3503_SBC;
1309 bh = TCG_REG_XZR;
1310 } else if (sub) {
1311 insn = I3503_SBC;
1313 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1315 tcg_out_mov(s, ext, orig_rl, rl);
1318 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1320 static const uint32_t sync[] = {
1321 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1322 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1323 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1324 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1325 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1327 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1330 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1331 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1333 TCGReg a1 = a0;
1334 if (is_ctz) {
1335 a1 = TCG_REG_TMP;
1336 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1338 if (const_b && b == (ext ? 64 : 32)) {
1339 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1340 } else {
1341 AArch64Insn sel = I3506_CSEL;
1343 tcg_out_cmp(s, ext, a0, 0, 1);
1344 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1346 if (const_b) {
1347 if (b == -1) {
1348 b = TCG_REG_XZR;
1349 sel = I3506_CSINV;
1350 } else if (b == 0) {
1351 b = TCG_REG_XZR;
1352 } else {
1353 tcg_out_movi(s, ext, d, b);
1354 b = d;
1357 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1361 #ifdef CONFIG_SOFTMMU
1362 #include "tcg-ldst.inc.c"
1364 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1365 * TCGMemOpIdx oi, uintptr_t ra)
1367 static void * const qemu_ld_helpers[16] = {
1368 [MO_UB] = helper_ret_ldub_mmu,
1369 [MO_LEUW] = helper_le_lduw_mmu,
1370 [MO_LEUL] = helper_le_ldul_mmu,
1371 [MO_LEQ] = helper_le_ldq_mmu,
1372 [MO_BEUW] = helper_be_lduw_mmu,
1373 [MO_BEUL] = helper_be_ldul_mmu,
1374 [MO_BEQ] = helper_be_ldq_mmu,
1377 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1378 * uintxx_t val, TCGMemOpIdx oi,
1379 * uintptr_t ra)
1381 static void * const qemu_st_helpers[16] = {
1382 [MO_UB] = helper_ret_stb_mmu,
1383 [MO_LEUW] = helper_le_stw_mmu,
1384 [MO_LEUL] = helper_le_stl_mmu,
1385 [MO_LEQ] = helper_le_stq_mmu,
1386 [MO_BEUW] = helper_be_stw_mmu,
1387 [MO_BEUL] = helper_be_stl_mmu,
1388 [MO_BEQ] = helper_be_stq_mmu,
1391 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1393 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1394 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1395 tcg_out_insn(s, 3406, ADR, rd, offset);
1398 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1400 TCGMemOpIdx oi = lb->oi;
1401 TCGMemOp opc = get_memop(oi);
1402 TCGMemOp size = opc & MO_SIZE;
1404 bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr);
1405 tcg_debug_assert(ok);
1407 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1408 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1409 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1410 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1411 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1412 if (opc & MO_SIGN) {
1413 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1414 } else {
1415 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1418 tcg_out_goto(s, lb->raddr);
1421 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1423 TCGMemOpIdx oi = lb->oi;
1424 TCGMemOp opc = get_memop(oi);
1425 TCGMemOp size = opc & MO_SIZE;
1427 bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr);
1428 tcg_debug_assert(ok);
1430 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1431 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1432 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1433 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1434 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1435 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1436 tcg_out_goto(s, lb->raddr);
1439 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1440 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1441 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1443 TCGLabelQemuLdst *label = new_ldst_label(s);
1445 label->is_ld = is_ld;
1446 label->oi = oi;
1447 label->type = ext;
1448 label->datalo_reg = data_reg;
1449 label->addrlo_reg = addr_reg;
1450 label->raddr = raddr;
1451 label->label_ptr[0] = label_ptr;
1454 /* We expect tlb_mask to be before tlb_table. */
1455 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
1456 offsetof(CPUArchState, tlb_mask));
1458 /* We expect to use a 24-bit unsigned offset from ENV. */
1459 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
1460 > 0xffffff);
1462 /* Load and compare a TLB entry, emitting the conditional jump to the
1463 slow path for the failure case, which will be patched later when finalizing
1464 the slow path. Generated code returns the host addend in X1,
1465 clobbers X0,X2,X3,TMP. */
1466 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1467 tcg_insn_unit **label_ptr, int mem_index,
1468 bool is_read)
1470 int mask_ofs = offsetof(CPUArchState, tlb_mask[mem_index]);
1471 int table_ofs = offsetof(CPUArchState, tlb_table[mem_index]);
1472 unsigned a_bits = get_alignment_bits(opc);
1473 unsigned s_bits = opc & MO_SIZE;
1474 unsigned a_mask = (1u << a_bits) - 1;
1475 unsigned s_mask = (1u << s_bits) - 1;
1476 TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0, x3;
1477 TCGType mask_type;
1478 uint64_t compare_mask;
1480 if (table_ofs > 0xfff) {
1481 int table_hi = table_ofs & ~0xfff;
1482 int mask_hi = mask_ofs & ~0xfff;
1484 table_base = TCG_REG_X1;
1485 if (mask_hi == table_hi) {
1486 mask_base = table_base;
1487 } else if (mask_hi) {
1488 mask_base = TCG_REG_X0;
1489 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1490 mask_base, TCG_AREG0, mask_hi);
1492 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1493 table_base, TCG_AREG0, table_hi);
1494 mask_ofs -= mask_hi;
1495 table_ofs -= table_hi;
1498 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1499 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1501 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
1502 tcg_out_ld(s, mask_type, TCG_REG_X0, mask_base, mask_ofs);
1503 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, table_base, table_ofs);
1505 /* Extract the TLB index from the address into X0. */
1506 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1507 TCG_REG_X0, TCG_REG_X0, addr_reg,
1508 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1510 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1511 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1513 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1514 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1515 ? offsetof(CPUTLBEntry, addr_read)
1516 : offsetof(CPUTLBEntry, addr_write));
1517 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1518 offsetof(CPUTLBEntry, addend));
1520 /* For aligned accesses, we check the first byte and include the alignment
1521 bits within the address. For unaligned access, we check that we don't
1522 cross pages using the address of the last byte of the access. */
1523 if (a_bits >= s_bits) {
1524 x3 = addr_reg;
1525 } else {
1526 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1527 TCG_REG_X3, addr_reg, s_mask - a_mask);
1528 x3 = TCG_REG_X3;
1530 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1532 /* Store the page mask part of the address into X3. */
1533 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1534 TCG_REG_X3, x3, compare_mask);
1536 /* Perform the address comparison. */
1537 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1539 /* If not equal, we jump to the slow path. */
1540 *label_ptr = s->code_ptr;
1541 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1544 #endif /* CONFIG_SOFTMMU */
1546 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1547 TCGReg data_r, TCGReg addr_r,
1548 TCGType otype, TCGReg off_r)
1550 const TCGMemOp bswap = memop & MO_BSWAP;
1552 switch (memop & MO_SSIZE) {
1553 case MO_UB:
1554 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1555 break;
1556 case MO_SB:
1557 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1558 data_r, addr_r, otype, off_r);
1559 break;
1560 case MO_UW:
1561 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1562 if (bswap) {
1563 tcg_out_rev16(s, data_r, data_r);
1565 break;
1566 case MO_SW:
1567 if (bswap) {
1568 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1569 tcg_out_rev16(s, data_r, data_r);
1570 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1571 } else {
1572 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1573 data_r, addr_r, otype, off_r);
1575 break;
1576 case MO_UL:
1577 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1578 if (bswap) {
1579 tcg_out_rev32(s, data_r, data_r);
1581 break;
1582 case MO_SL:
1583 if (bswap) {
1584 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1585 tcg_out_rev32(s, data_r, data_r);
1586 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1587 } else {
1588 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1590 break;
1591 case MO_Q:
1592 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1593 if (bswap) {
1594 tcg_out_rev64(s, data_r, data_r);
1596 break;
1597 default:
1598 tcg_abort();
1602 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1603 TCGReg data_r, TCGReg addr_r,
1604 TCGType otype, TCGReg off_r)
1606 const TCGMemOp bswap = memop & MO_BSWAP;
1608 switch (memop & MO_SIZE) {
1609 case MO_8:
1610 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1611 break;
1612 case MO_16:
1613 if (bswap && data_r != TCG_REG_XZR) {
1614 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1615 data_r = TCG_REG_TMP;
1617 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1618 break;
1619 case MO_32:
1620 if (bswap && data_r != TCG_REG_XZR) {
1621 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1622 data_r = TCG_REG_TMP;
1624 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1625 break;
1626 case MO_64:
1627 if (bswap && data_r != TCG_REG_XZR) {
1628 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1629 data_r = TCG_REG_TMP;
1631 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1632 break;
1633 default:
1634 tcg_abort();
1638 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1639 TCGMemOpIdx oi, TCGType ext)
1641 TCGMemOp memop = get_memop(oi);
1642 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1643 #ifdef CONFIG_SOFTMMU
1644 unsigned mem_index = get_mmuidx(oi);
1645 tcg_insn_unit *label_ptr;
1647 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1648 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1649 TCG_REG_X1, otype, addr_reg);
1650 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1651 s->code_ptr, label_ptr);
1652 #else /* !CONFIG_SOFTMMU */
1653 if (USE_GUEST_BASE) {
1654 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1655 TCG_REG_GUEST_BASE, otype, addr_reg);
1656 } else {
1657 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1658 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1660 #endif /* CONFIG_SOFTMMU */
1663 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1664 TCGMemOpIdx oi)
1666 TCGMemOp memop = get_memop(oi);
1667 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1668 #ifdef CONFIG_SOFTMMU
1669 unsigned mem_index = get_mmuidx(oi);
1670 tcg_insn_unit *label_ptr;
1672 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1673 tcg_out_qemu_st_direct(s, memop, data_reg,
1674 TCG_REG_X1, otype, addr_reg);
1675 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1676 data_reg, addr_reg, s->code_ptr, label_ptr);
1677 #else /* !CONFIG_SOFTMMU */
1678 if (USE_GUEST_BASE) {
1679 tcg_out_qemu_st_direct(s, memop, data_reg,
1680 TCG_REG_GUEST_BASE, otype, addr_reg);
1681 } else {
1682 tcg_out_qemu_st_direct(s, memop, data_reg,
1683 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1685 #endif /* CONFIG_SOFTMMU */
1688 static tcg_insn_unit *tb_ret_addr;
1690 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1691 const TCGArg args[TCG_MAX_OP_ARGS],
1692 const int const_args[TCG_MAX_OP_ARGS])
1694 /* 99% of the time, we can signal the use of extension registers
1695 by looking to see if the opcode handles 64-bit data. */
1696 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1698 /* Hoist the loads of the most common arguments. */
1699 TCGArg a0 = args[0];
1700 TCGArg a1 = args[1];
1701 TCGArg a2 = args[2];
1702 int c2 = const_args[2];
1704 /* Some operands are defined with "rZ" constraint, a register or
1705 the zero register. These need not actually test args[I] == 0. */
1706 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1708 switch (opc) {
1709 case INDEX_op_exit_tb:
1710 /* Reuse the zeroing that exists for goto_ptr. */
1711 if (a0 == 0) {
1712 tcg_out_goto_long(s, s->code_gen_epilogue);
1713 } else {
1714 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1715 tcg_out_goto_long(s, tb_ret_addr);
1717 break;
1719 case INDEX_op_goto_tb:
1720 if (s->tb_jmp_insn_offset != NULL) {
1721 /* TCG_TARGET_HAS_direct_jump */
1722 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1723 write can be used to patch the target address. */
1724 if ((uintptr_t)s->code_ptr & 7) {
1725 tcg_out32(s, NOP);
1727 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1728 /* actual branch destination will be patched by
1729 tb_target_set_jmp_target later. */
1730 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1731 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1732 } else {
1733 /* !TCG_TARGET_HAS_direct_jump */
1734 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1735 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1736 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1738 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1739 set_jmp_reset_offset(s, a0);
1740 break;
1742 case INDEX_op_goto_ptr:
1743 tcg_out_insn(s, 3207, BR, a0);
1744 break;
1746 case INDEX_op_br:
1747 tcg_out_goto_label(s, arg_label(a0));
1748 break;
1750 case INDEX_op_ld8u_i32:
1751 case INDEX_op_ld8u_i64:
1752 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1753 break;
1754 case INDEX_op_ld8s_i32:
1755 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1756 break;
1757 case INDEX_op_ld8s_i64:
1758 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1759 break;
1760 case INDEX_op_ld16u_i32:
1761 case INDEX_op_ld16u_i64:
1762 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1763 break;
1764 case INDEX_op_ld16s_i32:
1765 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1766 break;
1767 case INDEX_op_ld16s_i64:
1768 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1769 break;
1770 case INDEX_op_ld_i32:
1771 case INDEX_op_ld32u_i64:
1772 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1773 break;
1774 case INDEX_op_ld32s_i64:
1775 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1776 break;
1777 case INDEX_op_ld_i64:
1778 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1779 break;
1781 case INDEX_op_st8_i32:
1782 case INDEX_op_st8_i64:
1783 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1784 break;
1785 case INDEX_op_st16_i32:
1786 case INDEX_op_st16_i64:
1787 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1788 break;
1789 case INDEX_op_st_i32:
1790 case INDEX_op_st32_i64:
1791 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1792 break;
1793 case INDEX_op_st_i64:
1794 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1795 break;
1797 case INDEX_op_add_i32:
1798 a2 = (int32_t)a2;
1799 /* FALLTHRU */
1800 case INDEX_op_add_i64:
1801 if (c2) {
1802 tcg_out_addsubi(s, ext, a0, a1, a2);
1803 } else {
1804 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1806 break;
1808 case INDEX_op_sub_i32:
1809 a2 = (int32_t)a2;
1810 /* FALLTHRU */
1811 case INDEX_op_sub_i64:
1812 if (c2) {
1813 tcg_out_addsubi(s, ext, a0, a1, -a2);
1814 } else {
1815 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1817 break;
1819 case INDEX_op_neg_i64:
1820 case INDEX_op_neg_i32:
1821 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1822 break;
1824 case INDEX_op_and_i32:
1825 a2 = (int32_t)a2;
1826 /* FALLTHRU */
1827 case INDEX_op_and_i64:
1828 if (c2) {
1829 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1830 } else {
1831 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1833 break;
1835 case INDEX_op_andc_i32:
1836 a2 = (int32_t)a2;
1837 /* FALLTHRU */
1838 case INDEX_op_andc_i64:
1839 if (c2) {
1840 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1841 } else {
1842 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1844 break;
1846 case INDEX_op_or_i32:
1847 a2 = (int32_t)a2;
1848 /* FALLTHRU */
1849 case INDEX_op_or_i64:
1850 if (c2) {
1851 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1852 } else {
1853 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1855 break;
1857 case INDEX_op_orc_i32:
1858 a2 = (int32_t)a2;
1859 /* FALLTHRU */
1860 case INDEX_op_orc_i64:
1861 if (c2) {
1862 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1863 } else {
1864 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1866 break;
1868 case INDEX_op_xor_i32:
1869 a2 = (int32_t)a2;
1870 /* FALLTHRU */
1871 case INDEX_op_xor_i64:
1872 if (c2) {
1873 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1874 } else {
1875 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1877 break;
1879 case INDEX_op_eqv_i32:
1880 a2 = (int32_t)a2;
1881 /* FALLTHRU */
1882 case INDEX_op_eqv_i64:
1883 if (c2) {
1884 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1885 } else {
1886 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1888 break;
1890 case INDEX_op_not_i64:
1891 case INDEX_op_not_i32:
1892 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1893 break;
1895 case INDEX_op_mul_i64:
1896 case INDEX_op_mul_i32:
1897 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1898 break;
1900 case INDEX_op_div_i64:
1901 case INDEX_op_div_i32:
1902 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1903 break;
1904 case INDEX_op_divu_i64:
1905 case INDEX_op_divu_i32:
1906 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1907 break;
1909 case INDEX_op_rem_i64:
1910 case INDEX_op_rem_i32:
1911 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1912 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1913 break;
1914 case INDEX_op_remu_i64:
1915 case INDEX_op_remu_i32:
1916 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1917 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1918 break;
1920 case INDEX_op_shl_i64:
1921 case INDEX_op_shl_i32:
1922 if (c2) {
1923 tcg_out_shl(s, ext, a0, a1, a2);
1924 } else {
1925 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1927 break;
1929 case INDEX_op_shr_i64:
1930 case INDEX_op_shr_i32:
1931 if (c2) {
1932 tcg_out_shr(s, ext, a0, a1, a2);
1933 } else {
1934 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1936 break;
1938 case INDEX_op_sar_i64:
1939 case INDEX_op_sar_i32:
1940 if (c2) {
1941 tcg_out_sar(s, ext, a0, a1, a2);
1942 } else {
1943 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1945 break;
1947 case INDEX_op_rotr_i64:
1948 case INDEX_op_rotr_i32:
1949 if (c2) {
1950 tcg_out_rotr(s, ext, a0, a1, a2);
1951 } else {
1952 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1954 break;
1956 case INDEX_op_rotl_i64:
1957 case INDEX_op_rotl_i32:
1958 if (c2) {
1959 tcg_out_rotl(s, ext, a0, a1, a2);
1960 } else {
1961 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1962 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1964 break;
1966 case INDEX_op_clz_i64:
1967 case INDEX_op_clz_i32:
1968 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1969 break;
1970 case INDEX_op_ctz_i64:
1971 case INDEX_op_ctz_i32:
1972 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1973 break;
1975 case INDEX_op_brcond_i32:
1976 a1 = (int32_t)a1;
1977 /* FALLTHRU */
1978 case INDEX_op_brcond_i64:
1979 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1980 break;
1982 case INDEX_op_setcond_i32:
1983 a2 = (int32_t)a2;
1984 /* FALLTHRU */
1985 case INDEX_op_setcond_i64:
1986 tcg_out_cmp(s, ext, a1, a2, c2);
1987 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1988 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1989 TCG_REG_XZR, tcg_invert_cond(args[3]));
1990 break;
1992 case INDEX_op_movcond_i32:
1993 a2 = (int32_t)a2;
1994 /* FALLTHRU */
1995 case INDEX_op_movcond_i64:
1996 tcg_out_cmp(s, ext, a1, a2, c2);
1997 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1998 break;
2000 case INDEX_op_qemu_ld_i32:
2001 case INDEX_op_qemu_ld_i64:
2002 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2003 break;
2004 case INDEX_op_qemu_st_i32:
2005 case INDEX_op_qemu_st_i64:
2006 tcg_out_qemu_st(s, REG0(0), a1, a2);
2007 break;
2009 case INDEX_op_bswap64_i64:
2010 tcg_out_rev64(s, a0, a1);
2011 break;
2012 case INDEX_op_bswap32_i64:
2013 case INDEX_op_bswap32_i32:
2014 tcg_out_rev32(s, a0, a1);
2015 break;
2016 case INDEX_op_bswap16_i64:
2017 case INDEX_op_bswap16_i32:
2018 tcg_out_rev16(s, a0, a1);
2019 break;
2021 case INDEX_op_ext8s_i64:
2022 case INDEX_op_ext8s_i32:
2023 tcg_out_sxt(s, ext, MO_8, a0, a1);
2024 break;
2025 case INDEX_op_ext16s_i64:
2026 case INDEX_op_ext16s_i32:
2027 tcg_out_sxt(s, ext, MO_16, a0, a1);
2028 break;
2029 case INDEX_op_ext_i32_i64:
2030 case INDEX_op_ext32s_i64:
2031 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2032 break;
2033 case INDEX_op_ext8u_i64:
2034 case INDEX_op_ext8u_i32:
2035 tcg_out_uxt(s, MO_8, a0, a1);
2036 break;
2037 case INDEX_op_ext16u_i64:
2038 case INDEX_op_ext16u_i32:
2039 tcg_out_uxt(s, MO_16, a0, a1);
2040 break;
2041 case INDEX_op_extu_i32_i64:
2042 case INDEX_op_ext32u_i64:
2043 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2044 break;
2046 case INDEX_op_deposit_i64:
2047 case INDEX_op_deposit_i32:
2048 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2049 break;
2051 case INDEX_op_extract_i64:
2052 case INDEX_op_extract_i32:
2053 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2054 break;
2056 case INDEX_op_sextract_i64:
2057 case INDEX_op_sextract_i32:
2058 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2059 break;
2061 case INDEX_op_add2_i32:
2062 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2063 (int32_t)args[4], args[5], const_args[4],
2064 const_args[5], false);
2065 break;
2066 case INDEX_op_add2_i64:
2067 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2068 args[5], const_args[4], const_args[5], false);
2069 break;
2070 case INDEX_op_sub2_i32:
2071 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2072 (int32_t)args[4], args[5], const_args[4],
2073 const_args[5], true);
2074 break;
2075 case INDEX_op_sub2_i64:
2076 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2077 args[5], const_args[4], const_args[5], true);
2078 break;
2080 case INDEX_op_muluh_i64:
2081 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2082 break;
2083 case INDEX_op_mulsh_i64:
2084 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2085 break;
2087 case INDEX_op_mb:
2088 tcg_out_mb(s, a0);
2089 break;
2091 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2092 case INDEX_op_mov_i64:
2093 case INDEX_op_mov_vec:
2094 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2095 case INDEX_op_movi_i64:
2096 case INDEX_op_dupi_vec:
2097 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2098 default:
2099 g_assert_not_reached();
2102 #undef REG0
2105 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2106 unsigned vecl, unsigned vece,
2107 const TCGArg *args, const int *const_args)
2109 static const AArch64Insn cmp_insn[16] = {
2110 [TCG_COND_EQ] = I3616_CMEQ,
2111 [TCG_COND_GT] = I3616_CMGT,
2112 [TCG_COND_GE] = I3616_CMGE,
2113 [TCG_COND_GTU] = I3616_CMHI,
2114 [TCG_COND_GEU] = I3616_CMHS,
2116 static const AArch64Insn cmp0_insn[16] = {
2117 [TCG_COND_EQ] = I3617_CMEQ0,
2118 [TCG_COND_GT] = I3617_CMGT0,
2119 [TCG_COND_GE] = I3617_CMGE0,
2120 [TCG_COND_LT] = I3617_CMLT0,
2121 [TCG_COND_LE] = I3617_CMLE0,
2124 TCGType type = vecl + TCG_TYPE_V64;
2125 unsigned is_q = vecl;
2126 TCGArg a0, a1, a2;
2128 a0 = args[0];
2129 a1 = args[1];
2130 a2 = args[2];
2132 switch (opc) {
2133 case INDEX_op_ld_vec:
2134 tcg_out_ld(s, type, a0, a1, a2);
2135 break;
2136 case INDEX_op_st_vec:
2137 tcg_out_st(s, type, a0, a1, a2);
2138 break;
2139 case INDEX_op_add_vec:
2140 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2141 break;
2142 case INDEX_op_sub_vec:
2143 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2144 break;
2145 case INDEX_op_mul_vec:
2146 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2147 break;
2148 case INDEX_op_neg_vec:
2149 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2150 break;
2151 case INDEX_op_and_vec:
2152 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2153 break;
2154 case INDEX_op_or_vec:
2155 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2156 break;
2157 case INDEX_op_xor_vec:
2158 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2159 break;
2160 case INDEX_op_andc_vec:
2161 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2162 break;
2163 case INDEX_op_orc_vec:
2164 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2165 break;
2166 case INDEX_op_ssadd_vec:
2167 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2168 break;
2169 case INDEX_op_sssub_vec:
2170 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2171 break;
2172 case INDEX_op_usadd_vec:
2173 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2174 break;
2175 case INDEX_op_ussub_vec:
2176 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2177 break;
2178 case INDEX_op_smax_vec:
2179 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2180 break;
2181 case INDEX_op_smin_vec:
2182 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2183 break;
2184 case INDEX_op_umax_vec:
2185 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2186 break;
2187 case INDEX_op_umin_vec:
2188 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2189 break;
2190 case INDEX_op_not_vec:
2191 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2192 break;
2193 case INDEX_op_dup_vec:
2194 tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0);
2195 break;
2196 case INDEX_op_shli_vec:
2197 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2198 break;
2199 case INDEX_op_shri_vec:
2200 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2201 break;
2202 case INDEX_op_sari_vec:
2203 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2204 break;
2205 case INDEX_op_cmp_vec:
2207 TCGCond cond = args[3];
2208 AArch64Insn insn;
2210 if (cond == TCG_COND_NE) {
2211 if (const_args[2]) {
2212 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2213 } else {
2214 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2215 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2217 } else {
2218 if (const_args[2]) {
2219 insn = cmp0_insn[cond];
2220 if (insn) {
2221 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2222 break;
2224 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2225 a2 = TCG_VEC_TMP;
2227 insn = cmp_insn[cond];
2228 if (insn == 0) {
2229 TCGArg t;
2230 t = a1, a1 = a2, a2 = t;
2231 cond = tcg_swap_cond(cond);
2232 insn = cmp_insn[cond];
2233 tcg_debug_assert(insn != 0);
2235 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2238 break;
2239 default:
2240 g_assert_not_reached();
2244 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2246 switch (opc) {
2247 case INDEX_op_add_vec:
2248 case INDEX_op_sub_vec:
2249 case INDEX_op_and_vec:
2250 case INDEX_op_or_vec:
2251 case INDEX_op_xor_vec:
2252 case INDEX_op_andc_vec:
2253 case INDEX_op_orc_vec:
2254 case INDEX_op_neg_vec:
2255 case INDEX_op_not_vec:
2256 case INDEX_op_cmp_vec:
2257 case INDEX_op_shli_vec:
2258 case INDEX_op_shri_vec:
2259 case INDEX_op_sari_vec:
2260 case INDEX_op_ssadd_vec:
2261 case INDEX_op_sssub_vec:
2262 case INDEX_op_usadd_vec:
2263 case INDEX_op_ussub_vec:
2264 case INDEX_op_smax_vec:
2265 case INDEX_op_smin_vec:
2266 case INDEX_op_umax_vec:
2267 case INDEX_op_umin_vec:
2268 return 1;
2269 case INDEX_op_mul_vec:
2270 return vece < MO_64;
2272 default:
2273 return 0;
2277 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2278 TCGArg a0, ...)
2282 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2284 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2285 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2286 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2287 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2288 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2289 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2290 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2291 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2292 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2293 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2294 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2295 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2296 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2297 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2298 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2299 static const TCGTargetOpDef r_r_rAL
2300 = { .args_ct_str = { "r", "r", "rAL" } };
2301 static const TCGTargetOpDef dep
2302 = { .args_ct_str = { "r", "0", "rZ" } };
2303 static const TCGTargetOpDef movc
2304 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2305 static const TCGTargetOpDef add2
2306 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2308 switch (op) {
2309 case INDEX_op_goto_ptr:
2310 return &r;
2312 case INDEX_op_ld8u_i32:
2313 case INDEX_op_ld8s_i32:
2314 case INDEX_op_ld16u_i32:
2315 case INDEX_op_ld16s_i32:
2316 case INDEX_op_ld_i32:
2317 case INDEX_op_ld8u_i64:
2318 case INDEX_op_ld8s_i64:
2319 case INDEX_op_ld16u_i64:
2320 case INDEX_op_ld16s_i64:
2321 case INDEX_op_ld32u_i64:
2322 case INDEX_op_ld32s_i64:
2323 case INDEX_op_ld_i64:
2324 case INDEX_op_neg_i32:
2325 case INDEX_op_neg_i64:
2326 case INDEX_op_not_i32:
2327 case INDEX_op_not_i64:
2328 case INDEX_op_bswap16_i32:
2329 case INDEX_op_bswap32_i32:
2330 case INDEX_op_bswap16_i64:
2331 case INDEX_op_bswap32_i64:
2332 case INDEX_op_bswap64_i64:
2333 case INDEX_op_ext8s_i32:
2334 case INDEX_op_ext16s_i32:
2335 case INDEX_op_ext8u_i32:
2336 case INDEX_op_ext16u_i32:
2337 case INDEX_op_ext8s_i64:
2338 case INDEX_op_ext16s_i64:
2339 case INDEX_op_ext32s_i64:
2340 case INDEX_op_ext8u_i64:
2341 case INDEX_op_ext16u_i64:
2342 case INDEX_op_ext32u_i64:
2343 case INDEX_op_ext_i32_i64:
2344 case INDEX_op_extu_i32_i64:
2345 case INDEX_op_extract_i32:
2346 case INDEX_op_extract_i64:
2347 case INDEX_op_sextract_i32:
2348 case INDEX_op_sextract_i64:
2349 return &r_r;
2351 case INDEX_op_st8_i32:
2352 case INDEX_op_st16_i32:
2353 case INDEX_op_st_i32:
2354 case INDEX_op_st8_i64:
2355 case INDEX_op_st16_i64:
2356 case INDEX_op_st32_i64:
2357 case INDEX_op_st_i64:
2358 return &rZ_r;
2360 case INDEX_op_add_i32:
2361 case INDEX_op_add_i64:
2362 case INDEX_op_sub_i32:
2363 case INDEX_op_sub_i64:
2364 case INDEX_op_setcond_i32:
2365 case INDEX_op_setcond_i64:
2366 return &r_r_rA;
2368 case INDEX_op_mul_i32:
2369 case INDEX_op_mul_i64:
2370 case INDEX_op_div_i32:
2371 case INDEX_op_div_i64:
2372 case INDEX_op_divu_i32:
2373 case INDEX_op_divu_i64:
2374 case INDEX_op_rem_i32:
2375 case INDEX_op_rem_i64:
2376 case INDEX_op_remu_i32:
2377 case INDEX_op_remu_i64:
2378 case INDEX_op_muluh_i64:
2379 case INDEX_op_mulsh_i64:
2380 return &r_r_r;
2382 case INDEX_op_and_i32:
2383 case INDEX_op_and_i64:
2384 case INDEX_op_or_i32:
2385 case INDEX_op_or_i64:
2386 case INDEX_op_xor_i32:
2387 case INDEX_op_xor_i64:
2388 case INDEX_op_andc_i32:
2389 case INDEX_op_andc_i64:
2390 case INDEX_op_orc_i32:
2391 case INDEX_op_orc_i64:
2392 case INDEX_op_eqv_i32:
2393 case INDEX_op_eqv_i64:
2394 return &r_r_rL;
2396 case INDEX_op_shl_i32:
2397 case INDEX_op_shr_i32:
2398 case INDEX_op_sar_i32:
2399 case INDEX_op_rotl_i32:
2400 case INDEX_op_rotr_i32:
2401 case INDEX_op_shl_i64:
2402 case INDEX_op_shr_i64:
2403 case INDEX_op_sar_i64:
2404 case INDEX_op_rotl_i64:
2405 case INDEX_op_rotr_i64:
2406 return &r_r_ri;
2408 case INDEX_op_clz_i32:
2409 case INDEX_op_ctz_i32:
2410 case INDEX_op_clz_i64:
2411 case INDEX_op_ctz_i64:
2412 return &r_r_rAL;
2414 case INDEX_op_brcond_i32:
2415 case INDEX_op_brcond_i64:
2416 return &r_rA;
2418 case INDEX_op_movcond_i32:
2419 case INDEX_op_movcond_i64:
2420 return &movc;
2422 case INDEX_op_qemu_ld_i32:
2423 case INDEX_op_qemu_ld_i64:
2424 return &r_l;
2425 case INDEX_op_qemu_st_i32:
2426 case INDEX_op_qemu_st_i64:
2427 return &lZ_l;
2429 case INDEX_op_deposit_i32:
2430 case INDEX_op_deposit_i64:
2431 return &dep;
2433 case INDEX_op_add2_i32:
2434 case INDEX_op_add2_i64:
2435 case INDEX_op_sub2_i32:
2436 case INDEX_op_sub2_i64:
2437 return &add2;
2439 case INDEX_op_add_vec:
2440 case INDEX_op_sub_vec:
2441 case INDEX_op_mul_vec:
2442 case INDEX_op_and_vec:
2443 case INDEX_op_or_vec:
2444 case INDEX_op_xor_vec:
2445 case INDEX_op_andc_vec:
2446 case INDEX_op_orc_vec:
2447 case INDEX_op_ssadd_vec:
2448 case INDEX_op_sssub_vec:
2449 case INDEX_op_usadd_vec:
2450 case INDEX_op_ussub_vec:
2451 case INDEX_op_smax_vec:
2452 case INDEX_op_smin_vec:
2453 case INDEX_op_umax_vec:
2454 case INDEX_op_umin_vec:
2455 return &w_w_w;
2456 case INDEX_op_not_vec:
2457 case INDEX_op_neg_vec:
2458 case INDEX_op_shli_vec:
2459 case INDEX_op_shri_vec:
2460 case INDEX_op_sari_vec:
2461 return &w_w;
2462 case INDEX_op_ld_vec:
2463 case INDEX_op_st_vec:
2464 return &w_r;
2465 case INDEX_op_dup_vec:
2466 return &w_wr;
2467 case INDEX_op_cmp_vec:
2468 return &w_w_wZ;
2470 default:
2471 return NULL;
2475 static void tcg_target_init(TCGContext *s)
2477 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2478 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2479 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2480 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2482 tcg_target_call_clobber_regs = -1ull;
2483 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2484 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2485 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2486 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2487 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2488 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2489 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2490 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2491 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2492 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2493 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2494 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2495 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2496 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2497 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2498 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2499 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2500 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2501 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2503 s->reserved_regs = 0;
2504 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2505 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2506 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2507 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2508 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2511 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2512 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2514 #define FRAME_SIZE \
2515 ((PUSH_SIZE \
2516 + TCG_STATIC_CALL_ARGS_SIZE \
2517 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2518 + TCG_TARGET_STACK_ALIGN - 1) \
2519 & ~(TCG_TARGET_STACK_ALIGN - 1))
2521 /* We're expecting a 2 byte uleb128 encoded value. */
2522 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2524 /* We're expecting to use a single ADDI insn. */
2525 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2527 static void tcg_target_qemu_prologue(TCGContext *s)
2529 TCGReg r;
2531 /* Push (FP, LR) and allocate space for all saved registers. */
2532 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2533 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2535 /* Set up frame pointer for canonical unwinding. */
2536 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2538 /* Store callee-preserved regs x19..x28. */
2539 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2540 int ofs = (r - TCG_REG_X19 + 2) * 8;
2541 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2544 /* Make stack space for TCG locals. */
2545 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2546 FRAME_SIZE - PUSH_SIZE);
2548 /* Inform TCG about how to find TCG locals with register, offset, size. */
2549 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2550 CPU_TEMP_BUF_NLONGS * sizeof(long));
2552 #if !defined(CONFIG_SOFTMMU)
2553 if (USE_GUEST_BASE) {
2554 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2555 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2557 #endif
2559 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2560 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2563 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2564 * and fall through to the rest of the epilogue.
2566 s->code_gen_epilogue = s->code_ptr;
2567 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2569 /* TB epilogue */
2570 tb_ret_addr = s->code_ptr;
2572 /* Remove TCG locals stack space. */
2573 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2574 FRAME_SIZE - PUSH_SIZE);
2576 /* Restore registers x19..x28. */
2577 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2578 int ofs = (r - TCG_REG_X19 + 2) * 8;
2579 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2582 /* Pop (FP, LR), restore SP to previous frame. */
2583 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2584 TCG_REG_SP, PUSH_SIZE, 0, 1);
2585 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2588 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2590 int i;
2591 for (i = 0; i < count; ++i) {
2592 p[i] = NOP;
2596 typedef struct {
2597 DebugFrameHeader h;
2598 uint8_t fde_def_cfa[4];
2599 uint8_t fde_reg_ofs[24];
2600 } DebugFrame;
2602 #define ELF_HOST_MACHINE EM_AARCH64
2604 static const DebugFrame debug_frame = {
2605 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2606 .h.cie.id = -1,
2607 .h.cie.version = 1,
2608 .h.cie.code_align = 1,
2609 .h.cie.data_align = 0x78, /* sleb128 -8 */
2610 .h.cie.return_column = TCG_REG_LR,
2612 /* Total FDE size does not include the "len" member. */
2613 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2615 .fde_def_cfa = {
2616 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2617 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2618 (FRAME_SIZE >> 7)
2620 .fde_reg_ofs = {
2621 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2622 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2623 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2624 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2625 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2626 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2627 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2628 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2629 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2630 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2631 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2632 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2636 void tcg_register_jit(void *buf, size_t buf_size)
2638 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));