tests: fw_cfg: add 'reboot-timeout' test case
[qemu/ar7.git] / tcg / aarch64 / tcg-target.inc.c
blob40bf35079ae47203570366adb859b96d9d1660cc
1 /*
2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "tcg-pool.inc.c"
14 #include "qemu/bitops.h"
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33 #endif /* CONFIG_DEBUG_TCG */
35 static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42 TCG_REG_X16, TCG_REG_X17,
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61 static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65 static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
79 #endif
81 static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
83 ptrdiff_t offset = target - code_ptr;
84 if (offset == sextract64(offset, 0, 26)) {
85 /* read instruction, mask away previous PC_REL26 parameter contents,
86 set the proper offset, then write back the instruction. */
87 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
88 return true;
90 return false;
93 static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
95 ptrdiff_t offset = target - code_ptr;
96 if (offset == sextract64(offset, 0, 19)) {
97 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
98 return true;
100 return false;
103 static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
104 intptr_t value, intptr_t addend)
106 tcg_debug_assert(addend == 0);
107 switch (type) {
108 case R_AARCH64_JUMP26:
109 case R_AARCH64_CALL26:
110 return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
111 case R_AARCH64_CONDBR19:
112 return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
113 default:
114 g_assert_not_reached();
118 #define TCG_CT_CONST_AIMM 0x100
119 #define TCG_CT_CONST_LIMM 0x200
120 #define TCG_CT_CONST_ZERO 0x400
121 #define TCG_CT_CONST_MONE 0x800
123 /* parse target specific constraints */
124 static const char *target_parse_constraint(TCGArgConstraint *ct,
125 const char *ct_str, TCGType type)
127 switch (*ct_str++) {
128 case 'r': /* general registers */
129 ct->ct |= TCG_CT_REG;
130 ct->u.regs |= 0xffffffffu;
131 break;
132 case 'w': /* advsimd registers */
133 ct->ct |= TCG_CT_REG;
134 ct->u.regs |= 0xffffffff00000000ull;
135 break;
136 case 'l': /* qemu_ld / qemu_st address, data_reg */
137 ct->ct |= TCG_CT_REG;
138 ct->u.regs = 0xffffffffu;
139 #ifdef CONFIG_SOFTMMU
140 /* x0 and x1 will be overwritten when reading the tlb entry,
141 and x2, and x3 for helper args, better to avoid using them. */
142 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
143 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
144 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
145 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
146 #endif
147 break;
148 case 'A': /* Valid for arithmetic immediate (positive or negative). */
149 ct->ct |= TCG_CT_CONST_AIMM;
150 break;
151 case 'L': /* Valid for logical immediate. */
152 ct->ct |= TCG_CT_CONST_LIMM;
153 break;
154 case 'M': /* minus one */
155 ct->ct |= TCG_CT_CONST_MONE;
156 break;
157 case 'Z': /* zero */
158 ct->ct |= TCG_CT_CONST_ZERO;
159 break;
160 default:
161 return NULL;
163 return ct_str;
166 /* Match a constant valid for addition (12-bit, optionally shifted). */
167 static inline bool is_aimm(uint64_t val)
169 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
172 /* Match a constant valid for logical operations. */
173 static inline bool is_limm(uint64_t val)
175 /* Taking a simplified view of the logical immediates for now, ignoring
176 the replication that can happen across the field. Match bit patterns
177 of the forms
178 0....01....1
179 0..01..10..0
180 and their inverses. */
182 /* Make things easier below, by testing the form with msb clear. */
183 if ((int64_t)val < 0) {
184 val = ~val;
186 if (val == 0) {
187 return false;
189 val += val & -val;
190 return (val & (val - 1)) == 0;
193 /* Match a constant that is valid for vectors. */
194 static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
196 int i;
198 *op = 0;
199 /* Match replication across 8 bits. */
200 if (v64 == dup_const(MO_8, v64)) {
201 *cmode = 0xe;
202 *imm8 = v64 & 0xff;
203 return true;
205 /* Match replication across 16 bits. */
206 if (v64 == dup_const(MO_16, v64)) {
207 uint16_t v16 = v64;
209 if (v16 == (v16 & 0xff)) {
210 *cmode = 0x8;
211 *imm8 = v16 & 0xff;
212 return true;
213 } else if (v16 == (v16 & 0xff00)) {
214 *cmode = 0xa;
215 *imm8 = v16 >> 8;
216 return true;
219 /* Match replication across 32 bits. */
220 if (v64 == dup_const(MO_32, v64)) {
221 uint32_t v32 = v64;
223 if (v32 == (v32 & 0xff)) {
224 *cmode = 0x0;
225 *imm8 = v32 & 0xff;
226 return true;
227 } else if (v32 == (v32 & 0xff00)) {
228 *cmode = 0x2;
229 *imm8 = (v32 >> 8) & 0xff;
230 return true;
231 } else if (v32 == (v32 & 0xff0000)) {
232 *cmode = 0x4;
233 *imm8 = (v32 >> 16) & 0xff;
234 return true;
235 } else if (v32 == (v32 & 0xff000000)) {
236 *cmode = 0x6;
237 *imm8 = v32 >> 24;
238 return true;
239 } else if ((v32 & 0xffff00ff) == 0xff) {
240 *cmode = 0xc;
241 *imm8 = (v32 >> 8) & 0xff;
242 return true;
243 } else if ((v32 & 0xff00ffff) == 0xffff) {
244 *cmode = 0xd;
245 *imm8 = (v32 >> 16) & 0xff;
246 return true;
248 /* Match forms of a float32. */
249 if (extract32(v32, 0, 19) == 0
250 && (extract32(v32, 25, 6) == 0x20
251 || extract32(v32, 25, 6) == 0x1f)) {
252 *cmode = 0xf;
253 *imm8 = (extract32(v32, 31, 1) << 7)
254 | (extract32(v32, 25, 1) << 6)
255 | extract32(v32, 19, 6);
256 return true;
259 /* Match forms of a float64. */
260 if (extract64(v64, 0, 48) == 0
261 && (extract64(v64, 54, 9) == 0x100
262 || extract64(v64, 54, 9) == 0x0ff)) {
263 *cmode = 0xf;
264 *op = 1;
265 *imm8 = (extract64(v64, 63, 1) << 7)
266 | (extract64(v64, 54, 1) << 6)
267 | extract64(v64, 48, 6);
268 return true;
270 /* Match bytes of 0x00 and 0xff. */
271 for (i = 0; i < 64; i += 8) {
272 uint64_t byte = extract64(v64, i, 8);
273 if (byte != 0 && byte != 0xff) {
274 break;
277 if (i == 64) {
278 *cmode = 0xe;
279 *op = 1;
280 *imm8 = (extract64(v64, 0, 1) << 0)
281 | (extract64(v64, 8, 1) << 1)
282 | (extract64(v64, 16, 1) << 2)
283 | (extract64(v64, 24, 1) << 3)
284 | (extract64(v64, 32, 1) << 4)
285 | (extract64(v64, 40, 1) << 5)
286 | (extract64(v64, 48, 1) << 6)
287 | (extract64(v64, 56, 1) << 7);
288 return true;
290 return false;
293 static int tcg_target_const_match(tcg_target_long val, TCGType type,
294 const TCGArgConstraint *arg_ct)
296 int ct = arg_ct->ct;
298 if (ct & TCG_CT_CONST) {
299 return 1;
301 if (type == TCG_TYPE_I32) {
302 val = (int32_t)val;
304 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
305 return 1;
307 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
308 return 1;
310 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
311 return 1;
313 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
314 return 1;
317 return 0;
320 enum aarch64_cond_code {
321 COND_EQ = 0x0,
322 COND_NE = 0x1,
323 COND_CS = 0x2, /* Unsigned greater or equal */
324 COND_HS = COND_CS, /* ALIAS greater or equal */
325 COND_CC = 0x3, /* Unsigned less than */
326 COND_LO = COND_CC, /* ALIAS Lower */
327 COND_MI = 0x4, /* Negative */
328 COND_PL = 0x5, /* Zero or greater */
329 COND_VS = 0x6, /* Overflow */
330 COND_VC = 0x7, /* No overflow */
331 COND_HI = 0x8, /* Unsigned greater than */
332 COND_LS = 0x9, /* Unsigned less or equal */
333 COND_GE = 0xa,
334 COND_LT = 0xb,
335 COND_GT = 0xc,
336 COND_LE = 0xd,
337 COND_AL = 0xe,
338 COND_NV = 0xf, /* behaves like COND_AL here */
341 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
342 [TCG_COND_EQ] = COND_EQ,
343 [TCG_COND_NE] = COND_NE,
344 [TCG_COND_LT] = COND_LT,
345 [TCG_COND_GE] = COND_GE,
346 [TCG_COND_LE] = COND_LE,
347 [TCG_COND_GT] = COND_GT,
348 /* unsigned */
349 [TCG_COND_LTU] = COND_LO,
350 [TCG_COND_GTU] = COND_HI,
351 [TCG_COND_GEU] = COND_HS,
352 [TCG_COND_LEU] = COND_LS,
355 typedef enum {
356 LDST_ST = 0, /* store */
357 LDST_LD = 1, /* load */
358 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
359 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
360 } AArch64LdstType;
362 /* We encode the format of the insn into the beginning of the name, so that
363 we can have the preprocessor help "typecheck" the insn vs the output
364 function. Arm didn't provide us with nice names for the formats, so we
365 use the section number of the architecture reference manual in which the
366 instruction group is described. */
367 typedef enum {
368 /* Compare and branch (immediate). */
369 I3201_CBZ = 0x34000000,
370 I3201_CBNZ = 0x35000000,
372 /* Conditional branch (immediate). */
373 I3202_B_C = 0x54000000,
375 /* Unconditional branch (immediate). */
376 I3206_B = 0x14000000,
377 I3206_BL = 0x94000000,
379 /* Unconditional branch (register). */
380 I3207_BR = 0xd61f0000,
381 I3207_BLR = 0xd63f0000,
382 I3207_RET = 0xd65f0000,
384 /* AdvSIMD load/store single structure. */
385 I3303_LD1R = 0x0d40c000,
387 /* Load literal for loading the address at pc-relative offset */
388 I3305_LDR = 0x58000000,
389 I3305_LDR_v64 = 0x5c000000,
390 I3305_LDR_v128 = 0x9c000000,
392 /* Load/store register. Described here as 3.3.12, but the helper
393 that emits them can transform to 3.3.10 or 3.3.13. */
394 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
395 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
396 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
397 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
399 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
400 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
401 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
402 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
404 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
405 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
407 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
408 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
409 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
411 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
412 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
414 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
415 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
417 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
418 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
420 I3312_TO_I3310 = 0x00200800,
421 I3312_TO_I3313 = 0x01000000,
423 /* Load/store register pair instructions. */
424 I3314_LDP = 0x28400000,
425 I3314_STP = 0x28000000,
427 /* Add/subtract immediate instructions. */
428 I3401_ADDI = 0x11000000,
429 I3401_ADDSI = 0x31000000,
430 I3401_SUBI = 0x51000000,
431 I3401_SUBSI = 0x71000000,
433 /* Bitfield instructions. */
434 I3402_BFM = 0x33000000,
435 I3402_SBFM = 0x13000000,
436 I3402_UBFM = 0x53000000,
438 /* Extract instruction. */
439 I3403_EXTR = 0x13800000,
441 /* Logical immediate instructions. */
442 I3404_ANDI = 0x12000000,
443 I3404_ORRI = 0x32000000,
444 I3404_EORI = 0x52000000,
446 /* Move wide immediate instructions. */
447 I3405_MOVN = 0x12800000,
448 I3405_MOVZ = 0x52800000,
449 I3405_MOVK = 0x72800000,
451 /* PC relative addressing instructions. */
452 I3406_ADR = 0x10000000,
453 I3406_ADRP = 0x90000000,
455 /* Add/subtract shifted register instructions (without a shift). */
456 I3502_ADD = 0x0b000000,
457 I3502_ADDS = 0x2b000000,
458 I3502_SUB = 0x4b000000,
459 I3502_SUBS = 0x6b000000,
461 /* Add/subtract shifted register instructions (with a shift). */
462 I3502S_ADD_LSL = I3502_ADD,
464 /* Add/subtract with carry instructions. */
465 I3503_ADC = 0x1a000000,
466 I3503_SBC = 0x5a000000,
468 /* Conditional select instructions. */
469 I3506_CSEL = 0x1a800000,
470 I3506_CSINC = 0x1a800400,
471 I3506_CSINV = 0x5a800000,
472 I3506_CSNEG = 0x5a800400,
474 /* Data-processing (1 source) instructions. */
475 I3507_CLZ = 0x5ac01000,
476 I3507_RBIT = 0x5ac00000,
477 I3507_REV16 = 0x5ac00400,
478 I3507_REV32 = 0x5ac00800,
479 I3507_REV64 = 0x5ac00c00,
481 /* Data-processing (2 source) instructions. */
482 I3508_LSLV = 0x1ac02000,
483 I3508_LSRV = 0x1ac02400,
484 I3508_ASRV = 0x1ac02800,
485 I3508_RORV = 0x1ac02c00,
486 I3508_SMULH = 0x9b407c00,
487 I3508_UMULH = 0x9bc07c00,
488 I3508_UDIV = 0x1ac00800,
489 I3508_SDIV = 0x1ac00c00,
491 /* Data-processing (3 source) instructions. */
492 I3509_MADD = 0x1b000000,
493 I3509_MSUB = 0x1b008000,
495 /* Logical shifted register instructions (without a shift). */
496 I3510_AND = 0x0a000000,
497 I3510_BIC = 0x0a200000,
498 I3510_ORR = 0x2a000000,
499 I3510_ORN = 0x2a200000,
500 I3510_EOR = 0x4a000000,
501 I3510_EON = 0x4a200000,
502 I3510_ANDS = 0x6a000000,
504 /* Logical shifted register instructions (with a shift). */
505 I3502S_AND_LSR = I3510_AND | (1 << 22),
507 /* AdvSIMD copy */
508 I3605_DUP = 0x0e000400,
509 I3605_INS = 0x4e001c00,
510 I3605_UMOV = 0x0e003c00,
512 /* AdvSIMD modified immediate */
513 I3606_MOVI = 0x0f000400,
515 /* AdvSIMD shift by immediate */
516 I3614_SSHR = 0x0f000400,
517 I3614_SSRA = 0x0f001400,
518 I3614_SHL = 0x0f005400,
519 I3614_USHR = 0x2f000400,
520 I3614_USRA = 0x2f001400,
522 /* AdvSIMD three same. */
523 I3616_ADD = 0x0e208400,
524 I3616_AND = 0x0e201c00,
525 I3616_BIC = 0x0e601c00,
526 I3616_EOR = 0x2e201c00,
527 I3616_MUL = 0x0e209c00,
528 I3616_ORR = 0x0ea01c00,
529 I3616_ORN = 0x0ee01c00,
530 I3616_SUB = 0x2e208400,
531 I3616_CMGT = 0x0e203400,
532 I3616_CMGE = 0x0e203c00,
533 I3616_CMTST = 0x0e208c00,
534 I3616_CMHI = 0x2e203400,
535 I3616_CMHS = 0x2e203c00,
536 I3616_CMEQ = 0x2e208c00,
537 I3616_SMAX = 0x0e206400,
538 I3616_SMIN = 0x0e206c00,
539 I3616_SSHL = 0x0e204400,
540 I3616_SQADD = 0x0e200c00,
541 I3616_SQSUB = 0x0e202c00,
542 I3616_UMAX = 0x2e206400,
543 I3616_UMIN = 0x2e206c00,
544 I3616_UQADD = 0x2e200c00,
545 I3616_UQSUB = 0x2e202c00,
546 I3616_USHL = 0x2e204400,
548 /* AdvSIMD two-reg misc. */
549 I3617_CMGT0 = 0x0e208800,
550 I3617_CMEQ0 = 0x0e209800,
551 I3617_CMLT0 = 0x0e20a800,
552 I3617_CMGE0 = 0x2e208800,
553 I3617_CMLE0 = 0x2e20a800,
554 I3617_NOT = 0x2e205800,
555 I3617_ABS = 0x0e20b800,
556 I3617_NEG = 0x2e20b800,
558 /* System instructions. */
559 NOP = 0xd503201f,
560 DMB_ISH = 0xd50338bf,
561 DMB_LD = 0x00000100,
562 DMB_ST = 0x00000200,
563 } AArch64Insn;
565 static inline uint32_t tcg_in32(TCGContext *s)
567 uint32_t v = *(uint32_t *)s->code_ptr;
568 return v;
571 /* Emit an opcode with "type-checking" of the format. */
572 #define tcg_out_insn(S, FMT, OP, ...) \
573 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
575 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
576 TCGReg rt, TCGReg rn, unsigned size)
578 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
581 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
582 int imm19, TCGReg rt)
584 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
587 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
588 TCGReg rt, int imm19)
590 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
593 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
594 TCGCond c, int imm19)
596 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
599 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
601 tcg_out32(s, insn | (imm26 & 0x03ffffff));
604 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
606 tcg_out32(s, insn | rn << 5);
609 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
610 TCGReg r1, TCGReg r2, TCGReg rn,
611 tcg_target_long ofs, bool pre, bool w)
613 insn |= 1u << 31; /* ext */
614 insn |= pre << 24;
615 insn |= w << 23;
617 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
618 insn |= (ofs & (0x7f << 3)) << (15 - 3);
620 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
623 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
624 TCGReg rd, TCGReg rn, uint64_t aimm)
626 if (aimm > 0xfff) {
627 tcg_debug_assert((aimm & 0xfff) == 0);
628 aimm >>= 12;
629 tcg_debug_assert(aimm <= 0xfff);
630 aimm |= 1 << 12; /* apply LSL 12 */
632 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
635 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
636 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
637 that feed the DecodeBitMasks pseudo function. */
638 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
639 TCGReg rd, TCGReg rn, int n, int immr, int imms)
641 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
642 | rn << 5 | rd);
645 #define tcg_out_insn_3404 tcg_out_insn_3402
647 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
648 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
650 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
651 | rn << 5 | rd);
654 /* This function is used for the Move (wide immediate) instruction group.
655 Note that SHIFT is a full shift count, not the 2 bit HW field. */
656 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
657 TCGReg rd, uint16_t half, unsigned shift)
659 tcg_debug_assert((shift & ~0x30) == 0);
660 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
663 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
664 TCGReg rd, int64_t disp)
666 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
669 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
670 the rare occasion when we actually want to supply a shift amount. */
671 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
672 TCGType ext, TCGReg rd, TCGReg rn,
673 TCGReg rm, int imm6)
675 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
678 /* This function is for 3.5.2 (Add/subtract shifted register),
679 and 3.5.10 (Logical shifted register), for the vast majorty of cases
680 when we don't want to apply a shift. Thus it can also be used for
681 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
682 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
683 TCGReg rd, TCGReg rn, TCGReg rm)
685 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
688 #define tcg_out_insn_3503 tcg_out_insn_3502
689 #define tcg_out_insn_3508 tcg_out_insn_3502
690 #define tcg_out_insn_3510 tcg_out_insn_3502
692 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
693 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
695 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
696 | tcg_cond_to_aarch64[c] << 12);
699 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
700 TCGReg rd, TCGReg rn)
702 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
705 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
706 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
708 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
711 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
712 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
714 /* Note that bit 11 set means general register input. Therefore
715 we can handle both register sets with one function. */
716 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
717 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
720 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
721 TCGReg rd, bool op, int cmode, uint8_t imm8)
723 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
724 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
727 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
728 TCGReg rd, TCGReg rn, unsigned immhb)
730 tcg_out32(s, insn | q << 30 | immhb << 16
731 | (rn & 0x1f) << 5 | (rd & 0x1f));
734 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
735 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
737 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
738 | (rn & 0x1f) << 5 | (rd & 0x1f));
741 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
742 unsigned size, TCGReg rd, TCGReg rn)
744 tcg_out32(s, insn | q << 30 | (size << 22)
745 | (rn & 0x1f) << 5 | (rd & 0x1f));
748 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
749 TCGReg rd, TCGReg base, TCGType ext,
750 TCGReg regoff)
752 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
753 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
754 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
757 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
758 TCGReg rd, TCGReg rn, intptr_t offset)
760 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
763 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
764 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
766 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
767 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
768 | rn << 5 | (rd & 0x1f));
771 /* Register to register move using ORR (shifted register with no shift). */
772 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
774 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
777 /* Register to register move using ADDI (move to/from SP). */
778 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
780 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
783 /* This function is used for the Logical (immediate) instruction group.
784 The value of LIMM must satisfy IS_LIMM. See the comment above about
785 only supporting simplified logical immediates. */
786 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
787 TCGReg rd, TCGReg rn, uint64_t limm)
789 unsigned h, l, r, c;
791 tcg_debug_assert(is_limm(limm));
793 h = clz64(limm);
794 l = ctz64(limm);
795 if (l == 0) {
796 r = 0; /* form 0....01....1 */
797 c = ctz64(~limm) - 1;
798 if (h == 0) {
799 r = clz64(~limm); /* form 1..10..01..1 */
800 c += r;
802 } else {
803 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
804 c = r - h - 1;
806 if (ext == TCG_TYPE_I32) {
807 r &= 31;
808 c &= 31;
811 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
814 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
815 TCGReg rd, tcg_target_long v64)
817 int op, cmode, imm8;
819 if (is_fimm(v64, &op, &cmode, &imm8)) {
820 tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
821 } else if (type == TCG_TYPE_V128) {
822 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
823 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
824 } else {
825 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
826 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
830 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
831 TCGReg rd, TCGReg rs)
833 int is_q = type - TCG_TYPE_V64;
834 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
835 return true;
838 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
839 TCGReg r, TCGReg base, intptr_t offset)
841 TCGReg temp = TCG_REG_TMP;
843 if (offset < -0xffffff || offset > 0xffffff) {
844 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
845 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
846 base = temp;
847 } else {
848 AArch64Insn add_insn = I3401_ADDI;
850 if (offset < 0) {
851 add_insn = I3401_SUBI;
852 offset = -offset;
854 if (offset & 0xfff000) {
855 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
856 base = temp;
858 if (offset & 0xfff) {
859 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
860 base = temp;
863 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
864 return true;
867 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
868 tcg_target_long value)
870 tcg_target_long svalue = value;
871 tcg_target_long ivalue = ~value;
872 tcg_target_long t0, t1, t2;
873 int s0, s1;
874 AArch64Insn opc;
876 switch (type) {
877 case TCG_TYPE_I32:
878 case TCG_TYPE_I64:
879 tcg_debug_assert(rd < 32);
880 break;
882 case TCG_TYPE_V64:
883 case TCG_TYPE_V128:
884 tcg_debug_assert(rd >= 32);
885 tcg_out_dupi_vec(s, type, rd, value);
886 return;
888 default:
889 g_assert_not_reached();
892 /* For 32-bit values, discard potential garbage in value. For 64-bit
893 values within [2**31, 2**32-1], we can create smaller sequences by
894 interpreting this as a negative 32-bit number, while ensuring that
895 the high 32 bits are cleared by setting SF=0. */
896 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
897 svalue = (int32_t)value;
898 value = (uint32_t)value;
899 ivalue = (uint32_t)ivalue;
900 type = TCG_TYPE_I32;
903 /* Speed things up by handling the common case of small positive
904 and negative values specially. */
905 if ((value & ~0xffffull) == 0) {
906 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
907 return;
908 } else if ((ivalue & ~0xffffull) == 0) {
909 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
910 return;
913 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
914 use the sign-extended value. That lets us match rotated values such
915 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
916 if (is_limm(svalue)) {
917 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
918 return;
921 /* Look for host pointer values within 4G of the PC. This happens
922 often when loading pointers to QEMU's own data structures. */
923 if (type == TCG_TYPE_I64) {
924 tcg_target_long disp = value - (intptr_t)s->code_ptr;
925 if (disp == sextract64(disp, 0, 21)) {
926 tcg_out_insn(s, 3406, ADR, rd, disp);
927 return;
929 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
930 if (disp == sextract64(disp, 0, 21)) {
931 tcg_out_insn(s, 3406, ADRP, rd, disp);
932 if (value & 0xfff) {
933 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
935 return;
939 /* Would it take fewer insns to begin with MOVN? */
940 if (ctpop64(value) >= 32) {
941 t0 = ivalue;
942 opc = I3405_MOVN;
943 } else {
944 t0 = value;
945 opc = I3405_MOVZ;
947 s0 = ctz64(t0) & (63 & -16);
948 t1 = t0 & ~(0xffffUL << s0);
949 s1 = ctz64(t1) & (63 & -16);
950 t2 = t1 & ~(0xffffUL << s1);
951 if (t2 == 0) {
952 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
953 if (t1 != 0) {
954 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
956 return;
959 /* For more than 2 insns, dump it into the constant pool. */
960 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
961 tcg_out_insn(s, 3305, LDR, 0, rd);
964 /* Define something more legible for general use. */
965 #define tcg_out_ldst_r tcg_out_insn_3310
967 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
968 TCGReg rn, intptr_t offset, int lgsize)
970 /* If the offset is naturally aligned and in range, then we can
971 use the scaled uimm12 encoding */
972 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
973 uintptr_t scaled_uimm = offset >> lgsize;
974 if (scaled_uimm <= 0xfff) {
975 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
976 return;
980 /* Small signed offsets can use the unscaled encoding. */
981 if (offset >= -256 && offset < 256) {
982 tcg_out_insn_3312(s, insn, rd, rn, offset);
983 return;
986 /* Worst-case scenario, move offset to temp register, use reg offset. */
987 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
988 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
991 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
993 if (ret == arg) {
994 return true;
996 switch (type) {
997 case TCG_TYPE_I32:
998 case TCG_TYPE_I64:
999 if (ret < 32 && arg < 32) {
1000 tcg_out_movr(s, type, ret, arg);
1001 break;
1002 } else if (ret < 32) {
1003 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1004 break;
1005 } else if (arg < 32) {
1006 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1007 break;
1009 /* FALLTHRU */
1011 case TCG_TYPE_V64:
1012 tcg_debug_assert(ret >= 32 && arg >= 32);
1013 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1014 break;
1015 case TCG_TYPE_V128:
1016 tcg_debug_assert(ret >= 32 && arg >= 32);
1017 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1018 break;
1020 default:
1021 g_assert_not_reached();
1023 return true;
1026 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1027 TCGReg base, intptr_t ofs)
1029 AArch64Insn insn;
1030 int lgsz;
1032 switch (type) {
1033 case TCG_TYPE_I32:
1034 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1035 lgsz = 2;
1036 break;
1037 case TCG_TYPE_I64:
1038 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1039 lgsz = 3;
1040 break;
1041 case TCG_TYPE_V64:
1042 insn = I3312_LDRVD;
1043 lgsz = 3;
1044 break;
1045 case TCG_TYPE_V128:
1046 insn = I3312_LDRVQ;
1047 lgsz = 4;
1048 break;
1049 default:
1050 g_assert_not_reached();
1052 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1055 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1056 TCGReg base, intptr_t ofs)
1058 AArch64Insn insn;
1059 int lgsz;
1061 switch (type) {
1062 case TCG_TYPE_I32:
1063 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1064 lgsz = 2;
1065 break;
1066 case TCG_TYPE_I64:
1067 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1068 lgsz = 3;
1069 break;
1070 case TCG_TYPE_V64:
1071 insn = I3312_STRVD;
1072 lgsz = 3;
1073 break;
1074 case TCG_TYPE_V128:
1075 insn = I3312_STRVQ;
1076 lgsz = 4;
1077 break;
1078 default:
1079 g_assert_not_reached();
1081 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1084 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1085 TCGReg base, intptr_t ofs)
1087 if (type <= TCG_TYPE_I64 && val == 0) {
1088 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1089 return true;
1091 return false;
1094 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1095 TCGReg rn, unsigned int a, unsigned int b)
1097 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1100 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1101 TCGReg rn, unsigned int a, unsigned int b)
1103 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1106 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1107 TCGReg rn, unsigned int a, unsigned int b)
1109 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1112 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1113 TCGReg rn, TCGReg rm, unsigned int a)
1115 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1118 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1119 TCGReg rd, TCGReg rn, unsigned int m)
1121 int bits = ext ? 64 : 32;
1122 int max = bits - 1;
1123 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1126 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1127 TCGReg rd, TCGReg rn, unsigned int m)
1129 int max = ext ? 63 : 31;
1130 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1133 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1134 TCGReg rd, TCGReg rn, unsigned int m)
1136 int max = ext ? 63 : 31;
1137 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1140 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1141 TCGReg rd, TCGReg rn, unsigned int m)
1143 int max = ext ? 63 : 31;
1144 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1147 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1148 TCGReg rd, TCGReg rn, unsigned int m)
1150 int bits = ext ? 64 : 32;
1151 int max = bits - 1;
1152 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1155 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1156 TCGReg rn, unsigned lsb, unsigned width)
1158 unsigned size = ext ? 64 : 32;
1159 unsigned a = (size - lsb) & (size - 1);
1160 unsigned b = width - 1;
1161 tcg_out_bfm(s, ext, rd, rn, a, b);
1164 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1165 tcg_target_long b, bool const_b)
1167 if (const_b) {
1168 /* Using CMP or CMN aliases. */
1169 if (b >= 0) {
1170 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1171 } else {
1172 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1174 } else {
1175 /* Using CMP alias SUBS wzr, Wn, Wm */
1176 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1180 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1182 ptrdiff_t offset = target - s->code_ptr;
1183 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1184 tcg_out_insn(s, 3206, B, offset);
1187 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1189 ptrdiff_t offset = target - s->code_ptr;
1190 if (offset == sextract64(offset, 0, 26)) {
1191 tcg_out_insn(s, 3206, BL, offset);
1192 } else {
1193 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1194 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1198 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1200 tcg_out_insn(s, 3207, BLR, reg);
1203 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1205 ptrdiff_t offset = target - s->code_ptr;
1206 if (offset == sextract64(offset, 0, 26)) {
1207 tcg_out_insn(s, 3206, BL, offset);
1208 } else {
1209 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1210 tcg_out_callr(s, TCG_REG_TMP);
1214 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1215 uintptr_t addr)
1217 tcg_insn_unit i1, i2;
1218 TCGType rt = TCG_TYPE_I64;
1219 TCGReg rd = TCG_REG_TMP;
1220 uint64_t pair;
1222 ptrdiff_t offset = addr - jmp_addr;
1224 if (offset == sextract64(offset, 0, 26)) {
1225 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1226 i2 = NOP;
1227 } else {
1228 offset = (addr >> 12) - (jmp_addr >> 12);
1230 /* patch ADRP */
1231 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1232 /* patch ADDI */
1233 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1235 pair = (uint64_t)i2 << 32 | i1;
1236 atomic_set((uint64_t *)jmp_addr, pair);
1237 flush_icache_range(jmp_addr, jmp_addr + 8);
1240 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1242 if (!l->has_value) {
1243 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1244 tcg_out_insn(s, 3206, B, 0);
1245 } else {
1246 tcg_out_goto(s, l->u.value_ptr);
1250 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1251 TCGArg b, bool b_const, TCGLabel *l)
1253 intptr_t offset;
1254 bool need_cmp;
1256 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1257 need_cmp = false;
1258 } else {
1259 need_cmp = true;
1260 tcg_out_cmp(s, ext, a, b, b_const);
1263 if (!l->has_value) {
1264 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1265 offset = tcg_in32(s) >> 5;
1266 } else {
1267 offset = l->u.value_ptr - s->code_ptr;
1268 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1271 if (need_cmp) {
1272 tcg_out_insn(s, 3202, B_C, c, offset);
1273 } else if (c == TCG_COND_EQ) {
1274 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1275 } else {
1276 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1280 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1282 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1285 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1287 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1290 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1292 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1295 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1296 TCGReg rd, TCGReg rn)
1298 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1299 int bits = (8 << s_bits) - 1;
1300 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1303 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1304 TCGReg rd, TCGReg rn)
1306 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1307 int bits = (8 << s_bits) - 1;
1308 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1311 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1312 TCGReg rn, int64_t aimm)
1314 if (aimm >= 0) {
1315 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1316 } else {
1317 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1321 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1322 TCGReg rh, TCGReg al, TCGReg ah,
1323 tcg_target_long bl, tcg_target_long bh,
1324 bool const_bl, bool const_bh, bool sub)
1326 TCGReg orig_rl = rl;
1327 AArch64Insn insn;
1329 if (rl == ah || (!const_bh && rl == bh)) {
1330 rl = TCG_REG_TMP;
1333 if (const_bl) {
1334 insn = I3401_ADDSI;
1335 if ((bl < 0) ^ sub) {
1336 insn = I3401_SUBSI;
1337 bl = -bl;
1339 if (unlikely(al == TCG_REG_XZR)) {
1340 /* ??? We want to allow al to be zero for the benefit of
1341 negation via subtraction. However, that leaves open the
1342 possibility of adding 0+const in the low part, and the
1343 immediate add instructions encode XSP not XZR. Don't try
1344 anything more elaborate here than loading another zero. */
1345 al = TCG_REG_TMP;
1346 tcg_out_movi(s, ext, al, 0);
1348 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1349 } else {
1350 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1353 insn = I3503_ADC;
1354 if (const_bh) {
1355 /* Note that the only two constants we support are 0 and -1, and
1356 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1357 if ((bh != 0) ^ sub) {
1358 insn = I3503_SBC;
1360 bh = TCG_REG_XZR;
1361 } else if (sub) {
1362 insn = I3503_SBC;
1364 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1366 tcg_out_mov(s, ext, orig_rl, rl);
1369 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1371 static const uint32_t sync[] = {
1372 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1373 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1374 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1375 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1376 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1378 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1381 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1382 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1384 TCGReg a1 = a0;
1385 if (is_ctz) {
1386 a1 = TCG_REG_TMP;
1387 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1389 if (const_b && b == (ext ? 64 : 32)) {
1390 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1391 } else {
1392 AArch64Insn sel = I3506_CSEL;
1394 tcg_out_cmp(s, ext, a0, 0, 1);
1395 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1397 if (const_b) {
1398 if (b == -1) {
1399 b = TCG_REG_XZR;
1400 sel = I3506_CSINV;
1401 } else if (b == 0) {
1402 b = TCG_REG_XZR;
1403 } else {
1404 tcg_out_movi(s, ext, d, b);
1405 b = d;
1408 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1412 #ifdef CONFIG_SOFTMMU
1413 #include "tcg-ldst.inc.c"
1415 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1416 * TCGMemOpIdx oi, uintptr_t ra)
1418 static void * const qemu_ld_helpers[16] = {
1419 [MO_UB] = helper_ret_ldub_mmu,
1420 [MO_LEUW] = helper_le_lduw_mmu,
1421 [MO_LEUL] = helper_le_ldul_mmu,
1422 [MO_LEQ] = helper_le_ldq_mmu,
1423 [MO_BEUW] = helper_be_lduw_mmu,
1424 [MO_BEUL] = helper_be_ldul_mmu,
1425 [MO_BEQ] = helper_be_ldq_mmu,
1428 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1429 * uintxx_t val, TCGMemOpIdx oi,
1430 * uintptr_t ra)
1432 static void * const qemu_st_helpers[16] = {
1433 [MO_UB] = helper_ret_stb_mmu,
1434 [MO_LEUW] = helper_le_stw_mmu,
1435 [MO_LEUL] = helper_le_stl_mmu,
1436 [MO_LEQ] = helper_le_stq_mmu,
1437 [MO_BEUW] = helper_be_stw_mmu,
1438 [MO_BEUL] = helper_be_stl_mmu,
1439 [MO_BEQ] = helper_be_stq_mmu,
1442 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1444 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1445 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1446 tcg_out_insn(s, 3406, ADR, rd, offset);
1449 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1451 TCGMemOpIdx oi = lb->oi;
1452 TCGMemOp opc = get_memop(oi);
1453 TCGMemOp size = opc & MO_SIZE;
1455 if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1456 return false;
1459 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1460 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1461 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1462 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1463 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1464 if (opc & MO_SIGN) {
1465 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1466 } else {
1467 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1470 tcg_out_goto(s, lb->raddr);
1471 return true;
1474 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1476 TCGMemOpIdx oi = lb->oi;
1477 TCGMemOp opc = get_memop(oi);
1478 TCGMemOp size = opc & MO_SIZE;
1480 if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1481 return false;
1484 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1485 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1486 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1487 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1488 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1489 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1490 tcg_out_goto(s, lb->raddr);
1491 return true;
1494 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1495 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1496 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1498 TCGLabelQemuLdst *label = new_ldst_label(s);
1500 label->is_ld = is_ld;
1501 label->oi = oi;
1502 label->type = ext;
1503 label->datalo_reg = data_reg;
1504 label->addrlo_reg = addr_reg;
1505 label->raddr = raddr;
1506 label->label_ptr[0] = label_ptr;
1509 /* We expect tlb_mask to be before tlb_table. */
1510 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
1511 offsetof(CPUArchState, tlb_mask));
1513 /* We expect to use a 24-bit unsigned offset from ENV. */
1514 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
1515 > 0xffffff);
1517 /* Load and compare a TLB entry, emitting the conditional jump to the
1518 slow path for the failure case, which will be patched later when finalizing
1519 the slow path. Generated code returns the host addend in X1,
1520 clobbers X0,X2,X3,TMP. */
1521 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1522 tcg_insn_unit **label_ptr, int mem_index,
1523 bool is_read)
1525 int mask_ofs = offsetof(CPUArchState, tlb_mask[mem_index]);
1526 int table_ofs = offsetof(CPUArchState, tlb_table[mem_index]);
1527 unsigned a_bits = get_alignment_bits(opc);
1528 unsigned s_bits = opc & MO_SIZE;
1529 unsigned a_mask = (1u << a_bits) - 1;
1530 unsigned s_mask = (1u << s_bits) - 1;
1531 TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0, x3;
1532 TCGType mask_type;
1533 uint64_t compare_mask;
1535 if (table_ofs > 0xfff) {
1536 int table_hi = table_ofs & ~0xfff;
1537 int mask_hi = mask_ofs & ~0xfff;
1539 table_base = TCG_REG_X1;
1540 if (mask_hi == table_hi) {
1541 mask_base = table_base;
1542 } else if (mask_hi) {
1543 mask_base = TCG_REG_X0;
1544 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1545 mask_base, TCG_AREG0, mask_hi);
1547 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1548 table_base, TCG_AREG0, table_hi);
1549 mask_ofs -= mask_hi;
1550 table_ofs -= table_hi;
1553 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1554 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1556 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
1557 tcg_out_ld(s, mask_type, TCG_REG_X0, mask_base, mask_ofs);
1558 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, table_base, table_ofs);
1560 /* Extract the TLB index from the address into X0. */
1561 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1562 TCG_REG_X0, TCG_REG_X0, addr_reg,
1563 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1565 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1566 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1568 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1569 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1570 ? offsetof(CPUTLBEntry, addr_read)
1571 : offsetof(CPUTLBEntry, addr_write));
1572 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1573 offsetof(CPUTLBEntry, addend));
1575 /* For aligned accesses, we check the first byte and include the alignment
1576 bits within the address. For unaligned access, we check that we don't
1577 cross pages using the address of the last byte of the access. */
1578 if (a_bits >= s_bits) {
1579 x3 = addr_reg;
1580 } else {
1581 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1582 TCG_REG_X3, addr_reg, s_mask - a_mask);
1583 x3 = TCG_REG_X3;
1585 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1587 /* Store the page mask part of the address into X3. */
1588 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1589 TCG_REG_X3, x3, compare_mask);
1591 /* Perform the address comparison. */
1592 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1594 /* If not equal, we jump to the slow path. */
1595 *label_ptr = s->code_ptr;
1596 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1599 #endif /* CONFIG_SOFTMMU */
1601 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1602 TCGReg data_r, TCGReg addr_r,
1603 TCGType otype, TCGReg off_r)
1605 const TCGMemOp bswap = memop & MO_BSWAP;
1607 switch (memop & MO_SSIZE) {
1608 case MO_UB:
1609 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1610 break;
1611 case MO_SB:
1612 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1613 data_r, addr_r, otype, off_r);
1614 break;
1615 case MO_UW:
1616 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1617 if (bswap) {
1618 tcg_out_rev16(s, data_r, data_r);
1620 break;
1621 case MO_SW:
1622 if (bswap) {
1623 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1624 tcg_out_rev16(s, data_r, data_r);
1625 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1626 } else {
1627 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1628 data_r, addr_r, otype, off_r);
1630 break;
1631 case MO_UL:
1632 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1633 if (bswap) {
1634 tcg_out_rev32(s, data_r, data_r);
1636 break;
1637 case MO_SL:
1638 if (bswap) {
1639 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1640 tcg_out_rev32(s, data_r, data_r);
1641 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1642 } else {
1643 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1645 break;
1646 case MO_Q:
1647 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1648 if (bswap) {
1649 tcg_out_rev64(s, data_r, data_r);
1651 break;
1652 default:
1653 tcg_abort();
1657 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1658 TCGReg data_r, TCGReg addr_r,
1659 TCGType otype, TCGReg off_r)
1661 const TCGMemOp bswap = memop & MO_BSWAP;
1663 switch (memop & MO_SIZE) {
1664 case MO_8:
1665 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1666 break;
1667 case MO_16:
1668 if (bswap && data_r != TCG_REG_XZR) {
1669 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1670 data_r = TCG_REG_TMP;
1672 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1673 break;
1674 case MO_32:
1675 if (bswap && data_r != TCG_REG_XZR) {
1676 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1677 data_r = TCG_REG_TMP;
1679 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1680 break;
1681 case MO_64:
1682 if (bswap && data_r != TCG_REG_XZR) {
1683 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1684 data_r = TCG_REG_TMP;
1686 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1687 break;
1688 default:
1689 tcg_abort();
1693 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1694 TCGMemOpIdx oi, TCGType ext)
1696 TCGMemOp memop = get_memop(oi);
1697 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1698 #ifdef CONFIG_SOFTMMU
1699 unsigned mem_index = get_mmuidx(oi);
1700 tcg_insn_unit *label_ptr;
1702 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1703 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1704 TCG_REG_X1, otype, addr_reg);
1705 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1706 s->code_ptr, label_ptr);
1707 #else /* !CONFIG_SOFTMMU */
1708 if (USE_GUEST_BASE) {
1709 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1710 TCG_REG_GUEST_BASE, otype, addr_reg);
1711 } else {
1712 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1713 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1715 #endif /* CONFIG_SOFTMMU */
1718 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1719 TCGMemOpIdx oi)
1721 TCGMemOp memop = get_memop(oi);
1722 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1723 #ifdef CONFIG_SOFTMMU
1724 unsigned mem_index = get_mmuidx(oi);
1725 tcg_insn_unit *label_ptr;
1727 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1728 tcg_out_qemu_st_direct(s, memop, data_reg,
1729 TCG_REG_X1, otype, addr_reg);
1730 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1731 data_reg, addr_reg, s->code_ptr, label_ptr);
1732 #else /* !CONFIG_SOFTMMU */
1733 if (USE_GUEST_BASE) {
1734 tcg_out_qemu_st_direct(s, memop, data_reg,
1735 TCG_REG_GUEST_BASE, otype, addr_reg);
1736 } else {
1737 tcg_out_qemu_st_direct(s, memop, data_reg,
1738 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1740 #endif /* CONFIG_SOFTMMU */
1743 static tcg_insn_unit *tb_ret_addr;
1745 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1746 const TCGArg args[TCG_MAX_OP_ARGS],
1747 const int const_args[TCG_MAX_OP_ARGS])
1749 /* 99% of the time, we can signal the use of extension registers
1750 by looking to see if the opcode handles 64-bit data. */
1751 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1753 /* Hoist the loads of the most common arguments. */
1754 TCGArg a0 = args[0];
1755 TCGArg a1 = args[1];
1756 TCGArg a2 = args[2];
1757 int c2 = const_args[2];
1759 /* Some operands are defined with "rZ" constraint, a register or
1760 the zero register. These need not actually test args[I] == 0. */
1761 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1763 switch (opc) {
1764 case INDEX_op_exit_tb:
1765 /* Reuse the zeroing that exists for goto_ptr. */
1766 if (a0 == 0) {
1767 tcg_out_goto_long(s, s->code_gen_epilogue);
1768 } else {
1769 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1770 tcg_out_goto_long(s, tb_ret_addr);
1772 break;
1774 case INDEX_op_goto_tb:
1775 if (s->tb_jmp_insn_offset != NULL) {
1776 /* TCG_TARGET_HAS_direct_jump */
1777 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1778 write can be used to patch the target address. */
1779 if ((uintptr_t)s->code_ptr & 7) {
1780 tcg_out32(s, NOP);
1782 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1783 /* actual branch destination will be patched by
1784 tb_target_set_jmp_target later. */
1785 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1786 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1787 } else {
1788 /* !TCG_TARGET_HAS_direct_jump */
1789 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1790 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1791 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1793 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1794 set_jmp_reset_offset(s, a0);
1795 break;
1797 case INDEX_op_goto_ptr:
1798 tcg_out_insn(s, 3207, BR, a0);
1799 break;
1801 case INDEX_op_br:
1802 tcg_out_goto_label(s, arg_label(a0));
1803 break;
1805 case INDEX_op_ld8u_i32:
1806 case INDEX_op_ld8u_i64:
1807 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1808 break;
1809 case INDEX_op_ld8s_i32:
1810 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1811 break;
1812 case INDEX_op_ld8s_i64:
1813 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1814 break;
1815 case INDEX_op_ld16u_i32:
1816 case INDEX_op_ld16u_i64:
1817 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1818 break;
1819 case INDEX_op_ld16s_i32:
1820 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1821 break;
1822 case INDEX_op_ld16s_i64:
1823 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1824 break;
1825 case INDEX_op_ld_i32:
1826 case INDEX_op_ld32u_i64:
1827 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1828 break;
1829 case INDEX_op_ld32s_i64:
1830 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1831 break;
1832 case INDEX_op_ld_i64:
1833 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1834 break;
1836 case INDEX_op_st8_i32:
1837 case INDEX_op_st8_i64:
1838 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1839 break;
1840 case INDEX_op_st16_i32:
1841 case INDEX_op_st16_i64:
1842 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1843 break;
1844 case INDEX_op_st_i32:
1845 case INDEX_op_st32_i64:
1846 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1847 break;
1848 case INDEX_op_st_i64:
1849 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1850 break;
1852 case INDEX_op_add_i32:
1853 a2 = (int32_t)a2;
1854 /* FALLTHRU */
1855 case INDEX_op_add_i64:
1856 if (c2) {
1857 tcg_out_addsubi(s, ext, a0, a1, a2);
1858 } else {
1859 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1861 break;
1863 case INDEX_op_sub_i32:
1864 a2 = (int32_t)a2;
1865 /* FALLTHRU */
1866 case INDEX_op_sub_i64:
1867 if (c2) {
1868 tcg_out_addsubi(s, ext, a0, a1, -a2);
1869 } else {
1870 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1872 break;
1874 case INDEX_op_neg_i64:
1875 case INDEX_op_neg_i32:
1876 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1877 break;
1879 case INDEX_op_and_i32:
1880 a2 = (int32_t)a2;
1881 /* FALLTHRU */
1882 case INDEX_op_and_i64:
1883 if (c2) {
1884 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1885 } else {
1886 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1888 break;
1890 case INDEX_op_andc_i32:
1891 a2 = (int32_t)a2;
1892 /* FALLTHRU */
1893 case INDEX_op_andc_i64:
1894 if (c2) {
1895 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1896 } else {
1897 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1899 break;
1901 case INDEX_op_or_i32:
1902 a2 = (int32_t)a2;
1903 /* FALLTHRU */
1904 case INDEX_op_or_i64:
1905 if (c2) {
1906 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1907 } else {
1908 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1910 break;
1912 case INDEX_op_orc_i32:
1913 a2 = (int32_t)a2;
1914 /* FALLTHRU */
1915 case INDEX_op_orc_i64:
1916 if (c2) {
1917 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1918 } else {
1919 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1921 break;
1923 case INDEX_op_xor_i32:
1924 a2 = (int32_t)a2;
1925 /* FALLTHRU */
1926 case INDEX_op_xor_i64:
1927 if (c2) {
1928 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1929 } else {
1930 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1932 break;
1934 case INDEX_op_eqv_i32:
1935 a2 = (int32_t)a2;
1936 /* FALLTHRU */
1937 case INDEX_op_eqv_i64:
1938 if (c2) {
1939 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1940 } else {
1941 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1943 break;
1945 case INDEX_op_not_i64:
1946 case INDEX_op_not_i32:
1947 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1948 break;
1950 case INDEX_op_mul_i64:
1951 case INDEX_op_mul_i32:
1952 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1953 break;
1955 case INDEX_op_div_i64:
1956 case INDEX_op_div_i32:
1957 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1958 break;
1959 case INDEX_op_divu_i64:
1960 case INDEX_op_divu_i32:
1961 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1962 break;
1964 case INDEX_op_rem_i64:
1965 case INDEX_op_rem_i32:
1966 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1967 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1968 break;
1969 case INDEX_op_remu_i64:
1970 case INDEX_op_remu_i32:
1971 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1972 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1973 break;
1975 case INDEX_op_shl_i64:
1976 case INDEX_op_shl_i32:
1977 if (c2) {
1978 tcg_out_shl(s, ext, a0, a1, a2);
1979 } else {
1980 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1982 break;
1984 case INDEX_op_shr_i64:
1985 case INDEX_op_shr_i32:
1986 if (c2) {
1987 tcg_out_shr(s, ext, a0, a1, a2);
1988 } else {
1989 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1991 break;
1993 case INDEX_op_sar_i64:
1994 case INDEX_op_sar_i32:
1995 if (c2) {
1996 tcg_out_sar(s, ext, a0, a1, a2);
1997 } else {
1998 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2000 break;
2002 case INDEX_op_rotr_i64:
2003 case INDEX_op_rotr_i32:
2004 if (c2) {
2005 tcg_out_rotr(s, ext, a0, a1, a2);
2006 } else {
2007 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2009 break;
2011 case INDEX_op_rotl_i64:
2012 case INDEX_op_rotl_i32:
2013 if (c2) {
2014 tcg_out_rotl(s, ext, a0, a1, a2);
2015 } else {
2016 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2017 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2019 break;
2021 case INDEX_op_clz_i64:
2022 case INDEX_op_clz_i32:
2023 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2024 break;
2025 case INDEX_op_ctz_i64:
2026 case INDEX_op_ctz_i32:
2027 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2028 break;
2030 case INDEX_op_brcond_i32:
2031 a1 = (int32_t)a1;
2032 /* FALLTHRU */
2033 case INDEX_op_brcond_i64:
2034 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2035 break;
2037 case INDEX_op_setcond_i32:
2038 a2 = (int32_t)a2;
2039 /* FALLTHRU */
2040 case INDEX_op_setcond_i64:
2041 tcg_out_cmp(s, ext, a1, a2, c2);
2042 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2043 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2044 TCG_REG_XZR, tcg_invert_cond(args[3]));
2045 break;
2047 case INDEX_op_movcond_i32:
2048 a2 = (int32_t)a2;
2049 /* FALLTHRU */
2050 case INDEX_op_movcond_i64:
2051 tcg_out_cmp(s, ext, a1, a2, c2);
2052 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2053 break;
2055 case INDEX_op_qemu_ld_i32:
2056 case INDEX_op_qemu_ld_i64:
2057 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2058 break;
2059 case INDEX_op_qemu_st_i32:
2060 case INDEX_op_qemu_st_i64:
2061 tcg_out_qemu_st(s, REG0(0), a1, a2);
2062 break;
2064 case INDEX_op_bswap64_i64:
2065 tcg_out_rev64(s, a0, a1);
2066 break;
2067 case INDEX_op_bswap32_i64:
2068 case INDEX_op_bswap32_i32:
2069 tcg_out_rev32(s, a0, a1);
2070 break;
2071 case INDEX_op_bswap16_i64:
2072 case INDEX_op_bswap16_i32:
2073 tcg_out_rev16(s, a0, a1);
2074 break;
2076 case INDEX_op_ext8s_i64:
2077 case INDEX_op_ext8s_i32:
2078 tcg_out_sxt(s, ext, MO_8, a0, a1);
2079 break;
2080 case INDEX_op_ext16s_i64:
2081 case INDEX_op_ext16s_i32:
2082 tcg_out_sxt(s, ext, MO_16, a0, a1);
2083 break;
2084 case INDEX_op_ext_i32_i64:
2085 case INDEX_op_ext32s_i64:
2086 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2087 break;
2088 case INDEX_op_ext8u_i64:
2089 case INDEX_op_ext8u_i32:
2090 tcg_out_uxt(s, MO_8, a0, a1);
2091 break;
2092 case INDEX_op_ext16u_i64:
2093 case INDEX_op_ext16u_i32:
2094 tcg_out_uxt(s, MO_16, a0, a1);
2095 break;
2096 case INDEX_op_extu_i32_i64:
2097 case INDEX_op_ext32u_i64:
2098 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2099 break;
2101 case INDEX_op_deposit_i64:
2102 case INDEX_op_deposit_i32:
2103 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2104 break;
2106 case INDEX_op_extract_i64:
2107 case INDEX_op_extract_i32:
2108 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2109 break;
2111 case INDEX_op_sextract_i64:
2112 case INDEX_op_sextract_i32:
2113 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2114 break;
2116 case INDEX_op_extract2_i64:
2117 case INDEX_op_extract2_i32:
2118 tcg_out_extr(s, ext, a0, a1, a2, args[3]);
2119 break;
2121 case INDEX_op_add2_i32:
2122 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2123 (int32_t)args[4], args[5], const_args[4],
2124 const_args[5], false);
2125 break;
2126 case INDEX_op_add2_i64:
2127 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2128 args[5], const_args[4], const_args[5], false);
2129 break;
2130 case INDEX_op_sub2_i32:
2131 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2132 (int32_t)args[4], args[5], const_args[4],
2133 const_args[5], true);
2134 break;
2135 case INDEX_op_sub2_i64:
2136 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2137 args[5], const_args[4], const_args[5], true);
2138 break;
2140 case INDEX_op_muluh_i64:
2141 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2142 break;
2143 case INDEX_op_mulsh_i64:
2144 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2145 break;
2147 case INDEX_op_mb:
2148 tcg_out_mb(s, a0);
2149 break;
2151 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2152 case INDEX_op_mov_i64:
2153 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2154 case INDEX_op_movi_i64:
2155 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2156 default:
2157 g_assert_not_reached();
2160 #undef REG0
2163 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2164 unsigned vecl, unsigned vece,
2165 const TCGArg *args, const int *const_args)
2167 static const AArch64Insn cmp_insn[16] = {
2168 [TCG_COND_EQ] = I3616_CMEQ,
2169 [TCG_COND_GT] = I3616_CMGT,
2170 [TCG_COND_GE] = I3616_CMGE,
2171 [TCG_COND_GTU] = I3616_CMHI,
2172 [TCG_COND_GEU] = I3616_CMHS,
2174 static const AArch64Insn cmp0_insn[16] = {
2175 [TCG_COND_EQ] = I3617_CMEQ0,
2176 [TCG_COND_GT] = I3617_CMGT0,
2177 [TCG_COND_GE] = I3617_CMGE0,
2178 [TCG_COND_LT] = I3617_CMLT0,
2179 [TCG_COND_LE] = I3617_CMLE0,
2182 TCGType type = vecl + TCG_TYPE_V64;
2183 unsigned is_q = vecl;
2184 TCGArg a0, a1, a2;
2186 a0 = args[0];
2187 a1 = args[1];
2188 a2 = args[2];
2190 switch (opc) {
2191 case INDEX_op_ld_vec:
2192 tcg_out_ld(s, type, a0, a1, a2);
2193 break;
2194 case INDEX_op_st_vec:
2195 tcg_out_st(s, type, a0, a1, a2);
2196 break;
2197 case INDEX_op_dupm_vec:
2198 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2199 break;
2200 case INDEX_op_add_vec:
2201 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2202 break;
2203 case INDEX_op_sub_vec:
2204 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2205 break;
2206 case INDEX_op_mul_vec:
2207 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2208 break;
2209 case INDEX_op_neg_vec:
2210 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2211 break;
2212 case INDEX_op_abs_vec:
2213 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2214 break;
2215 case INDEX_op_and_vec:
2216 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2217 break;
2218 case INDEX_op_or_vec:
2219 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2220 break;
2221 case INDEX_op_xor_vec:
2222 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2223 break;
2224 case INDEX_op_andc_vec:
2225 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2226 break;
2227 case INDEX_op_orc_vec:
2228 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2229 break;
2230 case INDEX_op_ssadd_vec:
2231 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2232 break;
2233 case INDEX_op_sssub_vec:
2234 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2235 break;
2236 case INDEX_op_usadd_vec:
2237 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2238 break;
2239 case INDEX_op_ussub_vec:
2240 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2241 break;
2242 case INDEX_op_smax_vec:
2243 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2244 break;
2245 case INDEX_op_smin_vec:
2246 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2247 break;
2248 case INDEX_op_umax_vec:
2249 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2250 break;
2251 case INDEX_op_umin_vec:
2252 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2253 break;
2254 case INDEX_op_not_vec:
2255 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2256 break;
2257 case INDEX_op_shli_vec:
2258 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2259 break;
2260 case INDEX_op_shri_vec:
2261 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2262 break;
2263 case INDEX_op_sari_vec:
2264 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2265 break;
2266 case INDEX_op_shlv_vec:
2267 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2268 break;
2269 case INDEX_op_aa64_sshl_vec:
2270 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2271 break;
2272 case INDEX_op_cmp_vec:
2274 TCGCond cond = args[3];
2275 AArch64Insn insn;
2277 if (cond == TCG_COND_NE) {
2278 if (const_args[2]) {
2279 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2280 } else {
2281 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2282 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2284 } else {
2285 if (const_args[2]) {
2286 insn = cmp0_insn[cond];
2287 if (insn) {
2288 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2289 break;
2291 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2292 a2 = TCG_VEC_TMP;
2294 insn = cmp_insn[cond];
2295 if (insn == 0) {
2296 TCGArg t;
2297 t = a1, a1 = a2, a2 = t;
2298 cond = tcg_swap_cond(cond);
2299 insn = cmp_insn[cond];
2300 tcg_debug_assert(insn != 0);
2302 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2305 break;
2307 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2308 case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
2309 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2310 default:
2311 g_assert_not_reached();
2315 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2317 switch (opc) {
2318 case INDEX_op_add_vec:
2319 case INDEX_op_sub_vec:
2320 case INDEX_op_and_vec:
2321 case INDEX_op_or_vec:
2322 case INDEX_op_xor_vec:
2323 case INDEX_op_andc_vec:
2324 case INDEX_op_orc_vec:
2325 case INDEX_op_neg_vec:
2326 case INDEX_op_abs_vec:
2327 case INDEX_op_not_vec:
2328 case INDEX_op_cmp_vec:
2329 case INDEX_op_shli_vec:
2330 case INDEX_op_shri_vec:
2331 case INDEX_op_sari_vec:
2332 case INDEX_op_ssadd_vec:
2333 case INDEX_op_sssub_vec:
2334 case INDEX_op_usadd_vec:
2335 case INDEX_op_ussub_vec:
2336 case INDEX_op_shlv_vec:
2337 return 1;
2338 case INDEX_op_shrv_vec:
2339 case INDEX_op_sarv_vec:
2340 return -1;
2341 case INDEX_op_mul_vec:
2342 case INDEX_op_smax_vec:
2343 case INDEX_op_smin_vec:
2344 case INDEX_op_umax_vec:
2345 case INDEX_op_umin_vec:
2346 return vece < MO_64;
2348 default:
2349 return 0;
2353 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2354 TCGArg a0, ...)
2356 va_list va;
2357 TCGv_vec v0, v1, v2, t1;
2359 va_start(va, a0);
2360 v0 = temp_tcgv_vec(arg_temp(a0));
2361 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2362 v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2364 switch (opc) {
2365 case INDEX_op_shrv_vec:
2366 case INDEX_op_sarv_vec:
2367 /* Right shifts are negative left shifts for AArch64. */
2368 t1 = tcg_temp_new_vec(type);
2369 tcg_gen_neg_vec(vece, t1, v2);
2370 opc = (opc == INDEX_op_shrv_vec
2371 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2372 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2373 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2374 tcg_temp_free_vec(t1);
2375 break;
2377 default:
2378 g_assert_not_reached();
2381 va_end(va);
2384 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2386 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2387 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2388 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2389 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2390 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2391 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2392 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2393 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2394 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2395 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2396 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2397 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2398 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2399 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2400 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2401 static const TCGTargetOpDef r_r_rAL
2402 = { .args_ct_str = { "r", "r", "rAL" } };
2403 static const TCGTargetOpDef dep
2404 = { .args_ct_str = { "r", "0", "rZ" } };
2405 static const TCGTargetOpDef ext2
2406 = { .args_ct_str = { "r", "rZ", "rZ" } };
2407 static const TCGTargetOpDef movc
2408 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2409 static const TCGTargetOpDef add2
2410 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2412 switch (op) {
2413 case INDEX_op_goto_ptr:
2414 return &r;
2416 case INDEX_op_ld8u_i32:
2417 case INDEX_op_ld8s_i32:
2418 case INDEX_op_ld16u_i32:
2419 case INDEX_op_ld16s_i32:
2420 case INDEX_op_ld_i32:
2421 case INDEX_op_ld8u_i64:
2422 case INDEX_op_ld8s_i64:
2423 case INDEX_op_ld16u_i64:
2424 case INDEX_op_ld16s_i64:
2425 case INDEX_op_ld32u_i64:
2426 case INDEX_op_ld32s_i64:
2427 case INDEX_op_ld_i64:
2428 case INDEX_op_neg_i32:
2429 case INDEX_op_neg_i64:
2430 case INDEX_op_not_i32:
2431 case INDEX_op_not_i64:
2432 case INDEX_op_bswap16_i32:
2433 case INDEX_op_bswap32_i32:
2434 case INDEX_op_bswap16_i64:
2435 case INDEX_op_bswap32_i64:
2436 case INDEX_op_bswap64_i64:
2437 case INDEX_op_ext8s_i32:
2438 case INDEX_op_ext16s_i32:
2439 case INDEX_op_ext8u_i32:
2440 case INDEX_op_ext16u_i32:
2441 case INDEX_op_ext8s_i64:
2442 case INDEX_op_ext16s_i64:
2443 case INDEX_op_ext32s_i64:
2444 case INDEX_op_ext8u_i64:
2445 case INDEX_op_ext16u_i64:
2446 case INDEX_op_ext32u_i64:
2447 case INDEX_op_ext_i32_i64:
2448 case INDEX_op_extu_i32_i64:
2449 case INDEX_op_extract_i32:
2450 case INDEX_op_extract_i64:
2451 case INDEX_op_sextract_i32:
2452 case INDEX_op_sextract_i64:
2453 return &r_r;
2455 case INDEX_op_st8_i32:
2456 case INDEX_op_st16_i32:
2457 case INDEX_op_st_i32:
2458 case INDEX_op_st8_i64:
2459 case INDEX_op_st16_i64:
2460 case INDEX_op_st32_i64:
2461 case INDEX_op_st_i64:
2462 return &rZ_r;
2464 case INDEX_op_add_i32:
2465 case INDEX_op_add_i64:
2466 case INDEX_op_sub_i32:
2467 case INDEX_op_sub_i64:
2468 case INDEX_op_setcond_i32:
2469 case INDEX_op_setcond_i64:
2470 return &r_r_rA;
2472 case INDEX_op_mul_i32:
2473 case INDEX_op_mul_i64:
2474 case INDEX_op_div_i32:
2475 case INDEX_op_div_i64:
2476 case INDEX_op_divu_i32:
2477 case INDEX_op_divu_i64:
2478 case INDEX_op_rem_i32:
2479 case INDEX_op_rem_i64:
2480 case INDEX_op_remu_i32:
2481 case INDEX_op_remu_i64:
2482 case INDEX_op_muluh_i64:
2483 case INDEX_op_mulsh_i64:
2484 return &r_r_r;
2486 case INDEX_op_and_i32:
2487 case INDEX_op_and_i64:
2488 case INDEX_op_or_i32:
2489 case INDEX_op_or_i64:
2490 case INDEX_op_xor_i32:
2491 case INDEX_op_xor_i64:
2492 case INDEX_op_andc_i32:
2493 case INDEX_op_andc_i64:
2494 case INDEX_op_orc_i32:
2495 case INDEX_op_orc_i64:
2496 case INDEX_op_eqv_i32:
2497 case INDEX_op_eqv_i64:
2498 return &r_r_rL;
2500 case INDEX_op_shl_i32:
2501 case INDEX_op_shr_i32:
2502 case INDEX_op_sar_i32:
2503 case INDEX_op_rotl_i32:
2504 case INDEX_op_rotr_i32:
2505 case INDEX_op_shl_i64:
2506 case INDEX_op_shr_i64:
2507 case INDEX_op_sar_i64:
2508 case INDEX_op_rotl_i64:
2509 case INDEX_op_rotr_i64:
2510 return &r_r_ri;
2512 case INDEX_op_clz_i32:
2513 case INDEX_op_ctz_i32:
2514 case INDEX_op_clz_i64:
2515 case INDEX_op_ctz_i64:
2516 return &r_r_rAL;
2518 case INDEX_op_brcond_i32:
2519 case INDEX_op_brcond_i64:
2520 return &r_rA;
2522 case INDEX_op_movcond_i32:
2523 case INDEX_op_movcond_i64:
2524 return &movc;
2526 case INDEX_op_qemu_ld_i32:
2527 case INDEX_op_qemu_ld_i64:
2528 return &r_l;
2529 case INDEX_op_qemu_st_i32:
2530 case INDEX_op_qemu_st_i64:
2531 return &lZ_l;
2533 case INDEX_op_deposit_i32:
2534 case INDEX_op_deposit_i64:
2535 return &dep;
2537 case INDEX_op_extract2_i32:
2538 case INDEX_op_extract2_i64:
2539 return &ext2;
2541 case INDEX_op_add2_i32:
2542 case INDEX_op_add2_i64:
2543 case INDEX_op_sub2_i32:
2544 case INDEX_op_sub2_i64:
2545 return &add2;
2547 case INDEX_op_add_vec:
2548 case INDEX_op_sub_vec:
2549 case INDEX_op_mul_vec:
2550 case INDEX_op_and_vec:
2551 case INDEX_op_or_vec:
2552 case INDEX_op_xor_vec:
2553 case INDEX_op_andc_vec:
2554 case INDEX_op_orc_vec:
2555 case INDEX_op_ssadd_vec:
2556 case INDEX_op_sssub_vec:
2557 case INDEX_op_usadd_vec:
2558 case INDEX_op_ussub_vec:
2559 case INDEX_op_smax_vec:
2560 case INDEX_op_smin_vec:
2561 case INDEX_op_umax_vec:
2562 case INDEX_op_umin_vec:
2563 case INDEX_op_shlv_vec:
2564 case INDEX_op_shrv_vec:
2565 case INDEX_op_sarv_vec:
2566 case INDEX_op_aa64_sshl_vec:
2567 return &w_w_w;
2568 case INDEX_op_not_vec:
2569 case INDEX_op_neg_vec:
2570 case INDEX_op_abs_vec:
2571 case INDEX_op_shli_vec:
2572 case INDEX_op_shri_vec:
2573 case INDEX_op_sari_vec:
2574 return &w_w;
2575 case INDEX_op_ld_vec:
2576 case INDEX_op_st_vec:
2577 case INDEX_op_dupm_vec:
2578 return &w_r;
2579 case INDEX_op_dup_vec:
2580 return &w_wr;
2581 case INDEX_op_cmp_vec:
2582 return &w_w_wZ;
2584 default:
2585 return NULL;
2589 static void tcg_target_init(TCGContext *s)
2591 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2592 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2593 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2594 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2596 tcg_target_call_clobber_regs = -1ull;
2597 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2598 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2599 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2600 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2601 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2602 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2603 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2604 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2605 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2606 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2607 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2608 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2609 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2610 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2611 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2612 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2613 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2614 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2615 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2617 s->reserved_regs = 0;
2618 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2619 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2620 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2621 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2622 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2625 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2626 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2628 #define FRAME_SIZE \
2629 ((PUSH_SIZE \
2630 + TCG_STATIC_CALL_ARGS_SIZE \
2631 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2632 + TCG_TARGET_STACK_ALIGN - 1) \
2633 & ~(TCG_TARGET_STACK_ALIGN - 1))
2635 /* We're expecting a 2 byte uleb128 encoded value. */
2636 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2638 /* We're expecting to use a single ADDI insn. */
2639 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2641 static void tcg_target_qemu_prologue(TCGContext *s)
2643 TCGReg r;
2645 /* Push (FP, LR) and allocate space for all saved registers. */
2646 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2647 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2649 /* Set up frame pointer for canonical unwinding. */
2650 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2652 /* Store callee-preserved regs x19..x28. */
2653 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2654 int ofs = (r - TCG_REG_X19 + 2) * 8;
2655 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2658 /* Make stack space for TCG locals. */
2659 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2660 FRAME_SIZE - PUSH_SIZE);
2662 /* Inform TCG about how to find TCG locals with register, offset, size. */
2663 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2664 CPU_TEMP_BUF_NLONGS * sizeof(long));
2666 #if !defined(CONFIG_SOFTMMU)
2667 if (USE_GUEST_BASE) {
2668 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2669 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2671 #endif
2673 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2674 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2677 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2678 * and fall through to the rest of the epilogue.
2680 s->code_gen_epilogue = s->code_ptr;
2681 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2683 /* TB epilogue */
2684 tb_ret_addr = s->code_ptr;
2686 /* Remove TCG locals stack space. */
2687 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2688 FRAME_SIZE - PUSH_SIZE);
2690 /* Restore registers x19..x28. */
2691 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2692 int ofs = (r - TCG_REG_X19 + 2) * 8;
2693 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2696 /* Pop (FP, LR), restore SP to previous frame. */
2697 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2698 TCG_REG_SP, PUSH_SIZE, 0, 1);
2699 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2702 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2704 int i;
2705 for (i = 0; i < count; ++i) {
2706 p[i] = NOP;
2710 typedef struct {
2711 DebugFrameHeader h;
2712 uint8_t fde_def_cfa[4];
2713 uint8_t fde_reg_ofs[24];
2714 } DebugFrame;
2716 #define ELF_HOST_MACHINE EM_AARCH64
2718 static const DebugFrame debug_frame = {
2719 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2720 .h.cie.id = -1,
2721 .h.cie.version = 1,
2722 .h.cie.code_align = 1,
2723 .h.cie.data_align = 0x78, /* sleb128 -8 */
2724 .h.cie.return_column = TCG_REG_LR,
2726 /* Total FDE size does not include the "len" member. */
2727 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2729 .fde_def_cfa = {
2730 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2731 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2732 (FRAME_SIZE >> 7)
2734 .fde_reg_ofs = {
2735 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2736 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2737 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2738 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2739 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2740 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2741 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2742 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2743 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2744 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2745 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2746 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2750 void tcg_register_jit(void *buf, size_t buf_size)
2752 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));