2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "../tcg-pool.c.inc"
14 #include "qemu/bitops.h"
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33 #endif /* CONFIG_DEBUG_TCG */
35 static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42 TCG_REG_X16, TCG_REG_X17,
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61 static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65 static const int tcg_target_call_oarg_regs[1] = {
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
81 static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
83 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84 ptrdiff_t offset = target - src_rx;
86 if (offset == sextract64(offset, 0, 26)) {
87 /* read instruction, mask away previous PC_REL26 parameter contents,
88 set the proper offset, then write back the instruction. */
89 *src_rw = deposit32(*src_rw, 0, 26, offset);
95 static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98 ptrdiff_t offset = target - src_rx;
100 if (offset == sextract64(offset, 0, 19)) {
101 *src_rw = deposit32(*src_rw, 5, 19, offset);
107 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108 intptr_t value, intptr_t addend)
110 tcg_debug_assert(addend == 0);
112 case R_AARCH64_JUMP26:
113 case R_AARCH64_CALL26:
114 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
115 case R_AARCH64_CONDBR19:
116 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
118 g_assert_not_reached();
122 #define TCG_CT_CONST_AIMM 0x100
123 #define TCG_CT_CONST_LIMM 0x200
124 #define TCG_CT_CONST_ZERO 0x400
125 #define TCG_CT_CONST_MONE 0x800
126 #define TCG_CT_CONST_ORRI 0x1000
127 #define TCG_CT_CONST_ANDI 0x2000
129 #define ALL_GENERAL_REGS 0xffffffffu
130 #define ALL_VECTOR_REGS 0xffffffff00000000ull
132 #ifdef CONFIG_SOFTMMU
133 #define ALL_QLDST_REGS \
134 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
135 (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
137 #define ALL_QLDST_REGS ALL_GENERAL_REGS
140 /* Match a constant valid for addition (12-bit, optionally shifted). */
141 static inline bool is_aimm(uint64_t val)
143 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
146 /* Match a constant valid for logical operations. */
147 static inline bool is_limm(uint64_t val)
149 /* Taking a simplified view of the logical immediates for now, ignoring
150 the replication that can happen across the field. Match bit patterns
154 and their inverses. */
156 /* Make things easier below, by testing the form with msb clear. */
157 if ((int64_t)val < 0) {
164 return (val & (val - 1)) == 0;
167 /* Return true if v16 is a valid 16-bit shifted immediate. */
168 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
170 if (v16 == (v16 & 0xff)) {
174 } else if (v16 == (v16 & 0xff00)) {
182 /* Return true if v32 is a valid 32-bit shifted immediate. */
183 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
185 if (v32 == (v32 & 0xff)) {
189 } else if (v32 == (v32 & 0xff00)) {
191 *imm8 = (v32 >> 8) & 0xff;
193 } else if (v32 == (v32 & 0xff0000)) {
195 *imm8 = (v32 >> 16) & 0xff;
197 } else if (v32 == (v32 & 0xff000000)) {
205 /* Return true if v32 is a valid 32-bit shifting ones immediate. */
206 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
208 if ((v32 & 0xffff00ff) == 0xff) {
210 *imm8 = (v32 >> 8) & 0xff;
212 } else if ((v32 & 0xff00ffff) == 0xffff) {
214 *imm8 = (v32 >> 16) & 0xff;
220 /* Return true if v32 is a valid float32 immediate. */
221 static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
223 if (extract32(v32, 0, 19) == 0
224 && (extract32(v32, 25, 6) == 0x20
225 || extract32(v32, 25, 6) == 0x1f)) {
227 *imm8 = (extract32(v32, 31, 1) << 7)
228 | (extract32(v32, 25, 1) << 6)
229 | extract32(v32, 19, 6);
235 /* Return true if v64 is a valid float64 immediate. */
236 static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
238 if (extract64(v64, 0, 48) == 0
239 && (extract64(v64, 54, 9) == 0x100
240 || extract64(v64, 54, 9) == 0x0ff)) {
242 *imm8 = (extract64(v64, 63, 1) << 7)
243 | (extract64(v64, 54, 1) << 6)
244 | extract64(v64, 48, 6);
251 * Return non-zero if v32 can be formed by MOVI+ORR.
252 * Place the parameters for MOVI in (cmode, imm8).
253 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
255 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
259 for (i = 6; i > 0; i -= 2) {
260 /* Mask out one byte we can add with ORR. */
261 uint32_t tmp = v32 & ~(0xffu << (i * 4));
262 if (is_shimm32(tmp, cmode, imm8) ||
263 is_soimm32(tmp, cmode, imm8)) {
270 /* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
271 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
273 if (v32 == deposit32(v32, 16, 16, v32)) {
274 return is_shimm16(v32, cmode, imm8);
276 return is_shimm32(v32, cmode, imm8);
280 static int tcg_target_const_match(tcg_target_long val, TCGType type,
281 const TCGArgConstraint *arg_ct)
285 if (ct & TCG_CT_CONST) {
288 if (type == TCG_TYPE_I32) {
291 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
294 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
297 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
300 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
304 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
307 case TCG_CT_CONST_ANDI:
310 case TCG_CT_CONST_ORRI:
311 if (val == deposit64(val, 32, 32, val)) {
313 return is_shimm1632(val, &cmode, &imm8);
317 /* Both bits should not be set for the same insn. */
318 g_assert_not_reached();
324 enum aarch64_cond_code {
327 COND_CS = 0x2, /* Unsigned greater or equal */
328 COND_HS = COND_CS, /* ALIAS greater or equal */
329 COND_CC = 0x3, /* Unsigned less than */
330 COND_LO = COND_CC, /* ALIAS Lower */
331 COND_MI = 0x4, /* Negative */
332 COND_PL = 0x5, /* Zero or greater */
333 COND_VS = 0x6, /* Overflow */
334 COND_VC = 0x7, /* No overflow */
335 COND_HI = 0x8, /* Unsigned greater than */
336 COND_LS = 0x9, /* Unsigned less or equal */
342 COND_NV = 0xf, /* behaves like COND_AL here */
345 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
346 [TCG_COND_EQ] = COND_EQ,
347 [TCG_COND_NE] = COND_NE,
348 [TCG_COND_LT] = COND_LT,
349 [TCG_COND_GE] = COND_GE,
350 [TCG_COND_LE] = COND_LE,
351 [TCG_COND_GT] = COND_GT,
353 [TCG_COND_LTU] = COND_LO,
354 [TCG_COND_GTU] = COND_HI,
355 [TCG_COND_GEU] = COND_HS,
356 [TCG_COND_LEU] = COND_LS,
360 LDST_ST = 0, /* store */
361 LDST_LD = 1, /* load */
362 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
363 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
366 /* We encode the format of the insn into the beginning of the name, so that
367 we can have the preprocessor help "typecheck" the insn vs the output
368 function. Arm didn't provide us with nice names for the formats, so we
369 use the section number of the architecture reference manual in which the
370 instruction group is described. */
372 /* Compare and branch (immediate). */
373 I3201_CBZ = 0x34000000,
374 I3201_CBNZ = 0x35000000,
376 /* Conditional branch (immediate). */
377 I3202_B_C = 0x54000000,
379 /* Unconditional branch (immediate). */
380 I3206_B = 0x14000000,
381 I3206_BL = 0x94000000,
383 /* Unconditional branch (register). */
384 I3207_BR = 0xd61f0000,
385 I3207_BLR = 0xd63f0000,
386 I3207_RET = 0xd65f0000,
388 /* AdvSIMD load/store single structure. */
389 I3303_LD1R = 0x0d40c000,
391 /* Load literal for loading the address at pc-relative offset */
392 I3305_LDR = 0x58000000,
393 I3305_LDR_v64 = 0x5c000000,
394 I3305_LDR_v128 = 0x9c000000,
396 /* Load/store register. Described here as 3.3.12, but the helper
397 that emits them can transform to 3.3.10 or 3.3.13. */
398 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
399 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
400 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
401 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
403 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
404 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
405 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
406 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
408 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
409 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
411 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
412 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
413 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
415 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
416 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
418 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
419 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
421 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
422 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
424 I3312_TO_I3310 = 0x00200800,
425 I3312_TO_I3313 = 0x01000000,
427 /* Load/store register pair instructions. */
428 I3314_LDP = 0x28400000,
429 I3314_STP = 0x28000000,
431 /* Add/subtract immediate instructions. */
432 I3401_ADDI = 0x11000000,
433 I3401_ADDSI = 0x31000000,
434 I3401_SUBI = 0x51000000,
435 I3401_SUBSI = 0x71000000,
437 /* Bitfield instructions. */
438 I3402_BFM = 0x33000000,
439 I3402_SBFM = 0x13000000,
440 I3402_UBFM = 0x53000000,
442 /* Extract instruction. */
443 I3403_EXTR = 0x13800000,
445 /* Logical immediate instructions. */
446 I3404_ANDI = 0x12000000,
447 I3404_ORRI = 0x32000000,
448 I3404_EORI = 0x52000000,
450 /* Move wide immediate instructions. */
451 I3405_MOVN = 0x12800000,
452 I3405_MOVZ = 0x52800000,
453 I3405_MOVK = 0x72800000,
455 /* PC relative addressing instructions. */
456 I3406_ADR = 0x10000000,
457 I3406_ADRP = 0x90000000,
459 /* Add/subtract shifted register instructions (without a shift). */
460 I3502_ADD = 0x0b000000,
461 I3502_ADDS = 0x2b000000,
462 I3502_SUB = 0x4b000000,
463 I3502_SUBS = 0x6b000000,
465 /* Add/subtract shifted register instructions (with a shift). */
466 I3502S_ADD_LSL = I3502_ADD,
468 /* Add/subtract with carry instructions. */
469 I3503_ADC = 0x1a000000,
470 I3503_SBC = 0x5a000000,
472 /* Conditional select instructions. */
473 I3506_CSEL = 0x1a800000,
474 I3506_CSINC = 0x1a800400,
475 I3506_CSINV = 0x5a800000,
476 I3506_CSNEG = 0x5a800400,
478 /* Data-processing (1 source) instructions. */
479 I3507_CLZ = 0x5ac01000,
480 I3507_RBIT = 0x5ac00000,
481 I3507_REV16 = 0x5ac00400,
482 I3507_REV32 = 0x5ac00800,
483 I3507_REV64 = 0x5ac00c00,
485 /* Data-processing (2 source) instructions. */
486 I3508_LSLV = 0x1ac02000,
487 I3508_LSRV = 0x1ac02400,
488 I3508_ASRV = 0x1ac02800,
489 I3508_RORV = 0x1ac02c00,
490 I3508_SMULH = 0x9b407c00,
491 I3508_UMULH = 0x9bc07c00,
492 I3508_UDIV = 0x1ac00800,
493 I3508_SDIV = 0x1ac00c00,
495 /* Data-processing (3 source) instructions. */
496 I3509_MADD = 0x1b000000,
497 I3509_MSUB = 0x1b008000,
499 /* Logical shifted register instructions (without a shift). */
500 I3510_AND = 0x0a000000,
501 I3510_BIC = 0x0a200000,
502 I3510_ORR = 0x2a000000,
503 I3510_ORN = 0x2a200000,
504 I3510_EOR = 0x4a000000,
505 I3510_EON = 0x4a200000,
506 I3510_ANDS = 0x6a000000,
508 /* Logical shifted register instructions (with a shift). */
509 I3502S_AND_LSR = I3510_AND | (1 << 22),
512 I3605_DUP = 0x0e000400,
513 I3605_INS = 0x4e001c00,
514 I3605_UMOV = 0x0e003c00,
516 /* AdvSIMD modified immediate */
517 I3606_MOVI = 0x0f000400,
518 I3606_MVNI = 0x2f000400,
519 I3606_BIC = 0x2f001400,
520 I3606_ORR = 0x0f001400,
522 /* AdvSIMD shift by immediate */
523 I3614_SSHR = 0x0f000400,
524 I3614_SSRA = 0x0f001400,
525 I3614_SHL = 0x0f005400,
526 I3614_SLI = 0x2f005400,
527 I3614_USHR = 0x2f000400,
528 I3614_USRA = 0x2f001400,
530 /* AdvSIMD three same. */
531 I3616_ADD = 0x0e208400,
532 I3616_AND = 0x0e201c00,
533 I3616_BIC = 0x0e601c00,
534 I3616_BIF = 0x2ee01c00,
535 I3616_BIT = 0x2ea01c00,
536 I3616_BSL = 0x2e601c00,
537 I3616_EOR = 0x2e201c00,
538 I3616_MUL = 0x0e209c00,
539 I3616_ORR = 0x0ea01c00,
540 I3616_ORN = 0x0ee01c00,
541 I3616_SUB = 0x2e208400,
542 I3616_CMGT = 0x0e203400,
543 I3616_CMGE = 0x0e203c00,
544 I3616_CMTST = 0x0e208c00,
545 I3616_CMHI = 0x2e203400,
546 I3616_CMHS = 0x2e203c00,
547 I3616_CMEQ = 0x2e208c00,
548 I3616_SMAX = 0x0e206400,
549 I3616_SMIN = 0x0e206c00,
550 I3616_SSHL = 0x0e204400,
551 I3616_SQADD = 0x0e200c00,
552 I3616_SQSUB = 0x0e202c00,
553 I3616_UMAX = 0x2e206400,
554 I3616_UMIN = 0x2e206c00,
555 I3616_UQADD = 0x2e200c00,
556 I3616_UQSUB = 0x2e202c00,
557 I3616_USHL = 0x2e204400,
559 /* AdvSIMD two-reg misc. */
560 I3617_CMGT0 = 0x0e208800,
561 I3617_CMEQ0 = 0x0e209800,
562 I3617_CMLT0 = 0x0e20a800,
563 I3617_CMGE0 = 0x2e208800,
564 I3617_CMLE0 = 0x2e20a800,
565 I3617_NOT = 0x2e205800,
566 I3617_ABS = 0x0e20b800,
567 I3617_NEG = 0x2e20b800,
569 /* System instructions. */
571 DMB_ISH = 0xd50338bf,
576 static inline uint32_t tcg_in32(TCGContext *s)
578 uint32_t v = *(uint32_t *)s->code_ptr;
582 /* Emit an opcode with "type-checking" of the format. */
583 #define tcg_out_insn(S, FMT, OP, ...) \
584 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
586 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
587 TCGReg rt, TCGReg rn, unsigned size)
589 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
592 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
593 int imm19, TCGReg rt)
595 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
598 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
599 TCGReg rt, int imm19)
601 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
604 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
605 TCGCond c, int imm19)
607 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
610 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
612 tcg_out32(s, insn | (imm26 & 0x03ffffff));
615 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
617 tcg_out32(s, insn | rn << 5);
620 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
621 TCGReg r1, TCGReg r2, TCGReg rn,
622 tcg_target_long ofs, bool pre, bool w)
624 insn |= 1u << 31; /* ext */
628 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
629 insn |= (ofs & (0x7f << 3)) << (15 - 3);
631 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
634 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
635 TCGReg rd, TCGReg rn, uint64_t aimm)
638 tcg_debug_assert((aimm & 0xfff) == 0);
640 tcg_debug_assert(aimm <= 0xfff);
641 aimm |= 1 << 12; /* apply LSL 12 */
643 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
646 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
647 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
648 that feed the DecodeBitMasks pseudo function. */
649 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
650 TCGReg rd, TCGReg rn, int n, int immr, int imms)
652 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
656 #define tcg_out_insn_3404 tcg_out_insn_3402
658 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
659 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
661 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
665 /* This function is used for the Move (wide immediate) instruction group.
666 Note that SHIFT is a full shift count, not the 2 bit HW field. */
667 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
668 TCGReg rd, uint16_t half, unsigned shift)
670 tcg_debug_assert((shift & ~0x30) == 0);
671 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
674 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
675 TCGReg rd, int64_t disp)
677 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
680 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
681 the rare occasion when we actually want to supply a shift amount. */
682 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
683 TCGType ext, TCGReg rd, TCGReg rn,
686 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
689 /* This function is for 3.5.2 (Add/subtract shifted register),
690 and 3.5.10 (Logical shifted register), for the vast majorty of cases
691 when we don't want to apply a shift. Thus it can also be used for
692 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
693 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
694 TCGReg rd, TCGReg rn, TCGReg rm)
696 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
699 #define tcg_out_insn_3503 tcg_out_insn_3502
700 #define tcg_out_insn_3508 tcg_out_insn_3502
701 #define tcg_out_insn_3510 tcg_out_insn_3502
703 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
704 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
706 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
707 | tcg_cond_to_aarch64[c] << 12);
710 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
711 TCGReg rd, TCGReg rn)
713 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
716 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
717 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
719 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
722 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
723 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
725 /* Note that bit 11 set means general register input. Therefore
726 we can handle both register sets with one function. */
727 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
728 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
731 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
732 TCGReg rd, bool op, int cmode, uint8_t imm8)
734 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
735 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
738 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
739 TCGReg rd, TCGReg rn, unsigned immhb)
741 tcg_out32(s, insn | q << 30 | immhb << 16
742 | (rn & 0x1f) << 5 | (rd & 0x1f));
745 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
746 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
748 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
749 | (rn & 0x1f) << 5 | (rd & 0x1f));
752 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
753 unsigned size, TCGReg rd, TCGReg rn)
755 tcg_out32(s, insn | q << 30 | (size << 22)
756 | (rn & 0x1f) << 5 | (rd & 0x1f));
759 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
760 TCGReg rd, TCGReg base, TCGType ext,
763 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
764 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
765 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
768 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
769 TCGReg rd, TCGReg rn, intptr_t offset)
771 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
774 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
775 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
777 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
778 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
779 | rn << 5 | (rd & 0x1f));
782 /* Register to register move using ORR (shifted register with no shift). */
783 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
785 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
788 /* Register to register move using ADDI (move to/from SP). */
789 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
791 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
794 /* This function is used for the Logical (immediate) instruction group.
795 The value of LIMM must satisfy IS_LIMM. See the comment above about
796 only supporting simplified logical immediates. */
797 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
798 TCGReg rd, TCGReg rn, uint64_t limm)
802 tcg_debug_assert(is_limm(limm));
807 r = 0; /* form 0....01....1 */
808 c = ctz64(~limm) - 1;
810 r = clz64(~limm); /* form 1..10..01..1 */
814 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
817 if (ext == TCG_TYPE_I32) {
822 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
825 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
826 TCGReg rd, int64_t v64)
828 bool q = type == TCG_TYPE_V128;
831 /* Test all bytes equal first. */
834 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
839 * Test all bytes 0x00 or 0xff second. This can match cases that
840 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
842 for (i = imm8 = 0; i < 8; i++) {
843 uint8_t byte = v64 >> (i * 8);
846 } else if (byte != 0) {
850 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
855 * Tests for various replications. For each element width, if we
856 * cannot find an expansion there's no point checking a larger
857 * width because we already know by replication it cannot match.
862 if (is_shimm16(v16, &cmode, &imm8)) {
863 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
866 if (is_shimm16(~v16, &cmode, &imm8)) {
867 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
872 * Otherwise, all remaining constants can be loaded in two insns:
873 * rd = v16 & 0xff, rd |= v16 & 0xff00.
875 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
876 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
878 } else if (vece == MO_32) {
882 if (is_shimm32(v32, &cmode, &imm8) ||
883 is_soimm32(v32, &cmode, &imm8) ||
884 is_fimm32(v32, &cmode, &imm8)) {
885 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
888 if (is_shimm32(n32, &cmode, &imm8) ||
889 is_soimm32(n32, &cmode, &imm8)) {
890 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
895 * Restrict the set of constants to those we can load with
896 * two instructions. Others we load from the pool.
898 i = is_shimm32_pair(v32, &cmode, &imm8);
900 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
901 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
904 i = is_shimm32_pair(n32, &cmode, &imm8);
906 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
907 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
910 } else if (is_fimm64(v64, &cmode, &imm8)) {
911 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
916 * As a last resort, load from the constant pool. Sadly there
917 * is no LD1R (literal), so store the full 16-byte vector.
919 if (type == TCG_TYPE_V128) {
920 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
921 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
923 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
924 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
928 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
929 TCGReg rd, TCGReg rs)
931 int is_q = type - TCG_TYPE_V64;
932 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
936 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
937 TCGReg r, TCGReg base, intptr_t offset)
939 TCGReg temp = TCG_REG_TMP;
941 if (offset < -0xffffff || offset > 0xffffff) {
942 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
943 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
946 AArch64Insn add_insn = I3401_ADDI;
949 add_insn = I3401_SUBI;
952 if (offset & 0xfff000) {
953 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
956 if (offset & 0xfff) {
957 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
961 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
965 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
966 tcg_target_long value)
968 tcg_target_long svalue = value;
969 tcg_target_long ivalue = ~value;
970 tcg_target_long t0, t1, t2;
977 tcg_debug_assert(rd < 32);
980 g_assert_not_reached();
983 /* For 32-bit values, discard potential garbage in value. For 64-bit
984 values within [2**31, 2**32-1], we can create smaller sequences by
985 interpreting this as a negative 32-bit number, while ensuring that
986 the high 32 bits are cleared by setting SF=0. */
987 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
988 svalue = (int32_t)value;
989 value = (uint32_t)value;
990 ivalue = (uint32_t)ivalue;
994 /* Speed things up by handling the common case of small positive
995 and negative values specially. */
996 if ((value & ~0xffffull) == 0) {
997 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
999 } else if ((ivalue & ~0xffffull) == 0) {
1000 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1004 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1005 use the sign-extended value. That lets us match rotated values such
1006 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1007 if (is_limm(svalue)) {
1008 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1012 /* Look for host pointer values within 4G of the PC. This happens
1013 often when loading pointers to QEMU's own data structures. */
1014 if (type == TCG_TYPE_I64) {
1015 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1016 tcg_target_long disp = value - src_rx;
1017 if (disp == sextract64(disp, 0, 21)) {
1018 tcg_out_insn(s, 3406, ADR, rd, disp);
1021 disp = (value >> 12) - (src_rx >> 12);
1022 if (disp == sextract64(disp, 0, 21)) {
1023 tcg_out_insn(s, 3406, ADRP, rd, disp);
1024 if (value & 0xfff) {
1025 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1031 /* Would it take fewer insns to begin with MOVN? */
1032 if (ctpop64(value) >= 32) {
1039 s0 = ctz64(t0) & (63 & -16);
1040 t1 = t0 & ~(0xffffUL << s0);
1041 s1 = ctz64(t1) & (63 & -16);
1042 t2 = t1 & ~(0xffffUL << s1);
1044 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1046 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1051 /* For more than 2 insns, dump it into the constant pool. */
1052 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1053 tcg_out_insn(s, 3305, LDR, 0, rd);
1056 /* Define something more legible for general use. */
1057 #define tcg_out_ldst_r tcg_out_insn_3310
1059 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1060 TCGReg rn, intptr_t offset, int lgsize)
1062 /* If the offset is naturally aligned and in range, then we can
1063 use the scaled uimm12 encoding */
1064 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1065 uintptr_t scaled_uimm = offset >> lgsize;
1066 if (scaled_uimm <= 0xfff) {
1067 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1072 /* Small signed offsets can use the unscaled encoding. */
1073 if (offset >= -256 && offset < 256) {
1074 tcg_out_insn_3312(s, insn, rd, rn, offset);
1078 /* Worst-case scenario, move offset to temp register, use reg offset. */
1079 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1080 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1083 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1091 if (ret < 32 && arg < 32) {
1092 tcg_out_movr(s, type, ret, arg);
1094 } else if (ret < 32) {
1095 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1097 } else if (arg < 32) {
1098 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1104 tcg_debug_assert(ret >= 32 && arg >= 32);
1105 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1108 tcg_debug_assert(ret >= 32 && arg >= 32);
1109 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1113 g_assert_not_reached();
1118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1119 TCGReg base, intptr_t ofs)
1126 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1130 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1142 g_assert_not_reached();
1144 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1148 TCGReg base, intptr_t ofs)
1155 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1159 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1171 g_assert_not_reached();
1173 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1176 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1177 TCGReg base, intptr_t ofs)
1179 if (type <= TCG_TYPE_I64 && val == 0) {
1180 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1186 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1187 TCGReg rn, unsigned int a, unsigned int b)
1189 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1192 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1193 TCGReg rn, unsigned int a, unsigned int b)
1195 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1198 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1199 TCGReg rn, unsigned int a, unsigned int b)
1201 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1204 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1205 TCGReg rn, TCGReg rm, unsigned int a)
1207 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1210 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1211 TCGReg rd, TCGReg rn, unsigned int m)
1213 int bits = ext ? 64 : 32;
1215 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1218 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1219 TCGReg rd, TCGReg rn, unsigned int m)
1221 int max = ext ? 63 : 31;
1222 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1225 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1226 TCGReg rd, TCGReg rn, unsigned int m)
1228 int max = ext ? 63 : 31;
1229 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1232 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1233 TCGReg rd, TCGReg rn, unsigned int m)
1235 int max = ext ? 63 : 31;
1236 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1239 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1240 TCGReg rd, TCGReg rn, unsigned int m)
1242 int bits = ext ? 64 : 32;
1244 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1247 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1248 TCGReg rn, unsigned lsb, unsigned width)
1250 unsigned size = ext ? 64 : 32;
1251 unsigned a = (size - lsb) & (size - 1);
1252 unsigned b = width - 1;
1253 tcg_out_bfm(s, ext, rd, rn, a, b);
1256 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1257 tcg_target_long b, bool const_b)
1260 /* Using CMP or CMN aliases. */
1262 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1264 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1267 /* Using CMP alias SUBS wzr, Wn, Wm */
1268 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1272 static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1274 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1275 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1276 tcg_out_insn(s, 3206, B, offset);
1279 static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1281 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1282 if (offset == sextract64(offset, 0, 26)) {
1283 tcg_out_insn(s, 3206, B, offset);
1285 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1286 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1290 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1292 tcg_out_insn(s, 3207, BLR, reg);
1295 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1297 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1298 if (offset == sextract64(offset, 0, 26)) {
1299 tcg_out_insn(s, 3206, BL, offset);
1301 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1302 tcg_out_callr(s, TCG_REG_TMP);
1306 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1307 uintptr_t jmp_rw, uintptr_t addr)
1309 tcg_insn_unit i1, i2;
1310 TCGType rt = TCG_TYPE_I64;
1311 TCGReg rd = TCG_REG_TMP;
1314 ptrdiff_t offset = addr - jmp_rx;
1316 if (offset == sextract64(offset, 0, 26)) {
1317 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1320 offset = (addr >> 12) - (jmp_rx >> 12);
1323 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1325 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1327 pair = (uint64_t)i2 << 32 | i1;
1328 qatomic_set((uint64_t *)jmp_rw, pair);
1329 flush_idcache_range(jmp_rx, jmp_rw, 8);
1332 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1334 if (!l->has_value) {
1335 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1336 tcg_out_insn(s, 3206, B, 0);
1338 tcg_out_goto(s, l->u.value_ptr);
1342 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1343 TCGArg b, bool b_const, TCGLabel *l)
1348 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1352 tcg_out_cmp(s, ext, a, b, b_const);
1355 if (!l->has_value) {
1356 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1357 offset = tcg_in32(s) >> 5;
1359 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1360 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1364 tcg_out_insn(s, 3202, B_C, c, offset);
1365 } else if (c == TCG_COND_EQ) {
1366 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1368 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1372 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1374 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1377 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1379 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1382 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1384 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1387 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1388 TCGReg rd, TCGReg rn)
1390 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1391 int bits = (8 << s_bits) - 1;
1392 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1395 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1396 TCGReg rd, TCGReg rn)
1398 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1399 int bits = (8 << s_bits) - 1;
1400 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1403 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1404 TCGReg rn, int64_t aimm)
1407 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1409 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1413 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1414 TCGReg rh, TCGReg al, TCGReg ah,
1415 tcg_target_long bl, tcg_target_long bh,
1416 bool const_bl, bool const_bh, bool sub)
1418 TCGReg orig_rl = rl;
1421 if (rl == ah || (!const_bh && rl == bh)) {
1427 if ((bl < 0) ^ sub) {
1431 if (unlikely(al == TCG_REG_XZR)) {
1432 /* ??? We want to allow al to be zero for the benefit of
1433 negation via subtraction. However, that leaves open the
1434 possibility of adding 0+const in the low part, and the
1435 immediate add instructions encode XSP not XZR. Don't try
1436 anything more elaborate here than loading another zero. */
1438 tcg_out_movi(s, ext, al, 0);
1440 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1442 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1447 /* Note that the only two constants we support are 0 and -1, and
1448 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1449 if ((bh != 0) ^ sub) {
1456 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1458 tcg_out_mov(s, ext, orig_rl, rl);
1461 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1463 static const uint32_t sync[] = {
1464 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1465 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1466 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1467 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1468 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1470 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1473 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1474 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1479 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1481 if (const_b && b == (ext ? 64 : 32)) {
1482 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1484 AArch64Insn sel = I3506_CSEL;
1486 tcg_out_cmp(s, ext, a0, 0, 1);
1487 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1493 } else if (b == 0) {
1496 tcg_out_movi(s, ext, d, b);
1500 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1504 #ifdef CONFIG_SOFTMMU
1505 #include "../tcg-ldst.c.inc"
1507 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1508 * TCGMemOpIdx oi, uintptr_t ra)
1510 static void * const qemu_ld_helpers[16] = {
1511 [MO_UB] = helper_ret_ldub_mmu,
1512 [MO_LEUW] = helper_le_lduw_mmu,
1513 [MO_LEUL] = helper_le_ldul_mmu,
1514 [MO_LEQ] = helper_le_ldq_mmu,
1515 [MO_BEUW] = helper_be_lduw_mmu,
1516 [MO_BEUL] = helper_be_ldul_mmu,
1517 [MO_BEQ] = helper_be_ldq_mmu,
1520 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1521 * uintxx_t val, TCGMemOpIdx oi,
1524 static void * const qemu_st_helpers[16] = {
1525 [MO_UB] = helper_ret_stb_mmu,
1526 [MO_LEUW] = helper_le_stw_mmu,
1527 [MO_LEUL] = helper_le_stl_mmu,
1528 [MO_LEQ] = helper_le_stq_mmu,
1529 [MO_BEUW] = helper_be_stw_mmu,
1530 [MO_BEUL] = helper_be_stl_mmu,
1531 [MO_BEQ] = helper_be_stq_mmu,
1534 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1536 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1537 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1538 tcg_out_insn(s, 3406, ADR, rd, offset);
1541 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1543 TCGMemOpIdx oi = lb->oi;
1544 MemOp opc = get_memop(oi);
1545 MemOp size = opc & MO_SIZE;
1547 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1551 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1552 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1553 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1554 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1555 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1556 if (opc & MO_SIGN) {
1557 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1559 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1562 tcg_out_goto(s, lb->raddr);
1566 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1568 TCGMemOpIdx oi = lb->oi;
1569 MemOp opc = get_memop(oi);
1570 MemOp size = opc & MO_SIZE;
1572 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1576 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1577 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1578 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1579 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1580 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1581 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1582 tcg_out_goto(s, lb->raddr);
1586 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1587 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1588 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1590 TCGLabelQemuLdst *label = new_ldst_label(s);
1592 label->is_ld = is_ld;
1595 label->datalo_reg = data_reg;
1596 label->addrlo_reg = addr_reg;
1597 label->raddr = tcg_splitwx_to_rx(raddr);
1598 label->label_ptr[0] = label_ptr;
1601 /* We expect to use a 7-bit scaled negative offset from ENV. */
1602 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1603 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1605 /* These offsets are built into the LDP below. */
1606 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1607 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1609 /* Load and compare a TLB entry, emitting the conditional jump to the
1610 slow path for the failure case, which will be patched later when finalizing
1611 the slow path. Generated code returns the host addend in X1,
1612 clobbers X0,X2,X3,TMP. */
1613 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1614 tcg_insn_unit **label_ptr, int mem_index,
1617 unsigned a_bits = get_alignment_bits(opc);
1618 unsigned s_bits = opc & MO_SIZE;
1619 unsigned a_mask = (1u << a_bits) - 1;
1620 unsigned s_mask = (1u << s_bits) - 1;
1623 uint64_t compare_mask;
1625 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1626 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1628 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
1629 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1630 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1632 /* Extract the TLB index from the address into X0. */
1633 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1634 TCG_REG_X0, TCG_REG_X0, addr_reg,
1635 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1637 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1638 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1640 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1641 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1642 ? offsetof(CPUTLBEntry, addr_read)
1643 : offsetof(CPUTLBEntry, addr_write));
1644 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1645 offsetof(CPUTLBEntry, addend));
1647 /* For aligned accesses, we check the first byte and include the alignment
1648 bits within the address. For unaligned access, we check that we don't
1649 cross pages using the address of the last byte of the access. */
1650 if (a_bits >= s_bits) {
1653 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1654 TCG_REG_X3, addr_reg, s_mask - a_mask);
1657 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1659 /* Store the page mask part of the address into X3. */
1660 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1661 TCG_REG_X3, x3, compare_mask);
1663 /* Perform the address comparison. */
1664 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1666 /* If not equal, we jump to the slow path. */
1667 *label_ptr = s->code_ptr;
1668 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1671 #endif /* CONFIG_SOFTMMU */
1673 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1674 TCGReg data_r, TCGReg addr_r,
1675 TCGType otype, TCGReg off_r)
1677 const MemOp bswap = memop & MO_BSWAP;
1679 switch (memop & MO_SSIZE) {
1681 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1684 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1685 data_r, addr_r, otype, off_r);
1688 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1690 tcg_out_rev16(s, data_r, data_r);
1695 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1696 tcg_out_rev16(s, data_r, data_r);
1697 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1699 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1700 data_r, addr_r, otype, off_r);
1704 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1706 tcg_out_rev32(s, data_r, data_r);
1711 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1712 tcg_out_rev32(s, data_r, data_r);
1713 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1715 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1719 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1721 tcg_out_rev64(s, data_r, data_r);
1729 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1730 TCGReg data_r, TCGReg addr_r,
1731 TCGType otype, TCGReg off_r)
1733 const MemOp bswap = memop & MO_BSWAP;
1735 switch (memop & MO_SIZE) {
1737 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1740 if (bswap && data_r != TCG_REG_XZR) {
1741 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1742 data_r = TCG_REG_TMP;
1744 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1747 if (bswap && data_r != TCG_REG_XZR) {
1748 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1749 data_r = TCG_REG_TMP;
1751 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1754 if (bswap && data_r != TCG_REG_XZR) {
1755 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1756 data_r = TCG_REG_TMP;
1758 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1765 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1766 TCGMemOpIdx oi, TCGType ext)
1768 MemOp memop = get_memop(oi);
1769 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1770 #ifdef CONFIG_SOFTMMU
1771 unsigned mem_index = get_mmuidx(oi);
1772 tcg_insn_unit *label_ptr;
1774 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1775 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1776 TCG_REG_X1, otype, addr_reg);
1777 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1778 s->code_ptr, label_ptr);
1779 #else /* !CONFIG_SOFTMMU */
1780 if (USE_GUEST_BASE) {
1781 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1782 TCG_REG_GUEST_BASE, otype, addr_reg);
1784 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1785 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1787 #endif /* CONFIG_SOFTMMU */
1790 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1793 MemOp memop = get_memop(oi);
1794 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1795 #ifdef CONFIG_SOFTMMU
1796 unsigned mem_index = get_mmuidx(oi);
1797 tcg_insn_unit *label_ptr;
1799 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1800 tcg_out_qemu_st_direct(s, memop, data_reg,
1801 TCG_REG_X1, otype, addr_reg);
1802 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1803 data_reg, addr_reg, s->code_ptr, label_ptr);
1804 #else /* !CONFIG_SOFTMMU */
1805 if (USE_GUEST_BASE) {
1806 tcg_out_qemu_st_direct(s, memop, data_reg,
1807 TCG_REG_GUEST_BASE, otype, addr_reg);
1809 tcg_out_qemu_st_direct(s, memop, data_reg,
1810 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1812 #endif /* CONFIG_SOFTMMU */
1815 static const tcg_insn_unit *tb_ret_addr;
1817 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1818 const TCGArg args[TCG_MAX_OP_ARGS],
1819 const int const_args[TCG_MAX_OP_ARGS])
1821 /* 99% of the time, we can signal the use of extension registers
1822 by looking to see if the opcode handles 64-bit data. */
1823 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1825 /* Hoist the loads of the most common arguments. */
1826 TCGArg a0 = args[0];
1827 TCGArg a1 = args[1];
1828 TCGArg a2 = args[2];
1829 int c2 = const_args[2];
1831 /* Some operands are defined with "rZ" constraint, a register or
1832 the zero register. These need not actually test args[I] == 0. */
1833 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1836 case INDEX_op_exit_tb:
1837 /* Reuse the zeroing that exists for goto_ptr. */
1839 tcg_out_goto_long(s, tcg_code_gen_epilogue);
1841 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1842 tcg_out_goto_long(s, tb_ret_addr);
1846 case INDEX_op_goto_tb:
1847 if (s->tb_jmp_insn_offset != NULL) {
1848 /* TCG_TARGET_HAS_direct_jump */
1849 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1850 write can be used to patch the target address. */
1851 if ((uintptr_t)s->code_ptr & 7) {
1854 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1855 /* actual branch destination will be patched by
1856 tb_target_set_jmp_target later. */
1857 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1858 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1860 /* !TCG_TARGET_HAS_direct_jump */
1861 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1862 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1863 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1865 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1866 set_jmp_reset_offset(s, a0);
1869 case INDEX_op_goto_ptr:
1870 tcg_out_insn(s, 3207, BR, a0);
1874 tcg_out_goto_label(s, arg_label(a0));
1877 case INDEX_op_ld8u_i32:
1878 case INDEX_op_ld8u_i64:
1879 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1881 case INDEX_op_ld8s_i32:
1882 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1884 case INDEX_op_ld8s_i64:
1885 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1887 case INDEX_op_ld16u_i32:
1888 case INDEX_op_ld16u_i64:
1889 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1891 case INDEX_op_ld16s_i32:
1892 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1894 case INDEX_op_ld16s_i64:
1895 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1897 case INDEX_op_ld_i32:
1898 case INDEX_op_ld32u_i64:
1899 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1901 case INDEX_op_ld32s_i64:
1902 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1904 case INDEX_op_ld_i64:
1905 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1908 case INDEX_op_st8_i32:
1909 case INDEX_op_st8_i64:
1910 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1912 case INDEX_op_st16_i32:
1913 case INDEX_op_st16_i64:
1914 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1916 case INDEX_op_st_i32:
1917 case INDEX_op_st32_i64:
1918 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1920 case INDEX_op_st_i64:
1921 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1924 case INDEX_op_add_i32:
1927 case INDEX_op_add_i64:
1929 tcg_out_addsubi(s, ext, a0, a1, a2);
1931 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1935 case INDEX_op_sub_i32:
1938 case INDEX_op_sub_i64:
1940 tcg_out_addsubi(s, ext, a0, a1, -a2);
1942 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1946 case INDEX_op_neg_i64:
1947 case INDEX_op_neg_i32:
1948 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1951 case INDEX_op_and_i32:
1954 case INDEX_op_and_i64:
1956 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1958 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1962 case INDEX_op_andc_i32:
1965 case INDEX_op_andc_i64:
1967 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1969 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1973 case INDEX_op_or_i32:
1976 case INDEX_op_or_i64:
1978 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1980 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1984 case INDEX_op_orc_i32:
1987 case INDEX_op_orc_i64:
1989 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1991 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1995 case INDEX_op_xor_i32:
1998 case INDEX_op_xor_i64:
2000 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2002 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2006 case INDEX_op_eqv_i32:
2009 case INDEX_op_eqv_i64:
2011 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2013 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2017 case INDEX_op_not_i64:
2018 case INDEX_op_not_i32:
2019 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2022 case INDEX_op_mul_i64:
2023 case INDEX_op_mul_i32:
2024 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2027 case INDEX_op_div_i64:
2028 case INDEX_op_div_i32:
2029 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2031 case INDEX_op_divu_i64:
2032 case INDEX_op_divu_i32:
2033 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2036 case INDEX_op_rem_i64:
2037 case INDEX_op_rem_i32:
2038 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2039 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2041 case INDEX_op_remu_i64:
2042 case INDEX_op_remu_i32:
2043 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2044 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2047 case INDEX_op_shl_i64:
2048 case INDEX_op_shl_i32:
2050 tcg_out_shl(s, ext, a0, a1, a2);
2052 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2056 case INDEX_op_shr_i64:
2057 case INDEX_op_shr_i32:
2059 tcg_out_shr(s, ext, a0, a1, a2);
2061 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2065 case INDEX_op_sar_i64:
2066 case INDEX_op_sar_i32:
2068 tcg_out_sar(s, ext, a0, a1, a2);
2070 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2074 case INDEX_op_rotr_i64:
2075 case INDEX_op_rotr_i32:
2077 tcg_out_rotr(s, ext, a0, a1, a2);
2079 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2083 case INDEX_op_rotl_i64:
2084 case INDEX_op_rotl_i32:
2086 tcg_out_rotl(s, ext, a0, a1, a2);
2088 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2089 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2093 case INDEX_op_clz_i64:
2094 case INDEX_op_clz_i32:
2095 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2097 case INDEX_op_ctz_i64:
2098 case INDEX_op_ctz_i32:
2099 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2102 case INDEX_op_brcond_i32:
2105 case INDEX_op_brcond_i64:
2106 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2109 case INDEX_op_setcond_i32:
2112 case INDEX_op_setcond_i64:
2113 tcg_out_cmp(s, ext, a1, a2, c2);
2114 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2115 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2116 TCG_REG_XZR, tcg_invert_cond(args[3]));
2119 case INDEX_op_movcond_i32:
2122 case INDEX_op_movcond_i64:
2123 tcg_out_cmp(s, ext, a1, a2, c2);
2124 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2127 case INDEX_op_qemu_ld_i32:
2128 case INDEX_op_qemu_ld_i64:
2129 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2131 case INDEX_op_qemu_st_i32:
2132 case INDEX_op_qemu_st_i64:
2133 tcg_out_qemu_st(s, REG0(0), a1, a2);
2136 case INDEX_op_bswap64_i64:
2137 tcg_out_rev64(s, a0, a1);
2139 case INDEX_op_bswap32_i64:
2140 case INDEX_op_bswap32_i32:
2141 tcg_out_rev32(s, a0, a1);
2143 case INDEX_op_bswap16_i64:
2144 case INDEX_op_bswap16_i32:
2145 tcg_out_rev16(s, a0, a1);
2148 case INDEX_op_ext8s_i64:
2149 case INDEX_op_ext8s_i32:
2150 tcg_out_sxt(s, ext, MO_8, a0, a1);
2152 case INDEX_op_ext16s_i64:
2153 case INDEX_op_ext16s_i32:
2154 tcg_out_sxt(s, ext, MO_16, a0, a1);
2156 case INDEX_op_ext_i32_i64:
2157 case INDEX_op_ext32s_i64:
2158 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2160 case INDEX_op_ext8u_i64:
2161 case INDEX_op_ext8u_i32:
2162 tcg_out_uxt(s, MO_8, a0, a1);
2164 case INDEX_op_ext16u_i64:
2165 case INDEX_op_ext16u_i32:
2166 tcg_out_uxt(s, MO_16, a0, a1);
2168 case INDEX_op_extu_i32_i64:
2169 case INDEX_op_ext32u_i64:
2170 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2173 case INDEX_op_deposit_i64:
2174 case INDEX_op_deposit_i32:
2175 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2178 case INDEX_op_extract_i64:
2179 case INDEX_op_extract_i32:
2180 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2183 case INDEX_op_sextract_i64:
2184 case INDEX_op_sextract_i32:
2185 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2188 case INDEX_op_extract2_i64:
2189 case INDEX_op_extract2_i32:
2190 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2193 case INDEX_op_add2_i32:
2194 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2195 (int32_t)args[4], args[5], const_args[4],
2196 const_args[5], false);
2198 case INDEX_op_add2_i64:
2199 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2200 args[5], const_args[4], const_args[5], false);
2202 case INDEX_op_sub2_i32:
2203 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2204 (int32_t)args[4], args[5], const_args[4],
2205 const_args[5], true);
2207 case INDEX_op_sub2_i64:
2208 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2209 args[5], const_args[4], const_args[5], true);
2212 case INDEX_op_muluh_i64:
2213 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2215 case INDEX_op_mulsh_i64:
2216 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2223 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2224 case INDEX_op_mov_i64:
2225 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2227 g_assert_not_reached();
2233 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2234 unsigned vecl, unsigned vece,
2235 const TCGArg *args, const int *const_args)
2237 static const AArch64Insn cmp_insn[16] = {
2238 [TCG_COND_EQ] = I3616_CMEQ,
2239 [TCG_COND_GT] = I3616_CMGT,
2240 [TCG_COND_GE] = I3616_CMGE,
2241 [TCG_COND_GTU] = I3616_CMHI,
2242 [TCG_COND_GEU] = I3616_CMHS,
2244 static const AArch64Insn cmp0_insn[16] = {
2245 [TCG_COND_EQ] = I3617_CMEQ0,
2246 [TCG_COND_GT] = I3617_CMGT0,
2247 [TCG_COND_GE] = I3617_CMGE0,
2248 [TCG_COND_LT] = I3617_CMLT0,
2249 [TCG_COND_LE] = I3617_CMLE0,
2252 TCGType type = vecl + TCG_TYPE_V64;
2253 unsigned is_q = vecl;
2254 TCGArg a0, a1, a2, a3;
2262 case INDEX_op_ld_vec:
2263 tcg_out_ld(s, type, a0, a1, a2);
2265 case INDEX_op_st_vec:
2266 tcg_out_st(s, type, a0, a1, a2);
2268 case INDEX_op_dupm_vec:
2269 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2271 case INDEX_op_add_vec:
2272 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2274 case INDEX_op_sub_vec:
2275 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2277 case INDEX_op_mul_vec:
2278 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2280 case INDEX_op_neg_vec:
2281 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2283 case INDEX_op_abs_vec:
2284 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2286 case INDEX_op_and_vec:
2287 if (const_args[2]) {
2288 is_shimm1632(~a2, &cmode, &imm8);
2290 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2293 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2296 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2298 case INDEX_op_or_vec:
2299 if (const_args[2]) {
2300 is_shimm1632(a2, &cmode, &imm8);
2302 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2305 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2308 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2310 case INDEX_op_andc_vec:
2311 if (const_args[2]) {
2312 is_shimm1632(a2, &cmode, &imm8);
2314 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2317 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2320 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2322 case INDEX_op_orc_vec:
2323 if (const_args[2]) {
2324 is_shimm1632(~a2, &cmode, &imm8);
2326 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2329 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2332 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2334 case INDEX_op_xor_vec:
2335 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2337 case INDEX_op_ssadd_vec:
2338 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2340 case INDEX_op_sssub_vec:
2341 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2343 case INDEX_op_usadd_vec:
2344 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2346 case INDEX_op_ussub_vec:
2347 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2349 case INDEX_op_smax_vec:
2350 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2352 case INDEX_op_smin_vec:
2353 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2355 case INDEX_op_umax_vec:
2356 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2358 case INDEX_op_umin_vec:
2359 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2361 case INDEX_op_not_vec:
2362 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2364 case INDEX_op_shli_vec:
2365 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2367 case INDEX_op_shri_vec:
2368 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2370 case INDEX_op_sari_vec:
2371 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2373 case INDEX_op_aa64_sli_vec:
2374 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2376 case INDEX_op_shlv_vec:
2377 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2379 case INDEX_op_aa64_sshl_vec:
2380 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2382 case INDEX_op_cmp_vec:
2384 TCGCond cond = args[3];
2387 if (cond == TCG_COND_NE) {
2388 if (const_args[2]) {
2389 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2391 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2392 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2395 if (const_args[2]) {
2396 insn = cmp0_insn[cond];
2398 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2401 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2404 insn = cmp_insn[cond];
2407 t = a1, a1 = a2, a2 = t;
2408 cond = tcg_swap_cond(cond);
2409 insn = cmp_insn[cond];
2410 tcg_debug_assert(insn != 0);
2412 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2417 case INDEX_op_bitsel_vec:
2420 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2421 } else if (a0 == a2) {
2422 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2425 tcg_out_mov(s, type, a0, a1);
2427 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2431 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2432 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2434 g_assert_not_reached();
2438 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2441 case INDEX_op_add_vec:
2442 case INDEX_op_sub_vec:
2443 case INDEX_op_and_vec:
2444 case INDEX_op_or_vec:
2445 case INDEX_op_xor_vec:
2446 case INDEX_op_andc_vec:
2447 case INDEX_op_orc_vec:
2448 case INDEX_op_neg_vec:
2449 case INDEX_op_abs_vec:
2450 case INDEX_op_not_vec:
2451 case INDEX_op_cmp_vec:
2452 case INDEX_op_shli_vec:
2453 case INDEX_op_shri_vec:
2454 case INDEX_op_sari_vec:
2455 case INDEX_op_ssadd_vec:
2456 case INDEX_op_sssub_vec:
2457 case INDEX_op_usadd_vec:
2458 case INDEX_op_ussub_vec:
2459 case INDEX_op_shlv_vec:
2460 case INDEX_op_bitsel_vec:
2462 case INDEX_op_rotli_vec:
2463 case INDEX_op_shrv_vec:
2464 case INDEX_op_sarv_vec:
2465 case INDEX_op_rotlv_vec:
2466 case INDEX_op_rotrv_vec:
2468 case INDEX_op_mul_vec:
2469 case INDEX_op_smax_vec:
2470 case INDEX_op_smin_vec:
2471 case INDEX_op_umax_vec:
2472 case INDEX_op_umin_vec:
2473 return vece < MO_64;
2480 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2484 TCGv_vec v0, v1, v2, t1, t2, c1;
2488 v0 = temp_tcgv_vec(arg_temp(a0));
2489 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2490 a2 = va_arg(va, TCGArg);
2494 case INDEX_op_rotli_vec:
2495 t1 = tcg_temp_new_vec(type);
2496 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2497 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2498 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2499 tcg_temp_free_vec(t1);
2502 case INDEX_op_shrv_vec:
2503 case INDEX_op_sarv_vec:
2504 /* Right shifts are negative left shifts for AArch64. */
2505 v2 = temp_tcgv_vec(arg_temp(a2));
2506 t1 = tcg_temp_new_vec(type);
2507 tcg_gen_neg_vec(vece, t1, v2);
2508 opc = (opc == INDEX_op_shrv_vec
2509 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2510 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2511 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2512 tcg_temp_free_vec(t1);
2515 case INDEX_op_rotlv_vec:
2516 v2 = temp_tcgv_vec(arg_temp(a2));
2517 t1 = tcg_temp_new_vec(type);
2518 c1 = tcg_constant_vec(type, vece, 8 << vece);
2519 tcg_gen_sub_vec(vece, t1, v2, c1);
2520 /* Right shifts are negative left shifts for AArch64. */
2521 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2522 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2523 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2524 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2525 tcg_gen_or_vec(vece, v0, v0, t1);
2526 tcg_temp_free_vec(t1);
2529 case INDEX_op_rotrv_vec:
2530 v2 = temp_tcgv_vec(arg_temp(a2));
2531 t1 = tcg_temp_new_vec(type);
2532 t2 = tcg_temp_new_vec(type);
2533 c1 = tcg_constant_vec(type, vece, 8 << vece);
2534 tcg_gen_neg_vec(vece, t1, v2);
2535 tcg_gen_sub_vec(vece, t2, c1, v2);
2536 /* Right shifts are negative left shifts for AArch64. */
2537 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2538 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2539 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2540 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2541 tcg_gen_or_vec(vece, v0, t1, t2);
2542 tcg_temp_free_vec(t1);
2543 tcg_temp_free_vec(t2);
2547 g_assert_not_reached();
2551 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2554 case INDEX_op_goto_ptr:
2557 case INDEX_op_ld8u_i32:
2558 case INDEX_op_ld8s_i32:
2559 case INDEX_op_ld16u_i32:
2560 case INDEX_op_ld16s_i32:
2561 case INDEX_op_ld_i32:
2562 case INDEX_op_ld8u_i64:
2563 case INDEX_op_ld8s_i64:
2564 case INDEX_op_ld16u_i64:
2565 case INDEX_op_ld16s_i64:
2566 case INDEX_op_ld32u_i64:
2567 case INDEX_op_ld32s_i64:
2568 case INDEX_op_ld_i64:
2569 case INDEX_op_neg_i32:
2570 case INDEX_op_neg_i64:
2571 case INDEX_op_not_i32:
2572 case INDEX_op_not_i64:
2573 case INDEX_op_bswap16_i32:
2574 case INDEX_op_bswap32_i32:
2575 case INDEX_op_bswap16_i64:
2576 case INDEX_op_bswap32_i64:
2577 case INDEX_op_bswap64_i64:
2578 case INDEX_op_ext8s_i32:
2579 case INDEX_op_ext16s_i32:
2580 case INDEX_op_ext8u_i32:
2581 case INDEX_op_ext16u_i32:
2582 case INDEX_op_ext8s_i64:
2583 case INDEX_op_ext16s_i64:
2584 case INDEX_op_ext32s_i64:
2585 case INDEX_op_ext8u_i64:
2586 case INDEX_op_ext16u_i64:
2587 case INDEX_op_ext32u_i64:
2588 case INDEX_op_ext_i32_i64:
2589 case INDEX_op_extu_i32_i64:
2590 case INDEX_op_extract_i32:
2591 case INDEX_op_extract_i64:
2592 case INDEX_op_sextract_i32:
2593 case INDEX_op_sextract_i64:
2594 return C_O1_I1(r, r);
2596 case INDEX_op_st8_i32:
2597 case INDEX_op_st16_i32:
2598 case INDEX_op_st_i32:
2599 case INDEX_op_st8_i64:
2600 case INDEX_op_st16_i64:
2601 case INDEX_op_st32_i64:
2602 case INDEX_op_st_i64:
2603 return C_O0_I2(rZ, r);
2605 case INDEX_op_add_i32:
2606 case INDEX_op_add_i64:
2607 case INDEX_op_sub_i32:
2608 case INDEX_op_sub_i64:
2609 case INDEX_op_setcond_i32:
2610 case INDEX_op_setcond_i64:
2611 return C_O1_I2(r, r, rA);
2613 case INDEX_op_mul_i32:
2614 case INDEX_op_mul_i64:
2615 case INDEX_op_div_i32:
2616 case INDEX_op_div_i64:
2617 case INDEX_op_divu_i32:
2618 case INDEX_op_divu_i64:
2619 case INDEX_op_rem_i32:
2620 case INDEX_op_rem_i64:
2621 case INDEX_op_remu_i32:
2622 case INDEX_op_remu_i64:
2623 case INDEX_op_muluh_i64:
2624 case INDEX_op_mulsh_i64:
2625 return C_O1_I2(r, r, r);
2627 case INDEX_op_and_i32:
2628 case INDEX_op_and_i64:
2629 case INDEX_op_or_i32:
2630 case INDEX_op_or_i64:
2631 case INDEX_op_xor_i32:
2632 case INDEX_op_xor_i64:
2633 case INDEX_op_andc_i32:
2634 case INDEX_op_andc_i64:
2635 case INDEX_op_orc_i32:
2636 case INDEX_op_orc_i64:
2637 case INDEX_op_eqv_i32:
2638 case INDEX_op_eqv_i64:
2639 return C_O1_I2(r, r, rL);
2641 case INDEX_op_shl_i32:
2642 case INDEX_op_shr_i32:
2643 case INDEX_op_sar_i32:
2644 case INDEX_op_rotl_i32:
2645 case INDEX_op_rotr_i32:
2646 case INDEX_op_shl_i64:
2647 case INDEX_op_shr_i64:
2648 case INDEX_op_sar_i64:
2649 case INDEX_op_rotl_i64:
2650 case INDEX_op_rotr_i64:
2651 return C_O1_I2(r, r, ri);
2653 case INDEX_op_clz_i32:
2654 case INDEX_op_ctz_i32:
2655 case INDEX_op_clz_i64:
2656 case INDEX_op_ctz_i64:
2657 return C_O1_I2(r, r, rAL);
2659 case INDEX_op_brcond_i32:
2660 case INDEX_op_brcond_i64:
2661 return C_O0_I2(r, rA);
2663 case INDEX_op_movcond_i32:
2664 case INDEX_op_movcond_i64:
2665 return C_O1_I4(r, r, rA, rZ, rZ);
2667 case INDEX_op_qemu_ld_i32:
2668 case INDEX_op_qemu_ld_i64:
2669 return C_O1_I1(r, l);
2670 case INDEX_op_qemu_st_i32:
2671 case INDEX_op_qemu_st_i64:
2672 return C_O0_I2(lZ, l);
2674 case INDEX_op_deposit_i32:
2675 case INDEX_op_deposit_i64:
2676 return C_O1_I2(r, 0, rZ);
2678 case INDEX_op_extract2_i32:
2679 case INDEX_op_extract2_i64:
2680 return C_O1_I2(r, rZ, rZ);
2682 case INDEX_op_add2_i32:
2683 case INDEX_op_add2_i64:
2684 case INDEX_op_sub2_i32:
2685 case INDEX_op_sub2_i64:
2686 return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2688 case INDEX_op_add_vec:
2689 case INDEX_op_sub_vec:
2690 case INDEX_op_mul_vec:
2691 case INDEX_op_xor_vec:
2692 case INDEX_op_ssadd_vec:
2693 case INDEX_op_sssub_vec:
2694 case INDEX_op_usadd_vec:
2695 case INDEX_op_ussub_vec:
2696 case INDEX_op_smax_vec:
2697 case INDEX_op_smin_vec:
2698 case INDEX_op_umax_vec:
2699 case INDEX_op_umin_vec:
2700 case INDEX_op_shlv_vec:
2701 case INDEX_op_shrv_vec:
2702 case INDEX_op_sarv_vec:
2703 case INDEX_op_aa64_sshl_vec:
2704 return C_O1_I2(w, w, w);
2705 case INDEX_op_not_vec:
2706 case INDEX_op_neg_vec:
2707 case INDEX_op_abs_vec:
2708 case INDEX_op_shli_vec:
2709 case INDEX_op_shri_vec:
2710 case INDEX_op_sari_vec:
2711 return C_O1_I1(w, w);
2712 case INDEX_op_ld_vec:
2713 case INDEX_op_dupm_vec:
2714 return C_O1_I1(w, r);
2715 case INDEX_op_st_vec:
2716 return C_O0_I2(w, r);
2717 case INDEX_op_dup_vec:
2718 return C_O1_I1(w, wr);
2719 case INDEX_op_or_vec:
2720 case INDEX_op_andc_vec:
2721 return C_O1_I2(w, w, wO);
2722 case INDEX_op_and_vec:
2723 case INDEX_op_orc_vec:
2724 return C_O1_I2(w, w, wN);
2725 case INDEX_op_cmp_vec:
2726 return C_O1_I2(w, w, wZ);
2727 case INDEX_op_bitsel_vec:
2728 return C_O1_I3(w, w, w, w);
2729 case INDEX_op_aa64_sli_vec:
2730 return C_O1_I2(w, 0, w);
2733 g_assert_not_reached();
2737 static void tcg_target_init(TCGContext *s)
2739 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2740 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2741 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2742 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2744 tcg_target_call_clobber_regs = -1ull;
2745 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2746 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2747 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2748 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2749 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2750 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2751 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2752 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2753 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2754 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2755 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2756 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2757 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2758 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2759 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2760 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2761 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2762 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2763 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2765 s->reserved_regs = 0;
2766 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2767 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2768 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2769 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2770 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2773 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2774 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2776 #define FRAME_SIZE \
2778 + TCG_STATIC_CALL_ARGS_SIZE \
2779 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2780 + TCG_TARGET_STACK_ALIGN - 1) \
2781 & ~(TCG_TARGET_STACK_ALIGN - 1))
2783 /* We're expecting a 2 byte uleb128 encoded value. */
2784 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2786 /* We're expecting to use a single ADDI insn. */
2787 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2789 static void tcg_target_qemu_prologue(TCGContext *s)
2793 /* Push (FP, LR) and allocate space for all saved registers. */
2794 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2795 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2797 /* Set up frame pointer for canonical unwinding. */
2798 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2800 /* Store callee-preserved regs x19..x28. */
2801 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2802 int ofs = (r - TCG_REG_X19 + 2) * 8;
2803 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2806 /* Make stack space for TCG locals. */
2807 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2808 FRAME_SIZE - PUSH_SIZE);
2810 /* Inform TCG about how to find TCG locals with register, offset, size. */
2811 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2812 CPU_TEMP_BUF_NLONGS * sizeof(long));
2814 #if !defined(CONFIG_SOFTMMU)
2815 if (USE_GUEST_BASE) {
2816 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2817 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2821 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2822 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2825 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2826 * and fall through to the rest of the epilogue.
2828 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2829 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2832 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2834 /* Remove TCG locals stack space. */
2835 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2836 FRAME_SIZE - PUSH_SIZE);
2838 /* Restore registers x19..x28. */
2839 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2840 int ofs = (r - TCG_REG_X19 + 2) * 8;
2841 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2844 /* Pop (FP, LR), restore SP to previous frame. */
2845 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2846 TCG_REG_SP, PUSH_SIZE, 0, 1);
2847 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2850 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2853 for (i = 0; i < count; ++i) {
2860 uint8_t fde_def_cfa[4];
2861 uint8_t fde_reg_ofs[24];
2864 #define ELF_HOST_MACHINE EM_AARCH64
2866 static const DebugFrame debug_frame = {
2867 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2870 .h.cie.code_align = 1,
2871 .h.cie.data_align = 0x78, /* sleb128 -8 */
2872 .h.cie.return_column = TCG_REG_LR,
2874 /* Total FDE size does not include the "len" member. */
2875 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2878 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2879 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2883 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2884 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2885 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2886 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2887 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2888 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2889 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2890 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2891 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2892 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2893 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2894 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2898 void tcg_register_jit(const void *buf, size_t buf_size)
2900 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));