2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "../tcg-ldst.c.inc"
14 #include "../tcg-pool.c.inc"
15 #include "qemu/bitops.h"
17 /* We're going to re-use TCGType in setting of the SF bit, which controls
18 the size of the operation performed. If we know the values match, it
19 makes things much cleaner. */
20 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
22 #ifdef CONFIG_DEBUG_TCG
23 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
34 #endif /* CONFIG_DEBUG_TCG */
36 static const int tcg_target_reg_alloc_order[] = {
37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39 TCG_REG_X28, /* we will reserve this for guest_base if configured */
41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43 TCG_REG_X16, TCG_REG_X17,
45 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
46 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
48 /* X18 reserved by system */
49 /* X19 reserved for AREG0 */
50 /* X29 reserved as fp */
51 /* X30 reserved as temporary */
53 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
54 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
55 /* V8 - V15 are call-saved, and skipped. */
56 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
57 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
58 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
59 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
62 static const int tcg_target_call_iarg_regs[8] = {
63 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
64 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
67 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
69 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
70 tcg_debug_assert(slot >= 0 && slot <= 1);
71 return TCG_REG_X0 + slot;
74 #define TCG_REG_TMP TCG_REG_X30
75 #define TCG_VEC_TMP TCG_REG_V31
77 #ifndef CONFIG_SOFTMMU
78 /* Note that XZR cannot be encoded in the address base register slot,
79 as that actaully encodes SP. So if we need to zero-extend the guest
80 address, via the address index register slot, we need to load even
81 a zero guest base into a register. */
82 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
83 #define TCG_REG_GUEST_BASE TCG_REG_X28
86 static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
88 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
89 ptrdiff_t offset = target - src_rx;
91 if (offset == sextract64(offset, 0, 26)) {
92 /* read instruction, mask away previous PC_REL26 parameter contents,
93 set the proper offset, then write back the instruction. */
94 *src_rw = deposit32(*src_rw, 0, 26, offset);
100 static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
102 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
103 ptrdiff_t offset = target - src_rx;
105 if (offset == sextract64(offset, 0, 19)) {
106 *src_rw = deposit32(*src_rw, 5, 19, offset);
112 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
113 intptr_t value, intptr_t addend)
115 tcg_debug_assert(addend == 0);
117 case R_AARCH64_JUMP26:
118 case R_AARCH64_CALL26:
119 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
120 case R_AARCH64_CONDBR19:
121 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
123 g_assert_not_reached();
127 #define TCG_CT_CONST_AIMM 0x100
128 #define TCG_CT_CONST_LIMM 0x200
129 #define TCG_CT_CONST_ZERO 0x400
130 #define TCG_CT_CONST_MONE 0x800
131 #define TCG_CT_CONST_ORRI 0x1000
132 #define TCG_CT_CONST_ANDI 0x2000
134 #define ALL_GENERAL_REGS 0xffffffffu
135 #define ALL_VECTOR_REGS 0xffffffff00000000ull
137 #ifdef CONFIG_SOFTMMU
138 #define ALL_QLDST_REGS \
139 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
140 (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
142 #define ALL_QLDST_REGS ALL_GENERAL_REGS
145 /* Match a constant valid for addition (12-bit, optionally shifted). */
146 static inline bool is_aimm(uint64_t val)
148 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
151 /* Match a constant valid for logical operations. */
152 static inline bool is_limm(uint64_t val)
154 /* Taking a simplified view of the logical immediates for now, ignoring
155 the replication that can happen across the field. Match bit patterns
159 and their inverses. */
161 /* Make things easier below, by testing the form with msb clear. */
162 if ((int64_t)val < 0) {
169 return (val & (val - 1)) == 0;
172 /* Return true if v16 is a valid 16-bit shifted immediate. */
173 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
175 if (v16 == (v16 & 0xff)) {
179 } else if (v16 == (v16 & 0xff00)) {
187 /* Return true if v32 is a valid 32-bit shifted immediate. */
188 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
190 if (v32 == (v32 & 0xff)) {
194 } else if (v32 == (v32 & 0xff00)) {
196 *imm8 = (v32 >> 8) & 0xff;
198 } else if (v32 == (v32 & 0xff0000)) {
200 *imm8 = (v32 >> 16) & 0xff;
202 } else if (v32 == (v32 & 0xff000000)) {
210 /* Return true if v32 is a valid 32-bit shifting ones immediate. */
211 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
213 if ((v32 & 0xffff00ff) == 0xff) {
215 *imm8 = (v32 >> 8) & 0xff;
217 } else if ((v32 & 0xff00ffff) == 0xffff) {
219 *imm8 = (v32 >> 16) & 0xff;
225 /* Return true if v32 is a valid float32 immediate. */
226 static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
228 if (extract32(v32, 0, 19) == 0
229 && (extract32(v32, 25, 6) == 0x20
230 || extract32(v32, 25, 6) == 0x1f)) {
232 *imm8 = (extract32(v32, 31, 1) << 7)
233 | (extract32(v32, 25, 1) << 6)
234 | extract32(v32, 19, 6);
240 /* Return true if v64 is a valid float64 immediate. */
241 static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
243 if (extract64(v64, 0, 48) == 0
244 && (extract64(v64, 54, 9) == 0x100
245 || extract64(v64, 54, 9) == 0x0ff)) {
247 *imm8 = (extract64(v64, 63, 1) << 7)
248 | (extract64(v64, 54, 1) << 6)
249 | extract64(v64, 48, 6);
256 * Return non-zero if v32 can be formed by MOVI+ORR.
257 * Place the parameters for MOVI in (cmode, imm8).
258 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
260 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
264 for (i = 6; i > 0; i -= 2) {
265 /* Mask out one byte we can add with ORR. */
266 uint32_t tmp = v32 & ~(0xffu << (i * 4));
267 if (is_shimm32(tmp, cmode, imm8) ||
268 is_soimm32(tmp, cmode, imm8)) {
275 /* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
276 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
278 if (v32 == deposit32(v32, 16, 16, v32)) {
279 return is_shimm16(v32, cmode, imm8);
281 return is_shimm32(v32, cmode, imm8);
285 static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
287 if (ct & TCG_CT_CONST) {
290 if (type == TCG_TYPE_I32) {
293 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
296 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
299 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
302 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
306 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
309 case TCG_CT_CONST_ANDI:
312 case TCG_CT_CONST_ORRI:
313 if (val == deposit64(val, 32, 32, val)) {
315 return is_shimm1632(val, &cmode, &imm8);
319 /* Both bits should not be set for the same insn. */
320 g_assert_not_reached();
326 enum aarch64_cond_code {
329 COND_CS = 0x2, /* Unsigned greater or equal */
330 COND_HS = COND_CS, /* ALIAS greater or equal */
331 COND_CC = 0x3, /* Unsigned less than */
332 COND_LO = COND_CC, /* ALIAS Lower */
333 COND_MI = 0x4, /* Negative */
334 COND_PL = 0x5, /* Zero or greater */
335 COND_VS = 0x6, /* Overflow */
336 COND_VC = 0x7, /* No overflow */
337 COND_HI = 0x8, /* Unsigned greater than */
338 COND_LS = 0x9, /* Unsigned less or equal */
344 COND_NV = 0xf, /* behaves like COND_AL here */
347 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
348 [TCG_COND_EQ] = COND_EQ,
349 [TCG_COND_NE] = COND_NE,
350 [TCG_COND_LT] = COND_LT,
351 [TCG_COND_GE] = COND_GE,
352 [TCG_COND_LE] = COND_LE,
353 [TCG_COND_GT] = COND_GT,
355 [TCG_COND_LTU] = COND_LO,
356 [TCG_COND_GTU] = COND_HI,
357 [TCG_COND_GEU] = COND_HS,
358 [TCG_COND_LEU] = COND_LS,
362 LDST_ST = 0, /* store */
363 LDST_LD = 1, /* load */
364 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
365 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
368 /* We encode the format of the insn into the beginning of the name, so that
369 we can have the preprocessor help "typecheck" the insn vs the output
370 function. Arm didn't provide us with nice names for the formats, so we
371 use the section number of the architecture reference manual in which the
372 instruction group is described. */
374 /* Compare and branch (immediate). */
375 I3201_CBZ = 0x34000000,
376 I3201_CBNZ = 0x35000000,
378 /* Conditional branch (immediate). */
379 I3202_B_C = 0x54000000,
381 /* Unconditional branch (immediate). */
382 I3206_B = 0x14000000,
383 I3206_BL = 0x94000000,
385 /* Unconditional branch (register). */
386 I3207_BR = 0xd61f0000,
387 I3207_BLR = 0xd63f0000,
388 I3207_RET = 0xd65f0000,
390 /* AdvSIMD load/store single structure. */
391 I3303_LD1R = 0x0d40c000,
393 /* Load literal for loading the address at pc-relative offset */
394 I3305_LDR = 0x58000000,
395 I3305_LDR_v64 = 0x5c000000,
396 I3305_LDR_v128 = 0x9c000000,
398 /* Load/store register. Described here as 3.3.12, but the helper
399 that emits them can transform to 3.3.10 or 3.3.13. */
400 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
401 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
402 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
403 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
405 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
406 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
407 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
408 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
410 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
411 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
413 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
414 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
415 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
417 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
418 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
420 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
421 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
423 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
424 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
426 I3312_TO_I3310 = 0x00200800,
427 I3312_TO_I3313 = 0x01000000,
429 /* Load/store register pair instructions. */
430 I3314_LDP = 0x28400000,
431 I3314_STP = 0x28000000,
433 /* Add/subtract immediate instructions. */
434 I3401_ADDI = 0x11000000,
435 I3401_ADDSI = 0x31000000,
436 I3401_SUBI = 0x51000000,
437 I3401_SUBSI = 0x71000000,
439 /* Bitfield instructions. */
440 I3402_BFM = 0x33000000,
441 I3402_SBFM = 0x13000000,
442 I3402_UBFM = 0x53000000,
444 /* Extract instruction. */
445 I3403_EXTR = 0x13800000,
447 /* Logical immediate instructions. */
448 I3404_ANDI = 0x12000000,
449 I3404_ORRI = 0x32000000,
450 I3404_EORI = 0x52000000,
451 I3404_ANDSI = 0x72000000,
453 /* Move wide immediate instructions. */
454 I3405_MOVN = 0x12800000,
455 I3405_MOVZ = 0x52800000,
456 I3405_MOVK = 0x72800000,
458 /* PC relative addressing instructions. */
459 I3406_ADR = 0x10000000,
460 I3406_ADRP = 0x90000000,
462 /* Add/subtract shifted register instructions (without a shift). */
463 I3502_ADD = 0x0b000000,
464 I3502_ADDS = 0x2b000000,
465 I3502_SUB = 0x4b000000,
466 I3502_SUBS = 0x6b000000,
468 /* Add/subtract shifted register instructions (with a shift). */
469 I3502S_ADD_LSL = I3502_ADD,
471 /* Add/subtract with carry instructions. */
472 I3503_ADC = 0x1a000000,
473 I3503_SBC = 0x5a000000,
475 /* Conditional select instructions. */
476 I3506_CSEL = 0x1a800000,
477 I3506_CSINC = 0x1a800400,
478 I3506_CSINV = 0x5a800000,
479 I3506_CSNEG = 0x5a800400,
481 /* Data-processing (1 source) instructions. */
482 I3507_CLZ = 0x5ac01000,
483 I3507_RBIT = 0x5ac00000,
484 I3507_REV = 0x5ac00000, /* + size << 10 */
486 /* Data-processing (2 source) instructions. */
487 I3508_LSLV = 0x1ac02000,
488 I3508_LSRV = 0x1ac02400,
489 I3508_ASRV = 0x1ac02800,
490 I3508_RORV = 0x1ac02c00,
491 I3508_SMULH = 0x9b407c00,
492 I3508_UMULH = 0x9bc07c00,
493 I3508_UDIV = 0x1ac00800,
494 I3508_SDIV = 0x1ac00c00,
496 /* Data-processing (3 source) instructions. */
497 I3509_MADD = 0x1b000000,
498 I3509_MSUB = 0x1b008000,
500 /* Logical shifted register instructions (without a shift). */
501 I3510_AND = 0x0a000000,
502 I3510_BIC = 0x0a200000,
503 I3510_ORR = 0x2a000000,
504 I3510_ORN = 0x2a200000,
505 I3510_EOR = 0x4a000000,
506 I3510_EON = 0x4a200000,
507 I3510_ANDS = 0x6a000000,
509 /* Logical shifted register instructions (with a shift). */
510 I3502S_AND_LSR = I3510_AND | (1 << 22),
513 I3605_DUP = 0x0e000400,
514 I3605_INS = 0x4e001c00,
515 I3605_UMOV = 0x0e003c00,
517 /* AdvSIMD modified immediate */
518 I3606_MOVI = 0x0f000400,
519 I3606_MVNI = 0x2f000400,
520 I3606_BIC = 0x2f001400,
521 I3606_ORR = 0x0f001400,
523 /* AdvSIMD scalar shift by immediate */
524 I3609_SSHR = 0x5f000400,
525 I3609_SSRA = 0x5f001400,
526 I3609_SHL = 0x5f005400,
527 I3609_USHR = 0x7f000400,
528 I3609_USRA = 0x7f001400,
529 I3609_SLI = 0x7f005400,
531 /* AdvSIMD scalar three same */
532 I3611_SQADD = 0x5e200c00,
533 I3611_SQSUB = 0x5e202c00,
534 I3611_CMGT = 0x5e203400,
535 I3611_CMGE = 0x5e203c00,
536 I3611_SSHL = 0x5e204400,
537 I3611_ADD = 0x5e208400,
538 I3611_CMTST = 0x5e208c00,
539 I3611_UQADD = 0x7e200c00,
540 I3611_UQSUB = 0x7e202c00,
541 I3611_CMHI = 0x7e203400,
542 I3611_CMHS = 0x7e203c00,
543 I3611_USHL = 0x7e204400,
544 I3611_SUB = 0x7e208400,
545 I3611_CMEQ = 0x7e208c00,
547 /* AdvSIMD scalar two-reg misc */
548 I3612_CMGT0 = 0x5e208800,
549 I3612_CMEQ0 = 0x5e209800,
550 I3612_CMLT0 = 0x5e20a800,
551 I3612_ABS = 0x5e20b800,
552 I3612_CMGE0 = 0x7e208800,
553 I3612_CMLE0 = 0x7e209800,
554 I3612_NEG = 0x7e20b800,
556 /* AdvSIMD shift by immediate */
557 I3614_SSHR = 0x0f000400,
558 I3614_SSRA = 0x0f001400,
559 I3614_SHL = 0x0f005400,
560 I3614_SLI = 0x2f005400,
561 I3614_USHR = 0x2f000400,
562 I3614_USRA = 0x2f001400,
564 /* AdvSIMD three same. */
565 I3616_ADD = 0x0e208400,
566 I3616_AND = 0x0e201c00,
567 I3616_BIC = 0x0e601c00,
568 I3616_BIF = 0x2ee01c00,
569 I3616_BIT = 0x2ea01c00,
570 I3616_BSL = 0x2e601c00,
571 I3616_EOR = 0x2e201c00,
572 I3616_MUL = 0x0e209c00,
573 I3616_ORR = 0x0ea01c00,
574 I3616_ORN = 0x0ee01c00,
575 I3616_SUB = 0x2e208400,
576 I3616_CMGT = 0x0e203400,
577 I3616_CMGE = 0x0e203c00,
578 I3616_CMTST = 0x0e208c00,
579 I3616_CMHI = 0x2e203400,
580 I3616_CMHS = 0x2e203c00,
581 I3616_CMEQ = 0x2e208c00,
582 I3616_SMAX = 0x0e206400,
583 I3616_SMIN = 0x0e206c00,
584 I3616_SSHL = 0x0e204400,
585 I3616_SQADD = 0x0e200c00,
586 I3616_SQSUB = 0x0e202c00,
587 I3616_UMAX = 0x2e206400,
588 I3616_UMIN = 0x2e206c00,
589 I3616_UQADD = 0x2e200c00,
590 I3616_UQSUB = 0x2e202c00,
591 I3616_USHL = 0x2e204400,
593 /* AdvSIMD two-reg misc. */
594 I3617_CMGT0 = 0x0e208800,
595 I3617_CMEQ0 = 0x0e209800,
596 I3617_CMLT0 = 0x0e20a800,
597 I3617_CMGE0 = 0x2e208800,
598 I3617_CMLE0 = 0x2e209800,
599 I3617_NOT = 0x2e205800,
600 I3617_ABS = 0x0e20b800,
601 I3617_NEG = 0x2e20b800,
603 /* System instructions. */
605 DMB_ISH = 0xd50338bf,
610 static inline uint32_t tcg_in32(TCGContext *s)
612 uint32_t v = *(uint32_t *)s->code_ptr;
616 /* Emit an opcode with "type-checking" of the format. */
617 #define tcg_out_insn(S, FMT, OP, ...) \
618 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
620 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
621 TCGReg rt, TCGReg rn, unsigned size)
623 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
626 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
627 int imm19, TCGReg rt)
629 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
632 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
633 TCGReg rt, int imm19)
635 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
638 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
639 TCGCond c, int imm19)
641 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
644 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
646 tcg_out32(s, insn | (imm26 & 0x03ffffff));
649 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
651 tcg_out32(s, insn | rn << 5);
654 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
655 TCGReg r1, TCGReg r2, TCGReg rn,
656 tcg_target_long ofs, bool pre, bool w)
658 insn |= 1u << 31; /* ext */
662 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
663 insn |= (ofs & (0x7f << 3)) << (15 - 3);
665 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
668 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
669 TCGReg rd, TCGReg rn, uint64_t aimm)
672 tcg_debug_assert((aimm & 0xfff) == 0);
674 tcg_debug_assert(aimm <= 0xfff);
675 aimm |= 1 << 12; /* apply LSL 12 */
677 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
680 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
681 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
682 that feed the DecodeBitMasks pseudo function. */
683 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
684 TCGReg rd, TCGReg rn, int n, int immr, int imms)
686 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
690 #define tcg_out_insn_3404 tcg_out_insn_3402
692 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
693 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
695 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
699 /* This function is used for the Move (wide immediate) instruction group.
700 Note that SHIFT is a full shift count, not the 2 bit HW field. */
701 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
702 TCGReg rd, uint16_t half, unsigned shift)
704 tcg_debug_assert((shift & ~0x30) == 0);
705 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
708 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
709 TCGReg rd, int64_t disp)
711 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
714 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
715 the rare occasion when we actually want to supply a shift amount. */
716 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
717 TCGType ext, TCGReg rd, TCGReg rn,
720 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
723 /* This function is for 3.5.2 (Add/subtract shifted register),
724 and 3.5.10 (Logical shifted register), for the vast majorty of cases
725 when we don't want to apply a shift. Thus it can also be used for
726 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
727 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
728 TCGReg rd, TCGReg rn, TCGReg rm)
730 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
733 #define tcg_out_insn_3503 tcg_out_insn_3502
734 #define tcg_out_insn_3508 tcg_out_insn_3502
735 #define tcg_out_insn_3510 tcg_out_insn_3502
737 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
738 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
740 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
741 | tcg_cond_to_aarch64[c] << 12);
744 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
745 TCGReg rd, TCGReg rn)
747 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
750 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
751 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
753 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
756 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
757 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
759 /* Note that bit 11 set means general register input. Therefore
760 we can handle both register sets with one function. */
761 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
762 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
765 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
766 TCGReg rd, bool op, int cmode, uint8_t imm8)
768 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
769 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
772 static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
773 TCGReg rd, TCGReg rn, unsigned immhb)
775 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
778 static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
779 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
781 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
782 | (rn & 0x1f) << 5 | (rd & 0x1f));
785 static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
786 unsigned size, TCGReg rd, TCGReg rn)
788 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
791 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
792 TCGReg rd, TCGReg rn, unsigned immhb)
794 tcg_out32(s, insn | q << 30 | immhb << 16
795 | (rn & 0x1f) << 5 | (rd & 0x1f));
798 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
799 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
801 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
802 | (rn & 0x1f) << 5 | (rd & 0x1f));
805 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
806 unsigned size, TCGReg rd, TCGReg rn)
808 tcg_out32(s, insn | q << 30 | (size << 22)
809 | (rn & 0x1f) << 5 | (rd & 0x1f));
812 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
813 TCGReg rd, TCGReg base, TCGType ext,
816 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
817 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
818 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
821 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
822 TCGReg rd, TCGReg rn, intptr_t offset)
824 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
827 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
828 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
830 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
831 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
832 | rn << 5 | (rd & 0x1f));
835 /* Register to register move using ORR (shifted register with no shift). */
836 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
838 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
841 /* Register to register move using ADDI (move to/from SP). */
842 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
844 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
847 /* This function is used for the Logical (immediate) instruction group.
848 The value of LIMM must satisfy IS_LIMM. See the comment above about
849 only supporting simplified logical immediates. */
850 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
851 TCGReg rd, TCGReg rn, uint64_t limm)
855 tcg_debug_assert(is_limm(limm));
860 r = 0; /* form 0....01....1 */
861 c = ctz64(~limm) - 1;
863 r = clz64(~limm); /* form 1..10..01..1 */
867 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
870 if (ext == TCG_TYPE_I32) {
875 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
878 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
879 TCGReg rd, int64_t v64)
881 bool q = type == TCG_TYPE_V128;
884 /* Test all bytes equal first. */
887 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
892 * Test all bytes 0x00 or 0xff second. This can match cases that
893 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
895 for (i = imm8 = 0; i < 8; i++) {
896 uint8_t byte = v64 >> (i * 8);
899 } else if (byte != 0) {
903 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
908 * Tests for various replications. For each element width, if we
909 * cannot find an expansion there's no point checking a larger
910 * width because we already know by replication it cannot match.
915 if (is_shimm16(v16, &cmode, &imm8)) {
916 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
919 if (is_shimm16(~v16, &cmode, &imm8)) {
920 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
925 * Otherwise, all remaining constants can be loaded in two insns:
926 * rd = v16 & 0xff, rd |= v16 & 0xff00.
928 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
929 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
931 } else if (vece == MO_32) {
935 if (is_shimm32(v32, &cmode, &imm8) ||
936 is_soimm32(v32, &cmode, &imm8) ||
937 is_fimm32(v32, &cmode, &imm8)) {
938 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
941 if (is_shimm32(n32, &cmode, &imm8) ||
942 is_soimm32(n32, &cmode, &imm8)) {
943 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
948 * Restrict the set of constants to those we can load with
949 * two instructions. Others we load from the pool.
951 i = is_shimm32_pair(v32, &cmode, &imm8);
953 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
954 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
957 i = is_shimm32_pair(n32, &cmode, &imm8);
959 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
960 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
963 } else if (is_fimm64(v64, &cmode, &imm8)) {
964 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
969 * As a last resort, load from the constant pool. Sadly there
970 * is no LD1R (literal), so store the full 16-byte vector.
972 if (type == TCG_TYPE_V128) {
973 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
974 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
976 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
977 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
981 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
982 TCGReg rd, TCGReg rs)
984 int is_q = type - TCG_TYPE_V64;
985 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
989 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
990 TCGReg r, TCGReg base, intptr_t offset)
992 TCGReg temp = TCG_REG_TMP;
994 if (offset < -0xffffff || offset > 0xffffff) {
995 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
996 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
999 AArch64Insn add_insn = I3401_ADDI;
1002 add_insn = I3401_SUBI;
1005 if (offset & 0xfff000) {
1006 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1009 if (offset & 0xfff) {
1010 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1014 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1018 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1019 tcg_target_long value)
1021 tcg_target_long svalue = value;
1022 tcg_target_long ivalue = ~value;
1023 tcg_target_long t0, t1, t2;
1030 tcg_debug_assert(rd < 32);
1033 g_assert_not_reached();
1036 /* For 32-bit values, discard potential garbage in value. For 64-bit
1037 values within [2**31, 2**32-1], we can create smaller sequences by
1038 interpreting this as a negative 32-bit number, while ensuring that
1039 the high 32 bits are cleared by setting SF=0. */
1040 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1041 svalue = (int32_t)value;
1042 value = (uint32_t)value;
1043 ivalue = (uint32_t)ivalue;
1044 type = TCG_TYPE_I32;
1047 /* Speed things up by handling the common case of small positive
1048 and negative values specially. */
1049 if ((value & ~0xffffull) == 0) {
1050 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1052 } else if ((ivalue & ~0xffffull) == 0) {
1053 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1057 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1058 use the sign-extended value. That lets us match rotated values such
1059 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1060 if (is_limm(svalue)) {
1061 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1065 /* Look for host pointer values within 4G of the PC. This happens
1066 often when loading pointers to QEMU's own data structures. */
1067 if (type == TCG_TYPE_I64) {
1068 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1069 tcg_target_long disp = value - src_rx;
1070 if (disp == sextract64(disp, 0, 21)) {
1071 tcg_out_insn(s, 3406, ADR, rd, disp);
1074 disp = (value >> 12) - (src_rx >> 12);
1075 if (disp == sextract64(disp, 0, 21)) {
1076 tcg_out_insn(s, 3406, ADRP, rd, disp);
1077 if (value & 0xfff) {
1078 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1084 /* Would it take fewer insns to begin with MOVN? */
1085 if (ctpop64(value) >= 32) {
1092 s0 = ctz64(t0) & (63 & -16);
1093 t1 = t0 & ~(0xffffull << s0);
1094 s1 = ctz64(t1) & (63 & -16);
1095 t2 = t1 & ~(0xffffull << s1);
1097 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1099 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1104 /* For more than 2 insns, dump it into the constant pool. */
1105 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1106 tcg_out_insn(s, 3305, LDR, 0, rd);
1109 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1114 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1115 tcg_target_long imm)
1117 /* This function is only used for passing structs by reference. */
1118 g_assert_not_reached();
1121 /* Define something more legible for general use. */
1122 #define tcg_out_ldst_r tcg_out_insn_3310
1124 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1125 TCGReg rn, intptr_t offset, int lgsize)
1127 /* If the offset is naturally aligned and in range, then we can
1128 use the scaled uimm12 encoding */
1129 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1130 uintptr_t scaled_uimm = offset >> lgsize;
1131 if (scaled_uimm <= 0xfff) {
1132 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1137 /* Small signed offsets can use the unscaled encoding. */
1138 if (offset >= -256 && offset < 256) {
1139 tcg_out_insn_3312(s, insn, rd, rn, offset);
1143 /* Worst-case scenario, move offset to temp register, use reg offset. */
1144 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1145 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1148 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1156 if (ret < 32 && arg < 32) {
1157 tcg_out_movr(s, type, ret, arg);
1159 } else if (ret < 32) {
1160 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1162 } else if (arg < 32) {
1163 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1169 tcg_debug_assert(ret >= 32 && arg >= 32);
1170 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1173 tcg_debug_assert(ret >= 32 && arg >= 32);
1174 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1178 g_assert_not_reached();
1183 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1184 TCGReg base, intptr_t ofs)
1191 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1195 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1207 g_assert_not_reached();
1209 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1212 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1213 TCGReg base, intptr_t ofs)
1220 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1224 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1236 g_assert_not_reached();
1238 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1241 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1242 TCGReg base, intptr_t ofs)
1244 if (type <= TCG_TYPE_I64 && val == 0) {
1245 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1251 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1252 TCGReg rn, unsigned int a, unsigned int b)
1254 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1257 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1258 TCGReg rn, unsigned int a, unsigned int b)
1260 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1263 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1264 TCGReg rn, unsigned int a, unsigned int b)
1266 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1269 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1270 TCGReg rn, TCGReg rm, unsigned int a)
1272 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1275 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1276 TCGReg rd, TCGReg rn, unsigned int m)
1278 int bits = ext ? 64 : 32;
1280 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1283 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1284 TCGReg rd, TCGReg rn, unsigned int m)
1286 int max = ext ? 63 : 31;
1287 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1290 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1291 TCGReg rd, TCGReg rn, unsigned int m)
1293 int max = ext ? 63 : 31;
1294 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1297 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1298 TCGReg rd, TCGReg rn, unsigned int m)
1300 int max = ext ? 63 : 31;
1301 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1304 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1305 TCGReg rd, TCGReg rn, unsigned int m)
1307 int max = ext ? 63 : 31;
1308 tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1311 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1312 TCGReg rn, unsigned lsb, unsigned width)
1314 unsigned size = ext ? 64 : 32;
1315 unsigned a = (size - lsb) & (size - 1);
1316 unsigned b = width - 1;
1317 tcg_out_bfm(s, ext, rd, rn, a, b);
1320 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1321 tcg_target_long b, bool const_b)
1324 /* Using CMP or CMN aliases. */
1326 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1328 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1331 /* Using CMP alias SUBS wzr, Wn, Wm */
1332 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1336 static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1338 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1339 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1340 tcg_out_insn(s, 3206, B, offset);
1343 static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1345 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1346 if (offset == sextract64(offset, 0, 26)) {
1347 tcg_out_insn(s, 3206, B, offset);
1349 /* Choose X9 as a call-clobbered non-LR temporary. */
1350 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1351 tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1355 static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1357 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1358 if (offset == sextract64(offset, 0, 26)) {
1359 tcg_out_insn(s, 3206, BL, offset);
1361 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1362 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
1366 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1367 const TCGHelperInfo *info)
1369 tcg_out_call_int(s, target);
1372 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1374 if (!l->has_value) {
1375 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1376 tcg_out_insn(s, 3206, B, 0);
1378 tcg_out_goto(s, l->u.value_ptr);
1382 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1383 TCGArg b, bool b_const, TCGLabel *l)
1388 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1392 tcg_out_cmp(s, ext, a, b, b_const);
1395 if (!l->has_value) {
1396 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1397 offset = tcg_in32(s) >> 5;
1399 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1400 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1404 tcg_out_insn(s, 3202, B_C, c, offset);
1405 } else if (c == TCG_COND_EQ) {
1406 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1408 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1412 static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1413 TCGReg rd, TCGReg rn)
1415 /* REV, REV16, REV32 */
1416 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1419 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1420 TCGReg rd, TCGReg rn)
1422 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1423 int bits = (8 << s_bits) - 1;
1424 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1427 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1429 tcg_out_sxt(s, type, MO_8, rd, rn);
1432 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1434 tcg_out_sxt(s, type, MO_16, rd, rn);
1437 static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1439 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1442 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1444 tcg_out_ext32s(s, rd, rn);
1447 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1448 TCGReg rd, TCGReg rn)
1450 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1451 int bits = (8 << s_bits) - 1;
1452 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1455 static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1457 tcg_out_uxt(s, MO_8, rd, rn);
1460 static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1462 tcg_out_uxt(s, MO_16, rd, rn);
1465 static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1467 tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1470 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1472 tcg_out_ext32u(s, rd, rn);
1475 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1477 tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1480 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1481 TCGReg rn, int64_t aimm)
1484 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1486 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1490 static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1491 TCGReg rh, TCGReg al, TCGReg ah,
1492 tcg_target_long bl, tcg_target_long bh,
1493 bool const_bl, bool const_bh, bool sub)
1495 TCGReg orig_rl = rl;
1498 if (rl == ah || (!const_bh && rl == bh)) {
1505 insn = sub ? I3401_ADDSI : I3401_SUBSI;
1507 insn = sub ? I3401_SUBSI : I3401_ADDSI;
1510 if (unlikely(al == TCG_REG_XZR)) {
1511 /* ??? We want to allow al to be zero for the benefit of
1512 negation via subtraction. However, that leaves open the
1513 possibility of adding 0+const in the low part, and the
1514 immediate add instructions encode XSP not XZR. Don't try
1515 anything more elaborate here than loading another zero. */
1517 tcg_out_movi(s, ext, al, 0);
1519 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1521 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1526 /* Note that the only two constants we support are 0 and -1, and
1527 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1528 if ((bh != 0) ^ sub) {
1535 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1537 tcg_out_mov(s, ext, orig_rl, rl);
1540 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1542 static const uint32_t sync[] = {
1543 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1544 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1545 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1546 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1547 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1549 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1552 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1553 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1558 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1560 if (const_b && b == (ext ? 64 : 32)) {
1561 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1563 AArch64Insn sel = I3506_CSEL;
1565 tcg_out_cmp(s, ext, a0, 0, 1);
1566 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1572 } else if (b == 0) {
1575 tcg_out_movi(s, ext, d, b);
1579 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1583 static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1585 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1586 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1587 tcg_out_insn(s, 3406, ADR, rd, offset);
1590 #ifdef CONFIG_SOFTMMU
1591 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1592 * MemOpIdx oi, uintptr_t ra)
1594 static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1595 [MO_8] = helper_ret_ldub_mmu,
1597 [MO_16] = helper_be_lduw_mmu,
1598 [MO_32] = helper_be_ldul_mmu,
1599 [MO_64] = helper_be_ldq_mmu,
1601 [MO_16] = helper_le_lduw_mmu,
1602 [MO_32] = helper_le_ldul_mmu,
1603 [MO_64] = helper_le_ldq_mmu,
1607 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1608 * uintxx_t val, MemOpIdx oi,
1611 static void * const qemu_st_helpers[MO_SIZE + 1] = {
1612 [MO_8] = helper_ret_stb_mmu,
1614 [MO_16] = helper_be_stw_mmu,
1615 [MO_32] = helper_be_stl_mmu,
1616 [MO_64] = helper_be_stq_mmu,
1618 [MO_16] = helper_le_stw_mmu,
1619 [MO_32] = helper_le_stl_mmu,
1620 [MO_64] = helper_le_stq_mmu,
1624 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1626 MemOpIdx oi = lb->oi;
1627 MemOp opc = get_memop(oi);
1629 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1633 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1634 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1635 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1636 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1637 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1639 tcg_out_movext(s, lb->type, lb->datalo_reg,
1640 TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_X0);
1641 tcg_out_goto(s, lb->raddr);
1645 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1647 MemOpIdx oi = lb->oi;
1648 MemOp opc = get_memop(oi);
1649 MemOp size = opc & MO_SIZE;
1651 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1655 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1656 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1657 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1658 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1659 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1660 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1661 tcg_out_goto(s, lb->raddr);
1665 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1666 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1667 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1669 TCGLabelQemuLdst *label = new_ldst_label(s);
1671 label->is_ld = is_ld;
1674 label->datalo_reg = data_reg;
1675 label->addrlo_reg = addr_reg;
1676 label->raddr = tcg_splitwx_to_rx(raddr);
1677 label->label_ptr[0] = label_ptr;
1680 /* We expect to use a 7-bit scaled negative offset from ENV. */
1681 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1682 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1684 /* These offsets are built into the LDP below. */
1685 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1686 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1688 /* Load and compare a TLB entry, emitting the conditional jump to the
1689 slow path for the failure case, which will be patched later when finalizing
1690 the slow path. Generated code returns the host addend in X1,
1691 clobbers X0,X2,X3,TMP. */
1692 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1693 tcg_insn_unit **label_ptr, int mem_index,
1696 unsigned a_bits = get_alignment_bits(opc);
1697 unsigned s_bits = opc & MO_SIZE;
1698 unsigned a_mask = (1u << a_bits) - 1;
1699 unsigned s_mask = (1u << s_bits) - 1;
1702 uint64_t compare_mask;
1704 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1705 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1707 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
1708 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1709 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1711 /* Extract the TLB index from the address into X0. */
1712 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1713 TCG_REG_X0, TCG_REG_X0, addr_reg,
1714 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1716 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1717 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1719 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1720 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1721 ? offsetof(CPUTLBEntry, addr_read)
1722 : offsetof(CPUTLBEntry, addr_write));
1723 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1724 offsetof(CPUTLBEntry, addend));
1726 /* For aligned accesses, we check the first byte and include the alignment
1727 bits within the address. For unaligned access, we check that we don't
1728 cross pages using the address of the last byte of the access. */
1729 if (a_bits >= s_bits) {
1732 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1733 TCG_REG_X3, addr_reg, s_mask - a_mask);
1736 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1738 /* Store the page mask part of the address into X3. */
1739 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1740 TCG_REG_X3, x3, compare_mask);
1742 /* Perform the address comparison. */
1743 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1745 /* If not equal, we jump to the slow path. */
1746 *label_ptr = s->code_ptr;
1747 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1751 static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
1754 unsigned a_mask = (1 << a_bits) - 1;
1755 TCGLabelQemuLdst *label = new_ldst_label(s);
1757 label->is_ld = is_ld;
1758 label->addrlo_reg = addr_reg;
1760 /* tst addr, #mask */
1761 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1763 label->label_ptr[0] = s->code_ptr;
1765 /* b.ne slow_path */
1766 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1768 label->raddr = tcg_splitwx_to_rx(s->code_ptr);
1771 static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1773 if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1777 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
1778 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1780 /* "Tail call" to the helper, with the return address back inline. */
1781 tcg_out_adr(s, TCG_REG_LR, l->raddr);
1782 tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
1783 : helper_unaligned_st));
1787 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1789 return tcg_out_fail_alignment(s, l);
1792 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1794 return tcg_out_fail_alignment(s, l);
1796 #endif /* CONFIG_SOFTMMU */
1798 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1799 TCGReg data_r, TCGReg addr_r,
1800 TCGType otype, TCGReg off_r)
1802 switch (memop & MO_SSIZE) {
1804 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1807 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1808 data_r, addr_r, otype, off_r);
1811 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1814 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1815 data_r, addr_r, otype, off_r);
1818 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1821 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1824 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1827 g_assert_not_reached();
1831 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1832 TCGReg data_r, TCGReg addr_r,
1833 TCGType otype, TCGReg off_r)
1835 switch (memop & MO_SIZE) {
1837 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1840 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1843 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1846 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1849 g_assert_not_reached();
1853 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1854 MemOpIdx oi, TCGType ext)
1856 MemOp memop = get_memop(oi);
1857 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1859 /* Byte swapping is left to middle-end expansion. */
1860 tcg_debug_assert((memop & MO_BSWAP) == 0);
1862 #ifdef CONFIG_SOFTMMU
1863 unsigned mem_index = get_mmuidx(oi);
1864 tcg_insn_unit *label_ptr;
1866 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1867 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1868 TCG_REG_X1, otype, addr_reg);
1869 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1870 s->code_ptr, label_ptr);
1871 #else /* !CONFIG_SOFTMMU */
1872 unsigned a_bits = get_alignment_bits(memop);
1874 tcg_out_test_alignment(s, true, addr_reg, a_bits);
1876 if (USE_GUEST_BASE) {
1877 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1878 TCG_REG_GUEST_BASE, otype, addr_reg);
1880 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1881 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1883 #endif /* CONFIG_SOFTMMU */
1886 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1889 MemOp memop = get_memop(oi);
1890 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1892 /* Byte swapping is left to middle-end expansion. */
1893 tcg_debug_assert((memop & MO_BSWAP) == 0);
1895 #ifdef CONFIG_SOFTMMU
1896 unsigned mem_index = get_mmuidx(oi);
1897 tcg_insn_unit *label_ptr;
1899 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1900 tcg_out_qemu_st_direct(s, memop, data_reg,
1901 TCG_REG_X1, otype, addr_reg);
1902 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1903 data_reg, addr_reg, s->code_ptr, label_ptr);
1904 #else /* !CONFIG_SOFTMMU */
1905 unsigned a_bits = get_alignment_bits(memop);
1907 tcg_out_test_alignment(s, false, addr_reg, a_bits);
1909 if (USE_GUEST_BASE) {
1910 tcg_out_qemu_st_direct(s, memop, data_reg,
1911 TCG_REG_GUEST_BASE, otype, addr_reg);
1913 tcg_out_qemu_st_direct(s, memop, data_reg,
1914 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1916 #endif /* CONFIG_SOFTMMU */
1919 static const tcg_insn_unit *tb_ret_addr;
1921 static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1923 /* Reuse the zeroing that exists for goto_ptr. */
1925 tcg_out_goto_long(s, tcg_code_gen_epilogue);
1927 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1928 tcg_out_goto_long(s, tb_ret_addr);
1932 static void tcg_out_goto_tb(TCGContext *s, int which)
1935 * Direct branch, or indirect address load, will be patched
1936 * by tb_target_set_jmp_target. Assert indirect load offset
1937 * in range early, regardless of direct branch distance.
1939 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1940 tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1942 set_jmp_insn_offset(s, which);
1943 tcg_out32(s, I3206_B);
1944 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1945 set_jmp_reset_offset(s, which);
1948 void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1949 uintptr_t jmp_rx, uintptr_t jmp_rw)
1951 uintptr_t d_addr = tb->jmp_target_addr[n];
1952 ptrdiff_t d_offset = d_addr - jmp_rx;
1955 /* Either directly branch, or indirect branch load. */
1956 if (d_offset == sextract64(d_offset, 0, 28)) {
1957 insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1959 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1960 ptrdiff_t i_offset = i_addr - jmp_rx;
1962 /* Note that we asserted this in range in tcg_out_goto_tb. */
1963 insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
1965 qatomic_set((uint32_t *)jmp_rw, insn);
1966 flush_idcache_range(jmp_rx, jmp_rw, 4);
1969 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1970 const TCGArg args[TCG_MAX_OP_ARGS],
1971 const int const_args[TCG_MAX_OP_ARGS])
1973 /* 99% of the time, we can signal the use of extension registers
1974 by looking to see if the opcode handles 64-bit data. */
1975 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1977 /* Hoist the loads of the most common arguments. */
1978 TCGArg a0 = args[0];
1979 TCGArg a1 = args[1];
1980 TCGArg a2 = args[2];
1981 int c2 = const_args[2];
1983 /* Some operands are defined with "rZ" constraint, a register or
1984 the zero register. These need not actually test args[I] == 0. */
1985 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1988 case INDEX_op_goto_ptr:
1989 tcg_out_insn(s, 3207, BR, a0);
1993 tcg_out_goto_label(s, arg_label(a0));
1996 case INDEX_op_ld8u_i32:
1997 case INDEX_op_ld8u_i64:
1998 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2000 case INDEX_op_ld8s_i32:
2001 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2003 case INDEX_op_ld8s_i64:
2004 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2006 case INDEX_op_ld16u_i32:
2007 case INDEX_op_ld16u_i64:
2008 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2010 case INDEX_op_ld16s_i32:
2011 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2013 case INDEX_op_ld16s_i64:
2014 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2016 case INDEX_op_ld_i32:
2017 case INDEX_op_ld32u_i64:
2018 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2020 case INDEX_op_ld32s_i64:
2021 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2023 case INDEX_op_ld_i64:
2024 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2027 case INDEX_op_st8_i32:
2028 case INDEX_op_st8_i64:
2029 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2031 case INDEX_op_st16_i32:
2032 case INDEX_op_st16_i64:
2033 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2035 case INDEX_op_st_i32:
2036 case INDEX_op_st32_i64:
2037 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2039 case INDEX_op_st_i64:
2040 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2043 case INDEX_op_add_i32:
2046 case INDEX_op_add_i64:
2048 tcg_out_addsubi(s, ext, a0, a1, a2);
2050 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2054 case INDEX_op_sub_i32:
2057 case INDEX_op_sub_i64:
2059 tcg_out_addsubi(s, ext, a0, a1, -a2);
2061 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2065 case INDEX_op_neg_i64:
2066 case INDEX_op_neg_i32:
2067 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2070 case INDEX_op_and_i32:
2073 case INDEX_op_and_i64:
2075 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2077 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2081 case INDEX_op_andc_i32:
2084 case INDEX_op_andc_i64:
2086 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2088 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2092 case INDEX_op_or_i32:
2095 case INDEX_op_or_i64:
2097 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2099 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2103 case INDEX_op_orc_i32:
2106 case INDEX_op_orc_i64:
2108 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2110 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2114 case INDEX_op_xor_i32:
2117 case INDEX_op_xor_i64:
2119 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2121 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2125 case INDEX_op_eqv_i32:
2128 case INDEX_op_eqv_i64:
2130 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2132 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2136 case INDEX_op_not_i64:
2137 case INDEX_op_not_i32:
2138 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2141 case INDEX_op_mul_i64:
2142 case INDEX_op_mul_i32:
2143 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2146 case INDEX_op_div_i64:
2147 case INDEX_op_div_i32:
2148 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2150 case INDEX_op_divu_i64:
2151 case INDEX_op_divu_i32:
2152 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2155 case INDEX_op_rem_i64:
2156 case INDEX_op_rem_i32:
2157 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2158 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2160 case INDEX_op_remu_i64:
2161 case INDEX_op_remu_i32:
2162 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2163 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2166 case INDEX_op_shl_i64:
2167 case INDEX_op_shl_i32:
2169 tcg_out_shl(s, ext, a0, a1, a2);
2171 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2175 case INDEX_op_shr_i64:
2176 case INDEX_op_shr_i32:
2178 tcg_out_shr(s, ext, a0, a1, a2);
2180 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2184 case INDEX_op_sar_i64:
2185 case INDEX_op_sar_i32:
2187 tcg_out_sar(s, ext, a0, a1, a2);
2189 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2193 case INDEX_op_rotr_i64:
2194 case INDEX_op_rotr_i32:
2196 tcg_out_rotr(s, ext, a0, a1, a2);
2198 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2202 case INDEX_op_rotl_i64:
2203 case INDEX_op_rotl_i32:
2205 tcg_out_rotl(s, ext, a0, a1, a2);
2207 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2208 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2212 case INDEX_op_clz_i64:
2213 case INDEX_op_clz_i32:
2214 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2216 case INDEX_op_ctz_i64:
2217 case INDEX_op_ctz_i32:
2218 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2221 case INDEX_op_brcond_i32:
2224 case INDEX_op_brcond_i64:
2225 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2228 case INDEX_op_setcond_i32:
2231 case INDEX_op_setcond_i64:
2232 tcg_out_cmp(s, ext, a1, a2, c2);
2233 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2234 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2235 TCG_REG_XZR, tcg_invert_cond(args[3]));
2238 case INDEX_op_movcond_i32:
2241 case INDEX_op_movcond_i64:
2242 tcg_out_cmp(s, ext, a1, a2, c2);
2243 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2246 case INDEX_op_qemu_ld_i32:
2247 case INDEX_op_qemu_ld_i64:
2248 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2250 case INDEX_op_qemu_st_i32:
2251 case INDEX_op_qemu_st_i64:
2252 tcg_out_qemu_st(s, REG0(0), a1, a2);
2255 case INDEX_op_bswap64_i64:
2256 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2258 case INDEX_op_bswap32_i64:
2259 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2260 if (a2 & TCG_BSWAP_OS) {
2261 tcg_out_ext32s(s, a0, a0);
2264 case INDEX_op_bswap32_i32:
2265 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2267 case INDEX_op_bswap16_i64:
2268 case INDEX_op_bswap16_i32:
2269 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2270 if (a2 & TCG_BSWAP_OS) {
2271 /* Output must be sign-extended. */
2272 tcg_out_ext16s(s, ext, a0, a0);
2273 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2274 /* Output must be zero-extended, but input isn't. */
2275 tcg_out_ext16u(s, a0, a0);
2279 case INDEX_op_deposit_i64:
2280 case INDEX_op_deposit_i32:
2281 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2284 case INDEX_op_extract_i64:
2285 case INDEX_op_extract_i32:
2286 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2289 case INDEX_op_sextract_i64:
2290 case INDEX_op_sextract_i32:
2291 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2294 case INDEX_op_extract2_i64:
2295 case INDEX_op_extract2_i32:
2296 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2299 case INDEX_op_add2_i32:
2300 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2301 (int32_t)args[4], args[5], const_args[4],
2302 const_args[5], false);
2304 case INDEX_op_add2_i64:
2305 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2306 args[5], const_args[4], const_args[5], false);
2308 case INDEX_op_sub2_i32:
2309 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2310 (int32_t)args[4], args[5], const_args[4],
2311 const_args[5], true);
2313 case INDEX_op_sub2_i64:
2314 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2315 args[5], const_args[4], const_args[5], true);
2318 case INDEX_op_muluh_i64:
2319 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2321 case INDEX_op_mulsh_i64:
2322 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2329 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2330 case INDEX_op_mov_i64:
2331 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2332 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */
2333 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */
2334 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */
2335 case INDEX_op_ext8s_i64:
2336 case INDEX_op_ext8u_i32:
2337 case INDEX_op_ext8u_i64:
2338 case INDEX_op_ext16s_i64:
2339 case INDEX_op_ext16s_i32:
2340 case INDEX_op_ext16u_i64:
2341 case INDEX_op_ext16u_i32:
2342 case INDEX_op_ext32s_i64:
2343 case INDEX_op_ext32u_i64:
2344 case INDEX_op_ext_i32_i64:
2345 case INDEX_op_extu_i32_i64:
2346 case INDEX_op_extrl_i64_i32:
2348 g_assert_not_reached();
2354 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2355 unsigned vecl, unsigned vece,
2356 const TCGArg args[TCG_MAX_OP_ARGS],
2357 const int const_args[TCG_MAX_OP_ARGS])
2359 static const AArch64Insn cmp_vec_insn[16] = {
2360 [TCG_COND_EQ] = I3616_CMEQ,
2361 [TCG_COND_GT] = I3616_CMGT,
2362 [TCG_COND_GE] = I3616_CMGE,
2363 [TCG_COND_GTU] = I3616_CMHI,
2364 [TCG_COND_GEU] = I3616_CMHS,
2366 static const AArch64Insn cmp_scalar_insn[16] = {
2367 [TCG_COND_EQ] = I3611_CMEQ,
2368 [TCG_COND_GT] = I3611_CMGT,
2369 [TCG_COND_GE] = I3611_CMGE,
2370 [TCG_COND_GTU] = I3611_CMHI,
2371 [TCG_COND_GEU] = I3611_CMHS,
2373 static const AArch64Insn cmp0_vec_insn[16] = {
2374 [TCG_COND_EQ] = I3617_CMEQ0,
2375 [TCG_COND_GT] = I3617_CMGT0,
2376 [TCG_COND_GE] = I3617_CMGE0,
2377 [TCG_COND_LT] = I3617_CMLT0,
2378 [TCG_COND_LE] = I3617_CMLE0,
2380 static const AArch64Insn cmp0_scalar_insn[16] = {
2381 [TCG_COND_EQ] = I3612_CMEQ0,
2382 [TCG_COND_GT] = I3612_CMGT0,
2383 [TCG_COND_GE] = I3612_CMGE0,
2384 [TCG_COND_LT] = I3612_CMLT0,
2385 [TCG_COND_LE] = I3612_CMLE0,
2388 TCGType type = vecl + TCG_TYPE_V64;
2389 unsigned is_q = vecl;
2390 bool is_scalar = !is_q && vece == MO_64;
2391 TCGArg a0, a1, a2, a3;
2399 case INDEX_op_ld_vec:
2400 tcg_out_ld(s, type, a0, a1, a2);
2402 case INDEX_op_st_vec:
2403 tcg_out_st(s, type, a0, a1, a2);
2405 case INDEX_op_dupm_vec:
2406 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2408 case INDEX_op_add_vec:
2410 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2412 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2415 case INDEX_op_sub_vec:
2417 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2419 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2422 case INDEX_op_mul_vec:
2423 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2425 case INDEX_op_neg_vec:
2427 tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2429 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2432 case INDEX_op_abs_vec:
2434 tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2436 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2439 case INDEX_op_and_vec:
2440 if (const_args[2]) {
2441 is_shimm1632(~a2, &cmode, &imm8);
2443 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2446 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2449 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2451 case INDEX_op_or_vec:
2452 if (const_args[2]) {
2453 is_shimm1632(a2, &cmode, &imm8);
2455 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2458 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2461 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2463 case INDEX_op_andc_vec:
2464 if (const_args[2]) {
2465 is_shimm1632(a2, &cmode, &imm8);
2467 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2470 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2473 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2475 case INDEX_op_orc_vec:
2476 if (const_args[2]) {
2477 is_shimm1632(~a2, &cmode, &imm8);
2479 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2482 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2485 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2487 case INDEX_op_xor_vec:
2488 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2490 case INDEX_op_ssadd_vec:
2492 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2494 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2497 case INDEX_op_sssub_vec:
2499 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2501 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2504 case INDEX_op_usadd_vec:
2506 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2508 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2511 case INDEX_op_ussub_vec:
2513 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2515 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2518 case INDEX_op_smax_vec:
2519 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2521 case INDEX_op_smin_vec:
2522 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2524 case INDEX_op_umax_vec:
2525 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2527 case INDEX_op_umin_vec:
2528 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2530 case INDEX_op_not_vec:
2531 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2533 case INDEX_op_shli_vec:
2535 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2537 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2540 case INDEX_op_shri_vec:
2542 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2544 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2547 case INDEX_op_sari_vec:
2549 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2551 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2554 case INDEX_op_aa64_sli_vec:
2556 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2558 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2561 case INDEX_op_shlv_vec:
2563 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2565 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2568 case INDEX_op_aa64_sshl_vec:
2570 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2572 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2575 case INDEX_op_cmp_vec:
2577 TCGCond cond = args[3];
2580 if (cond == TCG_COND_NE) {
2581 if (const_args[2]) {
2583 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2585 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2589 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2591 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2593 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2596 if (const_args[2]) {
2598 insn = cmp0_scalar_insn[cond];
2600 tcg_out_insn_3612(s, insn, vece, a0, a1);
2604 insn = cmp0_vec_insn[cond];
2606 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2610 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2614 insn = cmp_scalar_insn[cond];
2617 t = a1, a1 = a2, a2 = t;
2618 cond = tcg_swap_cond(cond);
2619 insn = cmp_scalar_insn[cond];
2620 tcg_debug_assert(insn != 0);
2622 tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2624 insn = cmp_vec_insn[cond];
2627 t = a1, a1 = a2, a2 = t;
2628 cond = tcg_swap_cond(cond);
2629 insn = cmp_vec_insn[cond];
2630 tcg_debug_assert(insn != 0);
2632 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2638 case INDEX_op_bitsel_vec:
2641 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2642 } else if (a0 == a2) {
2643 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2646 tcg_out_mov(s, type, a0, a1);
2648 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2652 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2653 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2655 g_assert_not_reached();
2659 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2662 case INDEX_op_add_vec:
2663 case INDEX_op_sub_vec:
2664 case INDEX_op_and_vec:
2665 case INDEX_op_or_vec:
2666 case INDEX_op_xor_vec:
2667 case INDEX_op_andc_vec:
2668 case INDEX_op_orc_vec:
2669 case INDEX_op_neg_vec:
2670 case INDEX_op_abs_vec:
2671 case INDEX_op_not_vec:
2672 case INDEX_op_cmp_vec:
2673 case INDEX_op_shli_vec:
2674 case INDEX_op_shri_vec:
2675 case INDEX_op_sari_vec:
2676 case INDEX_op_ssadd_vec:
2677 case INDEX_op_sssub_vec:
2678 case INDEX_op_usadd_vec:
2679 case INDEX_op_ussub_vec:
2680 case INDEX_op_shlv_vec:
2681 case INDEX_op_bitsel_vec:
2683 case INDEX_op_rotli_vec:
2684 case INDEX_op_shrv_vec:
2685 case INDEX_op_sarv_vec:
2686 case INDEX_op_rotlv_vec:
2687 case INDEX_op_rotrv_vec:
2689 case INDEX_op_mul_vec:
2690 case INDEX_op_smax_vec:
2691 case INDEX_op_smin_vec:
2692 case INDEX_op_umax_vec:
2693 case INDEX_op_umin_vec:
2694 return vece < MO_64;
2701 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2705 TCGv_vec v0, v1, v2, t1, t2, c1;
2709 v0 = temp_tcgv_vec(arg_temp(a0));
2710 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2711 a2 = va_arg(va, TCGArg);
2715 case INDEX_op_rotli_vec:
2716 t1 = tcg_temp_new_vec(type);
2717 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2718 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2719 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2720 tcg_temp_free_vec(t1);
2723 case INDEX_op_shrv_vec:
2724 case INDEX_op_sarv_vec:
2725 /* Right shifts are negative left shifts for AArch64. */
2726 v2 = temp_tcgv_vec(arg_temp(a2));
2727 t1 = tcg_temp_new_vec(type);
2728 tcg_gen_neg_vec(vece, t1, v2);
2729 opc = (opc == INDEX_op_shrv_vec
2730 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2731 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2732 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2733 tcg_temp_free_vec(t1);
2736 case INDEX_op_rotlv_vec:
2737 v2 = temp_tcgv_vec(arg_temp(a2));
2738 t1 = tcg_temp_new_vec(type);
2739 c1 = tcg_constant_vec(type, vece, 8 << vece);
2740 tcg_gen_sub_vec(vece, t1, v2, c1);
2741 /* Right shifts are negative left shifts for AArch64. */
2742 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2743 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2744 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2745 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2746 tcg_gen_or_vec(vece, v0, v0, t1);
2747 tcg_temp_free_vec(t1);
2750 case INDEX_op_rotrv_vec:
2751 v2 = temp_tcgv_vec(arg_temp(a2));
2752 t1 = tcg_temp_new_vec(type);
2753 t2 = tcg_temp_new_vec(type);
2754 c1 = tcg_constant_vec(type, vece, 8 << vece);
2755 tcg_gen_neg_vec(vece, t1, v2);
2756 tcg_gen_sub_vec(vece, t2, c1, v2);
2757 /* Right shifts are negative left shifts for AArch64. */
2758 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2759 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2760 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2761 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2762 tcg_gen_or_vec(vece, v0, t1, t2);
2763 tcg_temp_free_vec(t1);
2764 tcg_temp_free_vec(t2);
2768 g_assert_not_reached();
2772 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2775 case INDEX_op_goto_ptr:
2778 case INDEX_op_ld8u_i32:
2779 case INDEX_op_ld8s_i32:
2780 case INDEX_op_ld16u_i32:
2781 case INDEX_op_ld16s_i32:
2782 case INDEX_op_ld_i32:
2783 case INDEX_op_ld8u_i64:
2784 case INDEX_op_ld8s_i64:
2785 case INDEX_op_ld16u_i64:
2786 case INDEX_op_ld16s_i64:
2787 case INDEX_op_ld32u_i64:
2788 case INDEX_op_ld32s_i64:
2789 case INDEX_op_ld_i64:
2790 case INDEX_op_neg_i32:
2791 case INDEX_op_neg_i64:
2792 case INDEX_op_not_i32:
2793 case INDEX_op_not_i64:
2794 case INDEX_op_bswap16_i32:
2795 case INDEX_op_bswap32_i32:
2796 case INDEX_op_bswap16_i64:
2797 case INDEX_op_bswap32_i64:
2798 case INDEX_op_bswap64_i64:
2799 case INDEX_op_ext8s_i32:
2800 case INDEX_op_ext16s_i32:
2801 case INDEX_op_ext8u_i32:
2802 case INDEX_op_ext16u_i32:
2803 case INDEX_op_ext8s_i64:
2804 case INDEX_op_ext16s_i64:
2805 case INDEX_op_ext32s_i64:
2806 case INDEX_op_ext8u_i64:
2807 case INDEX_op_ext16u_i64:
2808 case INDEX_op_ext32u_i64:
2809 case INDEX_op_ext_i32_i64:
2810 case INDEX_op_extu_i32_i64:
2811 case INDEX_op_extract_i32:
2812 case INDEX_op_extract_i64:
2813 case INDEX_op_sextract_i32:
2814 case INDEX_op_sextract_i64:
2815 return C_O1_I1(r, r);
2817 case INDEX_op_st8_i32:
2818 case INDEX_op_st16_i32:
2819 case INDEX_op_st_i32:
2820 case INDEX_op_st8_i64:
2821 case INDEX_op_st16_i64:
2822 case INDEX_op_st32_i64:
2823 case INDEX_op_st_i64:
2824 return C_O0_I2(rZ, r);
2826 case INDEX_op_add_i32:
2827 case INDEX_op_add_i64:
2828 case INDEX_op_sub_i32:
2829 case INDEX_op_sub_i64:
2830 case INDEX_op_setcond_i32:
2831 case INDEX_op_setcond_i64:
2832 return C_O1_I2(r, r, rA);
2834 case INDEX_op_mul_i32:
2835 case INDEX_op_mul_i64:
2836 case INDEX_op_div_i32:
2837 case INDEX_op_div_i64:
2838 case INDEX_op_divu_i32:
2839 case INDEX_op_divu_i64:
2840 case INDEX_op_rem_i32:
2841 case INDEX_op_rem_i64:
2842 case INDEX_op_remu_i32:
2843 case INDEX_op_remu_i64:
2844 case INDEX_op_muluh_i64:
2845 case INDEX_op_mulsh_i64:
2846 return C_O1_I2(r, r, r);
2848 case INDEX_op_and_i32:
2849 case INDEX_op_and_i64:
2850 case INDEX_op_or_i32:
2851 case INDEX_op_or_i64:
2852 case INDEX_op_xor_i32:
2853 case INDEX_op_xor_i64:
2854 case INDEX_op_andc_i32:
2855 case INDEX_op_andc_i64:
2856 case INDEX_op_orc_i32:
2857 case INDEX_op_orc_i64:
2858 case INDEX_op_eqv_i32:
2859 case INDEX_op_eqv_i64:
2860 return C_O1_I2(r, r, rL);
2862 case INDEX_op_shl_i32:
2863 case INDEX_op_shr_i32:
2864 case INDEX_op_sar_i32:
2865 case INDEX_op_rotl_i32:
2866 case INDEX_op_rotr_i32:
2867 case INDEX_op_shl_i64:
2868 case INDEX_op_shr_i64:
2869 case INDEX_op_sar_i64:
2870 case INDEX_op_rotl_i64:
2871 case INDEX_op_rotr_i64:
2872 return C_O1_I2(r, r, ri);
2874 case INDEX_op_clz_i32:
2875 case INDEX_op_ctz_i32:
2876 case INDEX_op_clz_i64:
2877 case INDEX_op_ctz_i64:
2878 return C_O1_I2(r, r, rAL);
2880 case INDEX_op_brcond_i32:
2881 case INDEX_op_brcond_i64:
2882 return C_O0_I2(r, rA);
2884 case INDEX_op_movcond_i32:
2885 case INDEX_op_movcond_i64:
2886 return C_O1_I4(r, r, rA, rZ, rZ);
2888 case INDEX_op_qemu_ld_i32:
2889 case INDEX_op_qemu_ld_i64:
2890 return C_O1_I1(r, l);
2891 case INDEX_op_qemu_st_i32:
2892 case INDEX_op_qemu_st_i64:
2893 return C_O0_I2(lZ, l);
2895 case INDEX_op_deposit_i32:
2896 case INDEX_op_deposit_i64:
2897 return C_O1_I2(r, 0, rZ);
2899 case INDEX_op_extract2_i32:
2900 case INDEX_op_extract2_i64:
2901 return C_O1_I2(r, rZ, rZ);
2903 case INDEX_op_add2_i32:
2904 case INDEX_op_add2_i64:
2905 case INDEX_op_sub2_i32:
2906 case INDEX_op_sub2_i64:
2907 return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2909 case INDEX_op_add_vec:
2910 case INDEX_op_sub_vec:
2911 case INDEX_op_mul_vec:
2912 case INDEX_op_xor_vec:
2913 case INDEX_op_ssadd_vec:
2914 case INDEX_op_sssub_vec:
2915 case INDEX_op_usadd_vec:
2916 case INDEX_op_ussub_vec:
2917 case INDEX_op_smax_vec:
2918 case INDEX_op_smin_vec:
2919 case INDEX_op_umax_vec:
2920 case INDEX_op_umin_vec:
2921 case INDEX_op_shlv_vec:
2922 case INDEX_op_shrv_vec:
2923 case INDEX_op_sarv_vec:
2924 case INDEX_op_aa64_sshl_vec:
2925 return C_O1_I2(w, w, w);
2926 case INDEX_op_not_vec:
2927 case INDEX_op_neg_vec:
2928 case INDEX_op_abs_vec:
2929 case INDEX_op_shli_vec:
2930 case INDEX_op_shri_vec:
2931 case INDEX_op_sari_vec:
2932 return C_O1_I1(w, w);
2933 case INDEX_op_ld_vec:
2934 case INDEX_op_dupm_vec:
2935 return C_O1_I1(w, r);
2936 case INDEX_op_st_vec:
2937 return C_O0_I2(w, r);
2938 case INDEX_op_dup_vec:
2939 return C_O1_I1(w, wr);
2940 case INDEX_op_or_vec:
2941 case INDEX_op_andc_vec:
2942 return C_O1_I2(w, w, wO);
2943 case INDEX_op_and_vec:
2944 case INDEX_op_orc_vec:
2945 return C_O1_I2(w, w, wN);
2946 case INDEX_op_cmp_vec:
2947 return C_O1_I2(w, w, wZ);
2948 case INDEX_op_bitsel_vec:
2949 return C_O1_I3(w, w, w, w);
2950 case INDEX_op_aa64_sli_vec:
2951 return C_O1_I2(w, 0, w);
2954 g_assert_not_reached();
2958 static void tcg_target_init(TCGContext *s)
2960 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2961 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2962 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2963 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2965 tcg_target_call_clobber_regs = -1ull;
2966 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2967 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2968 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2969 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2970 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2971 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2972 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2973 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2974 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2975 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2976 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2977 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2978 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2979 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2980 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2981 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2982 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2983 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2984 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2986 s->reserved_regs = 0;
2987 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2988 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2989 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2990 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2991 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2994 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2995 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2997 #define FRAME_SIZE \
2999 + TCG_STATIC_CALL_ARGS_SIZE \
3000 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3001 + TCG_TARGET_STACK_ALIGN - 1) \
3002 & ~(TCG_TARGET_STACK_ALIGN - 1))
3004 /* We're expecting a 2 byte uleb128 encoded value. */
3005 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3007 /* We're expecting to use a single ADDI insn. */
3008 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3010 static void tcg_target_qemu_prologue(TCGContext *s)
3014 /* Push (FP, LR) and allocate space for all saved registers. */
3015 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3016 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3018 /* Set up frame pointer for canonical unwinding. */
3019 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3021 /* Store callee-preserved regs x19..x28. */
3022 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3023 int ofs = (r - TCG_REG_X19 + 2) * 8;
3024 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3027 /* Make stack space for TCG locals. */
3028 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3029 FRAME_SIZE - PUSH_SIZE);
3031 /* Inform TCG about how to find TCG locals with register, offset, size. */
3032 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3033 CPU_TEMP_BUF_NLONGS * sizeof(long));
3035 #if !defined(CONFIG_SOFTMMU)
3036 if (USE_GUEST_BASE) {
3037 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3038 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3042 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3043 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3046 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3047 * and fall through to the rest of the epilogue.
3049 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3050 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3053 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3055 /* Remove TCG locals stack space. */
3056 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3057 FRAME_SIZE - PUSH_SIZE);
3059 /* Restore registers x19..x28. */
3060 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3061 int ofs = (r - TCG_REG_X19 + 2) * 8;
3062 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3065 /* Pop (FP, LR), restore SP to previous frame. */
3066 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3067 TCG_REG_SP, PUSH_SIZE, 0, 1);
3068 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3071 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3074 for (i = 0; i < count; ++i) {
3081 uint8_t fde_def_cfa[4];
3082 uint8_t fde_reg_ofs[24];
3085 #define ELF_HOST_MACHINE EM_AARCH64
3087 static const DebugFrame debug_frame = {
3088 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3091 .h.cie.code_align = 1,
3092 .h.cie.data_align = 0x78, /* sleb128 -8 */
3093 .h.cie.return_column = TCG_REG_LR,
3095 /* Total FDE size does not include the "len" member. */
3096 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3099 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
3100 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3104 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
3105 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
3106 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
3107 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
3108 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
3109 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
3110 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
3111 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
3112 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
3113 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
3114 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
3115 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
3119 void tcg_register_jit(const void *buf, size_t buf_size)
3121 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));