2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "../tcg-ldst.c.inc"
14 #include "../tcg-pool.c.inc"
15 #include "qemu/bitops.h"
17 /* We're going to re-use TCGType in setting of the SF bit, which controls
18 the size of the operation performed. If we know the values match, it
19 makes things much cleaner. */
20 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
22 #ifdef CONFIG_DEBUG_TCG
23 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
29 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
34 #endif /* CONFIG_DEBUG_TCG */
36 static const int tcg_target_reg_alloc_order[] = {
37 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39 TCG_REG_X28, /* we will reserve this for guest_base if configured */
41 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47 /* X16 reserved as temporary */
48 /* X17 reserved as temporary */
49 /* X18 reserved by system */
50 /* X19 reserved for AREG0 */
51 /* X29 reserved as fp */
52 /* X30 reserved as temporary */
54 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
55 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
56 /* V8 - V15 are call-saved, and skipped. */
57 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
58 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
59 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
60 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
63 static const int tcg_target_call_iarg_regs[8] = {
64 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
65 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
68 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
70 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
71 tcg_debug_assert(slot >= 0 && slot <= 1);
72 return TCG_REG_X0 + slot;
75 #define TCG_REG_TMP0 TCG_REG_X16
76 #define TCG_REG_TMP1 TCG_REG_X17
77 #define TCG_REG_TMP2 TCG_REG_X30
78 #define TCG_VEC_TMP0 TCG_REG_V31
80 #define TCG_REG_GUEST_BASE TCG_REG_X28
82 static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
84 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
85 ptrdiff_t offset = target - src_rx;
87 if (offset == sextract64(offset, 0, 26)) {
88 /* read instruction, mask away previous PC_REL26 parameter contents,
89 set the proper offset, then write back the instruction. */
90 *src_rw = deposit32(*src_rw, 0, 26, offset);
96 static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
98 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
99 ptrdiff_t offset = target - src_rx;
101 if (offset == sextract64(offset, 0, 19)) {
102 *src_rw = deposit32(*src_rw, 5, 19, offset);
108 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
109 intptr_t value, intptr_t addend)
111 tcg_debug_assert(addend == 0);
113 case R_AARCH64_JUMP26:
114 case R_AARCH64_CALL26:
115 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
116 case R_AARCH64_CONDBR19:
117 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
119 g_assert_not_reached();
123 #define TCG_CT_CONST_AIMM 0x100
124 #define TCG_CT_CONST_LIMM 0x200
125 #define TCG_CT_CONST_ZERO 0x400
126 #define TCG_CT_CONST_MONE 0x800
127 #define TCG_CT_CONST_ORRI 0x1000
128 #define TCG_CT_CONST_ANDI 0x2000
130 #define ALL_GENERAL_REGS 0xffffffffu
131 #define ALL_VECTOR_REGS 0xffffffff00000000ull
133 /* Match a constant valid for addition (12-bit, optionally shifted). */
134 static inline bool is_aimm(uint64_t val)
136 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
139 /* Match a constant valid for logical operations. */
140 static inline bool is_limm(uint64_t val)
142 /* Taking a simplified view of the logical immediates for now, ignoring
143 the replication that can happen across the field. Match bit patterns
147 and their inverses. */
149 /* Make things easier below, by testing the form with msb clear. */
150 if ((int64_t)val < 0) {
157 return (val & (val - 1)) == 0;
160 /* Return true if v16 is a valid 16-bit shifted immediate. */
161 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
163 if (v16 == (v16 & 0xff)) {
167 } else if (v16 == (v16 & 0xff00)) {
175 /* Return true if v32 is a valid 32-bit shifted immediate. */
176 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
178 if (v32 == (v32 & 0xff)) {
182 } else if (v32 == (v32 & 0xff00)) {
184 *imm8 = (v32 >> 8) & 0xff;
186 } else if (v32 == (v32 & 0xff0000)) {
188 *imm8 = (v32 >> 16) & 0xff;
190 } else if (v32 == (v32 & 0xff000000)) {
198 /* Return true if v32 is a valid 32-bit shifting ones immediate. */
199 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
201 if ((v32 & 0xffff00ff) == 0xff) {
203 *imm8 = (v32 >> 8) & 0xff;
205 } else if ((v32 & 0xff00ffff) == 0xffff) {
207 *imm8 = (v32 >> 16) & 0xff;
213 /* Return true if v32 is a valid float32 immediate. */
214 static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
216 if (extract32(v32, 0, 19) == 0
217 && (extract32(v32, 25, 6) == 0x20
218 || extract32(v32, 25, 6) == 0x1f)) {
220 *imm8 = (extract32(v32, 31, 1) << 7)
221 | (extract32(v32, 25, 1) << 6)
222 | extract32(v32, 19, 6);
228 /* Return true if v64 is a valid float64 immediate. */
229 static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
231 if (extract64(v64, 0, 48) == 0
232 && (extract64(v64, 54, 9) == 0x100
233 || extract64(v64, 54, 9) == 0x0ff)) {
235 *imm8 = (extract64(v64, 63, 1) << 7)
236 | (extract64(v64, 54, 1) << 6)
237 | extract64(v64, 48, 6);
244 * Return non-zero if v32 can be formed by MOVI+ORR.
245 * Place the parameters for MOVI in (cmode, imm8).
246 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
248 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
252 for (i = 6; i > 0; i -= 2) {
253 /* Mask out one byte we can add with ORR. */
254 uint32_t tmp = v32 & ~(0xffu << (i * 4));
255 if (is_shimm32(tmp, cmode, imm8) ||
256 is_soimm32(tmp, cmode, imm8)) {
263 /* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
264 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
266 if (v32 == deposit32(v32, 16, 16, v32)) {
267 return is_shimm16(v32, cmode, imm8);
269 return is_shimm32(v32, cmode, imm8);
273 static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
275 if (ct & TCG_CT_CONST) {
278 if (type == TCG_TYPE_I32) {
281 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
284 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
287 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
290 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
294 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
297 case TCG_CT_CONST_ANDI:
300 case TCG_CT_CONST_ORRI:
301 if (val == deposit64(val, 32, 32, val)) {
303 return is_shimm1632(val, &cmode, &imm8);
307 /* Both bits should not be set for the same insn. */
308 g_assert_not_reached();
314 enum aarch64_cond_code {
317 COND_CS = 0x2, /* Unsigned greater or equal */
318 COND_HS = COND_CS, /* ALIAS greater or equal */
319 COND_CC = 0x3, /* Unsigned less than */
320 COND_LO = COND_CC, /* ALIAS Lower */
321 COND_MI = 0x4, /* Negative */
322 COND_PL = 0x5, /* Zero or greater */
323 COND_VS = 0x6, /* Overflow */
324 COND_VC = 0x7, /* No overflow */
325 COND_HI = 0x8, /* Unsigned greater than */
326 COND_LS = 0x9, /* Unsigned less or equal */
332 COND_NV = 0xf, /* behaves like COND_AL here */
335 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
336 [TCG_COND_EQ] = COND_EQ,
337 [TCG_COND_NE] = COND_NE,
338 [TCG_COND_LT] = COND_LT,
339 [TCG_COND_GE] = COND_GE,
340 [TCG_COND_LE] = COND_LE,
341 [TCG_COND_GT] = COND_GT,
343 [TCG_COND_LTU] = COND_LO,
344 [TCG_COND_GTU] = COND_HI,
345 [TCG_COND_GEU] = COND_HS,
346 [TCG_COND_LEU] = COND_LS,
350 LDST_ST = 0, /* store */
351 LDST_LD = 1, /* load */
352 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
353 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
356 /* We encode the format of the insn into the beginning of the name, so that
357 we can have the preprocessor help "typecheck" the insn vs the output
358 function. Arm didn't provide us with nice names for the formats, so we
359 use the section number of the architecture reference manual in which the
360 instruction group is described. */
362 /* Compare and branch (immediate). */
363 I3201_CBZ = 0x34000000,
364 I3201_CBNZ = 0x35000000,
366 /* Conditional branch (immediate). */
367 I3202_B_C = 0x54000000,
369 /* Unconditional branch (immediate). */
370 I3206_B = 0x14000000,
371 I3206_BL = 0x94000000,
373 /* Unconditional branch (register). */
374 I3207_BR = 0xd61f0000,
375 I3207_BLR = 0xd63f0000,
376 I3207_RET = 0xd65f0000,
378 /* AdvSIMD load/store single structure. */
379 I3303_LD1R = 0x0d40c000,
381 /* Load literal for loading the address at pc-relative offset */
382 I3305_LDR = 0x58000000,
383 I3305_LDR_v64 = 0x5c000000,
384 I3305_LDR_v128 = 0x9c000000,
386 /* Load/store exclusive. */
387 I3306_LDXP = 0xc8600000,
388 I3306_STXP = 0xc8200000,
390 /* Load/store register. Described here as 3.3.12, but the helper
391 that emits them can transform to 3.3.10 or 3.3.13. */
392 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
393 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
394 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
395 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
397 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
398 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
399 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
400 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
402 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
403 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
405 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
406 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
407 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
409 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
410 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
412 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
413 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
415 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
416 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
418 I3312_TO_I3310 = 0x00200800,
419 I3312_TO_I3313 = 0x01000000,
421 /* Load/store register pair instructions. */
422 I3314_LDP = 0x28400000,
423 I3314_STP = 0x28000000,
425 /* Add/subtract immediate instructions. */
426 I3401_ADDI = 0x11000000,
427 I3401_ADDSI = 0x31000000,
428 I3401_SUBI = 0x51000000,
429 I3401_SUBSI = 0x71000000,
431 /* Bitfield instructions. */
432 I3402_BFM = 0x33000000,
433 I3402_SBFM = 0x13000000,
434 I3402_UBFM = 0x53000000,
436 /* Extract instruction. */
437 I3403_EXTR = 0x13800000,
439 /* Logical immediate instructions. */
440 I3404_ANDI = 0x12000000,
441 I3404_ORRI = 0x32000000,
442 I3404_EORI = 0x52000000,
443 I3404_ANDSI = 0x72000000,
445 /* Move wide immediate instructions. */
446 I3405_MOVN = 0x12800000,
447 I3405_MOVZ = 0x52800000,
448 I3405_MOVK = 0x72800000,
450 /* PC relative addressing instructions. */
451 I3406_ADR = 0x10000000,
452 I3406_ADRP = 0x90000000,
454 /* Add/subtract extended register instructions. */
455 I3501_ADD = 0x0b200000,
457 /* Add/subtract shifted register instructions (without a shift). */
458 I3502_ADD = 0x0b000000,
459 I3502_ADDS = 0x2b000000,
460 I3502_SUB = 0x4b000000,
461 I3502_SUBS = 0x6b000000,
463 /* Add/subtract shifted register instructions (with a shift). */
464 I3502S_ADD_LSL = I3502_ADD,
466 /* Add/subtract with carry instructions. */
467 I3503_ADC = 0x1a000000,
468 I3503_SBC = 0x5a000000,
470 /* Conditional select instructions. */
471 I3506_CSEL = 0x1a800000,
472 I3506_CSINC = 0x1a800400,
473 I3506_CSINV = 0x5a800000,
474 I3506_CSNEG = 0x5a800400,
476 /* Data-processing (1 source) instructions. */
477 I3507_CLZ = 0x5ac01000,
478 I3507_RBIT = 0x5ac00000,
479 I3507_REV = 0x5ac00000, /* + size << 10 */
481 /* Data-processing (2 source) instructions. */
482 I3508_LSLV = 0x1ac02000,
483 I3508_LSRV = 0x1ac02400,
484 I3508_ASRV = 0x1ac02800,
485 I3508_RORV = 0x1ac02c00,
486 I3508_SMULH = 0x9b407c00,
487 I3508_UMULH = 0x9bc07c00,
488 I3508_UDIV = 0x1ac00800,
489 I3508_SDIV = 0x1ac00c00,
491 /* Data-processing (3 source) instructions. */
492 I3509_MADD = 0x1b000000,
493 I3509_MSUB = 0x1b008000,
495 /* Logical shifted register instructions (without a shift). */
496 I3510_AND = 0x0a000000,
497 I3510_BIC = 0x0a200000,
498 I3510_ORR = 0x2a000000,
499 I3510_ORN = 0x2a200000,
500 I3510_EOR = 0x4a000000,
501 I3510_EON = 0x4a200000,
502 I3510_ANDS = 0x6a000000,
504 /* Logical shifted register instructions (with a shift). */
505 I3502S_AND_LSR = I3510_AND | (1 << 22),
508 I3605_DUP = 0x0e000400,
509 I3605_INS = 0x4e001c00,
510 I3605_UMOV = 0x0e003c00,
512 /* AdvSIMD modified immediate */
513 I3606_MOVI = 0x0f000400,
514 I3606_MVNI = 0x2f000400,
515 I3606_BIC = 0x2f001400,
516 I3606_ORR = 0x0f001400,
518 /* AdvSIMD scalar shift by immediate */
519 I3609_SSHR = 0x5f000400,
520 I3609_SSRA = 0x5f001400,
521 I3609_SHL = 0x5f005400,
522 I3609_USHR = 0x7f000400,
523 I3609_USRA = 0x7f001400,
524 I3609_SLI = 0x7f005400,
526 /* AdvSIMD scalar three same */
527 I3611_SQADD = 0x5e200c00,
528 I3611_SQSUB = 0x5e202c00,
529 I3611_CMGT = 0x5e203400,
530 I3611_CMGE = 0x5e203c00,
531 I3611_SSHL = 0x5e204400,
532 I3611_ADD = 0x5e208400,
533 I3611_CMTST = 0x5e208c00,
534 I3611_UQADD = 0x7e200c00,
535 I3611_UQSUB = 0x7e202c00,
536 I3611_CMHI = 0x7e203400,
537 I3611_CMHS = 0x7e203c00,
538 I3611_USHL = 0x7e204400,
539 I3611_SUB = 0x7e208400,
540 I3611_CMEQ = 0x7e208c00,
542 /* AdvSIMD scalar two-reg misc */
543 I3612_CMGT0 = 0x5e208800,
544 I3612_CMEQ0 = 0x5e209800,
545 I3612_CMLT0 = 0x5e20a800,
546 I3612_ABS = 0x5e20b800,
547 I3612_CMGE0 = 0x7e208800,
548 I3612_CMLE0 = 0x7e209800,
549 I3612_NEG = 0x7e20b800,
551 /* AdvSIMD shift by immediate */
552 I3614_SSHR = 0x0f000400,
553 I3614_SSRA = 0x0f001400,
554 I3614_SHL = 0x0f005400,
555 I3614_SLI = 0x2f005400,
556 I3614_USHR = 0x2f000400,
557 I3614_USRA = 0x2f001400,
559 /* AdvSIMD three same. */
560 I3616_ADD = 0x0e208400,
561 I3616_AND = 0x0e201c00,
562 I3616_BIC = 0x0e601c00,
563 I3616_BIF = 0x2ee01c00,
564 I3616_BIT = 0x2ea01c00,
565 I3616_BSL = 0x2e601c00,
566 I3616_EOR = 0x2e201c00,
567 I3616_MUL = 0x0e209c00,
568 I3616_ORR = 0x0ea01c00,
569 I3616_ORN = 0x0ee01c00,
570 I3616_SUB = 0x2e208400,
571 I3616_CMGT = 0x0e203400,
572 I3616_CMGE = 0x0e203c00,
573 I3616_CMTST = 0x0e208c00,
574 I3616_CMHI = 0x2e203400,
575 I3616_CMHS = 0x2e203c00,
576 I3616_CMEQ = 0x2e208c00,
577 I3616_SMAX = 0x0e206400,
578 I3616_SMIN = 0x0e206c00,
579 I3616_SSHL = 0x0e204400,
580 I3616_SQADD = 0x0e200c00,
581 I3616_SQSUB = 0x0e202c00,
582 I3616_UMAX = 0x2e206400,
583 I3616_UMIN = 0x2e206c00,
584 I3616_UQADD = 0x2e200c00,
585 I3616_UQSUB = 0x2e202c00,
586 I3616_USHL = 0x2e204400,
588 /* AdvSIMD two-reg misc. */
589 I3617_CMGT0 = 0x0e208800,
590 I3617_CMEQ0 = 0x0e209800,
591 I3617_CMLT0 = 0x0e20a800,
592 I3617_CMGE0 = 0x2e208800,
593 I3617_CMLE0 = 0x2e209800,
594 I3617_NOT = 0x2e205800,
595 I3617_ABS = 0x0e20b800,
596 I3617_NEG = 0x2e20b800,
598 /* System instructions. */
600 DMB_ISH = 0xd50338bf,
609 static inline uint32_t tcg_in32(TCGContext *s)
611 uint32_t v = *(uint32_t *)s->code_ptr;
615 /* Emit an opcode with "type-checking" of the format. */
616 #define tcg_out_insn(S, FMT, OP, ...) \
617 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
619 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
620 TCGReg rt, TCGReg rn, unsigned size)
622 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
625 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
626 int imm19, TCGReg rt)
628 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
631 static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
632 TCGReg rt, TCGReg rt2, TCGReg rn)
634 tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
637 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
638 TCGReg rt, int imm19)
640 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
643 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
644 TCGCond c, int imm19)
646 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
649 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
651 tcg_out32(s, insn | (imm26 & 0x03ffffff));
654 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
656 tcg_out32(s, insn | rn << 5);
659 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
660 TCGReg r1, TCGReg r2, TCGReg rn,
661 tcg_target_long ofs, bool pre, bool w)
663 insn |= 1u << 31; /* ext */
667 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
668 insn |= (ofs & (0x7f << 3)) << (15 - 3);
670 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
673 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
674 TCGReg rd, TCGReg rn, uint64_t aimm)
677 tcg_debug_assert((aimm & 0xfff) == 0);
679 tcg_debug_assert(aimm <= 0xfff);
680 aimm |= 1 << 12; /* apply LSL 12 */
682 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
685 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
686 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
687 that feed the DecodeBitMasks pseudo function. */
688 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
689 TCGReg rd, TCGReg rn, int n, int immr, int imms)
691 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
695 #define tcg_out_insn_3404 tcg_out_insn_3402
697 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
698 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
700 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
704 /* This function is used for the Move (wide immediate) instruction group.
705 Note that SHIFT is a full shift count, not the 2 bit HW field. */
706 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
707 TCGReg rd, uint16_t half, unsigned shift)
709 tcg_debug_assert((shift & ~0x30) == 0);
710 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
713 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
714 TCGReg rd, int64_t disp)
716 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
719 static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
720 TCGType sf, TCGReg rd, TCGReg rn,
721 TCGReg rm, int opt, int imm3)
723 tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
724 imm3 << 10 | rn << 5 | rd);
727 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
728 the rare occasion when we actually want to supply a shift amount. */
729 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
730 TCGType ext, TCGReg rd, TCGReg rn,
733 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
736 /* This function is for 3.5.2 (Add/subtract shifted register),
737 and 3.5.10 (Logical shifted register), for the vast majorty of cases
738 when we don't want to apply a shift. Thus it can also be used for
739 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
740 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
741 TCGReg rd, TCGReg rn, TCGReg rm)
743 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
746 #define tcg_out_insn_3503 tcg_out_insn_3502
747 #define tcg_out_insn_3508 tcg_out_insn_3502
748 #define tcg_out_insn_3510 tcg_out_insn_3502
750 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
751 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
753 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
754 | tcg_cond_to_aarch64[c] << 12);
757 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
758 TCGReg rd, TCGReg rn)
760 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
763 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
764 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
766 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
769 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
770 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
772 /* Note that bit 11 set means general register input. Therefore
773 we can handle both register sets with one function. */
774 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
775 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
778 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
779 TCGReg rd, bool op, int cmode, uint8_t imm8)
781 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
782 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
785 static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
786 TCGReg rd, TCGReg rn, unsigned immhb)
788 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
791 static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
792 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
794 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
795 | (rn & 0x1f) << 5 | (rd & 0x1f));
798 static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
799 unsigned size, TCGReg rd, TCGReg rn)
801 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
804 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
805 TCGReg rd, TCGReg rn, unsigned immhb)
807 tcg_out32(s, insn | q << 30 | immhb << 16
808 | (rn & 0x1f) << 5 | (rd & 0x1f));
811 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
812 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
814 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
815 | (rn & 0x1f) << 5 | (rd & 0x1f));
818 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
819 unsigned size, TCGReg rd, TCGReg rn)
821 tcg_out32(s, insn | q << 30 | (size << 22)
822 | (rn & 0x1f) << 5 | (rd & 0x1f));
825 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
826 TCGReg rd, TCGReg base, TCGType ext,
829 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
830 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
831 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
834 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
835 TCGReg rd, TCGReg rn, intptr_t offset)
837 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
840 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
841 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
843 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
844 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
845 | rn << 5 | (rd & 0x1f));
848 static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
851 * While BTI insns are nops on hosts without FEAT_BTI,
852 * there is no point in emitting them in that case either.
854 if (cpuinfo & CPUINFO_BTI) {
859 /* Register to register move using ORR (shifted register with no shift). */
860 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
862 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
865 /* Register to register move using ADDI (move to/from SP). */
866 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
868 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
871 /* This function is used for the Logical (immediate) instruction group.
872 The value of LIMM must satisfy IS_LIMM. See the comment above about
873 only supporting simplified logical immediates. */
874 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
875 TCGReg rd, TCGReg rn, uint64_t limm)
879 tcg_debug_assert(is_limm(limm));
884 r = 0; /* form 0....01....1 */
885 c = ctz64(~limm) - 1;
887 r = clz64(~limm); /* form 1..10..01..1 */
891 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
894 if (ext == TCG_TYPE_I32) {
899 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
902 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
903 TCGReg rd, int64_t v64)
905 bool q = type == TCG_TYPE_V128;
908 /* Test all bytes equal first. */
911 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
916 * Test all bytes 0x00 or 0xff second. This can match cases that
917 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
919 for (i = imm8 = 0; i < 8; i++) {
920 uint8_t byte = v64 >> (i * 8);
923 } else if (byte != 0) {
927 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
932 * Tests for various replications. For each element width, if we
933 * cannot find an expansion there's no point checking a larger
934 * width because we already know by replication it cannot match.
939 if (is_shimm16(v16, &cmode, &imm8)) {
940 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
943 if (is_shimm16(~v16, &cmode, &imm8)) {
944 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
949 * Otherwise, all remaining constants can be loaded in two insns:
950 * rd = v16 & 0xff, rd |= v16 & 0xff00.
952 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
953 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
955 } else if (vece == MO_32) {
959 if (is_shimm32(v32, &cmode, &imm8) ||
960 is_soimm32(v32, &cmode, &imm8) ||
961 is_fimm32(v32, &cmode, &imm8)) {
962 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
965 if (is_shimm32(n32, &cmode, &imm8) ||
966 is_soimm32(n32, &cmode, &imm8)) {
967 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
972 * Restrict the set of constants to those we can load with
973 * two instructions. Others we load from the pool.
975 i = is_shimm32_pair(v32, &cmode, &imm8);
977 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
978 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
981 i = is_shimm32_pair(n32, &cmode, &imm8);
983 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
984 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
987 } else if (is_fimm64(v64, &cmode, &imm8)) {
988 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
993 * As a last resort, load from the constant pool. Sadly there
994 * is no LD1R (literal), so store the full 16-byte vector.
996 if (type == TCG_TYPE_V128) {
997 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
998 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
1000 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
1001 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
1005 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1006 TCGReg rd, TCGReg rs)
1008 int is_q = type - TCG_TYPE_V64;
1009 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
1013 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1014 TCGReg r, TCGReg base, intptr_t offset)
1016 TCGReg temp = TCG_REG_TMP0;
1018 if (offset < -0xffffff || offset > 0xffffff) {
1019 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1020 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1023 AArch64Insn add_insn = I3401_ADDI;
1026 add_insn = I3401_SUBI;
1029 if (offset & 0xfff000) {
1030 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1033 if (offset & 0xfff) {
1034 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1038 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1042 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1043 tcg_target_long value)
1045 tcg_target_long svalue = value;
1046 tcg_target_long ivalue = ~value;
1047 tcg_target_long t0, t1, t2;
1054 tcg_debug_assert(rd < 32);
1057 g_assert_not_reached();
1060 /* For 32-bit values, discard potential garbage in value. For 64-bit
1061 values within [2**31, 2**32-1], we can create smaller sequences by
1062 interpreting this as a negative 32-bit number, while ensuring that
1063 the high 32 bits are cleared by setting SF=0. */
1064 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1065 svalue = (int32_t)value;
1066 value = (uint32_t)value;
1067 ivalue = (uint32_t)ivalue;
1068 type = TCG_TYPE_I32;
1071 /* Speed things up by handling the common case of small positive
1072 and negative values specially. */
1073 if ((value & ~0xffffull) == 0) {
1074 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1076 } else if ((ivalue & ~0xffffull) == 0) {
1077 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1081 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1082 use the sign-extended value. That lets us match rotated values such
1083 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1084 if (is_limm(svalue)) {
1085 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1089 /* Look for host pointer values within 4G of the PC. This happens
1090 often when loading pointers to QEMU's own data structures. */
1091 if (type == TCG_TYPE_I64) {
1092 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1093 tcg_target_long disp = value - src_rx;
1094 if (disp == sextract64(disp, 0, 21)) {
1095 tcg_out_insn(s, 3406, ADR, rd, disp);
1098 disp = (value >> 12) - (src_rx >> 12);
1099 if (disp == sextract64(disp, 0, 21)) {
1100 tcg_out_insn(s, 3406, ADRP, rd, disp);
1101 if (value & 0xfff) {
1102 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1108 /* Would it take fewer insns to begin with MOVN? */
1109 if (ctpop64(value) >= 32) {
1116 s0 = ctz64(t0) & (63 & -16);
1117 t1 = t0 & ~(0xffffull << s0);
1118 s1 = ctz64(t1) & (63 & -16);
1119 t2 = t1 & ~(0xffffull << s1);
1121 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1123 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1128 /* For more than 2 insns, dump it into the constant pool. */
1129 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1130 tcg_out_insn(s, 3305, LDR, 0, rd);
1133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1138 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1139 tcg_target_long imm)
1141 /* This function is only used for passing structs by reference. */
1142 g_assert_not_reached();
1145 /* Define something more legible for general use. */
1146 #define tcg_out_ldst_r tcg_out_insn_3310
1148 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1149 TCGReg rn, intptr_t offset, int lgsize)
1151 /* If the offset is naturally aligned and in range, then we can
1152 use the scaled uimm12 encoding */
1153 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1154 uintptr_t scaled_uimm = offset >> lgsize;
1155 if (scaled_uimm <= 0xfff) {
1156 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1161 /* Small signed offsets can use the unscaled encoding. */
1162 if (offset >= -256 && offset < 256) {
1163 tcg_out_insn_3312(s, insn, rd, rn, offset);
1167 /* Worst-case scenario, move offset to temp register, use reg offset. */
1168 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1169 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1172 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1180 if (ret < 32 && arg < 32) {
1181 tcg_out_movr(s, type, ret, arg);
1183 } else if (ret < 32) {
1184 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1186 } else if (arg < 32) {
1187 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1193 tcg_debug_assert(ret >= 32 && arg >= 32);
1194 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1197 tcg_debug_assert(ret >= 32 && arg >= 32);
1198 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1202 g_assert_not_reached();
1207 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1208 TCGReg base, intptr_t ofs)
1215 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1219 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1231 g_assert_not_reached();
1233 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1236 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1237 TCGReg base, intptr_t ofs)
1244 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1248 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1260 g_assert_not_reached();
1262 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1265 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1266 TCGReg base, intptr_t ofs)
1268 if (type <= TCG_TYPE_I64 && val == 0) {
1269 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1275 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1276 TCGReg rn, unsigned int a, unsigned int b)
1278 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1281 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1282 TCGReg rn, unsigned int a, unsigned int b)
1284 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1287 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1288 TCGReg rn, unsigned int a, unsigned int b)
1290 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1293 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1294 TCGReg rn, TCGReg rm, unsigned int a)
1296 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1299 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1300 TCGReg rd, TCGReg rn, unsigned int m)
1302 int bits = ext ? 64 : 32;
1304 tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1307 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1308 TCGReg rd, TCGReg rn, unsigned int m)
1310 int max = ext ? 63 : 31;
1311 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1314 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1315 TCGReg rd, TCGReg rn, unsigned int m)
1317 int max = ext ? 63 : 31;
1318 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1321 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1322 TCGReg rd, TCGReg rn, unsigned int m)
1324 int max = ext ? 63 : 31;
1325 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1328 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1329 TCGReg rd, TCGReg rn, unsigned int m)
1331 int max = ext ? 63 : 31;
1332 tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1335 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1336 TCGReg rn, unsigned lsb, unsigned width)
1338 unsigned size = ext ? 64 : 32;
1339 unsigned a = (size - lsb) & (size - 1);
1340 unsigned b = width - 1;
1341 tcg_out_bfm(s, ext, rd, rn, a, b);
1344 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1345 tcg_target_long b, bool const_b)
1348 /* Using CMP or CMN aliases. */
1350 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1352 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1355 /* Using CMP alias SUBS wzr, Wn, Wm */
1356 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1360 static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1362 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1363 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1364 tcg_out_insn(s, 3206, B, offset);
1367 static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1369 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1370 if (offset == sextract64(offset, 0, 26)) {
1371 tcg_out_insn(s, 3206, BL, offset);
1373 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1374 tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1378 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1379 const TCGHelperInfo *info)
1381 tcg_out_call_int(s, target);
1384 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1386 if (!l->has_value) {
1387 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1388 tcg_out_insn(s, 3206, B, 0);
1390 tcg_out_goto(s, l->u.value_ptr);
1394 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1395 TCGArg b, bool b_const, TCGLabel *l)
1400 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1404 tcg_out_cmp(s, ext, a, b, b_const);
1407 if (!l->has_value) {
1408 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1409 offset = tcg_in32(s) >> 5;
1411 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1412 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1416 tcg_out_insn(s, 3202, B_C, c, offset);
1417 } else if (c == TCG_COND_EQ) {
1418 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1420 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1424 static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1425 TCGReg rd, TCGReg rn)
1427 /* REV, REV16, REV32 */
1428 tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1431 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1432 TCGReg rd, TCGReg rn)
1434 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1435 int bits = (8 << s_bits) - 1;
1436 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1439 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1441 tcg_out_sxt(s, type, MO_8, rd, rn);
1444 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1446 tcg_out_sxt(s, type, MO_16, rd, rn);
1449 static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1451 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1454 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1456 tcg_out_ext32s(s, rd, rn);
1459 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1460 TCGReg rd, TCGReg rn)
1462 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1463 int bits = (8 << s_bits) - 1;
1464 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1467 static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1469 tcg_out_uxt(s, MO_8, rd, rn);
1472 static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1474 tcg_out_uxt(s, MO_16, rd, rn);
1477 static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1479 tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1482 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1484 tcg_out_ext32u(s, rd, rn);
1487 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1489 tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1492 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1493 TCGReg rn, int64_t aimm)
1496 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1498 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1502 static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1503 TCGReg rh, TCGReg al, TCGReg ah,
1504 tcg_target_long bl, tcg_target_long bh,
1505 bool const_bl, bool const_bh, bool sub)
1507 TCGReg orig_rl = rl;
1510 if (rl == ah || (!const_bh && rl == bh)) {
1517 insn = sub ? I3401_ADDSI : I3401_SUBSI;
1519 insn = sub ? I3401_SUBSI : I3401_ADDSI;
1522 if (unlikely(al == TCG_REG_XZR)) {
1523 /* ??? We want to allow al to be zero for the benefit of
1524 negation via subtraction. However, that leaves open the
1525 possibility of adding 0+const in the low part, and the
1526 immediate add instructions encode XSP not XZR. Don't try
1527 anything more elaborate here than loading another zero. */
1529 tcg_out_movi(s, ext, al, 0);
1531 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1533 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1538 /* Note that the only two constants we support are 0 and -1, and
1539 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1540 if ((bh != 0) ^ sub) {
1547 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1549 tcg_out_mov(s, ext, orig_rl, rl);
1552 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1554 static const uint32_t sync[] = {
1555 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1556 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1557 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1558 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1559 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1561 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1564 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1565 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1570 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1572 if (const_b && b == (ext ? 64 : 32)) {
1573 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1575 AArch64Insn sel = I3506_CSEL;
1577 tcg_out_cmp(s, ext, a0, 0, 1);
1578 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
1584 } else if (b == 0) {
1587 tcg_out_movi(s, ext, d, b);
1591 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
1602 bool tcg_target_has_memory_bswap(MemOp memop)
1607 static const TCGLdstHelperParam ldst_helper_param = {
1608 .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1611 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1613 MemOp opc = get_memop(lb->oi);
1615 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1619 tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1620 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1621 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1622 tcg_out_goto(s, lb->raddr);
1626 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1628 MemOp opc = get_memop(lb->oi);
1630 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1634 tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1635 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1636 tcg_out_goto(s, lb->raddr);
1640 /* We expect to use a 7-bit scaled negative offset from ENV. */
1641 #define MIN_TLB_MASK_TABLE_OFS -512
1644 * For system-mode, perform the TLB load and compare.
1645 * For user-mode, perform any required alignment tests.
1646 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1647 * is required and fill in @h with the host address for the fast path.
1649 static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1650 TCGReg addr_reg, MemOpIdx oi,
1653 TCGType addr_type = s->addr_type;
1654 TCGLabelQemuLdst *ldst = NULL;
1655 MemOp opc = get_memop(oi);
1656 MemOp s_bits = opc & MO_SIZE;
1659 h->aa = atom_and_align_for_opc(s, opc,
1660 have_lse2 ? MO_ATOM_WITHIN16
1663 a_mask = (1 << h->aa.align) - 1;
1665 if (tcg_use_softmmu) {
1666 unsigned s_mask = (1u << s_bits) - 1;
1667 unsigned mem_index = get_mmuidx(oi);
1670 uint64_t compare_mask;
1672 ldst = new_ldst_label(s);
1673 ldst->is_ld = is_ld;
1675 ldst->addrlo_reg = addr_reg;
1677 mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1678 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1680 /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1681 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1682 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1683 tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1684 tlb_mask_table_ofs(s, mem_index), 1, 0);
1686 /* Extract the TLB index from the address into X0. */
1687 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1688 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1689 s->page_bits - CPU_TLB_ENTRY_BITS);
1691 /* Add the tlb_table pointer, forming the CPUTLBEntry address. */
1692 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1694 /* Load the tlb comparator into TMP0, and the fast path addend. */
1695 QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1696 tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1697 is_ld ? offsetof(CPUTLBEntry, addr_read)
1698 : offsetof(CPUTLBEntry, addr_write));
1699 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1700 offsetof(CPUTLBEntry, addend));
1703 * For aligned accesses, we check the first byte and include
1704 * the alignment bits within the address. For unaligned access,
1705 * we check that we don't cross pages using the address of the
1706 * last byte of the access.
1708 if (a_mask >= s_mask) {
1709 addr_adj = addr_reg;
1711 addr_adj = TCG_REG_TMP2;
1712 tcg_out_insn(s, 3401, ADDI, addr_type,
1713 addr_adj, addr_reg, s_mask - a_mask);
1715 compare_mask = (uint64_t)s->page_mask | a_mask;
1717 /* Store the page mask part of the address into TMP2. */
1718 tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1719 addr_adj, compare_mask);
1721 /* Perform the address comparison. */
1722 tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1724 /* If not equal, we jump to the slow path. */
1725 ldst->label_ptr[0] = s->code_ptr;
1726 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1728 h->base = TCG_REG_TMP1;
1729 h->index = addr_reg;
1730 h->index_ext = addr_type;
1733 ldst = new_ldst_label(s);
1735 ldst->is_ld = is_ld;
1737 ldst->addrlo_reg = addr_reg;
1739 /* tst addr, #mask */
1740 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1742 /* b.ne slow_path */
1743 ldst->label_ptr[0] = s->code_ptr;
1744 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1747 if (guest_base || addr_type == TCG_TYPE_I32) {
1748 h->base = TCG_REG_GUEST_BASE;
1749 h->index = addr_reg;
1750 h->index_ext = addr_type;
1753 h->index = TCG_REG_XZR;
1754 h->index_ext = TCG_TYPE_I64;
1761 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1762 TCGReg data_r, HostAddress h)
1764 switch (memop & MO_SSIZE) {
1766 tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1769 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1770 data_r, h.base, h.index_ext, h.index);
1773 tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1776 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1777 data_r, h.base, h.index_ext, h.index);
1780 tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1783 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1786 tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1789 g_assert_not_reached();
1793 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1794 TCGReg data_r, HostAddress h)
1796 switch (memop & MO_SIZE) {
1798 tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1801 tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1804 tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1807 tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1810 g_assert_not_reached();
1814 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1815 MemOpIdx oi, TCGType data_type)
1817 TCGLabelQemuLdst *ldst;
1820 ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1821 tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1824 ldst->type = data_type;
1825 ldst->datalo_reg = data_reg;
1826 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1830 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1831 MemOpIdx oi, TCGType data_type)
1833 TCGLabelQemuLdst *ldst;
1836 ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1837 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1840 ldst->type = data_type;
1841 ldst->datalo_reg = data_reg;
1842 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1846 static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1847 TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1849 TCGLabelQemuLdst *ldst;
1854 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1856 /* Compose the final address, as LDP/STP have no indexing. */
1857 if (h.index == TCG_REG_XZR) {
1860 base = TCG_REG_TMP2;
1861 if (h.index_ext == TCG_TYPE_I32) {
1862 /* add base, base, index, uxtw */
1863 tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1864 h.base, h.index, MO_32, 0);
1866 /* add base, base, index */
1867 tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1871 use_pair = h.aa.atom < MO_128 || have_lse2;
1874 tcg_insn_unit *branch = NULL;
1875 TCGReg ll, lh, sl, sh;
1878 * If we have already checked for 16-byte alignment, that's all
1879 * we need. Otherwise we have determined that misaligned atomicity
1880 * may be handled with two 8-byte loads.
1882 if (h.aa.align < MO_128) {
1884 * TODO: align should be MO_64, so we only need test bit 3,
1885 * which means we could use TBNZ instead of ANDS+B_C.
1887 tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1888 branch = s->code_ptr;
1889 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1895 * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1896 * ldxp lo, hi, [base]
1897 * stxp t0, lo, hi, [base]
1899 * Require no overlap between data{lo,hi} and base.
1901 if (datalo == base || datahi == base) {
1902 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1903 base = TCG_REG_TMP2;
1909 * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1910 * 1: ldxp t0, t1, [base]
1911 * stxp t0, lo, hi, [base]
1914 tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1921 tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1922 tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1923 tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1926 /* "b .+8", branching across the one insn of use_pair. */
1927 tcg_out_insn(s, 3206, B, 2);
1928 reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1934 tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1936 tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1941 ldst->type = TCG_TYPE_I128;
1942 ldst->datalo_reg = datalo;
1943 ldst->datahi_reg = datahi;
1944 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1948 static const tcg_insn_unit *tb_ret_addr;
1950 static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1952 const tcg_insn_unit *target;
1955 /* Reuse the zeroing that exists for goto_ptr. */
1957 target = tcg_code_gen_epilogue;
1959 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1960 target = tb_ret_addr;
1963 offset = tcg_pcrel_diff(s, target) >> 2;
1964 if (offset == sextract64(offset, 0, 26)) {
1965 tcg_out_insn(s, 3206, B, offset);
1968 * Only x16/x17 generate BTI type Jump (2),
1969 * other registers generate BTI type Jump|Call (3).
1971 QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
1972 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1973 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1977 static void tcg_out_goto_tb(TCGContext *s, int which)
1980 * Direct branch, or indirect address load, will be patched
1981 * by tb_target_set_jmp_target. Assert indirect load offset
1982 * in range early, regardless of direct branch distance.
1984 intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1985 tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1987 set_jmp_insn_offset(s, which);
1988 tcg_out32(s, I3206_B);
1989 tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1990 set_jmp_reset_offset(s, which);
1991 tcg_out_bti(s, BTI_J);
1994 void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1995 uintptr_t jmp_rx, uintptr_t jmp_rw)
1997 uintptr_t d_addr = tb->jmp_target_addr[n];
1998 ptrdiff_t d_offset = d_addr - jmp_rx;
2001 /* Either directly branch, or indirect branch load. */
2002 if (d_offset == sextract64(d_offset, 0, 28)) {
2003 insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
2005 uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
2006 ptrdiff_t i_offset = i_addr - jmp_rx;
2008 /* Note that we asserted this in range in tcg_out_goto_tb. */
2009 insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
2011 qatomic_set((uint32_t *)jmp_rw, insn);
2012 flush_idcache_range(jmp_rx, jmp_rw, 4);
2015 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2016 const TCGArg args[TCG_MAX_OP_ARGS],
2017 const int const_args[TCG_MAX_OP_ARGS])
2019 /* 99% of the time, we can signal the use of extension registers
2020 by looking to see if the opcode handles 64-bit data. */
2021 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
2023 /* Hoist the loads of the most common arguments. */
2024 TCGArg a0 = args[0];
2025 TCGArg a1 = args[1];
2026 TCGArg a2 = args[2];
2027 int c2 = const_args[2];
2029 /* Some operands are defined with "rZ" constraint, a register or
2030 the zero register. These need not actually test args[I] == 0. */
2031 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2034 case INDEX_op_goto_ptr:
2035 tcg_out_insn(s, 3207, BR, a0);
2039 tcg_out_goto_label(s, arg_label(a0));
2042 case INDEX_op_ld8u_i32:
2043 case INDEX_op_ld8u_i64:
2044 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2046 case INDEX_op_ld8s_i32:
2047 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2049 case INDEX_op_ld8s_i64:
2050 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2052 case INDEX_op_ld16u_i32:
2053 case INDEX_op_ld16u_i64:
2054 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2056 case INDEX_op_ld16s_i32:
2057 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2059 case INDEX_op_ld16s_i64:
2060 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2062 case INDEX_op_ld_i32:
2063 case INDEX_op_ld32u_i64:
2064 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2066 case INDEX_op_ld32s_i64:
2067 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2069 case INDEX_op_ld_i64:
2070 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2073 case INDEX_op_st8_i32:
2074 case INDEX_op_st8_i64:
2075 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2077 case INDEX_op_st16_i32:
2078 case INDEX_op_st16_i64:
2079 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2081 case INDEX_op_st_i32:
2082 case INDEX_op_st32_i64:
2083 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2085 case INDEX_op_st_i64:
2086 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2089 case INDEX_op_add_i32:
2092 case INDEX_op_add_i64:
2094 tcg_out_addsubi(s, ext, a0, a1, a2);
2096 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2100 case INDEX_op_sub_i32:
2103 case INDEX_op_sub_i64:
2105 tcg_out_addsubi(s, ext, a0, a1, -a2);
2107 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2111 case INDEX_op_neg_i64:
2112 case INDEX_op_neg_i32:
2113 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2116 case INDEX_op_and_i32:
2119 case INDEX_op_and_i64:
2121 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2123 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2127 case INDEX_op_andc_i32:
2130 case INDEX_op_andc_i64:
2132 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2134 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2138 case INDEX_op_or_i32:
2141 case INDEX_op_or_i64:
2143 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2145 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2149 case INDEX_op_orc_i32:
2152 case INDEX_op_orc_i64:
2154 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2156 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2160 case INDEX_op_xor_i32:
2163 case INDEX_op_xor_i64:
2165 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2167 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2171 case INDEX_op_eqv_i32:
2174 case INDEX_op_eqv_i64:
2176 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2178 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2182 case INDEX_op_not_i64:
2183 case INDEX_op_not_i32:
2184 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2187 case INDEX_op_mul_i64:
2188 case INDEX_op_mul_i32:
2189 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2192 case INDEX_op_div_i64:
2193 case INDEX_op_div_i32:
2194 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2196 case INDEX_op_divu_i64:
2197 case INDEX_op_divu_i32:
2198 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2201 case INDEX_op_rem_i64:
2202 case INDEX_op_rem_i32:
2203 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2204 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2206 case INDEX_op_remu_i64:
2207 case INDEX_op_remu_i32:
2208 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2209 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2212 case INDEX_op_shl_i64:
2213 case INDEX_op_shl_i32:
2215 tcg_out_shl(s, ext, a0, a1, a2);
2217 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2221 case INDEX_op_shr_i64:
2222 case INDEX_op_shr_i32:
2224 tcg_out_shr(s, ext, a0, a1, a2);
2226 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2230 case INDEX_op_sar_i64:
2231 case INDEX_op_sar_i32:
2233 tcg_out_sar(s, ext, a0, a1, a2);
2235 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2239 case INDEX_op_rotr_i64:
2240 case INDEX_op_rotr_i32:
2242 tcg_out_rotr(s, ext, a0, a1, a2);
2244 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2248 case INDEX_op_rotl_i64:
2249 case INDEX_op_rotl_i32:
2251 tcg_out_rotl(s, ext, a0, a1, a2);
2253 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2254 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
2258 case INDEX_op_clz_i64:
2259 case INDEX_op_clz_i32:
2260 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2262 case INDEX_op_ctz_i64:
2263 case INDEX_op_ctz_i32:
2264 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2267 case INDEX_op_brcond_i32:
2270 case INDEX_op_brcond_i64:
2271 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2274 case INDEX_op_setcond_i32:
2277 case INDEX_op_setcond_i64:
2278 tcg_out_cmp(s, ext, a1, a2, c2);
2279 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2280 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2281 TCG_REG_XZR, tcg_invert_cond(args[3]));
2284 case INDEX_op_negsetcond_i32:
2287 case INDEX_op_negsetcond_i64:
2288 tcg_out_cmp(s, ext, a1, a2, c2);
2289 /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */
2290 tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR,
2291 TCG_REG_XZR, tcg_invert_cond(args[3]));
2294 case INDEX_op_movcond_i32:
2297 case INDEX_op_movcond_i64:
2298 tcg_out_cmp(s, ext, a1, a2, c2);
2299 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2302 case INDEX_op_qemu_ld_a32_i32:
2303 case INDEX_op_qemu_ld_a64_i32:
2304 case INDEX_op_qemu_ld_a32_i64:
2305 case INDEX_op_qemu_ld_a64_i64:
2306 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2308 case INDEX_op_qemu_st_a32_i32:
2309 case INDEX_op_qemu_st_a64_i32:
2310 case INDEX_op_qemu_st_a32_i64:
2311 case INDEX_op_qemu_st_a64_i64:
2312 tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2314 case INDEX_op_qemu_ld_a32_i128:
2315 case INDEX_op_qemu_ld_a64_i128:
2316 tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2318 case INDEX_op_qemu_st_a32_i128:
2319 case INDEX_op_qemu_st_a64_i128:
2320 tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2323 case INDEX_op_bswap64_i64:
2324 tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2326 case INDEX_op_bswap32_i64:
2327 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2328 if (a2 & TCG_BSWAP_OS) {
2329 tcg_out_ext32s(s, a0, a0);
2332 case INDEX_op_bswap32_i32:
2333 tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2335 case INDEX_op_bswap16_i64:
2336 case INDEX_op_bswap16_i32:
2337 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2338 if (a2 & TCG_BSWAP_OS) {
2339 /* Output must be sign-extended. */
2340 tcg_out_ext16s(s, ext, a0, a0);
2341 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2342 /* Output must be zero-extended, but input isn't. */
2343 tcg_out_ext16u(s, a0, a0);
2347 case INDEX_op_deposit_i64:
2348 case INDEX_op_deposit_i32:
2349 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2352 case INDEX_op_extract_i64:
2353 case INDEX_op_extract_i32:
2354 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2357 case INDEX_op_sextract_i64:
2358 case INDEX_op_sextract_i32:
2359 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2362 case INDEX_op_extract2_i64:
2363 case INDEX_op_extract2_i32:
2364 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2367 case INDEX_op_add2_i32:
2368 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2369 (int32_t)args[4], args[5], const_args[4],
2370 const_args[5], false);
2372 case INDEX_op_add2_i64:
2373 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2374 args[5], const_args[4], const_args[5], false);
2376 case INDEX_op_sub2_i32:
2377 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2378 (int32_t)args[4], args[5], const_args[4],
2379 const_args[5], true);
2381 case INDEX_op_sub2_i64:
2382 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2383 args[5], const_args[4], const_args[5], true);
2386 case INDEX_op_muluh_i64:
2387 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2389 case INDEX_op_mulsh_i64:
2390 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2397 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2398 case INDEX_op_mov_i64:
2399 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2400 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */
2401 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */
2402 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */
2403 case INDEX_op_ext8s_i64:
2404 case INDEX_op_ext8u_i32:
2405 case INDEX_op_ext8u_i64:
2406 case INDEX_op_ext16s_i64:
2407 case INDEX_op_ext16s_i32:
2408 case INDEX_op_ext16u_i64:
2409 case INDEX_op_ext16u_i32:
2410 case INDEX_op_ext32s_i64:
2411 case INDEX_op_ext32u_i64:
2412 case INDEX_op_ext_i32_i64:
2413 case INDEX_op_extu_i32_i64:
2414 case INDEX_op_extrl_i64_i32:
2416 g_assert_not_reached();
2422 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2423 unsigned vecl, unsigned vece,
2424 const TCGArg args[TCG_MAX_OP_ARGS],
2425 const int const_args[TCG_MAX_OP_ARGS])
2427 static const AArch64Insn cmp_vec_insn[16] = {
2428 [TCG_COND_EQ] = I3616_CMEQ,
2429 [TCG_COND_GT] = I3616_CMGT,
2430 [TCG_COND_GE] = I3616_CMGE,
2431 [TCG_COND_GTU] = I3616_CMHI,
2432 [TCG_COND_GEU] = I3616_CMHS,
2434 static const AArch64Insn cmp_scalar_insn[16] = {
2435 [TCG_COND_EQ] = I3611_CMEQ,
2436 [TCG_COND_GT] = I3611_CMGT,
2437 [TCG_COND_GE] = I3611_CMGE,
2438 [TCG_COND_GTU] = I3611_CMHI,
2439 [TCG_COND_GEU] = I3611_CMHS,
2441 static const AArch64Insn cmp0_vec_insn[16] = {
2442 [TCG_COND_EQ] = I3617_CMEQ0,
2443 [TCG_COND_GT] = I3617_CMGT0,
2444 [TCG_COND_GE] = I3617_CMGE0,
2445 [TCG_COND_LT] = I3617_CMLT0,
2446 [TCG_COND_LE] = I3617_CMLE0,
2448 static const AArch64Insn cmp0_scalar_insn[16] = {
2449 [TCG_COND_EQ] = I3612_CMEQ0,
2450 [TCG_COND_GT] = I3612_CMGT0,
2451 [TCG_COND_GE] = I3612_CMGE0,
2452 [TCG_COND_LT] = I3612_CMLT0,
2453 [TCG_COND_LE] = I3612_CMLE0,
2456 TCGType type = vecl + TCG_TYPE_V64;
2457 unsigned is_q = vecl;
2458 bool is_scalar = !is_q && vece == MO_64;
2459 TCGArg a0, a1, a2, a3;
2467 case INDEX_op_ld_vec:
2468 tcg_out_ld(s, type, a0, a1, a2);
2470 case INDEX_op_st_vec:
2471 tcg_out_st(s, type, a0, a1, a2);
2473 case INDEX_op_dupm_vec:
2474 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2476 case INDEX_op_add_vec:
2478 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2480 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2483 case INDEX_op_sub_vec:
2485 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2487 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2490 case INDEX_op_mul_vec:
2491 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2493 case INDEX_op_neg_vec:
2495 tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2497 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2500 case INDEX_op_abs_vec:
2502 tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2504 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2507 case INDEX_op_and_vec:
2508 if (const_args[2]) {
2509 is_shimm1632(~a2, &cmode, &imm8);
2511 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2514 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2517 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2519 case INDEX_op_or_vec:
2520 if (const_args[2]) {
2521 is_shimm1632(a2, &cmode, &imm8);
2523 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2526 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2529 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2531 case INDEX_op_andc_vec:
2532 if (const_args[2]) {
2533 is_shimm1632(a2, &cmode, &imm8);
2535 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2538 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2541 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2543 case INDEX_op_orc_vec:
2544 if (const_args[2]) {
2545 is_shimm1632(~a2, &cmode, &imm8);
2547 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2550 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2553 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2555 case INDEX_op_xor_vec:
2556 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2558 case INDEX_op_ssadd_vec:
2560 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2562 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2565 case INDEX_op_sssub_vec:
2567 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2569 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2572 case INDEX_op_usadd_vec:
2574 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2576 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2579 case INDEX_op_ussub_vec:
2581 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2583 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2586 case INDEX_op_smax_vec:
2587 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2589 case INDEX_op_smin_vec:
2590 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2592 case INDEX_op_umax_vec:
2593 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2595 case INDEX_op_umin_vec:
2596 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2598 case INDEX_op_not_vec:
2599 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2601 case INDEX_op_shli_vec:
2603 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2605 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2608 case INDEX_op_shri_vec:
2610 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2612 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2615 case INDEX_op_sari_vec:
2617 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2619 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2622 case INDEX_op_aa64_sli_vec:
2624 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2626 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2629 case INDEX_op_shlv_vec:
2631 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2633 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2636 case INDEX_op_aa64_sshl_vec:
2638 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2640 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2643 case INDEX_op_cmp_vec:
2645 TCGCond cond = args[3];
2648 if (cond == TCG_COND_NE) {
2649 if (const_args[2]) {
2651 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2653 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2657 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2659 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2661 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2664 if (const_args[2]) {
2666 insn = cmp0_scalar_insn[cond];
2668 tcg_out_insn_3612(s, insn, vece, a0, a1);
2672 insn = cmp0_vec_insn[cond];
2674 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2678 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2682 insn = cmp_scalar_insn[cond];
2685 t = a1, a1 = a2, a2 = t;
2686 cond = tcg_swap_cond(cond);
2687 insn = cmp_scalar_insn[cond];
2688 tcg_debug_assert(insn != 0);
2690 tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2692 insn = cmp_vec_insn[cond];
2695 t = a1, a1 = a2, a2 = t;
2696 cond = tcg_swap_cond(cond);
2697 insn = cmp_vec_insn[cond];
2698 tcg_debug_assert(insn != 0);
2700 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2706 case INDEX_op_bitsel_vec:
2709 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2710 } else if (a0 == a2) {
2711 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2714 tcg_out_mov(s, type, a0, a1);
2716 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2720 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2721 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2723 g_assert_not_reached();
2727 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2730 case INDEX_op_add_vec:
2731 case INDEX_op_sub_vec:
2732 case INDEX_op_and_vec:
2733 case INDEX_op_or_vec:
2734 case INDEX_op_xor_vec:
2735 case INDEX_op_andc_vec:
2736 case INDEX_op_orc_vec:
2737 case INDEX_op_neg_vec:
2738 case INDEX_op_abs_vec:
2739 case INDEX_op_not_vec:
2740 case INDEX_op_cmp_vec:
2741 case INDEX_op_shli_vec:
2742 case INDEX_op_shri_vec:
2743 case INDEX_op_sari_vec:
2744 case INDEX_op_ssadd_vec:
2745 case INDEX_op_sssub_vec:
2746 case INDEX_op_usadd_vec:
2747 case INDEX_op_ussub_vec:
2748 case INDEX_op_shlv_vec:
2749 case INDEX_op_bitsel_vec:
2751 case INDEX_op_rotli_vec:
2752 case INDEX_op_shrv_vec:
2753 case INDEX_op_sarv_vec:
2754 case INDEX_op_rotlv_vec:
2755 case INDEX_op_rotrv_vec:
2757 case INDEX_op_mul_vec:
2758 case INDEX_op_smax_vec:
2759 case INDEX_op_smin_vec:
2760 case INDEX_op_umax_vec:
2761 case INDEX_op_umin_vec:
2762 return vece < MO_64;
2769 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2773 TCGv_vec v0, v1, v2, t1, t2, c1;
2777 v0 = temp_tcgv_vec(arg_temp(a0));
2778 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2779 a2 = va_arg(va, TCGArg);
2783 case INDEX_op_rotli_vec:
2784 t1 = tcg_temp_new_vec(type);
2785 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2786 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2787 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2788 tcg_temp_free_vec(t1);
2791 case INDEX_op_shrv_vec:
2792 case INDEX_op_sarv_vec:
2793 /* Right shifts are negative left shifts for AArch64. */
2794 v2 = temp_tcgv_vec(arg_temp(a2));
2795 t1 = tcg_temp_new_vec(type);
2796 tcg_gen_neg_vec(vece, t1, v2);
2797 opc = (opc == INDEX_op_shrv_vec
2798 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2799 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2800 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2801 tcg_temp_free_vec(t1);
2804 case INDEX_op_rotlv_vec:
2805 v2 = temp_tcgv_vec(arg_temp(a2));
2806 t1 = tcg_temp_new_vec(type);
2807 c1 = tcg_constant_vec(type, vece, 8 << vece);
2808 tcg_gen_sub_vec(vece, t1, v2, c1);
2809 /* Right shifts are negative left shifts for AArch64. */
2810 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2811 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2812 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2813 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2814 tcg_gen_or_vec(vece, v0, v0, t1);
2815 tcg_temp_free_vec(t1);
2818 case INDEX_op_rotrv_vec:
2819 v2 = temp_tcgv_vec(arg_temp(a2));
2820 t1 = tcg_temp_new_vec(type);
2821 t2 = tcg_temp_new_vec(type);
2822 c1 = tcg_constant_vec(type, vece, 8 << vece);
2823 tcg_gen_neg_vec(vece, t1, v2);
2824 tcg_gen_sub_vec(vece, t2, c1, v2);
2825 /* Right shifts are negative left shifts for AArch64. */
2826 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2827 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2828 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2829 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2830 tcg_gen_or_vec(vece, v0, t1, t2);
2831 tcg_temp_free_vec(t1);
2832 tcg_temp_free_vec(t2);
2836 g_assert_not_reached();
2840 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2843 case INDEX_op_goto_ptr:
2846 case INDEX_op_ld8u_i32:
2847 case INDEX_op_ld8s_i32:
2848 case INDEX_op_ld16u_i32:
2849 case INDEX_op_ld16s_i32:
2850 case INDEX_op_ld_i32:
2851 case INDEX_op_ld8u_i64:
2852 case INDEX_op_ld8s_i64:
2853 case INDEX_op_ld16u_i64:
2854 case INDEX_op_ld16s_i64:
2855 case INDEX_op_ld32u_i64:
2856 case INDEX_op_ld32s_i64:
2857 case INDEX_op_ld_i64:
2858 case INDEX_op_neg_i32:
2859 case INDEX_op_neg_i64:
2860 case INDEX_op_not_i32:
2861 case INDEX_op_not_i64:
2862 case INDEX_op_bswap16_i32:
2863 case INDEX_op_bswap32_i32:
2864 case INDEX_op_bswap16_i64:
2865 case INDEX_op_bswap32_i64:
2866 case INDEX_op_bswap64_i64:
2867 case INDEX_op_ext8s_i32:
2868 case INDEX_op_ext16s_i32:
2869 case INDEX_op_ext8u_i32:
2870 case INDEX_op_ext16u_i32:
2871 case INDEX_op_ext8s_i64:
2872 case INDEX_op_ext16s_i64:
2873 case INDEX_op_ext32s_i64:
2874 case INDEX_op_ext8u_i64:
2875 case INDEX_op_ext16u_i64:
2876 case INDEX_op_ext32u_i64:
2877 case INDEX_op_ext_i32_i64:
2878 case INDEX_op_extu_i32_i64:
2879 case INDEX_op_extract_i32:
2880 case INDEX_op_extract_i64:
2881 case INDEX_op_sextract_i32:
2882 case INDEX_op_sextract_i64:
2883 return C_O1_I1(r, r);
2885 case INDEX_op_st8_i32:
2886 case INDEX_op_st16_i32:
2887 case INDEX_op_st_i32:
2888 case INDEX_op_st8_i64:
2889 case INDEX_op_st16_i64:
2890 case INDEX_op_st32_i64:
2891 case INDEX_op_st_i64:
2892 return C_O0_I2(rZ, r);
2894 case INDEX_op_add_i32:
2895 case INDEX_op_add_i64:
2896 case INDEX_op_sub_i32:
2897 case INDEX_op_sub_i64:
2898 case INDEX_op_setcond_i32:
2899 case INDEX_op_setcond_i64:
2900 case INDEX_op_negsetcond_i32:
2901 case INDEX_op_negsetcond_i64:
2902 return C_O1_I2(r, r, rA);
2904 case INDEX_op_mul_i32:
2905 case INDEX_op_mul_i64:
2906 case INDEX_op_div_i32:
2907 case INDEX_op_div_i64:
2908 case INDEX_op_divu_i32:
2909 case INDEX_op_divu_i64:
2910 case INDEX_op_rem_i32:
2911 case INDEX_op_rem_i64:
2912 case INDEX_op_remu_i32:
2913 case INDEX_op_remu_i64:
2914 case INDEX_op_muluh_i64:
2915 case INDEX_op_mulsh_i64:
2916 return C_O1_I2(r, r, r);
2918 case INDEX_op_and_i32:
2919 case INDEX_op_and_i64:
2920 case INDEX_op_or_i32:
2921 case INDEX_op_or_i64:
2922 case INDEX_op_xor_i32:
2923 case INDEX_op_xor_i64:
2924 case INDEX_op_andc_i32:
2925 case INDEX_op_andc_i64:
2926 case INDEX_op_orc_i32:
2927 case INDEX_op_orc_i64:
2928 case INDEX_op_eqv_i32:
2929 case INDEX_op_eqv_i64:
2930 return C_O1_I2(r, r, rL);
2932 case INDEX_op_shl_i32:
2933 case INDEX_op_shr_i32:
2934 case INDEX_op_sar_i32:
2935 case INDEX_op_rotl_i32:
2936 case INDEX_op_rotr_i32:
2937 case INDEX_op_shl_i64:
2938 case INDEX_op_shr_i64:
2939 case INDEX_op_sar_i64:
2940 case INDEX_op_rotl_i64:
2941 case INDEX_op_rotr_i64:
2942 return C_O1_I2(r, r, ri);
2944 case INDEX_op_clz_i32:
2945 case INDEX_op_ctz_i32:
2946 case INDEX_op_clz_i64:
2947 case INDEX_op_ctz_i64:
2948 return C_O1_I2(r, r, rAL);
2950 case INDEX_op_brcond_i32:
2951 case INDEX_op_brcond_i64:
2952 return C_O0_I2(r, rA);
2954 case INDEX_op_movcond_i32:
2955 case INDEX_op_movcond_i64:
2956 return C_O1_I4(r, r, rA, rZ, rZ);
2958 case INDEX_op_qemu_ld_a32_i32:
2959 case INDEX_op_qemu_ld_a64_i32:
2960 case INDEX_op_qemu_ld_a32_i64:
2961 case INDEX_op_qemu_ld_a64_i64:
2962 return C_O1_I1(r, r);
2963 case INDEX_op_qemu_ld_a32_i128:
2964 case INDEX_op_qemu_ld_a64_i128:
2965 return C_O2_I1(r, r, r);
2966 case INDEX_op_qemu_st_a32_i32:
2967 case INDEX_op_qemu_st_a64_i32:
2968 case INDEX_op_qemu_st_a32_i64:
2969 case INDEX_op_qemu_st_a64_i64:
2970 return C_O0_I2(rZ, r);
2971 case INDEX_op_qemu_st_a32_i128:
2972 case INDEX_op_qemu_st_a64_i128:
2973 return C_O0_I3(rZ, rZ, r);
2975 case INDEX_op_deposit_i32:
2976 case INDEX_op_deposit_i64:
2977 return C_O1_I2(r, 0, rZ);
2979 case INDEX_op_extract2_i32:
2980 case INDEX_op_extract2_i64:
2981 return C_O1_I2(r, rZ, rZ);
2983 case INDEX_op_add2_i32:
2984 case INDEX_op_add2_i64:
2985 case INDEX_op_sub2_i32:
2986 case INDEX_op_sub2_i64:
2987 return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2989 case INDEX_op_add_vec:
2990 case INDEX_op_sub_vec:
2991 case INDEX_op_mul_vec:
2992 case INDEX_op_xor_vec:
2993 case INDEX_op_ssadd_vec:
2994 case INDEX_op_sssub_vec:
2995 case INDEX_op_usadd_vec:
2996 case INDEX_op_ussub_vec:
2997 case INDEX_op_smax_vec:
2998 case INDEX_op_smin_vec:
2999 case INDEX_op_umax_vec:
3000 case INDEX_op_umin_vec:
3001 case INDEX_op_shlv_vec:
3002 case INDEX_op_shrv_vec:
3003 case INDEX_op_sarv_vec:
3004 case INDEX_op_aa64_sshl_vec:
3005 return C_O1_I2(w, w, w);
3006 case INDEX_op_not_vec:
3007 case INDEX_op_neg_vec:
3008 case INDEX_op_abs_vec:
3009 case INDEX_op_shli_vec:
3010 case INDEX_op_shri_vec:
3011 case INDEX_op_sari_vec:
3012 return C_O1_I1(w, w);
3013 case INDEX_op_ld_vec:
3014 case INDEX_op_dupm_vec:
3015 return C_O1_I1(w, r);
3016 case INDEX_op_st_vec:
3017 return C_O0_I2(w, r);
3018 case INDEX_op_dup_vec:
3019 return C_O1_I1(w, wr);
3020 case INDEX_op_or_vec:
3021 case INDEX_op_andc_vec:
3022 return C_O1_I2(w, w, wO);
3023 case INDEX_op_and_vec:
3024 case INDEX_op_orc_vec:
3025 return C_O1_I2(w, w, wN);
3026 case INDEX_op_cmp_vec:
3027 return C_O1_I2(w, w, wZ);
3028 case INDEX_op_bitsel_vec:
3029 return C_O1_I3(w, w, w, w);
3030 case INDEX_op_aa64_sli_vec:
3031 return C_O1_I2(w, 0, w);
3034 g_assert_not_reached();
3038 static void tcg_target_init(TCGContext *s)
3040 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3041 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3042 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3043 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3045 tcg_target_call_clobber_regs = -1ull;
3046 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3047 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3048 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3049 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3050 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3051 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3052 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3053 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3054 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3055 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3056 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3057 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3058 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3059 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3060 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3061 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3062 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3063 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3064 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3066 s->reserved_regs = 0;
3067 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3068 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3069 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3070 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3071 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3072 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3073 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3076 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
3077 #define PUSH_SIZE ((30 - 19 + 1) * 8)
3079 #define FRAME_SIZE \
3081 + TCG_STATIC_CALL_ARGS_SIZE \
3082 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3083 + TCG_TARGET_STACK_ALIGN - 1) \
3084 & ~(TCG_TARGET_STACK_ALIGN - 1))
3086 /* We're expecting a 2 byte uleb128 encoded value. */
3087 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3089 /* We're expecting to use a single ADDI insn. */
3090 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3092 static void tcg_target_qemu_prologue(TCGContext *s)
3096 tcg_out_bti(s, BTI_C);
3098 /* Push (FP, LR) and allocate space for all saved registers. */
3099 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3100 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3102 /* Set up frame pointer for canonical unwinding. */
3103 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3105 /* Store callee-preserved regs x19..x28. */
3106 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3107 int ofs = (r - TCG_REG_X19 + 2) * 8;
3108 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3111 /* Make stack space for TCG locals. */
3112 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3113 FRAME_SIZE - PUSH_SIZE);
3115 /* Inform TCG about how to find TCG locals with register, offset, size. */
3116 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3117 CPU_TEMP_BUF_NLONGS * sizeof(long));
3119 if (!tcg_use_softmmu) {
3121 * Note that XZR cannot be encoded in the address base register slot,
3122 * as that actually encodes SP. Depending on the guest, we may need
3123 * to zero-extend the guest address via the address index register slot,
3124 * therefore we need to load even a zero guest base into a register.
3126 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3127 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3130 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3131 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3134 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3135 * and fall through to the rest of the epilogue.
3137 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3138 tcg_out_bti(s, BTI_J);
3139 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3142 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3143 tcg_out_bti(s, BTI_J);
3145 /* Remove TCG locals stack space. */
3146 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3147 FRAME_SIZE - PUSH_SIZE);
3149 /* Restore registers x19..x28. */
3150 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3151 int ofs = (r - TCG_REG_X19 + 2) * 8;
3152 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3155 /* Pop (FP, LR), restore SP to previous frame. */
3156 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3157 TCG_REG_SP, PUSH_SIZE, 0, 1);
3158 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3161 static void tcg_out_tb_start(TCGContext *s)
3163 tcg_out_bti(s, BTI_J);
3166 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3169 for (i = 0; i < count; ++i) {
3176 uint8_t fde_def_cfa[4];
3177 uint8_t fde_reg_ofs[24];
3180 #define ELF_HOST_MACHINE EM_AARCH64
3182 static const DebugFrame debug_frame = {
3183 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3186 .h.cie.code_align = 1,
3187 .h.cie.data_align = 0x78, /* sleb128 -8 */
3188 .h.cie.return_column = TCG_REG_LR,
3190 /* Total FDE size does not include the "len" member. */
3191 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3194 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
3195 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3199 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
3200 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
3201 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
3202 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
3203 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
3204 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
3205 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
3206 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
3207 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
3208 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
3209 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
3210 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
3214 void tcg_register_jit(const void *buf, size_t buf_size)
3216 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));