tcg/aarch64: Remove reloc_pc26_atomic
[qemu/ar7.git] / tcg / aarch64 / tcg-target.inc.c
bloba41b63396005883ad4152c83641311e88d62cf89
1 /*
2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "tcg-pool.inc.c"
14 #include "qemu/bitops.h"
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33 #endif /* CONFIG_DEBUG_TCG */
35 static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42 TCG_REG_X16, TCG_REG_X17,
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61 static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65 static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
79 #endif
81 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
83 ptrdiff_t offset = target - code_ptr;
84 tcg_debug_assert(offset == sextract64(offset, 0, 26));
85 /* read instruction, mask away previous PC_REL26 parameter contents,
86 set the proper offset, then write back the instruction. */
87 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
90 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
92 ptrdiff_t offset = target - code_ptr;
93 tcg_debug_assert(offset == sextract64(offset, 0, 19));
94 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
97 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
98 intptr_t value, intptr_t addend)
100 tcg_debug_assert(addend == 0);
101 switch (type) {
102 case R_AARCH64_JUMP26:
103 case R_AARCH64_CALL26:
104 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
105 break;
106 case R_AARCH64_CONDBR19:
107 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
108 break;
109 default:
110 tcg_abort();
114 #define TCG_CT_CONST_AIMM 0x100
115 #define TCG_CT_CONST_LIMM 0x200
116 #define TCG_CT_CONST_ZERO 0x400
117 #define TCG_CT_CONST_MONE 0x800
119 /* parse target specific constraints */
120 static const char *target_parse_constraint(TCGArgConstraint *ct,
121 const char *ct_str, TCGType type)
123 switch (*ct_str++) {
124 case 'r': /* general registers */
125 ct->ct |= TCG_CT_REG;
126 ct->u.regs |= 0xffffffffu;
127 break;
128 case 'w': /* advsimd registers */
129 ct->ct |= TCG_CT_REG;
130 ct->u.regs |= 0xffffffff00000000ull;
131 break;
132 case 'l': /* qemu_ld / qemu_st address, data_reg */
133 ct->ct |= TCG_CT_REG;
134 ct->u.regs = 0xffffffffu;
135 #ifdef CONFIG_SOFTMMU
136 /* x0 and x1 will be overwritten when reading the tlb entry,
137 and x2, and x3 for helper args, better to avoid using them. */
138 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
139 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
140 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
141 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
142 #endif
143 break;
144 case 'A': /* Valid for arithmetic immediate (positive or negative). */
145 ct->ct |= TCG_CT_CONST_AIMM;
146 break;
147 case 'L': /* Valid for logical immediate. */
148 ct->ct |= TCG_CT_CONST_LIMM;
149 break;
150 case 'M': /* minus one */
151 ct->ct |= TCG_CT_CONST_MONE;
152 break;
153 case 'Z': /* zero */
154 ct->ct |= TCG_CT_CONST_ZERO;
155 break;
156 default:
157 return NULL;
159 return ct_str;
162 /* Match a constant valid for addition (12-bit, optionally shifted). */
163 static inline bool is_aimm(uint64_t val)
165 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
168 /* Match a constant valid for logical operations. */
169 static inline bool is_limm(uint64_t val)
171 /* Taking a simplified view of the logical immediates for now, ignoring
172 the replication that can happen across the field. Match bit patterns
173 of the forms
174 0....01....1
175 0..01..10..0
176 and their inverses. */
178 /* Make things easier below, by testing the form with msb clear. */
179 if ((int64_t)val < 0) {
180 val = ~val;
182 if (val == 0) {
183 return false;
185 val += val & -val;
186 return (val & (val - 1)) == 0;
189 /* Match a constant that is valid for vectors. */
190 static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
192 int i;
194 *op = 0;
195 /* Match replication across 8 bits. */
196 if (v64 == dup_const(MO_8, v64)) {
197 *cmode = 0xe;
198 *imm8 = v64 & 0xff;
199 return true;
201 /* Match replication across 16 bits. */
202 if (v64 == dup_const(MO_16, v64)) {
203 uint16_t v16 = v64;
205 if (v16 == (v16 & 0xff)) {
206 *cmode = 0x8;
207 *imm8 = v16 & 0xff;
208 return true;
209 } else if (v16 == (v16 & 0xff00)) {
210 *cmode = 0xa;
211 *imm8 = v16 >> 8;
212 return true;
215 /* Match replication across 32 bits. */
216 if (v64 == dup_const(MO_32, v64)) {
217 uint32_t v32 = v64;
219 if (v32 == (v32 & 0xff)) {
220 *cmode = 0x0;
221 *imm8 = v32 & 0xff;
222 return true;
223 } else if (v32 == (v32 & 0xff00)) {
224 *cmode = 0x2;
225 *imm8 = (v32 >> 8) & 0xff;
226 return true;
227 } else if (v32 == (v32 & 0xff0000)) {
228 *cmode = 0x4;
229 *imm8 = (v32 >> 16) & 0xff;
230 return true;
231 } else if (v32 == (v32 & 0xff000000)) {
232 *cmode = 0x6;
233 *imm8 = v32 >> 24;
234 return true;
235 } else if ((v32 & 0xffff00ff) == 0xff) {
236 *cmode = 0xc;
237 *imm8 = (v32 >> 8) & 0xff;
238 return true;
239 } else if ((v32 & 0xff00ffff) == 0xffff) {
240 *cmode = 0xd;
241 *imm8 = (v32 >> 16) & 0xff;
242 return true;
244 /* Match forms of a float32. */
245 if (extract32(v32, 0, 19) == 0
246 && (extract32(v32, 25, 6) == 0x20
247 || extract32(v32, 25, 6) == 0x1f)) {
248 *cmode = 0xf;
249 *imm8 = (extract32(v32, 31, 1) << 7)
250 | (extract32(v32, 25, 1) << 6)
251 | extract32(v32, 19, 6);
252 return true;
255 /* Match forms of a float64. */
256 if (extract64(v64, 0, 48) == 0
257 && (extract64(v64, 54, 9) == 0x100
258 || extract64(v64, 54, 9) == 0x0ff)) {
259 *cmode = 0xf;
260 *op = 1;
261 *imm8 = (extract64(v64, 63, 1) << 7)
262 | (extract64(v64, 54, 1) << 6)
263 | extract64(v64, 48, 6);
264 return true;
266 /* Match bytes of 0x00 and 0xff. */
267 for (i = 0; i < 64; i += 8) {
268 uint64_t byte = extract64(v64, i, 8);
269 if (byte != 0 && byte != 0xff) {
270 break;
273 if (i == 64) {
274 *cmode = 0xe;
275 *op = 1;
276 *imm8 = (extract64(v64, 0, 1) << 0)
277 | (extract64(v64, 8, 1) << 1)
278 | (extract64(v64, 16, 1) << 2)
279 | (extract64(v64, 24, 1) << 3)
280 | (extract64(v64, 32, 1) << 4)
281 | (extract64(v64, 40, 1) << 5)
282 | (extract64(v64, 48, 1) << 6)
283 | (extract64(v64, 56, 1) << 7);
284 return true;
286 return false;
289 static int tcg_target_const_match(tcg_target_long val, TCGType type,
290 const TCGArgConstraint *arg_ct)
292 int ct = arg_ct->ct;
294 if (ct & TCG_CT_CONST) {
295 return 1;
297 if (type == TCG_TYPE_I32) {
298 val = (int32_t)val;
300 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
301 return 1;
303 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
304 return 1;
306 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
307 return 1;
309 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
310 return 1;
313 return 0;
316 enum aarch64_cond_code {
317 COND_EQ = 0x0,
318 COND_NE = 0x1,
319 COND_CS = 0x2, /* Unsigned greater or equal */
320 COND_HS = COND_CS, /* ALIAS greater or equal */
321 COND_CC = 0x3, /* Unsigned less than */
322 COND_LO = COND_CC, /* ALIAS Lower */
323 COND_MI = 0x4, /* Negative */
324 COND_PL = 0x5, /* Zero or greater */
325 COND_VS = 0x6, /* Overflow */
326 COND_VC = 0x7, /* No overflow */
327 COND_HI = 0x8, /* Unsigned greater than */
328 COND_LS = 0x9, /* Unsigned less or equal */
329 COND_GE = 0xa,
330 COND_LT = 0xb,
331 COND_GT = 0xc,
332 COND_LE = 0xd,
333 COND_AL = 0xe,
334 COND_NV = 0xf, /* behaves like COND_AL here */
337 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
338 [TCG_COND_EQ] = COND_EQ,
339 [TCG_COND_NE] = COND_NE,
340 [TCG_COND_LT] = COND_LT,
341 [TCG_COND_GE] = COND_GE,
342 [TCG_COND_LE] = COND_LE,
343 [TCG_COND_GT] = COND_GT,
344 /* unsigned */
345 [TCG_COND_LTU] = COND_LO,
346 [TCG_COND_GTU] = COND_HI,
347 [TCG_COND_GEU] = COND_HS,
348 [TCG_COND_LEU] = COND_LS,
351 typedef enum {
352 LDST_ST = 0, /* store */
353 LDST_LD = 1, /* load */
354 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
355 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
356 } AArch64LdstType;
358 /* We encode the format of the insn into the beginning of the name, so that
359 we can have the preprocessor help "typecheck" the insn vs the output
360 function. Arm didn't provide us with nice names for the formats, so we
361 use the section number of the architecture reference manual in which the
362 instruction group is described. */
363 typedef enum {
364 /* Compare and branch (immediate). */
365 I3201_CBZ = 0x34000000,
366 I3201_CBNZ = 0x35000000,
368 /* Conditional branch (immediate). */
369 I3202_B_C = 0x54000000,
371 /* Unconditional branch (immediate). */
372 I3206_B = 0x14000000,
373 I3206_BL = 0x94000000,
375 /* Unconditional branch (register). */
376 I3207_BR = 0xd61f0000,
377 I3207_BLR = 0xd63f0000,
378 I3207_RET = 0xd65f0000,
380 /* Load literal for loading the address at pc-relative offset */
381 I3305_LDR = 0x58000000,
382 I3305_LDR_v64 = 0x5c000000,
383 I3305_LDR_v128 = 0x9c000000,
385 /* Load/store register. Described here as 3.3.12, but the helper
386 that emits them can transform to 3.3.10 or 3.3.13. */
387 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
388 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
389 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
390 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
392 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
393 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
394 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
395 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
397 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
398 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
400 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
401 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
402 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
404 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
405 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
407 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
408 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
410 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
411 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
413 I3312_TO_I3310 = 0x00200800,
414 I3312_TO_I3313 = 0x01000000,
416 /* Load/store register pair instructions. */
417 I3314_LDP = 0x28400000,
418 I3314_STP = 0x28000000,
420 /* Add/subtract immediate instructions. */
421 I3401_ADDI = 0x11000000,
422 I3401_ADDSI = 0x31000000,
423 I3401_SUBI = 0x51000000,
424 I3401_SUBSI = 0x71000000,
426 /* Bitfield instructions. */
427 I3402_BFM = 0x33000000,
428 I3402_SBFM = 0x13000000,
429 I3402_UBFM = 0x53000000,
431 /* Extract instruction. */
432 I3403_EXTR = 0x13800000,
434 /* Logical immediate instructions. */
435 I3404_ANDI = 0x12000000,
436 I3404_ORRI = 0x32000000,
437 I3404_EORI = 0x52000000,
439 /* Move wide immediate instructions. */
440 I3405_MOVN = 0x12800000,
441 I3405_MOVZ = 0x52800000,
442 I3405_MOVK = 0x72800000,
444 /* PC relative addressing instructions. */
445 I3406_ADR = 0x10000000,
446 I3406_ADRP = 0x90000000,
448 /* Add/subtract shifted register instructions (without a shift). */
449 I3502_ADD = 0x0b000000,
450 I3502_ADDS = 0x2b000000,
451 I3502_SUB = 0x4b000000,
452 I3502_SUBS = 0x6b000000,
454 /* Add/subtract shifted register instructions (with a shift). */
455 I3502S_ADD_LSL = I3502_ADD,
457 /* Add/subtract with carry instructions. */
458 I3503_ADC = 0x1a000000,
459 I3503_SBC = 0x5a000000,
461 /* Conditional select instructions. */
462 I3506_CSEL = 0x1a800000,
463 I3506_CSINC = 0x1a800400,
464 I3506_CSINV = 0x5a800000,
465 I3506_CSNEG = 0x5a800400,
467 /* Data-processing (1 source) instructions. */
468 I3507_CLZ = 0x5ac01000,
469 I3507_RBIT = 0x5ac00000,
470 I3507_REV16 = 0x5ac00400,
471 I3507_REV32 = 0x5ac00800,
472 I3507_REV64 = 0x5ac00c00,
474 /* Data-processing (2 source) instructions. */
475 I3508_LSLV = 0x1ac02000,
476 I3508_LSRV = 0x1ac02400,
477 I3508_ASRV = 0x1ac02800,
478 I3508_RORV = 0x1ac02c00,
479 I3508_SMULH = 0x9b407c00,
480 I3508_UMULH = 0x9bc07c00,
481 I3508_UDIV = 0x1ac00800,
482 I3508_SDIV = 0x1ac00c00,
484 /* Data-processing (3 source) instructions. */
485 I3509_MADD = 0x1b000000,
486 I3509_MSUB = 0x1b008000,
488 /* Logical shifted register instructions (without a shift). */
489 I3510_AND = 0x0a000000,
490 I3510_BIC = 0x0a200000,
491 I3510_ORR = 0x2a000000,
492 I3510_ORN = 0x2a200000,
493 I3510_EOR = 0x4a000000,
494 I3510_EON = 0x4a200000,
495 I3510_ANDS = 0x6a000000,
497 /* AdvSIMD copy */
498 I3605_DUP = 0x0e000400,
499 I3605_INS = 0x4e001c00,
500 I3605_UMOV = 0x0e003c00,
502 /* AdvSIMD modified immediate */
503 I3606_MOVI = 0x0f000400,
505 /* AdvSIMD shift by immediate */
506 I3614_SSHR = 0x0f000400,
507 I3614_SSRA = 0x0f001400,
508 I3614_SHL = 0x0f005400,
509 I3614_USHR = 0x2f000400,
510 I3614_USRA = 0x2f001400,
512 /* AdvSIMD three same. */
513 I3616_ADD = 0x0e208400,
514 I3616_AND = 0x0e201c00,
515 I3616_BIC = 0x0e601c00,
516 I3616_EOR = 0x2e201c00,
517 I3616_MUL = 0x0e209c00,
518 I3616_ORR = 0x0ea01c00,
519 I3616_ORN = 0x0ee01c00,
520 I3616_SUB = 0x2e208400,
521 I3616_CMGT = 0x0e203400,
522 I3616_CMGE = 0x0e203c00,
523 I3616_CMTST = 0x0e208c00,
524 I3616_CMHI = 0x2e203400,
525 I3616_CMHS = 0x2e203c00,
526 I3616_CMEQ = 0x2e208c00,
528 /* AdvSIMD two-reg misc. */
529 I3617_CMGT0 = 0x0e208800,
530 I3617_CMEQ0 = 0x0e209800,
531 I3617_CMLT0 = 0x0e20a800,
532 I3617_CMGE0 = 0x2e208800,
533 I3617_CMLE0 = 0x2e20a800,
534 I3617_NOT = 0x2e205800,
535 I3617_NEG = 0x2e20b800,
537 /* System instructions. */
538 NOP = 0xd503201f,
539 DMB_ISH = 0xd50338bf,
540 DMB_LD = 0x00000100,
541 DMB_ST = 0x00000200,
542 } AArch64Insn;
544 static inline uint32_t tcg_in32(TCGContext *s)
546 uint32_t v = *(uint32_t *)s->code_ptr;
547 return v;
550 /* Emit an opcode with "type-checking" of the format. */
551 #define tcg_out_insn(S, FMT, OP, ...) \
552 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
554 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
556 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
559 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
560 TCGReg rt, int imm19)
562 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
565 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
566 TCGCond c, int imm19)
568 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
571 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
573 tcg_out32(s, insn | (imm26 & 0x03ffffff));
576 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
578 tcg_out32(s, insn | rn << 5);
581 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
582 TCGReg r1, TCGReg r2, TCGReg rn,
583 tcg_target_long ofs, bool pre, bool w)
585 insn |= 1u << 31; /* ext */
586 insn |= pre << 24;
587 insn |= w << 23;
589 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
590 insn |= (ofs & (0x7f << 3)) << (15 - 3);
592 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
595 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
596 TCGReg rd, TCGReg rn, uint64_t aimm)
598 if (aimm > 0xfff) {
599 tcg_debug_assert((aimm & 0xfff) == 0);
600 aimm >>= 12;
601 tcg_debug_assert(aimm <= 0xfff);
602 aimm |= 1 << 12; /* apply LSL 12 */
604 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
607 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
608 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
609 that feed the DecodeBitMasks pseudo function. */
610 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
611 TCGReg rd, TCGReg rn, int n, int immr, int imms)
613 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
614 | rn << 5 | rd);
617 #define tcg_out_insn_3404 tcg_out_insn_3402
619 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
620 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
622 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
623 | rn << 5 | rd);
626 /* This function is used for the Move (wide immediate) instruction group.
627 Note that SHIFT is a full shift count, not the 2 bit HW field. */
628 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
629 TCGReg rd, uint16_t half, unsigned shift)
631 tcg_debug_assert((shift & ~0x30) == 0);
632 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
635 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
636 TCGReg rd, int64_t disp)
638 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
641 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
642 the rare occasion when we actually want to supply a shift amount. */
643 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
644 TCGType ext, TCGReg rd, TCGReg rn,
645 TCGReg rm, int imm6)
647 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
650 /* This function is for 3.5.2 (Add/subtract shifted register),
651 and 3.5.10 (Logical shifted register), for the vast majorty of cases
652 when we don't want to apply a shift. Thus it can also be used for
653 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
654 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
655 TCGReg rd, TCGReg rn, TCGReg rm)
657 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
660 #define tcg_out_insn_3503 tcg_out_insn_3502
661 #define tcg_out_insn_3508 tcg_out_insn_3502
662 #define tcg_out_insn_3510 tcg_out_insn_3502
664 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
665 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
667 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
668 | tcg_cond_to_aarch64[c] << 12);
671 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
672 TCGReg rd, TCGReg rn)
674 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
677 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
678 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
680 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
683 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
684 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
686 /* Note that bit 11 set means general register input. Therefore
687 we can handle both register sets with one function. */
688 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
689 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
692 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
693 TCGReg rd, bool op, int cmode, uint8_t imm8)
695 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
696 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
699 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
700 TCGReg rd, TCGReg rn, unsigned immhb)
702 tcg_out32(s, insn | q << 30 | immhb << 16
703 | (rn & 0x1f) << 5 | (rd & 0x1f));
706 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
707 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
709 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
710 | (rn & 0x1f) << 5 | (rd & 0x1f));
713 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
714 unsigned size, TCGReg rd, TCGReg rn)
716 tcg_out32(s, insn | q << 30 | (size << 22)
717 | (rn & 0x1f) << 5 | (rd & 0x1f));
720 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
721 TCGReg rd, TCGReg base, TCGType ext,
722 TCGReg regoff)
724 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
725 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
726 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
729 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
730 TCGReg rd, TCGReg rn, intptr_t offset)
732 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
735 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
736 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
738 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
739 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
740 | rn << 5 | (rd & 0x1f));
743 /* Register to register move using ORR (shifted register with no shift). */
744 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
746 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
749 /* Register to register move using ADDI (move to/from SP). */
750 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
752 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
755 /* This function is used for the Logical (immediate) instruction group.
756 The value of LIMM must satisfy IS_LIMM. See the comment above about
757 only supporting simplified logical immediates. */
758 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
759 TCGReg rd, TCGReg rn, uint64_t limm)
761 unsigned h, l, r, c;
763 tcg_debug_assert(is_limm(limm));
765 h = clz64(limm);
766 l = ctz64(limm);
767 if (l == 0) {
768 r = 0; /* form 0....01....1 */
769 c = ctz64(~limm) - 1;
770 if (h == 0) {
771 r = clz64(~limm); /* form 1..10..01..1 */
772 c += r;
774 } else {
775 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
776 c = r - h - 1;
778 if (ext == TCG_TYPE_I32) {
779 r &= 31;
780 c &= 31;
783 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
786 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
787 TCGReg rd, uint64_t v64)
789 int op, cmode, imm8;
791 if (is_fimm(v64, &op, &cmode, &imm8)) {
792 tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
793 } else if (type == TCG_TYPE_V128) {
794 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
795 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
796 } else {
797 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
798 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
802 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
803 tcg_target_long value)
805 tcg_target_long svalue = value;
806 tcg_target_long ivalue = ~value;
807 tcg_target_long t0, t1, t2;
808 int s0, s1;
809 AArch64Insn opc;
811 switch (type) {
812 case TCG_TYPE_I32:
813 case TCG_TYPE_I64:
814 tcg_debug_assert(rd < 32);
815 break;
817 case TCG_TYPE_V64:
818 case TCG_TYPE_V128:
819 tcg_debug_assert(rd >= 32);
820 tcg_out_dupi_vec(s, type, rd, value);
821 return;
823 default:
824 g_assert_not_reached();
827 /* For 32-bit values, discard potential garbage in value. For 64-bit
828 values within [2**31, 2**32-1], we can create smaller sequences by
829 interpreting this as a negative 32-bit number, while ensuring that
830 the high 32 bits are cleared by setting SF=0. */
831 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
832 svalue = (int32_t)value;
833 value = (uint32_t)value;
834 ivalue = (uint32_t)ivalue;
835 type = TCG_TYPE_I32;
838 /* Speed things up by handling the common case of small positive
839 and negative values specially. */
840 if ((value & ~0xffffull) == 0) {
841 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
842 return;
843 } else if ((ivalue & ~0xffffull) == 0) {
844 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
845 return;
848 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
849 use the sign-extended value. That lets us match rotated values such
850 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
851 if (is_limm(svalue)) {
852 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
853 return;
856 /* Look for host pointer values within 4G of the PC. This happens
857 often when loading pointers to QEMU's own data structures. */
858 if (type == TCG_TYPE_I64) {
859 tcg_target_long disp = value - (intptr_t)s->code_ptr;
860 if (disp == sextract64(disp, 0, 21)) {
861 tcg_out_insn(s, 3406, ADR, rd, disp);
862 return;
864 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
865 if (disp == sextract64(disp, 0, 21)) {
866 tcg_out_insn(s, 3406, ADRP, rd, disp);
867 if (value & 0xfff) {
868 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
870 return;
874 /* Would it take fewer insns to begin with MOVN? */
875 if (ctpop64(value) >= 32) {
876 t0 = ivalue;
877 opc = I3405_MOVN;
878 } else {
879 t0 = value;
880 opc = I3405_MOVZ;
882 s0 = ctz64(t0) & (63 & -16);
883 t1 = t0 & ~(0xffffUL << s0);
884 s1 = ctz64(t1) & (63 & -16);
885 t2 = t1 & ~(0xffffUL << s1);
886 if (t2 == 0) {
887 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
888 if (t1 != 0) {
889 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
891 return;
894 /* For more than 2 insns, dump it into the constant pool. */
895 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
896 tcg_out_insn(s, 3305, LDR, 0, rd);
899 /* Define something more legible for general use. */
900 #define tcg_out_ldst_r tcg_out_insn_3310
902 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
903 TCGReg rn, intptr_t offset, int lgsize)
905 /* If the offset is naturally aligned and in range, then we can
906 use the scaled uimm12 encoding */
907 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
908 uintptr_t scaled_uimm = offset >> lgsize;
909 if (scaled_uimm <= 0xfff) {
910 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
911 return;
915 /* Small signed offsets can use the unscaled encoding. */
916 if (offset >= -256 && offset < 256) {
917 tcg_out_insn_3312(s, insn, rd, rn, offset);
918 return;
921 /* Worst-case scenario, move offset to temp register, use reg offset. */
922 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
923 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
926 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
928 if (ret == arg) {
929 return;
931 switch (type) {
932 case TCG_TYPE_I32:
933 case TCG_TYPE_I64:
934 if (ret < 32 && arg < 32) {
935 tcg_out_movr(s, type, ret, arg);
936 break;
937 } else if (ret < 32) {
938 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
939 break;
940 } else if (arg < 32) {
941 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
942 break;
944 /* FALLTHRU */
946 case TCG_TYPE_V64:
947 tcg_debug_assert(ret >= 32 && arg >= 32);
948 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
949 break;
950 case TCG_TYPE_V128:
951 tcg_debug_assert(ret >= 32 && arg >= 32);
952 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
953 break;
955 default:
956 g_assert_not_reached();
960 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
961 TCGReg base, intptr_t ofs)
963 AArch64Insn insn;
964 int lgsz;
966 switch (type) {
967 case TCG_TYPE_I32:
968 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
969 lgsz = 2;
970 break;
971 case TCG_TYPE_I64:
972 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
973 lgsz = 3;
974 break;
975 case TCG_TYPE_V64:
976 insn = I3312_LDRVD;
977 lgsz = 3;
978 break;
979 case TCG_TYPE_V128:
980 insn = I3312_LDRVQ;
981 lgsz = 4;
982 break;
983 default:
984 g_assert_not_reached();
986 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
989 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
990 TCGReg base, intptr_t ofs)
992 AArch64Insn insn;
993 int lgsz;
995 switch (type) {
996 case TCG_TYPE_I32:
997 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
998 lgsz = 2;
999 break;
1000 case TCG_TYPE_I64:
1001 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1002 lgsz = 3;
1003 break;
1004 case TCG_TYPE_V64:
1005 insn = I3312_STRVD;
1006 lgsz = 3;
1007 break;
1008 case TCG_TYPE_V128:
1009 insn = I3312_STRVQ;
1010 lgsz = 4;
1011 break;
1012 default:
1013 g_assert_not_reached();
1015 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1018 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1019 TCGReg base, intptr_t ofs)
1021 if (type <= TCG_TYPE_I64 && val == 0) {
1022 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1023 return true;
1025 return false;
1028 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1029 TCGReg rn, unsigned int a, unsigned int b)
1031 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1034 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1035 TCGReg rn, unsigned int a, unsigned int b)
1037 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1040 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1041 TCGReg rn, unsigned int a, unsigned int b)
1043 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1046 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1047 TCGReg rn, TCGReg rm, unsigned int a)
1049 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1052 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1053 TCGReg rd, TCGReg rn, unsigned int m)
1055 int bits = ext ? 64 : 32;
1056 int max = bits - 1;
1057 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1060 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1061 TCGReg rd, TCGReg rn, unsigned int m)
1063 int max = ext ? 63 : 31;
1064 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1067 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1068 TCGReg rd, TCGReg rn, unsigned int m)
1070 int max = ext ? 63 : 31;
1071 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1074 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1075 TCGReg rd, TCGReg rn, unsigned int m)
1077 int max = ext ? 63 : 31;
1078 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1081 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1082 TCGReg rd, TCGReg rn, unsigned int m)
1084 int bits = ext ? 64 : 32;
1085 int max = bits - 1;
1086 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1089 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1090 TCGReg rn, unsigned lsb, unsigned width)
1092 unsigned size = ext ? 64 : 32;
1093 unsigned a = (size - lsb) & (size - 1);
1094 unsigned b = width - 1;
1095 tcg_out_bfm(s, ext, rd, rn, a, b);
1098 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1099 tcg_target_long b, bool const_b)
1101 if (const_b) {
1102 /* Using CMP or CMN aliases. */
1103 if (b >= 0) {
1104 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1105 } else {
1106 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1108 } else {
1109 /* Using CMP alias SUBS wzr, Wn, Wm */
1110 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1114 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1116 ptrdiff_t offset = target - s->code_ptr;
1117 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1118 tcg_out_insn(s, 3206, B, offset);
1121 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1123 ptrdiff_t offset = target - s->code_ptr;
1124 if (offset == sextract64(offset, 0, 26)) {
1125 tcg_out_insn(s, 3206, BL, offset);
1126 } else {
1127 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1128 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1132 static inline void tcg_out_goto_noaddr(TCGContext *s)
1134 /* We pay attention here to not modify the branch target by reading from
1135 the buffer. This ensure that caches and memory are kept coherent during
1136 retranslation. Mask away possible garbage in the high bits for the
1137 first translation, while keeping the offset bits for retranslation. */
1138 uint32_t old = tcg_in32(s);
1139 tcg_out_insn(s, 3206, B, old);
1142 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
1144 /* See comments in tcg_out_goto_noaddr. */
1145 uint32_t old = tcg_in32(s) >> 5;
1146 tcg_out_insn(s, 3202, B_C, c, old);
1149 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1151 tcg_out_insn(s, 3207, BLR, reg);
1154 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1156 ptrdiff_t offset = target - s->code_ptr;
1157 if (offset == sextract64(offset, 0, 26)) {
1158 tcg_out_insn(s, 3206, BL, offset);
1159 } else {
1160 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1161 tcg_out_callr(s, TCG_REG_TMP);
1165 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1166 uintptr_t addr)
1168 tcg_insn_unit i1, i2;
1169 TCGType rt = TCG_TYPE_I64;
1170 TCGReg rd = TCG_REG_TMP;
1171 uint64_t pair;
1173 ptrdiff_t offset = addr - jmp_addr;
1175 if (offset == sextract64(offset, 0, 26)) {
1176 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1177 i2 = NOP;
1178 } else {
1179 offset = (addr >> 12) - (jmp_addr >> 12);
1181 /* patch ADRP */
1182 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1183 /* patch ADDI */
1184 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1186 pair = (uint64_t)i2 << 32 | i1;
1187 atomic_set((uint64_t *)jmp_addr, pair);
1188 flush_icache_range(jmp_addr, jmp_addr + 8);
1191 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1193 if (!l->has_value) {
1194 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1195 tcg_out_goto_noaddr(s);
1196 } else {
1197 tcg_out_goto(s, l->u.value_ptr);
1201 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1202 TCGArg b, bool b_const, TCGLabel *l)
1204 intptr_t offset;
1205 bool need_cmp;
1207 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1208 need_cmp = false;
1209 } else {
1210 need_cmp = true;
1211 tcg_out_cmp(s, ext, a, b, b_const);
1214 if (!l->has_value) {
1215 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1216 offset = tcg_in32(s) >> 5;
1217 } else {
1218 offset = l->u.value_ptr - s->code_ptr;
1219 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1222 if (need_cmp) {
1223 tcg_out_insn(s, 3202, B_C, c, offset);
1224 } else if (c == TCG_COND_EQ) {
1225 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1226 } else {
1227 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1231 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1233 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1236 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1238 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1241 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1243 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1246 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1247 TCGReg rd, TCGReg rn)
1249 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1250 int bits = (8 << s_bits) - 1;
1251 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1254 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1255 TCGReg rd, TCGReg rn)
1257 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1258 int bits = (8 << s_bits) - 1;
1259 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1262 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1263 TCGReg rn, int64_t aimm)
1265 if (aimm >= 0) {
1266 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1267 } else {
1268 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1272 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1273 TCGReg rh, TCGReg al, TCGReg ah,
1274 tcg_target_long bl, tcg_target_long bh,
1275 bool const_bl, bool const_bh, bool sub)
1277 TCGReg orig_rl = rl;
1278 AArch64Insn insn;
1280 if (rl == ah || (!const_bh && rl == bh)) {
1281 rl = TCG_REG_TMP;
1284 if (const_bl) {
1285 insn = I3401_ADDSI;
1286 if ((bl < 0) ^ sub) {
1287 insn = I3401_SUBSI;
1288 bl = -bl;
1290 if (unlikely(al == TCG_REG_XZR)) {
1291 /* ??? We want to allow al to be zero for the benefit of
1292 negation via subtraction. However, that leaves open the
1293 possibility of adding 0+const in the low part, and the
1294 immediate add instructions encode XSP not XZR. Don't try
1295 anything more elaborate here than loading another zero. */
1296 al = TCG_REG_TMP;
1297 tcg_out_movi(s, ext, al, 0);
1299 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1300 } else {
1301 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1304 insn = I3503_ADC;
1305 if (const_bh) {
1306 /* Note that the only two constants we support are 0 and -1, and
1307 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1308 if ((bh != 0) ^ sub) {
1309 insn = I3503_SBC;
1311 bh = TCG_REG_XZR;
1312 } else if (sub) {
1313 insn = I3503_SBC;
1315 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1317 tcg_out_mov(s, ext, orig_rl, rl);
1320 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1322 static const uint32_t sync[] = {
1323 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1324 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1325 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1326 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1327 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1329 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1332 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1333 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1335 TCGReg a1 = a0;
1336 if (is_ctz) {
1337 a1 = TCG_REG_TMP;
1338 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1340 if (const_b && b == (ext ? 64 : 32)) {
1341 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1342 } else {
1343 AArch64Insn sel = I3506_CSEL;
1345 tcg_out_cmp(s, ext, a0, 0, 1);
1346 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1348 if (const_b) {
1349 if (b == -1) {
1350 b = TCG_REG_XZR;
1351 sel = I3506_CSINV;
1352 } else if (b == 0) {
1353 b = TCG_REG_XZR;
1354 } else {
1355 tcg_out_movi(s, ext, d, b);
1356 b = d;
1359 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1363 #ifdef CONFIG_SOFTMMU
1364 #include "tcg-ldst.inc.c"
1366 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1367 * TCGMemOpIdx oi, uintptr_t ra)
1369 static void * const qemu_ld_helpers[16] = {
1370 [MO_UB] = helper_ret_ldub_mmu,
1371 [MO_LEUW] = helper_le_lduw_mmu,
1372 [MO_LEUL] = helper_le_ldul_mmu,
1373 [MO_LEQ] = helper_le_ldq_mmu,
1374 [MO_BEUW] = helper_be_lduw_mmu,
1375 [MO_BEUL] = helper_be_ldul_mmu,
1376 [MO_BEQ] = helper_be_ldq_mmu,
1379 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1380 * uintxx_t val, TCGMemOpIdx oi,
1381 * uintptr_t ra)
1383 static void * const qemu_st_helpers[16] = {
1384 [MO_UB] = helper_ret_stb_mmu,
1385 [MO_LEUW] = helper_le_stw_mmu,
1386 [MO_LEUL] = helper_le_stl_mmu,
1387 [MO_LEQ] = helper_le_stq_mmu,
1388 [MO_BEUW] = helper_be_stw_mmu,
1389 [MO_BEUL] = helper_be_stl_mmu,
1390 [MO_BEQ] = helper_be_stq_mmu,
1393 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1395 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1396 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1397 tcg_out_insn(s, 3406, ADR, rd, offset);
1400 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1402 TCGMemOpIdx oi = lb->oi;
1403 TCGMemOp opc = get_memop(oi);
1404 TCGMemOp size = opc & MO_SIZE;
1406 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1408 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1409 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1410 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1411 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1412 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1413 if (opc & MO_SIGN) {
1414 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1415 } else {
1416 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1419 tcg_out_goto(s, lb->raddr);
1422 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1424 TCGMemOpIdx oi = lb->oi;
1425 TCGMemOp opc = get_memop(oi);
1426 TCGMemOp size = opc & MO_SIZE;
1428 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1430 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1431 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1432 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1433 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1434 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1435 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1436 tcg_out_goto(s, lb->raddr);
1439 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1440 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1441 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1443 TCGLabelQemuLdst *label = new_ldst_label(s);
1445 label->is_ld = is_ld;
1446 label->oi = oi;
1447 label->type = ext;
1448 label->datalo_reg = data_reg;
1449 label->addrlo_reg = addr_reg;
1450 label->raddr = raddr;
1451 label->label_ptr[0] = label_ptr;
1454 /* Load and compare a TLB entry, emitting the conditional jump to the
1455 slow path for the failure case, which will be patched later when finalizing
1456 the slow path. Generated code returns the host addend in X1,
1457 clobbers X0,X2,X3,TMP. */
1458 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1459 tcg_insn_unit **label_ptr, int mem_index,
1460 bool is_read)
1462 int tlb_offset = is_read ?
1463 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1464 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1465 unsigned a_bits = get_alignment_bits(opc);
1466 unsigned s_bits = opc & MO_SIZE;
1467 unsigned a_mask = (1u << a_bits) - 1;
1468 unsigned s_mask = (1u << s_bits) - 1;
1469 TCGReg base = TCG_AREG0, x3;
1470 uint64_t tlb_mask;
1472 /* For aligned accesses, we check the first byte and include the alignment
1473 bits within the address. For unaligned access, we check that we don't
1474 cross pages using the address of the last byte of the access. */
1475 if (a_bits >= s_bits) {
1476 x3 = addr_reg;
1477 } else {
1478 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1479 TCG_REG_X3, addr_reg, s_mask - a_mask);
1480 x3 = TCG_REG_X3;
1482 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1484 /* Extract the TLB index from the address into X0.
1485 X0<CPU_TLB_BITS:0> =
1486 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1487 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1488 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1490 /* Store the page mask part of the address into X3. */
1491 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1492 TCG_REG_X3, x3, tlb_mask);
1494 /* Add any "high bits" from the tlb offset to the env address into X2,
1495 to take advantage of the LSL12 form of the ADDI instruction.
1496 X2 = env + (tlb_offset & 0xfff000) */
1497 if (tlb_offset & 0xfff000) {
1498 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1499 tlb_offset & 0xfff000);
1500 base = TCG_REG_X2;
1503 /* Merge the tlb index contribution into X2.
1504 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1505 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1506 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1508 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1509 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1510 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1511 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff,
1512 TARGET_LONG_BITS == 32 ? 2 : 3);
1514 /* Load the tlb addend. Do that early to avoid stalling.
1515 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1516 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1517 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1518 (is_read ? offsetof(CPUTLBEntry, addr_read)
1519 : offsetof(CPUTLBEntry, addr_write)), 3);
1521 /* Perform the address comparison. */
1522 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1524 /* If not equal, we jump to the slow path. */
1525 *label_ptr = s->code_ptr;
1526 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1529 #endif /* CONFIG_SOFTMMU */
1531 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1532 TCGReg data_r, TCGReg addr_r,
1533 TCGType otype, TCGReg off_r)
1535 const TCGMemOp bswap = memop & MO_BSWAP;
1537 switch (memop & MO_SSIZE) {
1538 case MO_UB:
1539 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1540 break;
1541 case MO_SB:
1542 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1543 data_r, addr_r, otype, off_r);
1544 break;
1545 case MO_UW:
1546 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1547 if (bswap) {
1548 tcg_out_rev16(s, data_r, data_r);
1550 break;
1551 case MO_SW:
1552 if (bswap) {
1553 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1554 tcg_out_rev16(s, data_r, data_r);
1555 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1556 } else {
1557 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1558 data_r, addr_r, otype, off_r);
1560 break;
1561 case MO_UL:
1562 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1563 if (bswap) {
1564 tcg_out_rev32(s, data_r, data_r);
1566 break;
1567 case MO_SL:
1568 if (bswap) {
1569 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1570 tcg_out_rev32(s, data_r, data_r);
1571 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1572 } else {
1573 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1575 break;
1576 case MO_Q:
1577 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1578 if (bswap) {
1579 tcg_out_rev64(s, data_r, data_r);
1581 break;
1582 default:
1583 tcg_abort();
1587 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1588 TCGReg data_r, TCGReg addr_r,
1589 TCGType otype, TCGReg off_r)
1591 const TCGMemOp bswap = memop & MO_BSWAP;
1593 switch (memop & MO_SIZE) {
1594 case MO_8:
1595 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1596 break;
1597 case MO_16:
1598 if (bswap && data_r != TCG_REG_XZR) {
1599 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1600 data_r = TCG_REG_TMP;
1602 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1603 break;
1604 case MO_32:
1605 if (bswap && data_r != TCG_REG_XZR) {
1606 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1607 data_r = TCG_REG_TMP;
1609 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1610 break;
1611 case MO_64:
1612 if (bswap && data_r != TCG_REG_XZR) {
1613 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1614 data_r = TCG_REG_TMP;
1616 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1617 break;
1618 default:
1619 tcg_abort();
1623 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1624 TCGMemOpIdx oi, TCGType ext)
1626 TCGMemOp memop = get_memop(oi);
1627 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1628 #ifdef CONFIG_SOFTMMU
1629 unsigned mem_index = get_mmuidx(oi);
1630 tcg_insn_unit *label_ptr;
1632 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1633 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1634 TCG_REG_X1, otype, addr_reg);
1635 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1636 s->code_ptr, label_ptr);
1637 #else /* !CONFIG_SOFTMMU */
1638 if (USE_GUEST_BASE) {
1639 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1640 TCG_REG_GUEST_BASE, otype, addr_reg);
1641 } else {
1642 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1643 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1645 #endif /* CONFIG_SOFTMMU */
1648 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1649 TCGMemOpIdx oi)
1651 TCGMemOp memop = get_memop(oi);
1652 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1653 #ifdef CONFIG_SOFTMMU
1654 unsigned mem_index = get_mmuidx(oi);
1655 tcg_insn_unit *label_ptr;
1657 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1658 tcg_out_qemu_st_direct(s, memop, data_reg,
1659 TCG_REG_X1, otype, addr_reg);
1660 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1661 data_reg, addr_reg, s->code_ptr, label_ptr);
1662 #else /* !CONFIG_SOFTMMU */
1663 if (USE_GUEST_BASE) {
1664 tcg_out_qemu_st_direct(s, memop, data_reg,
1665 TCG_REG_GUEST_BASE, otype, addr_reg);
1666 } else {
1667 tcg_out_qemu_st_direct(s, memop, data_reg,
1668 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1670 #endif /* CONFIG_SOFTMMU */
1673 static tcg_insn_unit *tb_ret_addr;
1675 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1676 const TCGArg args[TCG_MAX_OP_ARGS],
1677 const int const_args[TCG_MAX_OP_ARGS])
1679 /* 99% of the time, we can signal the use of extension registers
1680 by looking to see if the opcode handles 64-bit data. */
1681 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1683 /* Hoist the loads of the most common arguments. */
1684 TCGArg a0 = args[0];
1685 TCGArg a1 = args[1];
1686 TCGArg a2 = args[2];
1687 int c2 = const_args[2];
1689 /* Some operands are defined with "rZ" constraint, a register or
1690 the zero register. These need not actually test args[I] == 0. */
1691 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1693 switch (opc) {
1694 case INDEX_op_exit_tb:
1695 /* Reuse the zeroing that exists for goto_ptr. */
1696 if (a0 == 0) {
1697 tcg_out_goto_long(s, s->code_gen_epilogue);
1698 } else {
1699 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1700 tcg_out_goto_long(s, tb_ret_addr);
1702 break;
1704 case INDEX_op_goto_tb:
1705 if (s->tb_jmp_insn_offset != NULL) {
1706 /* TCG_TARGET_HAS_direct_jump */
1707 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1708 write can be used to patch the target address. */
1709 if ((uintptr_t)s->code_ptr & 7) {
1710 tcg_out32(s, NOP);
1712 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1713 /* actual branch destination will be patched by
1714 tb_target_set_jmp_target later. */
1715 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1716 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1717 } else {
1718 /* !TCG_TARGET_HAS_direct_jump */
1719 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1720 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1721 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1723 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1724 set_jmp_reset_offset(s, a0);
1725 break;
1727 case INDEX_op_goto_ptr:
1728 tcg_out_insn(s, 3207, BR, a0);
1729 break;
1731 case INDEX_op_br:
1732 tcg_out_goto_label(s, arg_label(a0));
1733 break;
1735 case INDEX_op_ld8u_i32:
1736 case INDEX_op_ld8u_i64:
1737 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1738 break;
1739 case INDEX_op_ld8s_i32:
1740 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1741 break;
1742 case INDEX_op_ld8s_i64:
1743 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1744 break;
1745 case INDEX_op_ld16u_i32:
1746 case INDEX_op_ld16u_i64:
1747 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1748 break;
1749 case INDEX_op_ld16s_i32:
1750 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1751 break;
1752 case INDEX_op_ld16s_i64:
1753 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1754 break;
1755 case INDEX_op_ld_i32:
1756 case INDEX_op_ld32u_i64:
1757 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1758 break;
1759 case INDEX_op_ld32s_i64:
1760 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1761 break;
1762 case INDEX_op_ld_i64:
1763 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1764 break;
1766 case INDEX_op_st8_i32:
1767 case INDEX_op_st8_i64:
1768 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1769 break;
1770 case INDEX_op_st16_i32:
1771 case INDEX_op_st16_i64:
1772 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1773 break;
1774 case INDEX_op_st_i32:
1775 case INDEX_op_st32_i64:
1776 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1777 break;
1778 case INDEX_op_st_i64:
1779 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1780 break;
1782 case INDEX_op_add_i32:
1783 a2 = (int32_t)a2;
1784 /* FALLTHRU */
1785 case INDEX_op_add_i64:
1786 if (c2) {
1787 tcg_out_addsubi(s, ext, a0, a1, a2);
1788 } else {
1789 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1791 break;
1793 case INDEX_op_sub_i32:
1794 a2 = (int32_t)a2;
1795 /* FALLTHRU */
1796 case INDEX_op_sub_i64:
1797 if (c2) {
1798 tcg_out_addsubi(s, ext, a0, a1, -a2);
1799 } else {
1800 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1802 break;
1804 case INDEX_op_neg_i64:
1805 case INDEX_op_neg_i32:
1806 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1807 break;
1809 case INDEX_op_and_i32:
1810 a2 = (int32_t)a2;
1811 /* FALLTHRU */
1812 case INDEX_op_and_i64:
1813 if (c2) {
1814 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1815 } else {
1816 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1818 break;
1820 case INDEX_op_andc_i32:
1821 a2 = (int32_t)a2;
1822 /* FALLTHRU */
1823 case INDEX_op_andc_i64:
1824 if (c2) {
1825 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1826 } else {
1827 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1829 break;
1831 case INDEX_op_or_i32:
1832 a2 = (int32_t)a2;
1833 /* FALLTHRU */
1834 case INDEX_op_or_i64:
1835 if (c2) {
1836 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1837 } else {
1838 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1840 break;
1842 case INDEX_op_orc_i32:
1843 a2 = (int32_t)a2;
1844 /* FALLTHRU */
1845 case INDEX_op_orc_i64:
1846 if (c2) {
1847 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1848 } else {
1849 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1851 break;
1853 case INDEX_op_xor_i32:
1854 a2 = (int32_t)a2;
1855 /* FALLTHRU */
1856 case INDEX_op_xor_i64:
1857 if (c2) {
1858 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1859 } else {
1860 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1862 break;
1864 case INDEX_op_eqv_i32:
1865 a2 = (int32_t)a2;
1866 /* FALLTHRU */
1867 case INDEX_op_eqv_i64:
1868 if (c2) {
1869 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1870 } else {
1871 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1873 break;
1875 case INDEX_op_not_i64:
1876 case INDEX_op_not_i32:
1877 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1878 break;
1880 case INDEX_op_mul_i64:
1881 case INDEX_op_mul_i32:
1882 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1883 break;
1885 case INDEX_op_div_i64:
1886 case INDEX_op_div_i32:
1887 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1888 break;
1889 case INDEX_op_divu_i64:
1890 case INDEX_op_divu_i32:
1891 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1892 break;
1894 case INDEX_op_rem_i64:
1895 case INDEX_op_rem_i32:
1896 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1897 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1898 break;
1899 case INDEX_op_remu_i64:
1900 case INDEX_op_remu_i32:
1901 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1902 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1903 break;
1905 case INDEX_op_shl_i64:
1906 case INDEX_op_shl_i32:
1907 if (c2) {
1908 tcg_out_shl(s, ext, a0, a1, a2);
1909 } else {
1910 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1912 break;
1914 case INDEX_op_shr_i64:
1915 case INDEX_op_shr_i32:
1916 if (c2) {
1917 tcg_out_shr(s, ext, a0, a1, a2);
1918 } else {
1919 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1921 break;
1923 case INDEX_op_sar_i64:
1924 case INDEX_op_sar_i32:
1925 if (c2) {
1926 tcg_out_sar(s, ext, a0, a1, a2);
1927 } else {
1928 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1930 break;
1932 case INDEX_op_rotr_i64:
1933 case INDEX_op_rotr_i32:
1934 if (c2) {
1935 tcg_out_rotr(s, ext, a0, a1, a2);
1936 } else {
1937 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1939 break;
1941 case INDEX_op_rotl_i64:
1942 case INDEX_op_rotl_i32:
1943 if (c2) {
1944 tcg_out_rotl(s, ext, a0, a1, a2);
1945 } else {
1946 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1947 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1949 break;
1951 case INDEX_op_clz_i64:
1952 case INDEX_op_clz_i32:
1953 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1954 break;
1955 case INDEX_op_ctz_i64:
1956 case INDEX_op_ctz_i32:
1957 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1958 break;
1960 case INDEX_op_brcond_i32:
1961 a1 = (int32_t)a1;
1962 /* FALLTHRU */
1963 case INDEX_op_brcond_i64:
1964 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1965 break;
1967 case INDEX_op_setcond_i32:
1968 a2 = (int32_t)a2;
1969 /* FALLTHRU */
1970 case INDEX_op_setcond_i64:
1971 tcg_out_cmp(s, ext, a1, a2, c2);
1972 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1973 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1974 TCG_REG_XZR, tcg_invert_cond(args[3]));
1975 break;
1977 case INDEX_op_movcond_i32:
1978 a2 = (int32_t)a2;
1979 /* FALLTHRU */
1980 case INDEX_op_movcond_i64:
1981 tcg_out_cmp(s, ext, a1, a2, c2);
1982 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1983 break;
1985 case INDEX_op_qemu_ld_i32:
1986 case INDEX_op_qemu_ld_i64:
1987 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1988 break;
1989 case INDEX_op_qemu_st_i32:
1990 case INDEX_op_qemu_st_i64:
1991 tcg_out_qemu_st(s, REG0(0), a1, a2);
1992 break;
1994 case INDEX_op_bswap64_i64:
1995 tcg_out_rev64(s, a0, a1);
1996 break;
1997 case INDEX_op_bswap32_i64:
1998 case INDEX_op_bswap32_i32:
1999 tcg_out_rev32(s, a0, a1);
2000 break;
2001 case INDEX_op_bswap16_i64:
2002 case INDEX_op_bswap16_i32:
2003 tcg_out_rev16(s, a0, a1);
2004 break;
2006 case INDEX_op_ext8s_i64:
2007 case INDEX_op_ext8s_i32:
2008 tcg_out_sxt(s, ext, MO_8, a0, a1);
2009 break;
2010 case INDEX_op_ext16s_i64:
2011 case INDEX_op_ext16s_i32:
2012 tcg_out_sxt(s, ext, MO_16, a0, a1);
2013 break;
2014 case INDEX_op_ext_i32_i64:
2015 case INDEX_op_ext32s_i64:
2016 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2017 break;
2018 case INDEX_op_ext8u_i64:
2019 case INDEX_op_ext8u_i32:
2020 tcg_out_uxt(s, MO_8, a0, a1);
2021 break;
2022 case INDEX_op_ext16u_i64:
2023 case INDEX_op_ext16u_i32:
2024 tcg_out_uxt(s, MO_16, a0, a1);
2025 break;
2026 case INDEX_op_extu_i32_i64:
2027 case INDEX_op_ext32u_i64:
2028 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2029 break;
2031 case INDEX_op_deposit_i64:
2032 case INDEX_op_deposit_i32:
2033 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2034 break;
2036 case INDEX_op_extract_i64:
2037 case INDEX_op_extract_i32:
2038 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2039 break;
2041 case INDEX_op_sextract_i64:
2042 case INDEX_op_sextract_i32:
2043 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2044 break;
2046 case INDEX_op_add2_i32:
2047 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2048 (int32_t)args[4], args[5], const_args[4],
2049 const_args[5], false);
2050 break;
2051 case INDEX_op_add2_i64:
2052 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2053 args[5], const_args[4], const_args[5], false);
2054 break;
2055 case INDEX_op_sub2_i32:
2056 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2057 (int32_t)args[4], args[5], const_args[4],
2058 const_args[5], true);
2059 break;
2060 case INDEX_op_sub2_i64:
2061 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2062 args[5], const_args[4], const_args[5], true);
2063 break;
2065 case INDEX_op_muluh_i64:
2066 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2067 break;
2068 case INDEX_op_mulsh_i64:
2069 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2070 break;
2072 case INDEX_op_mb:
2073 tcg_out_mb(s, a0);
2074 break;
2076 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2077 case INDEX_op_mov_i64:
2078 case INDEX_op_mov_vec:
2079 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2080 case INDEX_op_movi_i64:
2081 case INDEX_op_dupi_vec:
2082 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2083 default:
2084 g_assert_not_reached();
2087 #undef REG0
2090 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2091 unsigned vecl, unsigned vece,
2092 const TCGArg *args, const int *const_args)
2094 static const AArch64Insn cmp_insn[16] = {
2095 [TCG_COND_EQ] = I3616_CMEQ,
2096 [TCG_COND_GT] = I3616_CMGT,
2097 [TCG_COND_GE] = I3616_CMGE,
2098 [TCG_COND_GTU] = I3616_CMHI,
2099 [TCG_COND_GEU] = I3616_CMHS,
2101 static const AArch64Insn cmp0_insn[16] = {
2102 [TCG_COND_EQ] = I3617_CMEQ0,
2103 [TCG_COND_GT] = I3617_CMGT0,
2104 [TCG_COND_GE] = I3617_CMGE0,
2105 [TCG_COND_LT] = I3617_CMLT0,
2106 [TCG_COND_LE] = I3617_CMLE0,
2109 TCGType type = vecl + TCG_TYPE_V64;
2110 unsigned is_q = vecl;
2111 TCGArg a0, a1, a2;
2113 a0 = args[0];
2114 a1 = args[1];
2115 a2 = args[2];
2117 switch (opc) {
2118 case INDEX_op_ld_vec:
2119 tcg_out_ld(s, type, a0, a1, a2);
2120 break;
2121 case INDEX_op_st_vec:
2122 tcg_out_st(s, type, a0, a1, a2);
2123 break;
2124 case INDEX_op_add_vec:
2125 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2126 break;
2127 case INDEX_op_sub_vec:
2128 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2129 break;
2130 case INDEX_op_mul_vec:
2131 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2132 break;
2133 case INDEX_op_neg_vec:
2134 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2135 break;
2136 case INDEX_op_and_vec:
2137 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2138 break;
2139 case INDEX_op_or_vec:
2140 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2141 break;
2142 case INDEX_op_xor_vec:
2143 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2144 break;
2145 case INDEX_op_andc_vec:
2146 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2147 break;
2148 case INDEX_op_orc_vec:
2149 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2150 break;
2151 case INDEX_op_not_vec:
2152 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2153 break;
2154 case INDEX_op_dup_vec:
2155 tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0);
2156 break;
2157 case INDEX_op_shli_vec:
2158 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2159 break;
2160 case INDEX_op_shri_vec:
2161 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2162 break;
2163 case INDEX_op_sari_vec:
2164 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2165 break;
2166 case INDEX_op_cmp_vec:
2168 TCGCond cond = args[3];
2169 AArch64Insn insn;
2171 if (cond == TCG_COND_NE) {
2172 if (const_args[2]) {
2173 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2174 } else {
2175 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2176 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2178 } else {
2179 if (const_args[2]) {
2180 insn = cmp0_insn[cond];
2181 if (insn) {
2182 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2183 break;
2185 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2186 a2 = TCG_VEC_TMP;
2188 insn = cmp_insn[cond];
2189 if (insn == 0) {
2190 TCGArg t;
2191 t = a1, a1 = a2, a2 = t;
2192 cond = tcg_swap_cond(cond);
2193 insn = cmp_insn[cond];
2194 tcg_debug_assert(insn != 0);
2196 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2199 break;
2200 default:
2201 g_assert_not_reached();
2205 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2207 switch (opc) {
2208 case INDEX_op_add_vec:
2209 case INDEX_op_sub_vec:
2210 case INDEX_op_and_vec:
2211 case INDEX_op_or_vec:
2212 case INDEX_op_xor_vec:
2213 case INDEX_op_andc_vec:
2214 case INDEX_op_orc_vec:
2215 case INDEX_op_neg_vec:
2216 case INDEX_op_not_vec:
2217 case INDEX_op_cmp_vec:
2218 case INDEX_op_shli_vec:
2219 case INDEX_op_shri_vec:
2220 case INDEX_op_sari_vec:
2221 return 1;
2222 case INDEX_op_mul_vec:
2223 return vece < MO_64;
2225 default:
2226 return 0;
2230 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2231 TCGArg a0, ...)
2235 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2237 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2238 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2239 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2240 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2241 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2242 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2243 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2244 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2245 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2246 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2247 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2248 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2249 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2250 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2251 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2252 static const TCGTargetOpDef r_r_rAL
2253 = { .args_ct_str = { "r", "r", "rAL" } };
2254 static const TCGTargetOpDef dep
2255 = { .args_ct_str = { "r", "0", "rZ" } };
2256 static const TCGTargetOpDef movc
2257 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2258 static const TCGTargetOpDef add2
2259 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2261 switch (op) {
2262 case INDEX_op_goto_ptr:
2263 return &r;
2265 case INDEX_op_ld8u_i32:
2266 case INDEX_op_ld8s_i32:
2267 case INDEX_op_ld16u_i32:
2268 case INDEX_op_ld16s_i32:
2269 case INDEX_op_ld_i32:
2270 case INDEX_op_ld8u_i64:
2271 case INDEX_op_ld8s_i64:
2272 case INDEX_op_ld16u_i64:
2273 case INDEX_op_ld16s_i64:
2274 case INDEX_op_ld32u_i64:
2275 case INDEX_op_ld32s_i64:
2276 case INDEX_op_ld_i64:
2277 case INDEX_op_neg_i32:
2278 case INDEX_op_neg_i64:
2279 case INDEX_op_not_i32:
2280 case INDEX_op_not_i64:
2281 case INDEX_op_bswap16_i32:
2282 case INDEX_op_bswap32_i32:
2283 case INDEX_op_bswap16_i64:
2284 case INDEX_op_bswap32_i64:
2285 case INDEX_op_bswap64_i64:
2286 case INDEX_op_ext8s_i32:
2287 case INDEX_op_ext16s_i32:
2288 case INDEX_op_ext8u_i32:
2289 case INDEX_op_ext16u_i32:
2290 case INDEX_op_ext8s_i64:
2291 case INDEX_op_ext16s_i64:
2292 case INDEX_op_ext32s_i64:
2293 case INDEX_op_ext8u_i64:
2294 case INDEX_op_ext16u_i64:
2295 case INDEX_op_ext32u_i64:
2296 case INDEX_op_ext_i32_i64:
2297 case INDEX_op_extu_i32_i64:
2298 case INDEX_op_extract_i32:
2299 case INDEX_op_extract_i64:
2300 case INDEX_op_sextract_i32:
2301 case INDEX_op_sextract_i64:
2302 return &r_r;
2304 case INDEX_op_st8_i32:
2305 case INDEX_op_st16_i32:
2306 case INDEX_op_st_i32:
2307 case INDEX_op_st8_i64:
2308 case INDEX_op_st16_i64:
2309 case INDEX_op_st32_i64:
2310 case INDEX_op_st_i64:
2311 return &rZ_r;
2313 case INDEX_op_add_i32:
2314 case INDEX_op_add_i64:
2315 case INDEX_op_sub_i32:
2316 case INDEX_op_sub_i64:
2317 case INDEX_op_setcond_i32:
2318 case INDEX_op_setcond_i64:
2319 return &r_r_rA;
2321 case INDEX_op_mul_i32:
2322 case INDEX_op_mul_i64:
2323 case INDEX_op_div_i32:
2324 case INDEX_op_div_i64:
2325 case INDEX_op_divu_i32:
2326 case INDEX_op_divu_i64:
2327 case INDEX_op_rem_i32:
2328 case INDEX_op_rem_i64:
2329 case INDEX_op_remu_i32:
2330 case INDEX_op_remu_i64:
2331 case INDEX_op_muluh_i64:
2332 case INDEX_op_mulsh_i64:
2333 return &r_r_r;
2335 case INDEX_op_and_i32:
2336 case INDEX_op_and_i64:
2337 case INDEX_op_or_i32:
2338 case INDEX_op_or_i64:
2339 case INDEX_op_xor_i32:
2340 case INDEX_op_xor_i64:
2341 case INDEX_op_andc_i32:
2342 case INDEX_op_andc_i64:
2343 case INDEX_op_orc_i32:
2344 case INDEX_op_orc_i64:
2345 case INDEX_op_eqv_i32:
2346 case INDEX_op_eqv_i64:
2347 return &r_r_rL;
2349 case INDEX_op_shl_i32:
2350 case INDEX_op_shr_i32:
2351 case INDEX_op_sar_i32:
2352 case INDEX_op_rotl_i32:
2353 case INDEX_op_rotr_i32:
2354 case INDEX_op_shl_i64:
2355 case INDEX_op_shr_i64:
2356 case INDEX_op_sar_i64:
2357 case INDEX_op_rotl_i64:
2358 case INDEX_op_rotr_i64:
2359 return &r_r_ri;
2361 case INDEX_op_clz_i32:
2362 case INDEX_op_ctz_i32:
2363 case INDEX_op_clz_i64:
2364 case INDEX_op_ctz_i64:
2365 return &r_r_rAL;
2367 case INDEX_op_brcond_i32:
2368 case INDEX_op_brcond_i64:
2369 return &r_rA;
2371 case INDEX_op_movcond_i32:
2372 case INDEX_op_movcond_i64:
2373 return &movc;
2375 case INDEX_op_qemu_ld_i32:
2376 case INDEX_op_qemu_ld_i64:
2377 return &r_l;
2378 case INDEX_op_qemu_st_i32:
2379 case INDEX_op_qemu_st_i64:
2380 return &lZ_l;
2382 case INDEX_op_deposit_i32:
2383 case INDEX_op_deposit_i64:
2384 return &dep;
2386 case INDEX_op_add2_i32:
2387 case INDEX_op_add2_i64:
2388 case INDEX_op_sub2_i32:
2389 case INDEX_op_sub2_i64:
2390 return &add2;
2392 case INDEX_op_add_vec:
2393 case INDEX_op_sub_vec:
2394 case INDEX_op_mul_vec:
2395 case INDEX_op_and_vec:
2396 case INDEX_op_or_vec:
2397 case INDEX_op_xor_vec:
2398 case INDEX_op_andc_vec:
2399 case INDEX_op_orc_vec:
2400 return &w_w_w;
2401 case INDEX_op_not_vec:
2402 case INDEX_op_neg_vec:
2403 case INDEX_op_shli_vec:
2404 case INDEX_op_shri_vec:
2405 case INDEX_op_sari_vec:
2406 return &w_w;
2407 case INDEX_op_ld_vec:
2408 case INDEX_op_st_vec:
2409 return &w_r;
2410 case INDEX_op_dup_vec:
2411 return &w_wr;
2412 case INDEX_op_cmp_vec:
2413 return &w_w_wZ;
2415 default:
2416 return NULL;
2420 static void tcg_target_init(TCGContext *s)
2422 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2423 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2424 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2425 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2427 tcg_target_call_clobber_regs = -1ull;
2428 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2429 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2430 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2431 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2432 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2433 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2434 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2435 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2436 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2437 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2438 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2439 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2440 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2441 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2442 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2443 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2444 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2445 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2446 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2448 s->reserved_regs = 0;
2449 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2450 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2451 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2452 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2453 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2456 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2457 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2459 #define FRAME_SIZE \
2460 ((PUSH_SIZE \
2461 + TCG_STATIC_CALL_ARGS_SIZE \
2462 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2463 + TCG_TARGET_STACK_ALIGN - 1) \
2464 & ~(TCG_TARGET_STACK_ALIGN - 1))
2466 /* We're expecting a 2 byte uleb128 encoded value. */
2467 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2469 /* We're expecting to use a single ADDI insn. */
2470 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2472 static void tcg_target_qemu_prologue(TCGContext *s)
2474 TCGReg r;
2476 /* Push (FP, LR) and allocate space for all saved registers. */
2477 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2478 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2480 /* Set up frame pointer for canonical unwinding. */
2481 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2483 /* Store callee-preserved regs x19..x28. */
2484 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2485 int ofs = (r - TCG_REG_X19 + 2) * 8;
2486 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2489 /* Make stack space for TCG locals. */
2490 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2491 FRAME_SIZE - PUSH_SIZE);
2493 /* Inform TCG about how to find TCG locals with register, offset, size. */
2494 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2495 CPU_TEMP_BUF_NLONGS * sizeof(long));
2497 #if !defined(CONFIG_SOFTMMU)
2498 if (USE_GUEST_BASE) {
2499 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2500 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2502 #endif
2504 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2505 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2508 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2509 * and fall through to the rest of the epilogue.
2511 s->code_gen_epilogue = s->code_ptr;
2512 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2514 /* TB epilogue */
2515 tb_ret_addr = s->code_ptr;
2517 /* Remove TCG locals stack space. */
2518 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2519 FRAME_SIZE - PUSH_SIZE);
2521 /* Restore registers x19..x28. */
2522 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2523 int ofs = (r - TCG_REG_X19 + 2) * 8;
2524 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2527 /* Pop (FP, LR), restore SP to previous frame. */
2528 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2529 TCG_REG_SP, PUSH_SIZE, 0, 1);
2530 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2533 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2535 int i;
2536 for (i = 0; i < count; ++i) {
2537 p[i] = NOP;
2541 typedef struct {
2542 DebugFrameHeader h;
2543 uint8_t fde_def_cfa[4];
2544 uint8_t fde_reg_ofs[24];
2545 } DebugFrame;
2547 #define ELF_HOST_MACHINE EM_AARCH64
2549 static const DebugFrame debug_frame = {
2550 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2551 .h.cie.id = -1,
2552 .h.cie.version = 1,
2553 .h.cie.code_align = 1,
2554 .h.cie.data_align = 0x78, /* sleb128 -8 */
2555 .h.cie.return_column = TCG_REG_LR,
2557 /* Total FDE size does not include the "len" member. */
2558 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2560 .fde_def_cfa = {
2561 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2562 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2563 (FRAME_SIZE >> 7)
2565 .fde_reg_ofs = {
2566 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2567 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2568 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2569 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2570 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2571 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2572 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2573 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2574 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2575 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2576 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2577 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2581 void tcg_register_jit(void *buf, size_t buf_size)
2583 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));