tcg/arm: Try pc-relative addresses for movi
[qemu/ar7.git] / tcg / arm / tcg-target.inc.c
blob42370e57368c30b6f0bbdfa6b5624c9e95d83df9
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Andrzej Zaborowski
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "elf.h"
26 #include "tcg-be-ldst.h"
28 int arm_arch = __ARM_ARCH;
30 #ifndef use_idiv_instructions
31 bool use_idiv_instructions;
32 #endif
34 /* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */
35 #ifdef CONFIG_SOFTMMU
36 # define USING_SOFTMMU 1
37 #else
38 # define USING_SOFTMMU 0
39 #endif
41 #ifdef CONFIG_DEBUG_TCG
42 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
43 "%r0",
44 "%r1",
45 "%r2",
46 "%r3",
47 "%r4",
48 "%r5",
49 "%r6",
50 "%r7",
51 "%r8",
52 "%r9",
53 "%r10",
54 "%r11",
55 "%r12",
56 "%r13",
57 "%r14",
58 "%pc",
60 #endif
62 static const int tcg_target_reg_alloc_order[] = {
63 TCG_REG_R4,
64 TCG_REG_R5,
65 TCG_REG_R6,
66 TCG_REG_R7,
67 TCG_REG_R8,
68 TCG_REG_R9,
69 TCG_REG_R10,
70 TCG_REG_R11,
71 TCG_REG_R13,
72 TCG_REG_R0,
73 TCG_REG_R1,
74 TCG_REG_R2,
75 TCG_REG_R3,
76 TCG_REG_R12,
77 TCG_REG_R14,
80 static const int tcg_target_call_iarg_regs[4] = {
81 TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
83 static const int tcg_target_call_oarg_regs[2] = {
84 TCG_REG_R0, TCG_REG_R1
87 #define TCG_REG_TMP TCG_REG_R12
89 static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
91 ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
92 *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
95 static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
97 ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
98 tcg_insn_unit insn = atomic_read(code_ptr);
99 tcg_debug_assert(offset == sextract32(offset, 0, 24));
100 atomic_set(code_ptr, deposit32(insn, 0, 24, offset));
103 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
104 intptr_t value, intptr_t addend)
106 tcg_debug_assert(type == R_ARM_PC24);
107 tcg_debug_assert(addend == 0);
108 reloc_pc24(code_ptr, (tcg_insn_unit *)value);
111 #define TCG_CT_CONST_ARM 0x100
112 #define TCG_CT_CONST_INV 0x200
113 #define TCG_CT_CONST_NEG 0x400
114 #define TCG_CT_CONST_ZERO 0x800
116 /* parse target specific constraints */
117 static const char *target_parse_constraint(TCGArgConstraint *ct,
118 const char *ct_str, TCGType type)
120 switch (*ct_str++) {
121 case 'I':
122 ct->ct |= TCG_CT_CONST_ARM;
123 break;
124 case 'K':
125 ct->ct |= TCG_CT_CONST_INV;
126 break;
127 case 'N': /* The gcc constraint letter is L, already used here. */
128 ct->ct |= TCG_CT_CONST_NEG;
129 break;
130 case 'Z':
131 ct->ct |= TCG_CT_CONST_ZERO;
132 break;
134 case 'r':
135 ct->ct |= TCG_CT_REG;
136 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
137 break;
139 /* qemu_ld address */
140 case 'l':
141 ct->ct |= TCG_CT_REG;
142 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
143 #ifdef CONFIG_SOFTMMU
144 /* r0-r2,lr will be overwritten when reading the tlb entry,
145 so don't use these. */
146 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
147 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
148 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
149 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
150 #endif
151 break;
153 /* qemu_st address & data */
154 case 's':
155 ct->ct |= TCG_CT_REG;
156 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
157 /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
158 and r0-r1 doing the byte swapping, so don't use these. */
159 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
160 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
161 #if defined(CONFIG_SOFTMMU)
162 /* Avoid clashes with registers being used for helper args */
163 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
164 #if TARGET_LONG_BITS == 64
165 /* Avoid clashes with registers being used for helper args */
166 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
167 #endif
168 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
169 #endif
170 break;
172 default:
173 return NULL;
175 return ct_str;
178 static inline uint32_t rotl(uint32_t val, int n)
180 return (val << n) | (val >> (32 - n));
183 /* ARM immediates for ALU instructions are made of an unsigned 8-bit
184 right-rotated by an even amount between 0 and 30. */
185 static inline int encode_imm(uint32_t imm)
187 int shift;
189 /* simple case, only lower bits */
190 if ((imm & ~0xff) == 0)
191 return 0;
192 /* then try a simple even shift */
193 shift = ctz32(imm) & ~1;
194 if (((imm >> shift) & ~0xff) == 0)
195 return 32 - shift;
196 /* now try harder with rotations */
197 if ((rotl(imm, 2) & ~0xff) == 0)
198 return 2;
199 if ((rotl(imm, 4) & ~0xff) == 0)
200 return 4;
201 if ((rotl(imm, 6) & ~0xff) == 0)
202 return 6;
203 /* imm can't be encoded */
204 return -1;
207 static inline int check_fit_imm(uint32_t imm)
209 return encode_imm(imm) >= 0;
212 /* Test if a constant matches the constraint.
213 * TODO: define constraints for:
215 * ldr/str offset: between -0xfff and 0xfff
216 * ldrh/strh offset: between -0xff and 0xff
217 * mov operand2: values represented with x << (2 * y), x < 0x100
218 * add, sub, eor...: ditto
220 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
221 const TCGArgConstraint *arg_ct)
223 int ct;
224 ct = arg_ct->ct;
225 if (ct & TCG_CT_CONST) {
226 return 1;
227 } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
228 return 1;
229 } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
230 return 1;
231 } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
232 return 1;
233 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
234 return 1;
235 } else {
236 return 0;
240 #define TO_CPSR (1 << 20)
242 typedef enum {
243 ARITH_AND = 0x0 << 21,
244 ARITH_EOR = 0x1 << 21,
245 ARITH_SUB = 0x2 << 21,
246 ARITH_RSB = 0x3 << 21,
247 ARITH_ADD = 0x4 << 21,
248 ARITH_ADC = 0x5 << 21,
249 ARITH_SBC = 0x6 << 21,
250 ARITH_RSC = 0x7 << 21,
251 ARITH_TST = 0x8 << 21 | TO_CPSR,
252 ARITH_CMP = 0xa << 21 | TO_CPSR,
253 ARITH_CMN = 0xb << 21 | TO_CPSR,
254 ARITH_ORR = 0xc << 21,
255 ARITH_MOV = 0xd << 21,
256 ARITH_BIC = 0xe << 21,
257 ARITH_MVN = 0xf << 21,
259 INSN_CLZ = 0x016f0f10,
260 INSN_RBIT = 0x06ff0f30,
262 INSN_LDR_IMM = 0x04100000,
263 INSN_LDR_REG = 0x06100000,
264 INSN_STR_IMM = 0x04000000,
265 INSN_STR_REG = 0x06000000,
267 INSN_LDRH_IMM = 0x005000b0,
268 INSN_LDRH_REG = 0x001000b0,
269 INSN_LDRSH_IMM = 0x005000f0,
270 INSN_LDRSH_REG = 0x001000f0,
271 INSN_STRH_IMM = 0x004000b0,
272 INSN_STRH_REG = 0x000000b0,
274 INSN_LDRB_IMM = 0x04500000,
275 INSN_LDRB_REG = 0x06500000,
276 INSN_LDRSB_IMM = 0x005000d0,
277 INSN_LDRSB_REG = 0x001000d0,
278 INSN_STRB_IMM = 0x04400000,
279 INSN_STRB_REG = 0x06400000,
281 INSN_LDRD_IMM = 0x004000d0,
282 INSN_LDRD_REG = 0x000000d0,
283 INSN_STRD_IMM = 0x004000f0,
284 INSN_STRD_REG = 0x000000f0,
286 INSN_DMB_ISH = 0x5bf07ff5,
287 INSN_DMB_MCR = 0xba0f07ee,
289 } ARMInsn;
291 #define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00)
292 #define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20)
293 #define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40)
294 #define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60)
295 #define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10)
296 #define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30)
297 #define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50)
298 #define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70)
300 enum arm_cond_code_e {
301 COND_EQ = 0x0,
302 COND_NE = 0x1,
303 COND_CS = 0x2, /* Unsigned greater or equal */
304 COND_CC = 0x3, /* Unsigned less than */
305 COND_MI = 0x4, /* Negative */
306 COND_PL = 0x5, /* Zero or greater */
307 COND_VS = 0x6, /* Overflow */
308 COND_VC = 0x7, /* No overflow */
309 COND_HI = 0x8, /* Unsigned greater than */
310 COND_LS = 0x9, /* Unsigned less or equal */
311 COND_GE = 0xa,
312 COND_LT = 0xb,
313 COND_GT = 0xc,
314 COND_LE = 0xd,
315 COND_AL = 0xe,
318 static const uint8_t tcg_cond_to_arm_cond[] = {
319 [TCG_COND_EQ] = COND_EQ,
320 [TCG_COND_NE] = COND_NE,
321 [TCG_COND_LT] = COND_LT,
322 [TCG_COND_GE] = COND_GE,
323 [TCG_COND_LE] = COND_LE,
324 [TCG_COND_GT] = COND_GT,
325 /* unsigned */
326 [TCG_COND_LTU] = COND_CC,
327 [TCG_COND_GEU] = COND_CS,
328 [TCG_COND_LEU] = COND_LS,
329 [TCG_COND_GTU] = COND_HI,
332 static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
334 tcg_out32(s, (cond << 28) | 0x0a000000 |
335 (((offset - 8) >> 2) & 0x00ffffff));
338 static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
340 /* We pay attention here to not modify the branch target by masking
341 the corresponding bytes. This ensure that caches and memory are
342 kept coherent during retranslation. */
343 tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
346 static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
348 /* We pay attention here to not modify the branch target by masking
349 the corresponding bytes. This ensure that caches and memory are
350 kept coherent during retranslation. */
351 tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
354 static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
356 tcg_out32(s, (cond << 28) | 0x0b000000 |
357 (((offset - 8) >> 2) & 0x00ffffff));
360 static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
362 tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
365 static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
367 tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
368 (((offset - 8) >> 2) & 0x00ffffff));
371 static inline void tcg_out_dat_reg(TCGContext *s,
372 int cond, int opc, int rd, int rn, int rm, int shift)
374 tcg_out32(s, (cond << 28) | (0 << 25) | opc |
375 (rn << 16) | (rd << 12) | shift | rm);
378 static inline void tcg_out_nop(TCGContext *s)
380 if (use_armv7_instructions) {
381 /* Architected nop introduced in v6k. */
382 /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this
383 also Just So Happened to do nothing on pre-v6k so that we
384 don't need to conditionalize it? */
385 tcg_out32(s, 0xe320f000);
386 } else {
387 /* Prior to that the assembler uses mov r0, r0. */
388 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
392 static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
394 /* Simple reg-reg move, optimising out the 'do nothing' case */
395 if (rd != rm) {
396 tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
400 static inline void tcg_out_bx(TCGContext *s, int cond, TCGReg rn)
402 /* Unless the C portion of QEMU is compiled as thumb, we don't
403 actually need true BX semantics; merely a branch to an address
404 held in a register. */
405 if (use_armv5t_instructions) {
406 tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
407 } else {
408 tcg_out_mov_reg(s, cond, TCG_REG_PC, rn);
412 static inline void tcg_out_dat_imm(TCGContext *s,
413 int cond, int opc, int rd, int rn, int im)
415 tcg_out32(s, (cond << 28) | (1 << 25) | opc |
416 (rn << 16) | (rd << 12) | im);
419 static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
421 int rot, opc, rn, diff;
423 /* Check a single MOV/MVN before anything else. */
424 rot = encode_imm(arg);
425 if (rot >= 0) {
426 tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
427 rotl(arg, rot) | (rot << 7));
428 return;
430 rot = encode_imm(~arg);
431 if (rot >= 0) {
432 tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
433 rotl(~arg, rot) | (rot << 7));
434 return;
437 /* Check for a pc-relative address. This will usually be the TB,
438 or within the TB, which is immediately before the code block. */
439 diff = arg - ((intptr_t)s->code_ptr + 8);
440 if (diff >= 0) {
441 rot = encode_imm(diff);
442 if (rot >= 0) {
443 tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC,
444 rotl(diff, rot) | (rot << 7));
445 return;
447 } else {
448 rot = encode_imm(-diff);
449 if (rot >= 0) {
450 tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC,
451 rotl(-diff, rot) | (rot << 7));
452 return;
456 /* Use movw + movt. */
457 if (use_armv7_instructions) {
458 /* movw */
459 tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
460 | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
461 if (arg & 0xffff0000) {
462 /* movt */
463 tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
464 | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
466 return;
469 /* TODO: This is very suboptimal, we can easily have a constant
470 pool somewhere after all the instructions. */
471 opc = ARITH_MOV;
472 rn = 0;
473 /* If we have lots of leading 1's, we can shorten the sequence by
474 beginning with mvn and then clearing higher bits with eor. */
475 if (clz32(~arg) > clz32(arg)) {
476 opc = ARITH_MVN, arg = ~arg;
478 do {
479 int i = ctz32(arg) & ~1;
480 rot = ((32 - i) << 7) & 0xf00;
481 tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
482 arg &= ~(0xff << i);
484 opc = ARITH_EOR;
485 rn = rd;
486 } while (arg);
489 static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
490 TCGArg lhs, TCGArg rhs, int rhs_is_const)
492 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
493 * rhs must satisfy the "rI" constraint.
495 if (rhs_is_const) {
496 int rot = encode_imm(rhs);
497 tcg_debug_assert(rot >= 0);
498 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
499 } else {
500 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
504 static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
505 TCGReg dst, TCGReg lhs, TCGArg rhs,
506 bool rhs_is_const)
508 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
509 * rhs must satisfy the "rIK" constraint.
511 if (rhs_is_const) {
512 int rot = encode_imm(rhs);
513 if (rot < 0) {
514 rhs = ~rhs;
515 rot = encode_imm(rhs);
516 tcg_debug_assert(rot >= 0);
517 opc = opinv;
519 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
520 } else {
521 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
525 static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
526 TCGArg dst, TCGArg lhs, TCGArg rhs,
527 bool rhs_is_const)
529 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
530 * rhs must satisfy the "rIN" constraint.
532 if (rhs_is_const) {
533 int rot = encode_imm(rhs);
534 if (rot < 0) {
535 rhs = -rhs;
536 rot = encode_imm(rhs);
537 tcg_debug_assert(rot >= 0);
538 opc = opneg;
540 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
541 } else {
542 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
546 static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
547 TCGReg rn, TCGReg rm)
549 /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */
550 if (!use_armv6_instructions && rd == rn) {
551 if (rd == rm) {
552 /* rd == rn == rm; copy an input to tmp first. */
553 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
554 rm = rn = TCG_REG_TMP;
555 } else {
556 rn = rm;
557 rm = rd;
560 /* mul */
561 tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
564 static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
565 TCGReg rd1, TCGReg rn, TCGReg rm)
567 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
568 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
569 if (rd0 == rm || rd1 == rm) {
570 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
571 rn = TCG_REG_TMP;
572 } else {
573 TCGReg t = rn;
574 rn = rm;
575 rm = t;
578 /* umull */
579 tcg_out32(s, (cond << 28) | 0x00800090 |
580 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
583 static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
584 TCGReg rd1, TCGReg rn, TCGReg rm)
586 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
587 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
588 if (rd0 == rm || rd1 == rm) {
589 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
590 rn = TCG_REG_TMP;
591 } else {
592 TCGReg t = rn;
593 rn = rm;
594 rm = t;
597 /* smull */
598 tcg_out32(s, (cond << 28) | 0x00c00090 |
599 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
602 static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
604 tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
607 static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
609 tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
612 static inline void tcg_out_ext8s(TCGContext *s, int cond,
613 int rd, int rn)
615 if (use_armv6_instructions) {
616 /* sxtb */
617 tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
618 } else {
619 tcg_out_dat_reg(s, cond, ARITH_MOV,
620 rd, 0, rn, SHIFT_IMM_LSL(24));
621 tcg_out_dat_reg(s, cond, ARITH_MOV,
622 rd, 0, rd, SHIFT_IMM_ASR(24));
626 static inline void tcg_out_ext8u(TCGContext *s, int cond,
627 int rd, int rn)
629 tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
632 static inline void tcg_out_ext16s(TCGContext *s, int cond,
633 int rd, int rn)
635 if (use_armv6_instructions) {
636 /* sxth */
637 tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
638 } else {
639 tcg_out_dat_reg(s, cond, ARITH_MOV,
640 rd, 0, rn, SHIFT_IMM_LSL(16));
641 tcg_out_dat_reg(s, cond, ARITH_MOV,
642 rd, 0, rd, SHIFT_IMM_ASR(16));
646 static inline void tcg_out_ext16u(TCGContext *s, int cond,
647 int rd, int rn)
649 if (use_armv6_instructions) {
650 /* uxth */
651 tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
652 } else {
653 tcg_out_dat_reg(s, cond, ARITH_MOV,
654 rd, 0, rn, SHIFT_IMM_LSL(16));
655 tcg_out_dat_reg(s, cond, ARITH_MOV,
656 rd, 0, rd, SHIFT_IMM_LSR(16));
660 static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
662 if (use_armv6_instructions) {
663 /* revsh */
664 tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
665 } else {
666 tcg_out_dat_reg(s, cond, ARITH_MOV,
667 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
668 tcg_out_dat_reg(s, cond, ARITH_MOV,
669 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
670 tcg_out_dat_reg(s, cond, ARITH_ORR,
671 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
675 static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
677 if (use_armv6_instructions) {
678 /* rev16 */
679 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
680 } else {
681 tcg_out_dat_reg(s, cond, ARITH_MOV,
682 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
683 tcg_out_dat_reg(s, cond, ARITH_MOV,
684 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
685 tcg_out_dat_reg(s, cond, ARITH_ORR,
686 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
690 /* swap the two low bytes assuming that the two high input bytes and the
691 two high output bit can hold any value. */
692 static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
694 if (use_armv6_instructions) {
695 /* rev16 */
696 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
697 } else {
698 tcg_out_dat_reg(s, cond, ARITH_MOV,
699 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
700 tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
701 tcg_out_dat_reg(s, cond, ARITH_ORR,
702 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
706 static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
708 if (use_armv6_instructions) {
709 /* rev */
710 tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
711 } else {
712 tcg_out_dat_reg(s, cond, ARITH_EOR,
713 TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
714 tcg_out_dat_imm(s, cond, ARITH_BIC,
715 TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
716 tcg_out_dat_reg(s, cond, ARITH_MOV,
717 rd, 0, rn, SHIFT_IMM_ROR(8));
718 tcg_out_dat_reg(s, cond, ARITH_EOR,
719 rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
723 static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
724 TCGArg a1, int ofs, int len, bool const_a1)
726 if (const_a1) {
727 /* bfi becomes bfc with rn == 15. */
728 a1 = 15;
730 /* bfi/bfc */
731 tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
732 | (ofs << 7) | ((ofs + len - 1) << 16));
735 static inline void tcg_out_extract(TCGContext *s, int cond, TCGReg rd,
736 TCGArg a1, int ofs, int len)
738 /* ubfx */
739 tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | a1
740 | (ofs << 7) | ((len - 1) << 16));
743 static inline void tcg_out_sextract(TCGContext *s, int cond, TCGReg rd,
744 TCGArg a1, int ofs, int len)
746 /* sbfx */
747 tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | a1
748 | (ofs << 7) | ((len - 1) << 16));
751 /* Note that this routine is used for both LDR and LDRH formats, so we do
752 not wish to include an immediate shift at this point. */
753 static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
754 TCGReg rn, TCGReg rm, bool u, bool p, bool w)
756 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
757 | (w << 21) | (rn << 16) | (rt << 12) | rm);
760 static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
761 TCGReg rn, int imm8, bool p, bool w)
763 bool u = 1;
764 if (imm8 < 0) {
765 imm8 = -imm8;
766 u = 0;
768 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
769 (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
772 static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
773 TCGReg rn, int imm12, bool p, bool w)
775 bool u = 1;
776 if (imm12 < 0) {
777 imm12 = -imm12;
778 u = 0;
780 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
781 (rn << 16) | (rt << 12) | imm12);
784 static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
785 TCGReg rn, int imm12)
787 tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
790 static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
791 TCGReg rn, int imm12)
793 tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
796 static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
797 TCGReg rn, TCGReg rm)
799 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
802 static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
803 TCGReg rn, TCGReg rm)
805 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
808 static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
809 TCGReg rn, int imm8)
811 tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
814 static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
815 TCGReg rn, TCGReg rm)
817 tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
820 static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
821 TCGReg rn, int imm8)
823 tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
826 static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
827 TCGReg rn, TCGReg rm)
829 tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
832 /* Register pre-increment with base writeback. */
833 static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
834 TCGReg rn, TCGReg rm)
836 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
839 static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
840 TCGReg rn, TCGReg rm)
842 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
845 static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
846 TCGReg rn, int imm8)
848 tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
851 static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
852 TCGReg rn, int imm8)
854 tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
857 static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
858 TCGReg rn, TCGReg rm)
860 tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
863 static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
864 TCGReg rn, TCGReg rm)
866 tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
869 static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
870 TCGReg rn, int imm8)
872 tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
875 static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
876 TCGReg rn, TCGReg rm)
878 tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
881 static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
882 TCGReg rn, int imm12)
884 tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
887 static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
888 TCGReg rn, int imm12)
890 tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
893 static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
894 TCGReg rn, TCGReg rm)
896 tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
899 static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
900 TCGReg rn, TCGReg rm)
902 tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
905 static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
906 TCGReg rn, int imm8)
908 tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
911 static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
912 TCGReg rn, TCGReg rm)
914 tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
917 static inline void tcg_out_ld32u(TCGContext *s, int cond,
918 int rd, int rn, int32_t offset)
920 if (offset > 0xfff || offset < -0xfff) {
921 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
922 tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
923 } else
924 tcg_out_ld32_12(s, cond, rd, rn, offset);
927 static inline void tcg_out_st32(TCGContext *s, int cond,
928 int rd, int rn, int32_t offset)
930 if (offset > 0xfff || offset < -0xfff) {
931 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
932 tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
933 } else
934 tcg_out_st32_12(s, cond, rd, rn, offset);
937 static inline void tcg_out_ld16u(TCGContext *s, int cond,
938 int rd, int rn, int32_t offset)
940 if (offset > 0xff || offset < -0xff) {
941 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
942 tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
943 } else
944 tcg_out_ld16u_8(s, cond, rd, rn, offset);
947 static inline void tcg_out_ld16s(TCGContext *s, int cond,
948 int rd, int rn, int32_t offset)
950 if (offset > 0xff || offset < -0xff) {
951 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
952 tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
953 } else
954 tcg_out_ld16s_8(s, cond, rd, rn, offset);
957 static inline void tcg_out_st16(TCGContext *s, int cond,
958 int rd, int rn, int32_t offset)
960 if (offset > 0xff || offset < -0xff) {
961 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
962 tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
963 } else
964 tcg_out_st16_8(s, cond, rd, rn, offset);
967 static inline void tcg_out_ld8u(TCGContext *s, int cond,
968 int rd, int rn, int32_t offset)
970 if (offset > 0xfff || offset < -0xfff) {
971 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
972 tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
973 } else
974 tcg_out_ld8_12(s, cond, rd, rn, offset);
977 static inline void tcg_out_ld8s(TCGContext *s, int cond,
978 int rd, int rn, int32_t offset)
980 if (offset > 0xff || offset < -0xff) {
981 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
982 tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
983 } else
984 tcg_out_ld8s_8(s, cond, rd, rn, offset);
987 static inline void tcg_out_st8(TCGContext *s, int cond,
988 int rd, int rn, int32_t offset)
990 if (offset > 0xfff || offset < -0xfff) {
991 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
992 tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
993 } else
994 tcg_out_st8_12(s, cond, rd, rn, offset);
997 /* The _goto case is normally between TBs within the same code buffer, and
998 * with the code buffer limited to 16MB we wouldn't need the long case.
999 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
1001 static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
1003 intptr_t addri = (intptr_t)addr;
1004 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1006 if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
1007 tcg_out_b(s, cond, disp);
1008 return;
1011 assert(use_armv5t_instructions || (addri & 1) == 0);
1012 tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
1013 tcg_out_bx(s, cond, TCG_REG_TMP);
1016 /* The call case is mostly used for helpers - so it's not unreasonable
1017 * for them to be beyond branch range */
1018 static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1020 intptr_t addri = (intptr_t)addr;
1021 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1023 if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1024 if (addri & 1) {
1025 /* Use BLX if the target is in Thumb mode */
1026 if (!use_armv5t_instructions) {
1027 tcg_abort();
1029 tcg_out_blx_imm(s, disp);
1030 } else {
1031 tcg_out_bl(s, COND_AL, disp);
1033 } else if (use_armv7_instructions) {
1034 tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1035 tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1036 } else {
1037 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1038 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1039 tcg_out32(s, addri);
1043 static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
1045 if (l->has_value) {
1046 tcg_out_goto(s, cond, l->u.value_ptr);
1047 } else {
1048 tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
1049 tcg_out_b_noaddr(s, cond);
1053 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1055 if (use_armv7_instructions) {
1056 tcg_out32(s, INSN_DMB_ISH);
1057 } else if (use_armv6_instructions) {
1058 tcg_out32(s, INSN_DMB_MCR);
1062 #ifdef CONFIG_SOFTMMU
1063 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1064 * int mmu_idx, uintptr_t ra)
1066 static void * const qemu_ld_helpers[16] = {
1067 [MO_UB] = helper_ret_ldub_mmu,
1068 [MO_SB] = helper_ret_ldsb_mmu,
1070 [MO_LEUW] = helper_le_lduw_mmu,
1071 [MO_LEUL] = helper_le_ldul_mmu,
1072 [MO_LEQ] = helper_le_ldq_mmu,
1073 [MO_LESW] = helper_le_ldsw_mmu,
1074 [MO_LESL] = helper_le_ldul_mmu,
1076 [MO_BEUW] = helper_be_lduw_mmu,
1077 [MO_BEUL] = helper_be_ldul_mmu,
1078 [MO_BEQ] = helper_be_ldq_mmu,
1079 [MO_BESW] = helper_be_ldsw_mmu,
1080 [MO_BESL] = helper_be_ldul_mmu,
1083 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1084 * uintxx_t val, int mmu_idx, uintptr_t ra)
1086 static void * const qemu_st_helpers[16] = {
1087 [MO_UB] = helper_ret_stb_mmu,
1088 [MO_LEUW] = helper_le_stw_mmu,
1089 [MO_LEUL] = helper_le_stl_mmu,
1090 [MO_LEQ] = helper_le_stq_mmu,
1091 [MO_BEUW] = helper_be_stw_mmu,
1092 [MO_BEUL] = helper_be_stl_mmu,
1093 [MO_BEQ] = helper_be_stq_mmu,
1096 /* Helper routines for marshalling helper function arguments into
1097 * the correct registers and stack.
1098 * argreg is where we want to put this argument, arg is the argument itself.
1099 * Return value is the updated argreg ready for the next call.
1100 * Note that argreg 0..3 is real registers, 4+ on stack.
1102 * We provide routines for arguments which are: immediate, 32 bit
1103 * value in register, 16 and 8 bit values in register (which must be zero
1104 * extended before use) and 64 bit value in a lo:hi register pair.
1106 #define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
1107 static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
1109 if (argreg < 4) { \
1110 MOV_ARG(s, COND_AL, argreg, arg); \
1111 } else { \
1112 int ofs = (argreg - 4) * 4; \
1113 EXT_ARG; \
1114 tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
1115 tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
1117 return argreg + 1; \
1120 DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1121 (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1122 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1123 (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1124 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1125 (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1126 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1128 static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1129 TCGReg arglo, TCGReg arghi)
1131 /* 64 bit arguments must go in even/odd register pairs
1132 * and in 8-aligned stack slots.
1134 if (argreg & 1) {
1135 argreg++;
1137 if (use_armv6_instructions && argreg >= 4
1138 && (arglo & 1) == 0 && arghi == arglo + 1) {
1139 tcg_out_strd_8(s, COND_AL, arglo,
1140 TCG_REG_CALL_STACK, (argreg - 4) * 4);
1141 return argreg + 2;
1142 } else {
1143 argreg = tcg_out_arg_reg32(s, argreg, arglo);
1144 argreg = tcg_out_arg_reg32(s, argreg, arghi);
1145 return argreg;
1149 #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1151 /* We're expecting to use an 8-bit immediate and to mask. */
1152 QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1154 /* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
1155 Using the offset of the second entry in the last tlb table ensures
1156 that we can index all of the elements of the first entry. */
1157 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1158 > 0xffff);
1160 /* Load and compare a TLB entry, leaving the flags set. Returns the register
1161 containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
1163 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1164 TCGMemOp opc, int mem_index, bool is_load)
1166 TCGReg base = TCG_AREG0;
1167 int cmp_off =
1168 (is_load
1169 ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1170 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1171 int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1172 unsigned s_bits = opc & MO_SIZE;
1173 unsigned a_bits = get_alignment_bits(opc);
1175 /* Should generate something like the following:
1176 * shr tmp, addrlo, #TARGET_PAGE_BITS (1)
1177 * add r2, env, #high
1178 * and r0, tmp, #(CPU_TLB_SIZE - 1) (2)
1179 * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3)
1180 * ldr r0, [r2, #cmp] (4)
1181 * tst addrlo, #s_mask
1182 * ldr r2, [r2, #add] (5)
1183 * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS
1185 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1186 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1188 /* We checked that the offset is contained within 16 bits above. */
1189 if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
1190 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1191 (24 << 7) | (cmp_off >> 8));
1192 base = TCG_REG_R2;
1193 add_off -= cmp_off & 0xff00;
1194 cmp_off &= 0xff;
1197 tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1198 TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1199 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1200 TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1202 /* Load the tlb comparator. Use ldrd if needed and available,
1203 but due to how the pointer needs setting up, ldm isn't useful.
1204 Base arm5 doesn't have ldrd, but armv5te does. */
1205 if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1206 tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1207 } else {
1208 tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1209 if (TARGET_LONG_BITS == 64) {
1210 tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1214 /* Check alignment. We don't support inline unaligned acceses,
1215 but we can easily support overalignment checks. */
1216 if (a_bits < s_bits) {
1217 a_bits = s_bits;
1219 if (a_bits) {
1220 tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
1223 /* Load the tlb addend. */
1224 tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1226 tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
1227 TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1229 if (TARGET_LONG_BITS == 64) {
1230 tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1231 TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
1234 return TCG_REG_R2;
1237 /* Record the context of a call to the out of line helper code for the slow
1238 path for a load or store, so that we can later generate the correct
1239 helper code. */
1240 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1241 TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1242 TCGReg addrhi, tcg_insn_unit *raddr,
1243 tcg_insn_unit *label_ptr)
1245 TCGLabelQemuLdst *label = new_ldst_label(s);
1247 label->is_ld = is_ld;
1248 label->oi = oi;
1249 label->datalo_reg = datalo;
1250 label->datahi_reg = datahi;
1251 label->addrlo_reg = addrlo;
1252 label->addrhi_reg = addrhi;
1253 label->raddr = raddr;
1254 label->label_ptr[0] = label_ptr;
1257 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1259 TCGReg argreg, datalo, datahi;
1260 TCGMemOpIdx oi = lb->oi;
1261 TCGMemOp opc = get_memop(oi);
1262 void *func;
1264 reloc_pc24(lb->label_ptr[0], s->code_ptr);
1266 argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1267 if (TARGET_LONG_BITS == 64) {
1268 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1269 } else {
1270 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1272 argreg = tcg_out_arg_imm32(s, argreg, oi);
1273 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1275 /* For armv6 we can use the canonical unsigned helpers and minimize
1276 icache usage. For pre-armv6, use the signed helpers since we do
1277 not have a single insn sign-extend. */
1278 if (use_armv6_instructions) {
1279 func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
1280 } else {
1281 func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
1282 if (opc & MO_SIGN) {
1283 opc = MO_UL;
1286 tcg_out_call(s, func);
1288 datalo = lb->datalo_reg;
1289 datahi = lb->datahi_reg;
1290 switch (opc & MO_SSIZE) {
1291 case MO_SB:
1292 tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1293 break;
1294 case MO_SW:
1295 tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1296 break;
1297 default:
1298 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1299 break;
1300 case MO_Q:
1301 if (datalo != TCG_REG_R1) {
1302 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1303 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1304 } else if (datahi != TCG_REG_R0) {
1305 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1306 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1307 } else {
1308 tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1309 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1310 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1312 break;
1315 tcg_out_goto(s, COND_AL, lb->raddr);
1318 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1320 TCGReg argreg, datalo, datahi;
1321 TCGMemOpIdx oi = lb->oi;
1322 TCGMemOp opc = get_memop(oi);
1324 reloc_pc24(lb->label_ptr[0], s->code_ptr);
1326 argreg = TCG_REG_R0;
1327 argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1328 if (TARGET_LONG_BITS == 64) {
1329 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1330 } else {
1331 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1334 datalo = lb->datalo_reg;
1335 datahi = lb->datahi_reg;
1336 switch (opc & MO_SIZE) {
1337 case MO_8:
1338 argreg = tcg_out_arg_reg8(s, argreg, datalo);
1339 break;
1340 case MO_16:
1341 argreg = tcg_out_arg_reg16(s, argreg, datalo);
1342 break;
1343 case MO_32:
1344 default:
1345 argreg = tcg_out_arg_reg32(s, argreg, datalo);
1346 break;
1347 case MO_64:
1348 argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1349 break;
1352 argreg = tcg_out_arg_imm32(s, argreg, oi);
1353 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1355 /* Tail-call to the helper, which will return to the fast path. */
1356 tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1358 #endif /* SOFTMMU */
1360 static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1361 TCGReg datalo, TCGReg datahi,
1362 TCGReg addrlo, TCGReg addend)
1364 TCGMemOp bswap = opc & MO_BSWAP;
1366 switch (opc & MO_SSIZE) {
1367 case MO_UB:
1368 tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1369 break;
1370 case MO_SB:
1371 tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1372 break;
1373 case MO_UW:
1374 tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1375 if (bswap) {
1376 tcg_out_bswap16(s, COND_AL, datalo, datalo);
1378 break;
1379 case MO_SW:
1380 if (bswap) {
1381 tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1382 tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1383 } else {
1384 tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1386 break;
1387 case MO_UL:
1388 default:
1389 tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1390 if (bswap) {
1391 tcg_out_bswap32(s, COND_AL, datalo, datalo);
1393 break;
1394 case MO_Q:
1396 TCGReg dl = (bswap ? datahi : datalo);
1397 TCGReg dh = (bswap ? datalo : datahi);
1399 /* Avoid ldrd for user-only emulation, to handle unaligned. */
1400 if (USING_SOFTMMU && use_armv6_instructions
1401 && (dl & 1) == 0 && dh == dl + 1) {
1402 tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1403 } else if (dl != addend) {
1404 tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1405 tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1406 } else {
1407 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1408 addend, addrlo, SHIFT_IMM_LSL(0));
1409 tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1410 tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1412 if (bswap) {
1413 tcg_out_bswap32(s, COND_AL, dl, dl);
1414 tcg_out_bswap32(s, COND_AL, dh, dh);
1417 break;
1421 static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1422 TCGReg datalo, TCGReg datahi,
1423 TCGReg addrlo)
1425 TCGMemOp bswap = opc & MO_BSWAP;
1427 switch (opc & MO_SSIZE) {
1428 case MO_UB:
1429 tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1430 break;
1431 case MO_SB:
1432 tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1433 break;
1434 case MO_UW:
1435 tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1436 if (bswap) {
1437 tcg_out_bswap16(s, COND_AL, datalo, datalo);
1439 break;
1440 case MO_SW:
1441 if (bswap) {
1442 tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1443 tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1444 } else {
1445 tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1447 break;
1448 case MO_UL:
1449 default:
1450 tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1451 if (bswap) {
1452 tcg_out_bswap32(s, COND_AL, datalo, datalo);
1454 break;
1455 case MO_Q:
1457 TCGReg dl = (bswap ? datahi : datalo);
1458 TCGReg dh = (bswap ? datalo : datahi);
1460 /* Avoid ldrd for user-only emulation, to handle unaligned. */
1461 if (USING_SOFTMMU && use_armv6_instructions
1462 && (dl & 1) == 0 && dh == dl + 1) {
1463 tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1464 } else if (dl == addrlo) {
1465 tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1466 tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1467 } else {
1468 tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1469 tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1471 if (bswap) {
1472 tcg_out_bswap32(s, COND_AL, dl, dl);
1473 tcg_out_bswap32(s, COND_AL, dh, dh);
1476 break;
1480 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1482 TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1483 TCGMemOpIdx oi;
1484 TCGMemOp opc;
1485 #ifdef CONFIG_SOFTMMU
1486 int mem_index;
1487 TCGReg addend;
1488 tcg_insn_unit *label_ptr;
1489 #endif
1491 datalo = *args++;
1492 datahi = (is64 ? *args++ : 0);
1493 addrlo = *args++;
1494 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1495 oi = *args++;
1496 opc = get_memop(oi);
1498 #ifdef CONFIG_SOFTMMU
1499 mem_index = get_mmuidx(oi);
1500 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
1502 /* This a conditional BL only to load a pointer within this opcode into LR
1503 for the slow path. We will not be using the value for a tail call. */
1504 label_ptr = s->code_ptr;
1505 tcg_out_bl_noaddr(s, COND_NE);
1507 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1509 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1510 s->code_ptr, label_ptr);
1511 #else /* !CONFIG_SOFTMMU */
1512 if (guest_base) {
1513 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1514 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1515 } else {
1516 tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1518 #endif
1521 static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1522 TCGReg datalo, TCGReg datahi,
1523 TCGReg addrlo, TCGReg addend)
1525 TCGMemOp bswap = opc & MO_BSWAP;
1527 switch (opc & MO_SIZE) {
1528 case MO_8:
1529 tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1530 break;
1531 case MO_16:
1532 if (bswap) {
1533 tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1534 tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1535 } else {
1536 tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1538 break;
1539 case MO_32:
1540 default:
1541 if (bswap) {
1542 tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1543 tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1544 } else {
1545 tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1547 break;
1548 case MO_64:
1549 /* Avoid strd for user-only emulation, to handle unaligned. */
1550 if (bswap) {
1551 tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1552 tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1553 tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1554 tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1555 } else if (USING_SOFTMMU && use_armv6_instructions
1556 && (datalo & 1) == 0 && datahi == datalo + 1) {
1557 tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1558 } else {
1559 tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1560 tcg_out_st32_12(s, cond, datahi, addend, 4);
1562 break;
1566 static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1567 TCGReg datalo, TCGReg datahi,
1568 TCGReg addrlo)
1570 TCGMemOp bswap = opc & MO_BSWAP;
1572 switch (opc & MO_SIZE) {
1573 case MO_8:
1574 tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1575 break;
1576 case MO_16:
1577 if (bswap) {
1578 tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1579 tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1580 } else {
1581 tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1583 break;
1584 case MO_32:
1585 default:
1586 if (bswap) {
1587 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1588 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1589 } else {
1590 tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1592 break;
1593 case MO_64:
1594 /* Avoid strd for user-only emulation, to handle unaligned. */
1595 if (bswap) {
1596 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1597 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1598 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1599 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1600 } else if (USING_SOFTMMU && use_armv6_instructions
1601 && (datalo & 1) == 0 && datahi == datalo + 1) {
1602 tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1603 } else {
1604 tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1605 tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1607 break;
1611 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1613 TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1614 TCGMemOpIdx oi;
1615 TCGMemOp opc;
1616 #ifdef CONFIG_SOFTMMU
1617 int mem_index;
1618 TCGReg addend;
1619 tcg_insn_unit *label_ptr;
1620 #endif
1622 datalo = *args++;
1623 datahi = (is64 ? *args++ : 0);
1624 addrlo = *args++;
1625 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1626 oi = *args++;
1627 opc = get_memop(oi);
1629 #ifdef CONFIG_SOFTMMU
1630 mem_index = get_mmuidx(oi);
1631 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
1633 tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1635 /* The conditional call must come last, as we're going to return here. */
1636 label_ptr = s->code_ptr;
1637 tcg_out_bl_noaddr(s, COND_NE);
1639 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1640 s->code_ptr, label_ptr);
1641 #else /* !CONFIG_SOFTMMU */
1642 if (guest_base) {
1643 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1644 tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1645 datahi, addrlo, TCG_REG_TMP);
1646 } else {
1647 tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1649 #endif
1652 static tcg_insn_unit *tb_ret_addr;
1654 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1655 const TCGArg *args, const int *const_args)
1657 TCGArg a0, a1, a2, a3, a4, a5;
1658 int c;
1660 switch (opc) {
1661 case INDEX_op_exit_tb:
1662 /* Reuse the zeroing that exists for goto_ptr. */
1663 a0 = args[0];
1664 if (a0 == 0) {
1665 tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
1666 } else {
1667 tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1668 tcg_out_goto(s, COND_AL, tb_ret_addr);
1670 break;
1671 case INDEX_op_goto_tb:
1672 tcg_debug_assert(s->tb_jmp_insn_offset == 0);
1674 /* Indirect jump method */
1675 intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
1676 tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
1677 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
1679 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
1680 break;
1681 case INDEX_op_goto_ptr:
1682 tcg_out_bx(s, COND_AL, args[0]);
1683 break;
1684 case INDEX_op_br:
1685 tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
1686 break;
1688 case INDEX_op_ld8u_i32:
1689 tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1690 break;
1691 case INDEX_op_ld8s_i32:
1692 tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1693 break;
1694 case INDEX_op_ld16u_i32:
1695 tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1696 break;
1697 case INDEX_op_ld16s_i32:
1698 tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1699 break;
1700 case INDEX_op_ld_i32:
1701 tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1702 break;
1703 case INDEX_op_st8_i32:
1704 tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1705 break;
1706 case INDEX_op_st16_i32:
1707 tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1708 break;
1709 case INDEX_op_st_i32:
1710 tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1711 break;
1713 case INDEX_op_movcond_i32:
1714 /* Constraints mean that v2 is always in the same register as dest,
1715 * so we only need to do "if condition passed, move v1 to dest".
1717 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1718 args[1], args[2], const_args[2]);
1719 tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1720 ARITH_MVN, args[0], 0, args[3], const_args[3]);
1721 break;
1722 case INDEX_op_add_i32:
1723 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1724 args[0], args[1], args[2], const_args[2]);
1725 break;
1726 case INDEX_op_sub_i32:
1727 if (const_args[1]) {
1728 if (const_args[2]) {
1729 tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1730 } else {
1731 tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1732 args[0], args[2], args[1], 1);
1734 } else {
1735 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1736 args[0], args[1], args[2], const_args[2]);
1738 break;
1739 case INDEX_op_and_i32:
1740 tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1741 args[0], args[1], args[2], const_args[2]);
1742 break;
1743 case INDEX_op_andc_i32:
1744 tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1745 args[0], args[1], args[2], const_args[2]);
1746 break;
1747 case INDEX_op_or_i32:
1748 c = ARITH_ORR;
1749 goto gen_arith;
1750 case INDEX_op_xor_i32:
1751 c = ARITH_EOR;
1752 /* Fall through. */
1753 gen_arith:
1754 tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1755 break;
1756 case INDEX_op_add2_i32:
1757 a0 = args[0], a1 = args[1], a2 = args[2];
1758 a3 = args[3], a4 = args[4], a5 = args[5];
1759 if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1760 a0 = TCG_REG_TMP;
1762 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1763 a0, a2, a4, const_args[4]);
1764 tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1765 a1, a3, a5, const_args[5]);
1766 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1767 break;
1768 case INDEX_op_sub2_i32:
1769 a0 = args[0], a1 = args[1], a2 = args[2];
1770 a3 = args[3], a4 = args[4], a5 = args[5];
1771 if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1772 a0 = TCG_REG_TMP;
1774 if (const_args[2]) {
1775 if (const_args[4]) {
1776 tcg_out_movi32(s, COND_AL, a0, a4);
1777 a4 = a0;
1779 tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1780 } else {
1781 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1782 ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1784 if (const_args[3]) {
1785 if (const_args[5]) {
1786 tcg_out_movi32(s, COND_AL, a1, a5);
1787 a5 = a1;
1789 tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1790 } else {
1791 tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1792 a1, a3, a5, const_args[5]);
1794 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1795 break;
1796 case INDEX_op_neg_i32:
1797 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1798 break;
1799 case INDEX_op_not_i32:
1800 tcg_out_dat_reg(s, COND_AL,
1801 ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1802 break;
1803 case INDEX_op_mul_i32:
1804 tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1805 break;
1806 case INDEX_op_mulu2_i32:
1807 tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1808 break;
1809 case INDEX_op_muls2_i32:
1810 tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1811 break;
1812 /* XXX: Perhaps args[2] & 0x1f is wrong */
1813 case INDEX_op_shl_i32:
1814 c = const_args[2] ?
1815 SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1816 goto gen_shift32;
1817 case INDEX_op_shr_i32:
1818 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1819 SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1820 goto gen_shift32;
1821 case INDEX_op_sar_i32:
1822 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1823 SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1824 goto gen_shift32;
1825 case INDEX_op_rotr_i32:
1826 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1827 SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1828 /* Fall through. */
1829 gen_shift32:
1830 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1831 break;
1833 case INDEX_op_rotl_i32:
1834 if (const_args[2]) {
1835 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1836 ((0x20 - args[2]) & 0x1f) ?
1837 SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1838 SHIFT_IMM_LSL(0));
1839 } else {
1840 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1841 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1842 SHIFT_REG_ROR(TCG_REG_TMP));
1844 break;
1846 case INDEX_op_ctz_i32:
1847 tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
1848 a1 = TCG_REG_TMP;
1849 goto do_clz;
1851 case INDEX_op_clz_i32:
1852 a1 = args[1];
1853 do_clz:
1854 a0 = args[0];
1855 a2 = args[2];
1856 c = const_args[2];
1857 if (c && a2 == 32) {
1858 tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
1859 break;
1861 tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0);
1862 tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0);
1863 if (c || a0 != a2) {
1864 tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c);
1866 break;
1868 case INDEX_op_brcond_i32:
1869 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1870 args[0], args[1], const_args[1]);
1871 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
1872 arg_label(args[3]));
1873 break;
1874 case INDEX_op_brcond2_i32:
1875 /* The resulting conditions are:
1876 * TCG_COND_EQ --> a0 == a2 && a1 == a3,
1877 * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3,
1878 * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3,
1879 * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1880 * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1881 * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3,
1883 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1884 args[1], args[3], const_args[3]);
1885 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1886 args[0], args[2], const_args[2]);
1887 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
1888 arg_label(args[5]));
1889 break;
1890 case INDEX_op_setcond_i32:
1891 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1892 args[1], args[2], const_args[2]);
1893 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1894 ARITH_MOV, args[0], 0, 1);
1895 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1896 ARITH_MOV, args[0], 0, 0);
1897 break;
1898 case INDEX_op_setcond2_i32:
1899 /* See brcond2_i32 comment */
1900 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1901 args[2], args[4], const_args[4]);
1902 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1903 args[1], args[3], const_args[3]);
1904 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1905 ARITH_MOV, args[0], 0, 1);
1906 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
1907 ARITH_MOV, args[0], 0, 0);
1908 break;
1910 case INDEX_op_qemu_ld_i32:
1911 tcg_out_qemu_ld(s, args, 0);
1912 break;
1913 case INDEX_op_qemu_ld_i64:
1914 tcg_out_qemu_ld(s, args, 1);
1915 break;
1916 case INDEX_op_qemu_st_i32:
1917 tcg_out_qemu_st(s, args, 0);
1918 break;
1919 case INDEX_op_qemu_st_i64:
1920 tcg_out_qemu_st(s, args, 1);
1921 break;
1923 case INDEX_op_bswap16_i32:
1924 tcg_out_bswap16(s, COND_AL, args[0], args[1]);
1925 break;
1926 case INDEX_op_bswap32_i32:
1927 tcg_out_bswap32(s, COND_AL, args[0], args[1]);
1928 break;
1930 case INDEX_op_ext8s_i32:
1931 tcg_out_ext8s(s, COND_AL, args[0], args[1]);
1932 break;
1933 case INDEX_op_ext16s_i32:
1934 tcg_out_ext16s(s, COND_AL, args[0], args[1]);
1935 break;
1936 case INDEX_op_ext16u_i32:
1937 tcg_out_ext16u(s, COND_AL, args[0], args[1]);
1938 break;
1940 case INDEX_op_deposit_i32:
1941 tcg_out_deposit(s, COND_AL, args[0], args[2],
1942 args[3], args[4], const_args[2]);
1943 break;
1944 case INDEX_op_extract_i32:
1945 tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
1946 break;
1947 case INDEX_op_sextract_i32:
1948 tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
1949 break;
1951 case INDEX_op_div_i32:
1952 tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
1953 break;
1954 case INDEX_op_divu_i32:
1955 tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
1956 break;
1958 case INDEX_op_mb:
1959 tcg_out_mb(s, args[0]);
1960 break;
1962 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1963 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1964 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1965 default:
1966 tcg_abort();
1970 static const TCGTargetOpDef arm_op_defs[] = {
1971 { INDEX_op_exit_tb, { } },
1972 { INDEX_op_goto_tb, { } },
1973 { INDEX_op_br, { } },
1974 { INDEX_op_goto_ptr, { "r" } },
1976 { INDEX_op_ld8u_i32, { "r", "r" } },
1977 { INDEX_op_ld8s_i32, { "r", "r" } },
1978 { INDEX_op_ld16u_i32, { "r", "r" } },
1979 { INDEX_op_ld16s_i32, { "r", "r" } },
1980 { INDEX_op_ld_i32, { "r", "r" } },
1981 { INDEX_op_st8_i32, { "r", "r" } },
1982 { INDEX_op_st16_i32, { "r", "r" } },
1983 { INDEX_op_st_i32, { "r", "r" } },
1985 /* TODO: "r", "r", "ri" */
1986 { INDEX_op_add_i32, { "r", "r", "rIN" } },
1987 { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
1988 { INDEX_op_mul_i32, { "r", "r", "r" } },
1989 { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
1990 { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
1991 { INDEX_op_and_i32, { "r", "r", "rIK" } },
1992 { INDEX_op_andc_i32, { "r", "r", "rIK" } },
1993 { INDEX_op_or_i32, { "r", "r", "rI" } },
1994 { INDEX_op_xor_i32, { "r", "r", "rI" } },
1995 { INDEX_op_neg_i32, { "r", "r" } },
1996 { INDEX_op_not_i32, { "r", "r" } },
1998 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1999 { INDEX_op_shr_i32, { "r", "r", "ri" } },
2000 { INDEX_op_sar_i32, { "r", "r", "ri" } },
2001 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
2002 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
2003 { INDEX_op_clz_i32, { "r", "r", "rIK" } },
2004 { INDEX_op_ctz_i32, { "r", "r", "rIK" } },
2006 { INDEX_op_brcond_i32, { "r", "rIN" } },
2007 { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
2008 { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
2010 { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
2011 { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
2012 { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
2013 { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
2015 #if TARGET_LONG_BITS == 32
2016 { INDEX_op_qemu_ld_i32, { "r", "l" } },
2017 { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
2018 { INDEX_op_qemu_st_i32, { "s", "s" } },
2019 { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
2020 #else
2021 { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
2022 { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } },
2023 { INDEX_op_qemu_st_i32, { "s", "s", "s" } },
2024 { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } },
2025 #endif
2027 { INDEX_op_bswap16_i32, { "r", "r" } },
2028 { INDEX_op_bswap32_i32, { "r", "r" } },
2030 { INDEX_op_ext8s_i32, { "r", "r" } },
2031 { INDEX_op_ext16s_i32, { "r", "r" } },
2032 { INDEX_op_ext16u_i32, { "r", "r" } },
2034 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
2035 { INDEX_op_extract_i32, { "r", "r" } },
2036 { INDEX_op_sextract_i32, { "r", "r" } },
2038 { INDEX_op_div_i32, { "r", "r", "r" } },
2039 { INDEX_op_divu_i32, { "r", "r", "r" } },
2041 { INDEX_op_mb, { } },
2042 { -1 },
2045 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2047 int i, n = ARRAY_SIZE(arm_op_defs);
2049 for (i = 0; i < n; ++i) {
2050 if (arm_op_defs[i].op == op) {
2051 return &arm_op_defs[i];
2054 return NULL;
2057 static void tcg_target_init(TCGContext *s)
2059 /* Only probe for the platform and capabilities if we havn't already
2060 determined maximum values at compile time. */
2061 #ifndef use_idiv_instructions
2063 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2064 use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2066 #endif
2067 if (__ARM_ARCH < 7) {
2068 const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
2069 if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2070 arm_arch = pl[1] - '0';
2074 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2075 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
2076 (1 << TCG_REG_R0) |
2077 (1 << TCG_REG_R1) |
2078 (1 << TCG_REG_R2) |
2079 (1 << TCG_REG_R3) |
2080 (1 << TCG_REG_R12) |
2081 (1 << TCG_REG_R14));
2083 tcg_regset_clear(s->reserved_regs);
2084 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2085 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2086 tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2089 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2090 TCGReg arg1, intptr_t arg2)
2092 tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2095 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2096 TCGReg arg1, intptr_t arg2)
2098 tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2101 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
2102 TCGReg base, intptr_t ofs)
2104 return false;
2107 static inline void tcg_out_mov(TCGContext *s, TCGType type,
2108 TCGReg ret, TCGReg arg)
2110 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2113 static inline void tcg_out_movi(TCGContext *s, TCGType type,
2114 TCGReg ret, tcg_target_long arg)
2116 tcg_out_movi32(s, COND_AL, ret, arg);
2119 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2120 and tcg_register_jit. */
2122 #define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2124 #define FRAME_SIZE \
2125 ((PUSH_SIZE \
2126 + TCG_STATIC_CALL_ARGS_SIZE \
2127 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2128 + TCG_TARGET_STACK_ALIGN - 1) \
2129 & -TCG_TARGET_STACK_ALIGN)
2131 static void tcg_target_qemu_prologue(TCGContext *s)
2133 int stack_addend;
2135 /* Calling convention requires us to save r4-r11 and lr. */
2136 /* stmdb sp!, { r4 - r11, lr } */
2137 tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2139 /* Reserve callee argument and tcg temp space. */
2140 stack_addend = FRAME_SIZE - PUSH_SIZE;
2142 tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2143 TCG_REG_CALL_STACK, stack_addend, 1);
2144 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2145 CPU_TEMP_BUF_NLONGS * sizeof(long));
2147 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2149 tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2152 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2153 * and fall through to the rest of the epilogue.
2155 s->code_gen_epilogue = s->code_ptr;
2156 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
2158 /* TB epilogue */
2159 tb_ret_addr = s->code_ptr;
2160 tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2161 TCG_REG_CALL_STACK, stack_addend, 1);
2163 /* ldmia sp!, { r4 - r11, pc } */
2164 tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2167 typedef struct {
2168 DebugFrameHeader h;
2169 uint8_t fde_def_cfa[4];
2170 uint8_t fde_reg_ofs[18];
2171 } DebugFrame;
2173 #define ELF_HOST_MACHINE EM_ARM
2175 /* We're expecting a 2 byte uleb128 encoded value. */
2176 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2178 static const DebugFrame debug_frame = {
2179 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2180 .h.cie.id = -1,
2181 .h.cie.version = 1,
2182 .h.cie.code_align = 1,
2183 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2184 .h.cie.return_column = 14,
2186 /* Total FDE size does not include the "len" member. */
2187 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2189 .fde_def_cfa = {
2190 12, 13, /* DW_CFA_def_cfa sp, ... */
2191 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2192 (FRAME_SIZE >> 7)
2194 .fde_reg_ofs = {
2195 /* The following must match the stmdb in the prologue. */
2196 0x8e, 1, /* DW_CFA_offset, lr, -4 */
2197 0x8b, 2, /* DW_CFA_offset, r11, -8 */
2198 0x8a, 3, /* DW_CFA_offset, r10, -12 */
2199 0x89, 4, /* DW_CFA_offset, r9, -16 */
2200 0x88, 5, /* DW_CFA_offset, r8, -20 */
2201 0x87, 6, /* DW_CFA_offset, r7, -24 */
2202 0x86, 7, /* DW_CFA_offset, r6, -28 */
2203 0x85, 8, /* DW_CFA_offset, r5, -32 */
2204 0x84, 9, /* DW_CFA_offset, r4, -36 */
2208 void tcg_register_jit(void *buf, size_t buf_size)
2210 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));