Merge remote-tracking branch 'remotes/gkurz/tags/for-upstream' into staging
[qemu/ar7.git] / tcg / arm / tcg-target.inc.c
blobe75a6d4943ed476e98cfbe1cb43e950ddacb81b2
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Andrzej Zaborowski
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "elf.h"
26 #include "tcg-be-ldst.h"
28 int arm_arch = __ARM_ARCH;
30 #ifndef use_idiv_instructions
31 bool use_idiv_instructions;
32 #endif
34 /* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */
35 #ifdef CONFIG_SOFTMMU
36 # define USING_SOFTMMU 1
37 #else
38 # define USING_SOFTMMU 0
39 #endif
41 #ifdef CONFIG_DEBUG_TCG
42 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
43 "%r0",
44 "%r1",
45 "%r2",
46 "%r3",
47 "%r4",
48 "%r5",
49 "%r6",
50 "%r7",
51 "%r8",
52 "%r9",
53 "%r10",
54 "%r11",
55 "%r12",
56 "%r13",
57 "%r14",
58 "%pc",
60 #endif
62 static const int tcg_target_reg_alloc_order[] = {
63 TCG_REG_R4,
64 TCG_REG_R5,
65 TCG_REG_R6,
66 TCG_REG_R7,
67 TCG_REG_R8,
68 TCG_REG_R9,
69 TCG_REG_R10,
70 TCG_REG_R11,
71 TCG_REG_R13,
72 TCG_REG_R0,
73 TCG_REG_R1,
74 TCG_REG_R2,
75 TCG_REG_R3,
76 TCG_REG_R12,
77 TCG_REG_R14,
80 static const int tcg_target_call_iarg_regs[4] = {
81 TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
83 static const int tcg_target_call_oarg_regs[2] = {
84 TCG_REG_R0, TCG_REG_R1
87 #define TCG_REG_TMP TCG_REG_R12
89 static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
91 ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
92 *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
95 static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
97 ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
98 tcg_insn_unit insn = atomic_read(code_ptr);
99 tcg_debug_assert(offset == sextract32(offset, 0, 24));
100 atomic_set(code_ptr, deposit32(insn, 0, 24, offset));
103 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
104 intptr_t value, intptr_t addend)
106 tcg_debug_assert(type == R_ARM_PC24);
107 tcg_debug_assert(addend == 0);
108 reloc_pc24(code_ptr, (tcg_insn_unit *)value);
111 #define TCG_CT_CONST_ARM 0x100
112 #define TCG_CT_CONST_INV 0x200
113 #define TCG_CT_CONST_NEG 0x400
114 #define TCG_CT_CONST_ZERO 0x800
116 /* parse target specific constraints */
117 static const char *target_parse_constraint(TCGArgConstraint *ct,
118 const char *ct_str, TCGType type)
120 switch (*ct_str++) {
121 case 'I':
122 ct->ct |= TCG_CT_CONST_ARM;
123 break;
124 case 'K':
125 ct->ct |= TCG_CT_CONST_INV;
126 break;
127 case 'N': /* The gcc constraint letter is L, already used here. */
128 ct->ct |= TCG_CT_CONST_NEG;
129 break;
130 case 'Z':
131 ct->ct |= TCG_CT_CONST_ZERO;
132 break;
134 case 'r':
135 ct->ct |= TCG_CT_REG;
136 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
137 break;
139 /* qemu_ld address */
140 case 'l':
141 ct->ct |= TCG_CT_REG;
142 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
143 #ifdef CONFIG_SOFTMMU
144 /* r0-r2,lr will be overwritten when reading the tlb entry,
145 so don't use these. */
146 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
147 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
148 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
149 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
150 #endif
151 break;
153 /* qemu_st address & data */
154 case 's':
155 ct->ct |= TCG_CT_REG;
156 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
157 /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
158 and r0-r1 doing the byte swapping, so don't use these. */
159 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
160 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
161 #if defined(CONFIG_SOFTMMU)
162 /* Avoid clashes with registers being used for helper args */
163 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
164 #if TARGET_LONG_BITS == 64
165 /* Avoid clashes with registers being used for helper args */
166 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
167 #endif
168 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
169 #endif
170 break;
172 default:
173 return NULL;
175 return ct_str;
178 static inline uint32_t rotl(uint32_t val, int n)
180 return (val << n) | (val >> (32 - n));
183 /* ARM immediates for ALU instructions are made of an unsigned 8-bit
184 right-rotated by an even amount between 0 and 30. */
185 static inline int encode_imm(uint32_t imm)
187 int shift;
189 /* simple case, only lower bits */
190 if ((imm & ~0xff) == 0)
191 return 0;
192 /* then try a simple even shift */
193 shift = ctz32(imm) & ~1;
194 if (((imm >> shift) & ~0xff) == 0)
195 return 32 - shift;
196 /* now try harder with rotations */
197 if ((rotl(imm, 2) & ~0xff) == 0)
198 return 2;
199 if ((rotl(imm, 4) & ~0xff) == 0)
200 return 4;
201 if ((rotl(imm, 6) & ~0xff) == 0)
202 return 6;
203 /* imm can't be encoded */
204 return -1;
207 static inline int check_fit_imm(uint32_t imm)
209 return encode_imm(imm) >= 0;
212 /* Test if a constant matches the constraint.
213 * TODO: define constraints for:
215 * ldr/str offset: between -0xfff and 0xfff
216 * ldrh/strh offset: between -0xff and 0xff
217 * mov operand2: values represented with x << (2 * y), x < 0x100
218 * add, sub, eor...: ditto
220 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
221 const TCGArgConstraint *arg_ct)
223 int ct;
224 ct = arg_ct->ct;
225 if (ct & TCG_CT_CONST) {
226 return 1;
227 } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
228 return 1;
229 } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
230 return 1;
231 } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
232 return 1;
233 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
234 return 1;
235 } else {
236 return 0;
240 #define TO_CPSR (1 << 20)
242 typedef enum {
243 ARITH_AND = 0x0 << 21,
244 ARITH_EOR = 0x1 << 21,
245 ARITH_SUB = 0x2 << 21,
246 ARITH_RSB = 0x3 << 21,
247 ARITH_ADD = 0x4 << 21,
248 ARITH_ADC = 0x5 << 21,
249 ARITH_SBC = 0x6 << 21,
250 ARITH_RSC = 0x7 << 21,
251 ARITH_TST = 0x8 << 21 | TO_CPSR,
252 ARITH_CMP = 0xa << 21 | TO_CPSR,
253 ARITH_CMN = 0xb << 21 | TO_CPSR,
254 ARITH_ORR = 0xc << 21,
255 ARITH_MOV = 0xd << 21,
256 ARITH_BIC = 0xe << 21,
257 ARITH_MVN = 0xf << 21,
259 INSN_CLZ = 0x016f0f10,
260 INSN_RBIT = 0x06ff0f30,
262 INSN_LDR_IMM = 0x04100000,
263 INSN_LDR_REG = 0x06100000,
264 INSN_STR_IMM = 0x04000000,
265 INSN_STR_REG = 0x06000000,
267 INSN_LDRH_IMM = 0x005000b0,
268 INSN_LDRH_REG = 0x001000b0,
269 INSN_LDRSH_IMM = 0x005000f0,
270 INSN_LDRSH_REG = 0x001000f0,
271 INSN_STRH_IMM = 0x004000b0,
272 INSN_STRH_REG = 0x000000b0,
274 INSN_LDRB_IMM = 0x04500000,
275 INSN_LDRB_REG = 0x06500000,
276 INSN_LDRSB_IMM = 0x005000d0,
277 INSN_LDRSB_REG = 0x001000d0,
278 INSN_STRB_IMM = 0x04400000,
279 INSN_STRB_REG = 0x06400000,
281 INSN_LDRD_IMM = 0x004000d0,
282 INSN_LDRD_REG = 0x000000d0,
283 INSN_STRD_IMM = 0x004000f0,
284 INSN_STRD_REG = 0x000000f0,
286 INSN_DMB_ISH = 0x5bf07ff5,
287 INSN_DMB_MCR = 0xba0f07ee,
289 } ARMInsn;
291 #define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00)
292 #define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20)
293 #define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40)
294 #define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60)
295 #define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10)
296 #define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30)
297 #define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50)
298 #define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70)
300 enum arm_cond_code_e {
301 COND_EQ = 0x0,
302 COND_NE = 0x1,
303 COND_CS = 0x2, /* Unsigned greater or equal */
304 COND_CC = 0x3, /* Unsigned less than */
305 COND_MI = 0x4, /* Negative */
306 COND_PL = 0x5, /* Zero or greater */
307 COND_VS = 0x6, /* Overflow */
308 COND_VC = 0x7, /* No overflow */
309 COND_HI = 0x8, /* Unsigned greater than */
310 COND_LS = 0x9, /* Unsigned less or equal */
311 COND_GE = 0xa,
312 COND_LT = 0xb,
313 COND_GT = 0xc,
314 COND_LE = 0xd,
315 COND_AL = 0xe,
318 static const uint8_t tcg_cond_to_arm_cond[] = {
319 [TCG_COND_EQ] = COND_EQ,
320 [TCG_COND_NE] = COND_NE,
321 [TCG_COND_LT] = COND_LT,
322 [TCG_COND_GE] = COND_GE,
323 [TCG_COND_LE] = COND_LE,
324 [TCG_COND_GT] = COND_GT,
325 /* unsigned */
326 [TCG_COND_LTU] = COND_CC,
327 [TCG_COND_GEU] = COND_CS,
328 [TCG_COND_LEU] = COND_LS,
329 [TCG_COND_GTU] = COND_HI,
332 static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
334 tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
337 static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
339 tcg_out32(s, (cond << 28) | 0x0a000000 |
340 (((offset - 8) >> 2) & 0x00ffffff));
343 static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
345 /* We pay attention here to not modify the branch target by masking
346 the corresponding bytes. This ensure that caches and memory are
347 kept coherent during retranslation. */
348 tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
351 static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
353 /* We pay attention here to not modify the branch target by masking
354 the corresponding bytes. This ensure that caches and memory are
355 kept coherent during retranslation. */
356 tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
359 static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
361 tcg_out32(s, (cond << 28) | 0x0b000000 |
362 (((offset - 8) >> 2) & 0x00ffffff));
365 static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
367 tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
370 static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
372 tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
373 (((offset - 8) >> 2) & 0x00ffffff));
376 static inline void tcg_out_dat_reg(TCGContext *s,
377 int cond, int opc, int rd, int rn, int rm, int shift)
379 tcg_out32(s, (cond << 28) | (0 << 25) | opc |
380 (rn << 16) | (rd << 12) | shift | rm);
383 static inline void tcg_out_nop(TCGContext *s)
385 if (use_armv7_instructions) {
386 /* Architected nop introduced in v6k. */
387 /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this
388 also Just So Happened to do nothing on pre-v6k so that we
389 don't need to conditionalize it? */
390 tcg_out32(s, 0xe320f000);
391 } else {
392 /* Prior to that the assembler uses mov r0, r0. */
393 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
397 static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
399 /* Simple reg-reg move, optimising out the 'do nothing' case */
400 if (rd != rm) {
401 tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
405 static inline void tcg_out_dat_imm(TCGContext *s,
406 int cond, int opc, int rd, int rn, int im)
408 tcg_out32(s, (cond << 28) | (1 << 25) | opc |
409 (rn << 16) | (rd << 12) | im);
412 static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
414 int rot, opc, rn;
416 /* For armv7, make sure not to use movw+movt when mov/mvn would do.
417 Speed things up by only checking when movt would be required.
418 Prior to armv7, have one go at fully rotated immediates before
419 doing the decomposition thing below. */
420 if (!use_armv7_instructions || (arg & 0xffff0000)) {
421 rot = encode_imm(arg);
422 if (rot >= 0) {
423 tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
424 rotl(arg, rot) | (rot << 7));
425 return;
427 rot = encode_imm(~arg);
428 if (rot >= 0) {
429 tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
430 rotl(~arg, rot) | (rot << 7));
431 return;
435 /* Use movw + movt. */
436 if (use_armv7_instructions) {
437 /* movw */
438 tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
439 | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
440 if (arg & 0xffff0000) {
441 /* movt */
442 tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
443 | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
445 return;
448 /* TODO: This is very suboptimal, we can easily have a constant
449 pool somewhere after all the instructions. */
450 opc = ARITH_MOV;
451 rn = 0;
452 /* If we have lots of leading 1's, we can shorten the sequence by
453 beginning with mvn and then clearing higher bits with eor. */
454 if (clz32(~arg) > clz32(arg)) {
455 opc = ARITH_MVN, arg = ~arg;
457 do {
458 int i = ctz32(arg) & ~1;
459 rot = ((32 - i) << 7) & 0xf00;
460 tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
461 arg &= ~(0xff << i);
463 opc = ARITH_EOR;
464 rn = rd;
465 } while (arg);
468 static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
469 TCGArg lhs, TCGArg rhs, int rhs_is_const)
471 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
472 * rhs must satisfy the "rI" constraint.
474 if (rhs_is_const) {
475 int rot = encode_imm(rhs);
476 tcg_debug_assert(rot >= 0);
477 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
478 } else {
479 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
483 static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
484 TCGReg dst, TCGReg lhs, TCGArg rhs,
485 bool rhs_is_const)
487 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
488 * rhs must satisfy the "rIK" constraint.
490 if (rhs_is_const) {
491 int rot = encode_imm(rhs);
492 if (rot < 0) {
493 rhs = ~rhs;
494 rot = encode_imm(rhs);
495 tcg_debug_assert(rot >= 0);
496 opc = opinv;
498 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
499 } else {
500 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
504 static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
505 TCGArg dst, TCGArg lhs, TCGArg rhs,
506 bool rhs_is_const)
508 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
509 * rhs must satisfy the "rIN" constraint.
511 if (rhs_is_const) {
512 int rot = encode_imm(rhs);
513 if (rot < 0) {
514 rhs = -rhs;
515 rot = encode_imm(rhs);
516 tcg_debug_assert(rot >= 0);
517 opc = opneg;
519 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
520 } else {
521 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
525 static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
526 TCGReg rn, TCGReg rm)
528 /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */
529 if (!use_armv6_instructions && rd == rn) {
530 if (rd == rm) {
531 /* rd == rn == rm; copy an input to tmp first. */
532 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
533 rm = rn = TCG_REG_TMP;
534 } else {
535 rn = rm;
536 rm = rd;
539 /* mul */
540 tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
543 static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
544 TCGReg rd1, TCGReg rn, TCGReg rm)
546 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
547 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
548 if (rd0 == rm || rd1 == rm) {
549 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
550 rn = TCG_REG_TMP;
551 } else {
552 TCGReg t = rn;
553 rn = rm;
554 rm = t;
557 /* umull */
558 tcg_out32(s, (cond << 28) | 0x00800090 |
559 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
562 static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
563 TCGReg rd1, TCGReg rn, TCGReg rm)
565 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
566 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
567 if (rd0 == rm || rd1 == rm) {
568 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
569 rn = TCG_REG_TMP;
570 } else {
571 TCGReg t = rn;
572 rn = rm;
573 rm = t;
576 /* smull */
577 tcg_out32(s, (cond << 28) | 0x00c00090 |
578 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
581 static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
583 tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
586 static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
588 tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
591 static inline void tcg_out_ext8s(TCGContext *s, int cond,
592 int rd, int rn)
594 if (use_armv6_instructions) {
595 /* sxtb */
596 tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
597 } else {
598 tcg_out_dat_reg(s, cond, ARITH_MOV,
599 rd, 0, rn, SHIFT_IMM_LSL(24));
600 tcg_out_dat_reg(s, cond, ARITH_MOV,
601 rd, 0, rd, SHIFT_IMM_ASR(24));
605 static inline void tcg_out_ext8u(TCGContext *s, int cond,
606 int rd, int rn)
608 tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
611 static inline void tcg_out_ext16s(TCGContext *s, int cond,
612 int rd, int rn)
614 if (use_armv6_instructions) {
615 /* sxth */
616 tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
617 } else {
618 tcg_out_dat_reg(s, cond, ARITH_MOV,
619 rd, 0, rn, SHIFT_IMM_LSL(16));
620 tcg_out_dat_reg(s, cond, ARITH_MOV,
621 rd, 0, rd, SHIFT_IMM_ASR(16));
625 static inline void tcg_out_ext16u(TCGContext *s, int cond,
626 int rd, int rn)
628 if (use_armv6_instructions) {
629 /* uxth */
630 tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
631 } else {
632 tcg_out_dat_reg(s, cond, ARITH_MOV,
633 rd, 0, rn, SHIFT_IMM_LSL(16));
634 tcg_out_dat_reg(s, cond, ARITH_MOV,
635 rd, 0, rd, SHIFT_IMM_LSR(16));
639 static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
641 if (use_armv6_instructions) {
642 /* revsh */
643 tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
644 } else {
645 tcg_out_dat_reg(s, cond, ARITH_MOV,
646 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
647 tcg_out_dat_reg(s, cond, ARITH_MOV,
648 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
649 tcg_out_dat_reg(s, cond, ARITH_ORR,
650 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
654 static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
656 if (use_armv6_instructions) {
657 /* rev16 */
658 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
659 } else {
660 tcg_out_dat_reg(s, cond, ARITH_MOV,
661 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
662 tcg_out_dat_reg(s, cond, ARITH_MOV,
663 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
664 tcg_out_dat_reg(s, cond, ARITH_ORR,
665 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
669 /* swap the two low bytes assuming that the two high input bytes and the
670 two high output bit can hold any value. */
671 static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
673 if (use_armv6_instructions) {
674 /* rev16 */
675 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
676 } else {
677 tcg_out_dat_reg(s, cond, ARITH_MOV,
678 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
679 tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
680 tcg_out_dat_reg(s, cond, ARITH_ORR,
681 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
685 static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
687 if (use_armv6_instructions) {
688 /* rev */
689 tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
690 } else {
691 tcg_out_dat_reg(s, cond, ARITH_EOR,
692 TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
693 tcg_out_dat_imm(s, cond, ARITH_BIC,
694 TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
695 tcg_out_dat_reg(s, cond, ARITH_MOV,
696 rd, 0, rn, SHIFT_IMM_ROR(8));
697 tcg_out_dat_reg(s, cond, ARITH_EOR,
698 rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
702 static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
703 TCGArg a1, int ofs, int len, bool const_a1)
705 if (const_a1) {
706 /* bfi becomes bfc with rn == 15. */
707 a1 = 15;
709 /* bfi/bfc */
710 tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
711 | (ofs << 7) | ((ofs + len - 1) << 16));
714 static inline void tcg_out_extract(TCGContext *s, int cond, TCGReg rd,
715 TCGArg a1, int ofs, int len)
717 /* ubfx */
718 tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | a1
719 | (ofs << 7) | ((len - 1) << 16));
722 static inline void tcg_out_sextract(TCGContext *s, int cond, TCGReg rd,
723 TCGArg a1, int ofs, int len)
725 /* sbfx */
726 tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | a1
727 | (ofs << 7) | ((len - 1) << 16));
730 /* Note that this routine is used for both LDR and LDRH formats, so we do
731 not wish to include an immediate shift at this point. */
732 static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
733 TCGReg rn, TCGReg rm, bool u, bool p, bool w)
735 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
736 | (w << 21) | (rn << 16) | (rt << 12) | rm);
739 static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
740 TCGReg rn, int imm8, bool p, bool w)
742 bool u = 1;
743 if (imm8 < 0) {
744 imm8 = -imm8;
745 u = 0;
747 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
748 (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
751 static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
752 TCGReg rn, int imm12, bool p, bool w)
754 bool u = 1;
755 if (imm12 < 0) {
756 imm12 = -imm12;
757 u = 0;
759 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
760 (rn << 16) | (rt << 12) | imm12);
763 static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
764 TCGReg rn, int imm12)
766 tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
769 static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
770 TCGReg rn, int imm12)
772 tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
775 static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
776 TCGReg rn, TCGReg rm)
778 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
781 static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
782 TCGReg rn, TCGReg rm)
784 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
787 static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
788 TCGReg rn, int imm8)
790 tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
793 static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
794 TCGReg rn, TCGReg rm)
796 tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
799 static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
800 TCGReg rn, int imm8)
802 tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
805 static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
806 TCGReg rn, TCGReg rm)
808 tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
811 /* Register pre-increment with base writeback. */
812 static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
813 TCGReg rn, TCGReg rm)
815 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
818 static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
819 TCGReg rn, TCGReg rm)
821 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
824 static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
825 TCGReg rn, int imm8)
827 tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
830 static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
831 TCGReg rn, int imm8)
833 tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
836 static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
837 TCGReg rn, TCGReg rm)
839 tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
842 static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
843 TCGReg rn, TCGReg rm)
845 tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
848 static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
849 TCGReg rn, int imm8)
851 tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
854 static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
855 TCGReg rn, TCGReg rm)
857 tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
860 static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
861 TCGReg rn, int imm12)
863 tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
866 static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
867 TCGReg rn, int imm12)
869 tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
872 static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
873 TCGReg rn, TCGReg rm)
875 tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
878 static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
879 TCGReg rn, TCGReg rm)
881 tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
884 static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
885 TCGReg rn, int imm8)
887 tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
890 static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
891 TCGReg rn, TCGReg rm)
893 tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
896 static inline void tcg_out_ld32u(TCGContext *s, int cond,
897 int rd, int rn, int32_t offset)
899 if (offset > 0xfff || offset < -0xfff) {
900 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
901 tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
902 } else
903 tcg_out_ld32_12(s, cond, rd, rn, offset);
906 static inline void tcg_out_st32(TCGContext *s, int cond,
907 int rd, int rn, int32_t offset)
909 if (offset > 0xfff || offset < -0xfff) {
910 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
911 tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
912 } else
913 tcg_out_st32_12(s, cond, rd, rn, offset);
916 static inline void tcg_out_ld16u(TCGContext *s, int cond,
917 int rd, int rn, int32_t offset)
919 if (offset > 0xff || offset < -0xff) {
920 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
921 tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
922 } else
923 tcg_out_ld16u_8(s, cond, rd, rn, offset);
926 static inline void tcg_out_ld16s(TCGContext *s, int cond,
927 int rd, int rn, int32_t offset)
929 if (offset > 0xff || offset < -0xff) {
930 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
931 tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
932 } else
933 tcg_out_ld16s_8(s, cond, rd, rn, offset);
936 static inline void tcg_out_st16(TCGContext *s, int cond,
937 int rd, int rn, int32_t offset)
939 if (offset > 0xff || offset < -0xff) {
940 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
941 tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
942 } else
943 tcg_out_st16_8(s, cond, rd, rn, offset);
946 static inline void tcg_out_ld8u(TCGContext *s, int cond,
947 int rd, int rn, int32_t offset)
949 if (offset > 0xfff || offset < -0xfff) {
950 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
951 tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
952 } else
953 tcg_out_ld8_12(s, cond, rd, rn, offset);
956 static inline void tcg_out_ld8s(TCGContext *s, int cond,
957 int rd, int rn, int32_t offset)
959 if (offset > 0xff || offset < -0xff) {
960 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
961 tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
962 } else
963 tcg_out_ld8s_8(s, cond, rd, rn, offset);
966 static inline void tcg_out_st8(TCGContext *s, int cond,
967 int rd, int rn, int32_t offset)
969 if (offset > 0xfff || offset < -0xfff) {
970 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
971 tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
972 } else
973 tcg_out_st8_12(s, cond, rd, rn, offset);
976 /* The _goto case is normally between TBs within the same code buffer, and
977 * with the code buffer limited to 16MB we wouldn't need the long case.
978 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
980 static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
982 intptr_t addri = (intptr_t)addr;
983 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
985 if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
986 tcg_out_b(s, cond, disp);
987 return;
990 tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
991 if (use_armv5t_instructions) {
992 tcg_out_bx(s, cond, TCG_REG_TMP);
993 } else {
994 if (addri & 1) {
995 tcg_abort();
997 tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
1001 /* The call case is mostly used for helpers - so it's not unreasonable
1002 * for them to be beyond branch range */
1003 static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1005 intptr_t addri = (intptr_t)addr;
1006 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1008 if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1009 if (addri & 1) {
1010 /* Use BLX if the target is in Thumb mode */
1011 if (!use_armv5t_instructions) {
1012 tcg_abort();
1014 tcg_out_blx_imm(s, disp);
1015 } else {
1016 tcg_out_bl(s, COND_AL, disp);
1018 } else if (use_armv7_instructions) {
1019 tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1020 tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1021 } else {
1022 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1023 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1024 tcg_out32(s, addri);
1028 void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
1030 tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
1031 tcg_insn_unit *target = (tcg_insn_unit *)addr;
1033 /* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */
1034 reloc_pc24_atomic(code_ptr, target);
1035 flush_icache_range(jmp_addr, jmp_addr + 4);
1038 static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
1040 if (l->has_value) {
1041 tcg_out_goto(s, cond, l->u.value_ptr);
1042 } else {
1043 tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
1044 tcg_out_b_noaddr(s, cond);
1048 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1050 if (use_armv7_instructions) {
1051 tcg_out32(s, INSN_DMB_ISH);
1052 } else if (use_armv6_instructions) {
1053 tcg_out32(s, INSN_DMB_MCR);
1057 #ifdef CONFIG_SOFTMMU
1058 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1059 * int mmu_idx, uintptr_t ra)
1061 static void * const qemu_ld_helpers[16] = {
1062 [MO_UB] = helper_ret_ldub_mmu,
1063 [MO_SB] = helper_ret_ldsb_mmu,
1065 [MO_LEUW] = helper_le_lduw_mmu,
1066 [MO_LEUL] = helper_le_ldul_mmu,
1067 [MO_LEQ] = helper_le_ldq_mmu,
1068 [MO_LESW] = helper_le_ldsw_mmu,
1069 [MO_LESL] = helper_le_ldul_mmu,
1071 [MO_BEUW] = helper_be_lduw_mmu,
1072 [MO_BEUL] = helper_be_ldul_mmu,
1073 [MO_BEQ] = helper_be_ldq_mmu,
1074 [MO_BESW] = helper_be_ldsw_mmu,
1075 [MO_BESL] = helper_be_ldul_mmu,
1078 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1079 * uintxx_t val, int mmu_idx, uintptr_t ra)
1081 static void * const qemu_st_helpers[16] = {
1082 [MO_UB] = helper_ret_stb_mmu,
1083 [MO_LEUW] = helper_le_stw_mmu,
1084 [MO_LEUL] = helper_le_stl_mmu,
1085 [MO_LEQ] = helper_le_stq_mmu,
1086 [MO_BEUW] = helper_be_stw_mmu,
1087 [MO_BEUL] = helper_be_stl_mmu,
1088 [MO_BEQ] = helper_be_stq_mmu,
1091 /* Helper routines for marshalling helper function arguments into
1092 * the correct registers and stack.
1093 * argreg is where we want to put this argument, arg is the argument itself.
1094 * Return value is the updated argreg ready for the next call.
1095 * Note that argreg 0..3 is real registers, 4+ on stack.
1097 * We provide routines for arguments which are: immediate, 32 bit
1098 * value in register, 16 and 8 bit values in register (which must be zero
1099 * extended before use) and 64 bit value in a lo:hi register pair.
1101 #define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
1102 static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
1104 if (argreg < 4) { \
1105 MOV_ARG(s, COND_AL, argreg, arg); \
1106 } else { \
1107 int ofs = (argreg - 4) * 4; \
1108 EXT_ARG; \
1109 tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
1110 tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
1112 return argreg + 1; \
1115 DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1116 (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1117 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1118 (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1119 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1120 (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1121 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1123 static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1124 TCGReg arglo, TCGReg arghi)
1126 /* 64 bit arguments must go in even/odd register pairs
1127 * and in 8-aligned stack slots.
1129 if (argreg & 1) {
1130 argreg++;
1132 if (use_armv6_instructions && argreg >= 4
1133 && (arglo & 1) == 0 && arghi == arglo + 1) {
1134 tcg_out_strd_8(s, COND_AL, arglo,
1135 TCG_REG_CALL_STACK, (argreg - 4) * 4);
1136 return argreg + 2;
1137 } else {
1138 argreg = tcg_out_arg_reg32(s, argreg, arglo);
1139 argreg = tcg_out_arg_reg32(s, argreg, arghi);
1140 return argreg;
1144 #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1146 /* We're expecting to use an 8-bit immediate and to mask. */
1147 QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1149 /* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
1150 Using the offset of the second entry in the last tlb table ensures
1151 that we can index all of the elements of the first entry. */
1152 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1153 > 0xffff);
1155 /* Load and compare a TLB entry, leaving the flags set. Returns the register
1156 containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
1158 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1159 TCGMemOp opc, int mem_index, bool is_load)
1161 TCGReg base = TCG_AREG0;
1162 int cmp_off =
1163 (is_load
1164 ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1165 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1166 int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1167 unsigned s_bits = opc & MO_SIZE;
1168 unsigned a_bits = get_alignment_bits(opc);
1170 /* Should generate something like the following:
1171 * shr tmp, addrlo, #TARGET_PAGE_BITS (1)
1172 * add r2, env, #high
1173 * and r0, tmp, #(CPU_TLB_SIZE - 1) (2)
1174 * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3)
1175 * ldr r0, [r2, #cmp] (4)
1176 * tst addrlo, #s_mask
1177 * ldr r2, [r2, #add] (5)
1178 * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS
1180 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1181 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1183 /* We checked that the offset is contained within 16 bits above. */
1184 if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
1185 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1186 (24 << 7) | (cmp_off >> 8));
1187 base = TCG_REG_R2;
1188 add_off -= cmp_off & 0xff00;
1189 cmp_off &= 0xff;
1192 tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1193 TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1194 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1195 TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1197 /* Load the tlb comparator. Use ldrd if needed and available,
1198 but due to how the pointer needs setting up, ldm isn't useful.
1199 Base arm5 doesn't have ldrd, but armv5te does. */
1200 if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1201 tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1202 } else {
1203 tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1204 if (TARGET_LONG_BITS == 64) {
1205 tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1209 /* Check alignment. We don't support inline unaligned acceses,
1210 but we can easily support overalignment checks. */
1211 if (a_bits < s_bits) {
1212 a_bits = s_bits;
1214 if (a_bits) {
1215 tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
1218 /* Load the tlb addend. */
1219 tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1221 tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
1222 TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1224 if (TARGET_LONG_BITS == 64) {
1225 tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1226 TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
1229 return TCG_REG_R2;
1232 /* Record the context of a call to the out of line helper code for the slow
1233 path for a load or store, so that we can later generate the correct
1234 helper code. */
1235 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1236 TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1237 TCGReg addrhi, tcg_insn_unit *raddr,
1238 tcg_insn_unit *label_ptr)
1240 TCGLabelQemuLdst *label = new_ldst_label(s);
1242 label->is_ld = is_ld;
1243 label->oi = oi;
1244 label->datalo_reg = datalo;
1245 label->datahi_reg = datahi;
1246 label->addrlo_reg = addrlo;
1247 label->addrhi_reg = addrhi;
1248 label->raddr = raddr;
1249 label->label_ptr[0] = label_ptr;
1252 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1254 TCGReg argreg, datalo, datahi;
1255 TCGMemOpIdx oi = lb->oi;
1256 TCGMemOp opc = get_memop(oi);
1257 void *func;
1259 reloc_pc24(lb->label_ptr[0], s->code_ptr);
1261 argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1262 if (TARGET_LONG_BITS == 64) {
1263 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1264 } else {
1265 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1267 argreg = tcg_out_arg_imm32(s, argreg, oi);
1268 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1270 /* For armv6 we can use the canonical unsigned helpers and minimize
1271 icache usage. For pre-armv6, use the signed helpers since we do
1272 not have a single insn sign-extend. */
1273 if (use_armv6_instructions) {
1274 func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
1275 } else {
1276 func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
1277 if (opc & MO_SIGN) {
1278 opc = MO_UL;
1281 tcg_out_call(s, func);
1283 datalo = lb->datalo_reg;
1284 datahi = lb->datahi_reg;
1285 switch (opc & MO_SSIZE) {
1286 case MO_SB:
1287 tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1288 break;
1289 case MO_SW:
1290 tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1291 break;
1292 default:
1293 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1294 break;
1295 case MO_Q:
1296 if (datalo != TCG_REG_R1) {
1297 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1298 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1299 } else if (datahi != TCG_REG_R0) {
1300 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1301 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1302 } else {
1303 tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1304 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1305 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1307 break;
1310 tcg_out_goto(s, COND_AL, lb->raddr);
1313 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1315 TCGReg argreg, datalo, datahi;
1316 TCGMemOpIdx oi = lb->oi;
1317 TCGMemOp opc = get_memop(oi);
1319 reloc_pc24(lb->label_ptr[0], s->code_ptr);
1321 argreg = TCG_REG_R0;
1322 argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1323 if (TARGET_LONG_BITS == 64) {
1324 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1325 } else {
1326 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1329 datalo = lb->datalo_reg;
1330 datahi = lb->datahi_reg;
1331 switch (opc & MO_SIZE) {
1332 case MO_8:
1333 argreg = tcg_out_arg_reg8(s, argreg, datalo);
1334 break;
1335 case MO_16:
1336 argreg = tcg_out_arg_reg16(s, argreg, datalo);
1337 break;
1338 case MO_32:
1339 default:
1340 argreg = tcg_out_arg_reg32(s, argreg, datalo);
1341 break;
1342 case MO_64:
1343 argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1344 break;
1347 argreg = tcg_out_arg_imm32(s, argreg, oi);
1348 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1350 /* Tail-call to the helper, which will return to the fast path. */
1351 tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1353 #endif /* SOFTMMU */
1355 static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1356 TCGReg datalo, TCGReg datahi,
1357 TCGReg addrlo, TCGReg addend)
1359 TCGMemOp bswap = opc & MO_BSWAP;
1361 switch (opc & MO_SSIZE) {
1362 case MO_UB:
1363 tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1364 break;
1365 case MO_SB:
1366 tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1367 break;
1368 case MO_UW:
1369 tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1370 if (bswap) {
1371 tcg_out_bswap16(s, COND_AL, datalo, datalo);
1373 break;
1374 case MO_SW:
1375 if (bswap) {
1376 tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1377 tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1378 } else {
1379 tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1381 break;
1382 case MO_UL:
1383 default:
1384 tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1385 if (bswap) {
1386 tcg_out_bswap32(s, COND_AL, datalo, datalo);
1388 break;
1389 case MO_Q:
1391 TCGReg dl = (bswap ? datahi : datalo);
1392 TCGReg dh = (bswap ? datalo : datahi);
1394 /* Avoid ldrd for user-only emulation, to handle unaligned. */
1395 if (USING_SOFTMMU && use_armv6_instructions
1396 && (dl & 1) == 0 && dh == dl + 1) {
1397 tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1398 } else if (dl != addend) {
1399 tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1400 tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1401 } else {
1402 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1403 addend, addrlo, SHIFT_IMM_LSL(0));
1404 tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1405 tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1407 if (bswap) {
1408 tcg_out_bswap32(s, COND_AL, dl, dl);
1409 tcg_out_bswap32(s, COND_AL, dh, dh);
1412 break;
1416 static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1417 TCGReg datalo, TCGReg datahi,
1418 TCGReg addrlo)
1420 TCGMemOp bswap = opc & MO_BSWAP;
1422 switch (opc & MO_SSIZE) {
1423 case MO_UB:
1424 tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1425 break;
1426 case MO_SB:
1427 tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1428 break;
1429 case MO_UW:
1430 tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1431 if (bswap) {
1432 tcg_out_bswap16(s, COND_AL, datalo, datalo);
1434 break;
1435 case MO_SW:
1436 if (bswap) {
1437 tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1438 tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1439 } else {
1440 tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1442 break;
1443 case MO_UL:
1444 default:
1445 tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1446 if (bswap) {
1447 tcg_out_bswap32(s, COND_AL, datalo, datalo);
1449 break;
1450 case MO_Q:
1452 TCGReg dl = (bswap ? datahi : datalo);
1453 TCGReg dh = (bswap ? datalo : datahi);
1455 /* Avoid ldrd for user-only emulation, to handle unaligned. */
1456 if (USING_SOFTMMU && use_armv6_instructions
1457 && (dl & 1) == 0 && dh == dl + 1) {
1458 tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1459 } else if (dl == addrlo) {
1460 tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1461 tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1462 } else {
1463 tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1464 tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1466 if (bswap) {
1467 tcg_out_bswap32(s, COND_AL, dl, dl);
1468 tcg_out_bswap32(s, COND_AL, dh, dh);
1471 break;
1475 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1477 TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1478 TCGMemOpIdx oi;
1479 TCGMemOp opc;
1480 #ifdef CONFIG_SOFTMMU
1481 int mem_index;
1482 TCGReg addend;
1483 tcg_insn_unit *label_ptr;
1484 #endif
1486 datalo = *args++;
1487 datahi = (is64 ? *args++ : 0);
1488 addrlo = *args++;
1489 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1490 oi = *args++;
1491 opc = get_memop(oi);
1493 #ifdef CONFIG_SOFTMMU
1494 mem_index = get_mmuidx(oi);
1495 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
1497 /* This a conditional BL only to load a pointer within this opcode into LR
1498 for the slow path. We will not be using the value for a tail call. */
1499 label_ptr = s->code_ptr;
1500 tcg_out_bl_noaddr(s, COND_NE);
1502 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1504 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1505 s->code_ptr, label_ptr);
1506 #else /* !CONFIG_SOFTMMU */
1507 if (guest_base) {
1508 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1509 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1510 } else {
1511 tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1513 #endif
1516 static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1517 TCGReg datalo, TCGReg datahi,
1518 TCGReg addrlo, TCGReg addend)
1520 TCGMemOp bswap = opc & MO_BSWAP;
1522 switch (opc & MO_SIZE) {
1523 case MO_8:
1524 tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1525 break;
1526 case MO_16:
1527 if (bswap) {
1528 tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1529 tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1530 } else {
1531 tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1533 break;
1534 case MO_32:
1535 default:
1536 if (bswap) {
1537 tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1538 tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1539 } else {
1540 tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1542 break;
1543 case MO_64:
1544 /* Avoid strd for user-only emulation, to handle unaligned. */
1545 if (bswap) {
1546 tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1547 tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1548 tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1549 tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1550 } else if (USING_SOFTMMU && use_armv6_instructions
1551 && (datalo & 1) == 0 && datahi == datalo + 1) {
1552 tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1553 } else {
1554 tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1555 tcg_out_st32_12(s, cond, datahi, addend, 4);
1557 break;
1561 static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1562 TCGReg datalo, TCGReg datahi,
1563 TCGReg addrlo)
1565 TCGMemOp bswap = opc & MO_BSWAP;
1567 switch (opc & MO_SIZE) {
1568 case MO_8:
1569 tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1570 break;
1571 case MO_16:
1572 if (bswap) {
1573 tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1574 tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1575 } else {
1576 tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1578 break;
1579 case MO_32:
1580 default:
1581 if (bswap) {
1582 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1583 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1584 } else {
1585 tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1587 break;
1588 case MO_64:
1589 /* Avoid strd for user-only emulation, to handle unaligned. */
1590 if (bswap) {
1591 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1592 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1593 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1594 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1595 } else if (USING_SOFTMMU && use_armv6_instructions
1596 && (datalo & 1) == 0 && datahi == datalo + 1) {
1597 tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1598 } else {
1599 tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1600 tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1602 break;
1606 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1608 TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1609 TCGMemOpIdx oi;
1610 TCGMemOp opc;
1611 #ifdef CONFIG_SOFTMMU
1612 int mem_index;
1613 TCGReg addend;
1614 tcg_insn_unit *label_ptr;
1615 #endif
1617 datalo = *args++;
1618 datahi = (is64 ? *args++ : 0);
1619 addrlo = *args++;
1620 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1621 oi = *args++;
1622 opc = get_memop(oi);
1624 #ifdef CONFIG_SOFTMMU
1625 mem_index = get_mmuidx(oi);
1626 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
1628 tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1630 /* The conditional call must come last, as we're going to return here. */
1631 label_ptr = s->code_ptr;
1632 tcg_out_bl_noaddr(s, COND_NE);
1634 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1635 s->code_ptr, label_ptr);
1636 #else /* !CONFIG_SOFTMMU */
1637 if (guest_base) {
1638 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1639 tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1640 datahi, addrlo, TCG_REG_TMP);
1641 } else {
1642 tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1644 #endif
1647 static tcg_insn_unit *tb_ret_addr;
1649 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1650 const TCGArg *args, const int *const_args)
1652 TCGArg a0, a1, a2, a3, a4, a5;
1653 int c;
1655 switch (opc) {
1656 case INDEX_op_exit_tb:
1657 tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1658 tcg_out_goto(s, COND_AL, tb_ret_addr);
1659 break;
1660 case INDEX_op_goto_tb:
1661 if (s->tb_jmp_insn_offset) {
1662 /* Direct jump method */
1663 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
1664 tcg_out_b_noaddr(s, COND_AL);
1665 } else {
1666 /* Indirect jump method */
1667 intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
1668 tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
1669 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
1671 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
1672 break;
1673 case INDEX_op_br:
1674 tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
1675 break;
1677 case INDEX_op_ld8u_i32:
1678 tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1679 break;
1680 case INDEX_op_ld8s_i32:
1681 tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1682 break;
1683 case INDEX_op_ld16u_i32:
1684 tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1685 break;
1686 case INDEX_op_ld16s_i32:
1687 tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1688 break;
1689 case INDEX_op_ld_i32:
1690 tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1691 break;
1692 case INDEX_op_st8_i32:
1693 tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1694 break;
1695 case INDEX_op_st16_i32:
1696 tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1697 break;
1698 case INDEX_op_st_i32:
1699 tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1700 break;
1702 case INDEX_op_movcond_i32:
1703 /* Constraints mean that v2 is always in the same register as dest,
1704 * so we only need to do "if condition passed, move v1 to dest".
1706 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1707 args[1], args[2], const_args[2]);
1708 tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1709 ARITH_MVN, args[0], 0, args[3], const_args[3]);
1710 break;
1711 case INDEX_op_add_i32:
1712 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1713 args[0], args[1], args[2], const_args[2]);
1714 break;
1715 case INDEX_op_sub_i32:
1716 if (const_args[1]) {
1717 if (const_args[2]) {
1718 tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1719 } else {
1720 tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1721 args[0], args[2], args[1], 1);
1723 } else {
1724 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1725 args[0], args[1], args[2], const_args[2]);
1727 break;
1728 case INDEX_op_and_i32:
1729 tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1730 args[0], args[1], args[2], const_args[2]);
1731 break;
1732 case INDEX_op_andc_i32:
1733 tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1734 args[0], args[1], args[2], const_args[2]);
1735 break;
1736 case INDEX_op_or_i32:
1737 c = ARITH_ORR;
1738 goto gen_arith;
1739 case INDEX_op_xor_i32:
1740 c = ARITH_EOR;
1741 /* Fall through. */
1742 gen_arith:
1743 tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1744 break;
1745 case INDEX_op_add2_i32:
1746 a0 = args[0], a1 = args[1], a2 = args[2];
1747 a3 = args[3], a4 = args[4], a5 = args[5];
1748 if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1749 a0 = TCG_REG_TMP;
1751 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1752 a0, a2, a4, const_args[4]);
1753 tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1754 a1, a3, a5, const_args[5]);
1755 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1756 break;
1757 case INDEX_op_sub2_i32:
1758 a0 = args[0], a1 = args[1], a2 = args[2];
1759 a3 = args[3], a4 = args[4], a5 = args[5];
1760 if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1761 a0 = TCG_REG_TMP;
1763 if (const_args[2]) {
1764 if (const_args[4]) {
1765 tcg_out_movi32(s, COND_AL, a0, a4);
1766 a4 = a0;
1768 tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1769 } else {
1770 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1771 ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1773 if (const_args[3]) {
1774 if (const_args[5]) {
1775 tcg_out_movi32(s, COND_AL, a1, a5);
1776 a5 = a1;
1778 tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1779 } else {
1780 tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1781 a1, a3, a5, const_args[5]);
1783 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1784 break;
1785 case INDEX_op_neg_i32:
1786 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1787 break;
1788 case INDEX_op_not_i32:
1789 tcg_out_dat_reg(s, COND_AL,
1790 ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1791 break;
1792 case INDEX_op_mul_i32:
1793 tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1794 break;
1795 case INDEX_op_mulu2_i32:
1796 tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1797 break;
1798 case INDEX_op_muls2_i32:
1799 tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1800 break;
1801 /* XXX: Perhaps args[2] & 0x1f is wrong */
1802 case INDEX_op_shl_i32:
1803 c = const_args[2] ?
1804 SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1805 goto gen_shift32;
1806 case INDEX_op_shr_i32:
1807 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1808 SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1809 goto gen_shift32;
1810 case INDEX_op_sar_i32:
1811 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1812 SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1813 goto gen_shift32;
1814 case INDEX_op_rotr_i32:
1815 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1816 SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1817 /* Fall through. */
1818 gen_shift32:
1819 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1820 break;
1822 case INDEX_op_rotl_i32:
1823 if (const_args[2]) {
1824 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1825 ((0x20 - args[2]) & 0x1f) ?
1826 SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1827 SHIFT_IMM_LSL(0));
1828 } else {
1829 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1830 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1831 SHIFT_REG_ROR(TCG_REG_TMP));
1833 break;
1835 case INDEX_op_ctz_i32:
1836 tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
1837 a1 = TCG_REG_TMP;
1838 goto do_clz;
1840 case INDEX_op_clz_i32:
1841 a1 = args[1];
1842 do_clz:
1843 a0 = args[0];
1844 a2 = args[2];
1845 c = const_args[2];
1846 if (c && a2 == 32) {
1847 tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
1848 break;
1850 tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0);
1851 tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0);
1852 if (c || a0 != a2) {
1853 tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c);
1855 break;
1857 case INDEX_op_brcond_i32:
1858 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1859 args[0], args[1], const_args[1]);
1860 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
1861 arg_label(args[3]));
1862 break;
1863 case INDEX_op_brcond2_i32:
1864 /* The resulting conditions are:
1865 * TCG_COND_EQ --> a0 == a2 && a1 == a3,
1866 * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3,
1867 * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3,
1868 * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1869 * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1870 * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3,
1872 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1873 args[1], args[3], const_args[3]);
1874 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1875 args[0], args[2], const_args[2]);
1876 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
1877 arg_label(args[5]));
1878 break;
1879 case INDEX_op_setcond_i32:
1880 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1881 args[1], args[2], const_args[2]);
1882 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1883 ARITH_MOV, args[0], 0, 1);
1884 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1885 ARITH_MOV, args[0], 0, 0);
1886 break;
1887 case INDEX_op_setcond2_i32:
1888 /* See brcond2_i32 comment */
1889 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1890 args[2], args[4], const_args[4]);
1891 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1892 args[1], args[3], const_args[3]);
1893 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1894 ARITH_MOV, args[0], 0, 1);
1895 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
1896 ARITH_MOV, args[0], 0, 0);
1897 break;
1899 case INDEX_op_qemu_ld_i32:
1900 tcg_out_qemu_ld(s, args, 0);
1901 break;
1902 case INDEX_op_qemu_ld_i64:
1903 tcg_out_qemu_ld(s, args, 1);
1904 break;
1905 case INDEX_op_qemu_st_i32:
1906 tcg_out_qemu_st(s, args, 0);
1907 break;
1908 case INDEX_op_qemu_st_i64:
1909 tcg_out_qemu_st(s, args, 1);
1910 break;
1912 case INDEX_op_bswap16_i32:
1913 tcg_out_bswap16(s, COND_AL, args[0], args[1]);
1914 break;
1915 case INDEX_op_bswap32_i32:
1916 tcg_out_bswap32(s, COND_AL, args[0], args[1]);
1917 break;
1919 case INDEX_op_ext8s_i32:
1920 tcg_out_ext8s(s, COND_AL, args[0], args[1]);
1921 break;
1922 case INDEX_op_ext16s_i32:
1923 tcg_out_ext16s(s, COND_AL, args[0], args[1]);
1924 break;
1925 case INDEX_op_ext16u_i32:
1926 tcg_out_ext16u(s, COND_AL, args[0], args[1]);
1927 break;
1929 case INDEX_op_deposit_i32:
1930 tcg_out_deposit(s, COND_AL, args[0], args[2],
1931 args[3], args[4], const_args[2]);
1932 break;
1933 case INDEX_op_extract_i32:
1934 tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
1935 break;
1936 case INDEX_op_sextract_i32:
1937 tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
1938 break;
1940 case INDEX_op_div_i32:
1941 tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
1942 break;
1943 case INDEX_op_divu_i32:
1944 tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
1945 break;
1947 case INDEX_op_mb:
1948 tcg_out_mb(s, args[0]);
1949 break;
1951 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1952 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1953 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1954 default:
1955 tcg_abort();
1959 static const TCGTargetOpDef arm_op_defs[] = {
1960 { INDEX_op_exit_tb, { } },
1961 { INDEX_op_goto_tb, { } },
1962 { INDEX_op_br, { } },
1964 { INDEX_op_ld8u_i32, { "r", "r" } },
1965 { INDEX_op_ld8s_i32, { "r", "r" } },
1966 { INDEX_op_ld16u_i32, { "r", "r" } },
1967 { INDEX_op_ld16s_i32, { "r", "r" } },
1968 { INDEX_op_ld_i32, { "r", "r" } },
1969 { INDEX_op_st8_i32, { "r", "r" } },
1970 { INDEX_op_st16_i32, { "r", "r" } },
1971 { INDEX_op_st_i32, { "r", "r" } },
1973 /* TODO: "r", "r", "ri" */
1974 { INDEX_op_add_i32, { "r", "r", "rIN" } },
1975 { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
1976 { INDEX_op_mul_i32, { "r", "r", "r" } },
1977 { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
1978 { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
1979 { INDEX_op_and_i32, { "r", "r", "rIK" } },
1980 { INDEX_op_andc_i32, { "r", "r", "rIK" } },
1981 { INDEX_op_or_i32, { "r", "r", "rI" } },
1982 { INDEX_op_xor_i32, { "r", "r", "rI" } },
1983 { INDEX_op_neg_i32, { "r", "r" } },
1984 { INDEX_op_not_i32, { "r", "r" } },
1986 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1987 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1988 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1989 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1990 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1991 { INDEX_op_clz_i32, { "r", "r", "rIK" } },
1992 { INDEX_op_ctz_i32, { "r", "r", "rIK" } },
1994 { INDEX_op_brcond_i32, { "r", "rIN" } },
1995 { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
1996 { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
1998 { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
1999 { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
2000 { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
2001 { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
2003 #if TARGET_LONG_BITS == 32
2004 { INDEX_op_qemu_ld_i32, { "r", "l" } },
2005 { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
2006 { INDEX_op_qemu_st_i32, { "s", "s" } },
2007 { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
2008 #else
2009 { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
2010 { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } },
2011 { INDEX_op_qemu_st_i32, { "s", "s", "s" } },
2012 { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } },
2013 #endif
2015 { INDEX_op_bswap16_i32, { "r", "r" } },
2016 { INDEX_op_bswap32_i32, { "r", "r" } },
2018 { INDEX_op_ext8s_i32, { "r", "r" } },
2019 { INDEX_op_ext16s_i32, { "r", "r" } },
2020 { INDEX_op_ext16u_i32, { "r", "r" } },
2022 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
2023 { INDEX_op_extract_i32, { "r", "r" } },
2024 { INDEX_op_sextract_i32, { "r", "r" } },
2026 { INDEX_op_div_i32, { "r", "r", "r" } },
2027 { INDEX_op_divu_i32, { "r", "r", "r" } },
2029 { INDEX_op_mb, { } },
2030 { -1 },
2033 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2035 int i, n = ARRAY_SIZE(arm_op_defs);
2037 for (i = 0; i < n; ++i) {
2038 if (arm_op_defs[i].op == op) {
2039 return &arm_op_defs[i];
2042 return NULL;
2045 static void tcg_target_init(TCGContext *s)
2047 /* Only probe for the platform and capabilities if we havn't already
2048 determined maximum values at compile time. */
2049 #ifndef use_idiv_instructions
2051 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2052 use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2054 #endif
2055 if (__ARM_ARCH < 7) {
2056 const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
2057 if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2058 arm_arch = pl[1] - '0';
2062 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2063 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
2064 (1 << TCG_REG_R0) |
2065 (1 << TCG_REG_R1) |
2066 (1 << TCG_REG_R2) |
2067 (1 << TCG_REG_R3) |
2068 (1 << TCG_REG_R12) |
2069 (1 << TCG_REG_R14));
2071 tcg_regset_clear(s->reserved_regs);
2072 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2073 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2074 tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2077 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2078 TCGReg arg1, intptr_t arg2)
2080 tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2083 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2084 TCGReg arg1, intptr_t arg2)
2086 tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2089 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
2090 TCGReg base, intptr_t ofs)
2092 return false;
2095 static inline void tcg_out_mov(TCGContext *s, TCGType type,
2096 TCGReg ret, TCGReg arg)
2098 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2101 static inline void tcg_out_movi(TCGContext *s, TCGType type,
2102 TCGReg ret, tcg_target_long arg)
2104 tcg_out_movi32(s, COND_AL, ret, arg);
2107 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2108 and tcg_register_jit. */
2110 #define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2112 #define FRAME_SIZE \
2113 ((PUSH_SIZE \
2114 + TCG_STATIC_CALL_ARGS_SIZE \
2115 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2116 + TCG_TARGET_STACK_ALIGN - 1) \
2117 & -TCG_TARGET_STACK_ALIGN)
2119 static void tcg_target_qemu_prologue(TCGContext *s)
2121 int stack_addend;
2123 /* Calling convention requires us to save r4-r11 and lr. */
2124 /* stmdb sp!, { r4 - r11, lr } */
2125 tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2127 /* Reserve callee argument and tcg temp space. */
2128 stack_addend = FRAME_SIZE - PUSH_SIZE;
2130 tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2131 TCG_REG_CALL_STACK, stack_addend, 1);
2132 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2133 CPU_TEMP_BUF_NLONGS * sizeof(long));
2135 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2137 tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2138 tb_ret_addr = s->code_ptr;
2140 /* Epilogue. We branch here via tb_ret_addr. */
2141 tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2142 TCG_REG_CALL_STACK, stack_addend, 1);
2144 /* ldmia sp!, { r4 - r11, pc } */
2145 tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2148 typedef struct {
2149 DebugFrameHeader h;
2150 uint8_t fde_def_cfa[4];
2151 uint8_t fde_reg_ofs[18];
2152 } DebugFrame;
2154 #define ELF_HOST_MACHINE EM_ARM
2156 /* We're expecting a 2 byte uleb128 encoded value. */
2157 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2159 static const DebugFrame debug_frame = {
2160 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2161 .h.cie.id = -1,
2162 .h.cie.version = 1,
2163 .h.cie.code_align = 1,
2164 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2165 .h.cie.return_column = 14,
2167 /* Total FDE size does not include the "len" member. */
2168 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2170 .fde_def_cfa = {
2171 12, 13, /* DW_CFA_def_cfa sp, ... */
2172 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2173 (FRAME_SIZE >> 7)
2175 .fde_reg_ofs = {
2176 /* The following must match the stmdb in the prologue. */
2177 0x8e, 1, /* DW_CFA_offset, lr, -4 */
2178 0x8b, 2, /* DW_CFA_offset, r11, -8 */
2179 0x8a, 3, /* DW_CFA_offset, r10, -12 */
2180 0x89, 4, /* DW_CFA_offset, r9, -16 */
2181 0x88, 5, /* DW_CFA_offset, r8, -20 */
2182 0x87, 6, /* DW_CFA_offset, r7, -24 */
2183 0x86, 7, /* DW_CFA_offset, r6, -28 */
2184 0x85, 8, /* DW_CFA_offset, r5, -32 */
2185 0x84, 9, /* DW_CFA_offset, r4, -36 */
2189 void tcg_register_jit(void *buf, size_t buf_size)
2191 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));