Merge remote-tracking branch 'public/tags/tracing-pull-request' into staging
[qemu/kevin.git] / tcg / arm / tcg-target.inc.c
blobffa0d40660aac7f70058c06ba8f2fb9a9a6ea5f7
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Andrzej Zaborowski
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "elf.h"
26 #include "tcg-be-ldst.h"
28 /* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */
29 #ifndef __ARM_ARCH
30 # if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
31 || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
32 || defined(__ARM_ARCH_7EM__)
33 # define __ARM_ARCH 7
34 # elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
35 || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
36 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
37 # define __ARM_ARCH 6
38 # elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
39 || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
40 || defined(__ARM_ARCH_5TEJ__)
41 # define __ARM_ARCH 5
42 # else
43 # define __ARM_ARCH 4
44 # endif
45 #endif
47 static int arm_arch = __ARM_ARCH;
49 #if defined(__ARM_ARCH_5T__) \
50 || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
51 # define use_armv5t_instructions 1
52 #else
53 # define use_armv5t_instructions use_armv6_instructions
54 #endif
56 #define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6)
57 #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
59 #ifndef use_idiv_instructions
60 bool use_idiv_instructions;
61 #endif
63 /* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */
64 #ifdef CONFIG_SOFTMMU
65 # define USING_SOFTMMU 1
66 #else
67 # define USING_SOFTMMU 0
68 #endif
70 #ifdef CONFIG_DEBUG_TCG
71 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
72 "%r0",
73 "%r1",
74 "%r2",
75 "%r3",
76 "%r4",
77 "%r5",
78 "%r6",
79 "%r7",
80 "%r8",
81 "%r9",
82 "%r10",
83 "%r11",
84 "%r12",
85 "%r13",
86 "%r14",
87 "%pc",
89 #endif
91 static const int tcg_target_reg_alloc_order[] = {
92 TCG_REG_R4,
93 TCG_REG_R5,
94 TCG_REG_R6,
95 TCG_REG_R7,
96 TCG_REG_R8,
97 TCG_REG_R9,
98 TCG_REG_R10,
99 TCG_REG_R11,
100 TCG_REG_R13,
101 TCG_REG_R0,
102 TCG_REG_R1,
103 TCG_REG_R2,
104 TCG_REG_R3,
105 TCG_REG_R12,
106 TCG_REG_R14,
109 static const int tcg_target_call_iarg_regs[4] = {
110 TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
112 static const int tcg_target_call_oarg_regs[2] = {
113 TCG_REG_R0, TCG_REG_R1
116 #define TCG_REG_TMP TCG_REG_R12
118 static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
120 ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
121 *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
124 static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
126 ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
127 tcg_insn_unit insn = atomic_read(code_ptr);
128 tcg_debug_assert(offset == sextract32(offset, 0, 24));
129 atomic_set(code_ptr, deposit32(insn, 0, 24, offset));
132 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
133 intptr_t value, intptr_t addend)
135 tcg_debug_assert(type == R_ARM_PC24);
136 tcg_debug_assert(addend == 0);
137 reloc_pc24(code_ptr, (tcg_insn_unit *)value);
140 #define TCG_CT_CONST_ARM 0x100
141 #define TCG_CT_CONST_INV 0x200
142 #define TCG_CT_CONST_NEG 0x400
143 #define TCG_CT_CONST_ZERO 0x800
145 /* parse target specific constraints */
146 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
148 const char *ct_str;
150 ct_str = *pct_str;
151 switch (ct_str[0]) {
152 case 'I':
153 ct->ct |= TCG_CT_CONST_ARM;
154 break;
155 case 'K':
156 ct->ct |= TCG_CT_CONST_INV;
157 break;
158 case 'N': /* The gcc constraint letter is L, already used here. */
159 ct->ct |= TCG_CT_CONST_NEG;
160 break;
161 case 'Z':
162 ct->ct |= TCG_CT_CONST_ZERO;
163 break;
165 case 'r':
166 ct->ct |= TCG_CT_REG;
167 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
168 break;
170 /* qemu_ld address */
171 case 'l':
172 ct->ct |= TCG_CT_REG;
173 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
174 #ifdef CONFIG_SOFTMMU
175 /* r0-r2,lr will be overwritten when reading the tlb entry,
176 so don't use these. */
177 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
178 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
179 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
180 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
181 #endif
182 break;
184 /* qemu_st address & data */
185 case 's':
186 ct->ct |= TCG_CT_REG;
187 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
188 /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
189 and r0-r1 doing the byte swapping, so don't use these. */
190 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
191 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
192 #if defined(CONFIG_SOFTMMU)
193 /* Avoid clashes with registers being used for helper args */
194 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
195 #if TARGET_LONG_BITS == 64
196 /* Avoid clashes with registers being used for helper args */
197 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
198 #endif
199 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
200 #endif
201 break;
203 default:
204 return -1;
206 ct_str++;
207 *pct_str = ct_str;
209 return 0;
212 static inline uint32_t rotl(uint32_t val, int n)
214 return (val << n) | (val >> (32 - n));
217 /* ARM immediates for ALU instructions are made of an unsigned 8-bit
218 right-rotated by an even amount between 0 and 30. */
219 static inline int encode_imm(uint32_t imm)
221 int shift;
223 /* simple case, only lower bits */
224 if ((imm & ~0xff) == 0)
225 return 0;
226 /* then try a simple even shift */
227 shift = ctz32(imm) & ~1;
228 if (((imm >> shift) & ~0xff) == 0)
229 return 32 - shift;
230 /* now try harder with rotations */
231 if ((rotl(imm, 2) & ~0xff) == 0)
232 return 2;
233 if ((rotl(imm, 4) & ~0xff) == 0)
234 return 4;
235 if ((rotl(imm, 6) & ~0xff) == 0)
236 return 6;
237 /* imm can't be encoded */
238 return -1;
241 static inline int check_fit_imm(uint32_t imm)
243 return encode_imm(imm) >= 0;
246 /* Test if a constant matches the constraint.
247 * TODO: define constraints for:
249 * ldr/str offset: between -0xfff and 0xfff
250 * ldrh/strh offset: between -0xff and 0xff
251 * mov operand2: values represented with x << (2 * y), x < 0x100
252 * add, sub, eor...: ditto
254 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
255 const TCGArgConstraint *arg_ct)
257 int ct;
258 ct = arg_ct->ct;
259 if (ct & TCG_CT_CONST) {
260 return 1;
261 } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
262 return 1;
263 } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
264 return 1;
265 } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
266 return 1;
267 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
268 return 1;
269 } else {
270 return 0;
274 #define TO_CPSR (1 << 20)
276 typedef enum {
277 ARITH_AND = 0x0 << 21,
278 ARITH_EOR = 0x1 << 21,
279 ARITH_SUB = 0x2 << 21,
280 ARITH_RSB = 0x3 << 21,
281 ARITH_ADD = 0x4 << 21,
282 ARITH_ADC = 0x5 << 21,
283 ARITH_SBC = 0x6 << 21,
284 ARITH_RSC = 0x7 << 21,
285 ARITH_TST = 0x8 << 21 | TO_CPSR,
286 ARITH_CMP = 0xa << 21 | TO_CPSR,
287 ARITH_CMN = 0xb << 21 | TO_CPSR,
288 ARITH_ORR = 0xc << 21,
289 ARITH_MOV = 0xd << 21,
290 ARITH_BIC = 0xe << 21,
291 ARITH_MVN = 0xf << 21,
293 INSN_LDR_IMM = 0x04100000,
294 INSN_LDR_REG = 0x06100000,
295 INSN_STR_IMM = 0x04000000,
296 INSN_STR_REG = 0x06000000,
298 INSN_LDRH_IMM = 0x005000b0,
299 INSN_LDRH_REG = 0x001000b0,
300 INSN_LDRSH_IMM = 0x005000f0,
301 INSN_LDRSH_REG = 0x001000f0,
302 INSN_STRH_IMM = 0x004000b0,
303 INSN_STRH_REG = 0x000000b0,
305 INSN_LDRB_IMM = 0x04500000,
306 INSN_LDRB_REG = 0x06500000,
307 INSN_LDRSB_IMM = 0x005000d0,
308 INSN_LDRSB_REG = 0x001000d0,
309 INSN_STRB_IMM = 0x04400000,
310 INSN_STRB_REG = 0x06400000,
312 INSN_LDRD_IMM = 0x004000d0,
313 INSN_LDRD_REG = 0x000000d0,
314 INSN_STRD_IMM = 0x004000f0,
315 INSN_STRD_REG = 0x000000f0,
317 INSN_DMB_ISH = 0x5bf07ff5,
318 INSN_DMB_MCR = 0xba0f07ee,
320 } ARMInsn;
322 #define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00)
323 #define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20)
324 #define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40)
325 #define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60)
326 #define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10)
327 #define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30)
328 #define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50)
329 #define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70)
331 enum arm_cond_code_e {
332 COND_EQ = 0x0,
333 COND_NE = 0x1,
334 COND_CS = 0x2, /* Unsigned greater or equal */
335 COND_CC = 0x3, /* Unsigned less than */
336 COND_MI = 0x4, /* Negative */
337 COND_PL = 0x5, /* Zero or greater */
338 COND_VS = 0x6, /* Overflow */
339 COND_VC = 0x7, /* No overflow */
340 COND_HI = 0x8, /* Unsigned greater than */
341 COND_LS = 0x9, /* Unsigned less or equal */
342 COND_GE = 0xa,
343 COND_LT = 0xb,
344 COND_GT = 0xc,
345 COND_LE = 0xd,
346 COND_AL = 0xe,
349 static const uint8_t tcg_cond_to_arm_cond[] = {
350 [TCG_COND_EQ] = COND_EQ,
351 [TCG_COND_NE] = COND_NE,
352 [TCG_COND_LT] = COND_LT,
353 [TCG_COND_GE] = COND_GE,
354 [TCG_COND_LE] = COND_LE,
355 [TCG_COND_GT] = COND_GT,
356 /* unsigned */
357 [TCG_COND_LTU] = COND_CC,
358 [TCG_COND_GEU] = COND_CS,
359 [TCG_COND_LEU] = COND_LS,
360 [TCG_COND_GTU] = COND_HI,
363 static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
365 tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
368 static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
370 tcg_out32(s, (cond << 28) | 0x0a000000 |
371 (((offset - 8) >> 2) & 0x00ffffff));
374 static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
376 /* We pay attention here to not modify the branch target by masking
377 the corresponding bytes. This ensure that caches and memory are
378 kept coherent during retranslation. */
379 tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
382 static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
384 /* We pay attention here to not modify the branch target by masking
385 the corresponding bytes. This ensure that caches and memory are
386 kept coherent during retranslation. */
387 tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
390 static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
392 tcg_out32(s, (cond << 28) | 0x0b000000 |
393 (((offset - 8) >> 2) & 0x00ffffff));
396 static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
398 tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
401 static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
403 tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
404 (((offset - 8) >> 2) & 0x00ffffff));
407 static inline void tcg_out_dat_reg(TCGContext *s,
408 int cond, int opc, int rd, int rn, int rm, int shift)
410 tcg_out32(s, (cond << 28) | (0 << 25) | opc |
411 (rn << 16) | (rd << 12) | shift | rm);
414 static inline void tcg_out_nop(TCGContext *s)
416 if (use_armv7_instructions) {
417 /* Architected nop introduced in v6k. */
418 /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this
419 also Just So Happened to do nothing on pre-v6k so that we
420 don't need to conditionalize it? */
421 tcg_out32(s, 0xe320f000);
422 } else {
423 /* Prior to that the assembler uses mov r0, r0. */
424 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
428 static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
430 /* Simple reg-reg move, optimising out the 'do nothing' case */
431 if (rd != rm) {
432 tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
436 static inline void tcg_out_dat_imm(TCGContext *s,
437 int cond, int opc, int rd, int rn, int im)
439 tcg_out32(s, (cond << 28) | (1 << 25) | opc |
440 (rn << 16) | (rd << 12) | im);
443 static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
445 int rot, opc, rn;
447 /* For armv7, make sure not to use movw+movt when mov/mvn would do.
448 Speed things up by only checking when movt would be required.
449 Prior to armv7, have one go at fully rotated immediates before
450 doing the decomposition thing below. */
451 if (!use_armv7_instructions || (arg & 0xffff0000)) {
452 rot = encode_imm(arg);
453 if (rot >= 0) {
454 tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
455 rotl(arg, rot) | (rot << 7));
456 return;
458 rot = encode_imm(~arg);
459 if (rot >= 0) {
460 tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
461 rotl(~arg, rot) | (rot << 7));
462 return;
466 /* Use movw + movt. */
467 if (use_armv7_instructions) {
468 /* movw */
469 tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
470 | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
471 if (arg & 0xffff0000) {
472 /* movt */
473 tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
474 | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
476 return;
479 /* TODO: This is very suboptimal, we can easily have a constant
480 pool somewhere after all the instructions. */
481 opc = ARITH_MOV;
482 rn = 0;
483 /* If we have lots of leading 1's, we can shorten the sequence by
484 beginning with mvn and then clearing higher bits with eor. */
485 if (clz32(~arg) > clz32(arg)) {
486 opc = ARITH_MVN, arg = ~arg;
488 do {
489 int i = ctz32(arg) & ~1;
490 rot = ((32 - i) << 7) & 0xf00;
491 tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
492 arg &= ~(0xff << i);
494 opc = ARITH_EOR;
495 rn = rd;
496 } while (arg);
499 static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
500 TCGArg lhs, TCGArg rhs, int rhs_is_const)
502 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
503 * rhs must satisfy the "rI" constraint.
505 if (rhs_is_const) {
506 int rot = encode_imm(rhs);
507 tcg_debug_assert(rot >= 0);
508 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
509 } else {
510 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
514 static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
515 TCGReg dst, TCGReg lhs, TCGArg rhs,
516 bool rhs_is_const)
518 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
519 * rhs must satisfy the "rIK" constraint.
521 if (rhs_is_const) {
522 int rot = encode_imm(rhs);
523 if (rot < 0) {
524 rhs = ~rhs;
525 rot = encode_imm(rhs);
526 tcg_debug_assert(rot >= 0);
527 opc = opinv;
529 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
530 } else {
531 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
535 static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
536 TCGArg dst, TCGArg lhs, TCGArg rhs,
537 bool rhs_is_const)
539 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
540 * rhs must satisfy the "rIN" constraint.
542 if (rhs_is_const) {
543 int rot = encode_imm(rhs);
544 if (rot < 0) {
545 rhs = -rhs;
546 rot = encode_imm(rhs);
547 tcg_debug_assert(rot >= 0);
548 opc = opneg;
550 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
551 } else {
552 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
556 static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
557 TCGReg rn, TCGReg rm)
559 /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */
560 if (!use_armv6_instructions && rd == rn) {
561 if (rd == rm) {
562 /* rd == rn == rm; copy an input to tmp first. */
563 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
564 rm = rn = TCG_REG_TMP;
565 } else {
566 rn = rm;
567 rm = rd;
570 /* mul */
571 tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
574 static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
575 TCGReg rd1, TCGReg rn, TCGReg rm)
577 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
578 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
579 if (rd0 == rm || rd1 == rm) {
580 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
581 rn = TCG_REG_TMP;
582 } else {
583 TCGReg t = rn;
584 rn = rm;
585 rm = t;
588 /* umull */
589 tcg_out32(s, (cond << 28) | 0x00800090 |
590 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
593 static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
594 TCGReg rd1, TCGReg rn, TCGReg rm)
596 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
597 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
598 if (rd0 == rm || rd1 == rm) {
599 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
600 rn = TCG_REG_TMP;
601 } else {
602 TCGReg t = rn;
603 rn = rm;
604 rm = t;
607 /* smull */
608 tcg_out32(s, (cond << 28) | 0x00c00090 |
609 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
612 static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
614 tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
617 static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
619 tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
622 static inline void tcg_out_ext8s(TCGContext *s, int cond,
623 int rd, int rn)
625 if (use_armv6_instructions) {
626 /* sxtb */
627 tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
628 } else {
629 tcg_out_dat_reg(s, cond, ARITH_MOV,
630 rd, 0, rn, SHIFT_IMM_LSL(24));
631 tcg_out_dat_reg(s, cond, ARITH_MOV,
632 rd, 0, rd, SHIFT_IMM_ASR(24));
636 static inline void tcg_out_ext8u(TCGContext *s, int cond,
637 int rd, int rn)
639 tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
642 static inline void tcg_out_ext16s(TCGContext *s, int cond,
643 int rd, int rn)
645 if (use_armv6_instructions) {
646 /* sxth */
647 tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
648 } else {
649 tcg_out_dat_reg(s, cond, ARITH_MOV,
650 rd, 0, rn, SHIFT_IMM_LSL(16));
651 tcg_out_dat_reg(s, cond, ARITH_MOV,
652 rd, 0, rd, SHIFT_IMM_ASR(16));
656 static inline void tcg_out_ext16u(TCGContext *s, int cond,
657 int rd, int rn)
659 if (use_armv6_instructions) {
660 /* uxth */
661 tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
662 } else {
663 tcg_out_dat_reg(s, cond, ARITH_MOV,
664 rd, 0, rn, SHIFT_IMM_LSL(16));
665 tcg_out_dat_reg(s, cond, ARITH_MOV,
666 rd, 0, rd, SHIFT_IMM_LSR(16));
670 static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
672 if (use_armv6_instructions) {
673 /* revsh */
674 tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
675 } else {
676 tcg_out_dat_reg(s, cond, ARITH_MOV,
677 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
678 tcg_out_dat_reg(s, cond, ARITH_MOV,
679 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
680 tcg_out_dat_reg(s, cond, ARITH_ORR,
681 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
685 static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
687 if (use_armv6_instructions) {
688 /* rev16 */
689 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
690 } else {
691 tcg_out_dat_reg(s, cond, ARITH_MOV,
692 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
693 tcg_out_dat_reg(s, cond, ARITH_MOV,
694 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
695 tcg_out_dat_reg(s, cond, ARITH_ORR,
696 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
700 /* swap the two low bytes assuming that the two high input bytes and the
701 two high output bit can hold any value. */
702 static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
704 if (use_armv6_instructions) {
705 /* rev16 */
706 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
707 } else {
708 tcg_out_dat_reg(s, cond, ARITH_MOV,
709 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
710 tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
711 tcg_out_dat_reg(s, cond, ARITH_ORR,
712 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
716 static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
718 if (use_armv6_instructions) {
719 /* rev */
720 tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
721 } else {
722 tcg_out_dat_reg(s, cond, ARITH_EOR,
723 TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
724 tcg_out_dat_imm(s, cond, ARITH_BIC,
725 TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
726 tcg_out_dat_reg(s, cond, ARITH_MOV,
727 rd, 0, rn, SHIFT_IMM_ROR(8));
728 tcg_out_dat_reg(s, cond, ARITH_EOR,
729 rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
733 bool tcg_target_deposit_valid(int ofs, int len)
735 /* ??? Without bfi, we could improve over generic code by combining
736 the right-shift from a non-zero ofs with the orr. We do run into
737 problems when rd == rs, and the mask generated from ofs+len doesn't
738 fit into an immediate. We would have to be careful not to pessimize
739 wrt the optimizations performed on the expanded code. */
740 return use_armv7_instructions;
743 static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
744 TCGArg a1, int ofs, int len, bool const_a1)
746 if (const_a1) {
747 /* bfi becomes bfc with rn == 15. */
748 a1 = 15;
750 /* bfi/bfc */
751 tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
752 | (ofs << 7) | ((ofs + len - 1) << 16));
755 /* Note that this routine is used for both LDR and LDRH formats, so we do
756 not wish to include an immediate shift at this point. */
757 static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
758 TCGReg rn, TCGReg rm, bool u, bool p, bool w)
760 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
761 | (w << 21) | (rn << 16) | (rt << 12) | rm);
764 static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
765 TCGReg rn, int imm8, bool p, bool w)
767 bool u = 1;
768 if (imm8 < 0) {
769 imm8 = -imm8;
770 u = 0;
772 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
773 (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
776 static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
777 TCGReg rn, int imm12, bool p, bool w)
779 bool u = 1;
780 if (imm12 < 0) {
781 imm12 = -imm12;
782 u = 0;
784 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
785 (rn << 16) | (rt << 12) | imm12);
788 static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
789 TCGReg rn, int imm12)
791 tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
794 static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
795 TCGReg rn, int imm12)
797 tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
800 static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
801 TCGReg rn, TCGReg rm)
803 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
806 static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
807 TCGReg rn, TCGReg rm)
809 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
812 static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
813 TCGReg rn, int imm8)
815 tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
818 static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
819 TCGReg rn, TCGReg rm)
821 tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
824 static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
825 TCGReg rn, int imm8)
827 tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
830 static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
831 TCGReg rn, TCGReg rm)
833 tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
836 /* Register pre-increment with base writeback. */
837 static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
838 TCGReg rn, TCGReg rm)
840 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
843 static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
844 TCGReg rn, TCGReg rm)
846 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
849 static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
850 TCGReg rn, int imm8)
852 tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
855 static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
856 TCGReg rn, int imm8)
858 tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
861 static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
862 TCGReg rn, TCGReg rm)
864 tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
867 static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
868 TCGReg rn, TCGReg rm)
870 tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
873 static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
874 TCGReg rn, int imm8)
876 tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
879 static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
880 TCGReg rn, TCGReg rm)
882 tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
885 static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
886 TCGReg rn, int imm12)
888 tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
891 static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
892 TCGReg rn, int imm12)
894 tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
897 static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
898 TCGReg rn, TCGReg rm)
900 tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
903 static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
904 TCGReg rn, TCGReg rm)
906 tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
909 static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
910 TCGReg rn, int imm8)
912 tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
915 static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
916 TCGReg rn, TCGReg rm)
918 tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
921 static inline void tcg_out_ld32u(TCGContext *s, int cond,
922 int rd, int rn, int32_t offset)
924 if (offset > 0xfff || offset < -0xfff) {
925 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
926 tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
927 } else
928 tcg_out_ld32_12(s, cond, rd, rn, offset);
931 static inline void tcg_out_st32(TCGContext *s, int cond,
932 int rd, int rn, int32_t offset)
934 if (offset > 0xfff || offset < -0xfff) {
935 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
936 tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
937 } else
938 tcg_out_st32_12(s, cond, rd, rn, offset);
941 static inline void tcg_out_ld16u(TCGContext *s, int cond,
942 int rd, int rn, int32_t offset)
944 if (offset > 0xff || offset < -0xff) {
945 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
946 tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
947 } else
948 tcg_out_ld16u_8(s, cond, rd, rn, offset);
951 static inline void tcg_out_ld16s(TCGContext *s, int cond,
952 int rd, int rn, int32_t offset)
954 if (offset > 0xff || offset < -0xff) {
955 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
956 tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
957 } else
958 tcg_out_ld16s_8(s, cond, rd, rn, offset);
961 static inline void tcg_out_st16(TCGContext *s, int cond,
962 int rd, int rn, int32_t offset)
964 if (offset > 0xff || offset < -0xff) {
965 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
966 tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
967 } else
968 tcg_out_st16_8(s, cond, rd, rn, offset);
971 static inline void tcg_out_ld8u(TCGContext *s, int cond,
972 int rd, int rn, int32_t offset)
974 if (offset > 0xfff || offset < -0xfff) {
975 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
976 tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
977 } else
978 tcg_out_ld8_12(s, cond, rd, rn, offset);
981 static inline void tcg_out_ld8s(TCGContext *s, int cond,
982 int rd, int rn, int32_t offset)
984 if (offset > 0xff || offset < -0xff) {
985 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
986 tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
987 } else
988 tcg_out_ld8s_8(s, cond, rd, rn, offset);
991 static inline void tcg_out_st8(TCGContext *s, int cond,
992 int rd, int rn, int32_t offset)
994 if (offset > 0xfff || offset < -0xfff) {
995 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
996 tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
997 } else
998 tcg_out_st8_12(s, cond, rd, rn, offset);
1001 /* The _goto case is normally between TBs within the same code buffer, and
1002 * with the code buffer limited to 16MB we wouldn't need the long case.
1003 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
1005 static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
1007 intptr_t addri = (intptr_t)addr;
1008 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1010 if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
1011 tcg_out_b(s, cond, disp);
1012 return;
1015 tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
1016 if (use_armv5t_instructions) {
1017 tcg_out_bx(s, cond, TCG_REG_TMP);
1018 } else {
1019 if (addri & 1) {
1020 tcg_abort();
1022 tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
1026 /* The call case is mostly used for helpers - so it's not unreasonable
1027 * for them to be beyond branch range */
1028 static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1030 intptr_t addri = (intptr_t)addr;
1031 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1033 if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1034 if (addri & 1) {
1035 /* Use BLX if the target is in Thumb mode */
1036 if (!use_armv5t_instructions) {
1037 tcg_abort();
1039 tcg_out_blx_imm(s, disp);
1040 } else {
1041 tcg_out_bl(s, COND_AL, disp);
1043 } else if (use_armv7_instructions) {
1044 tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1045 tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1046 } else {
1047 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1048 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1049 tcg_out32(s, addri);
1053 void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
1055 tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
1056 tcg_insn_unit *target = (tcg_insn_unit *)addr;
1058 /* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */
1059 reloc_pc24_atomic(code_ptr, target);
1060 flush_icache_range(jmp_addr, jmp_addr + 4);
1063 static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
1065 if (l->has_value) {
1066 tcg_out_goto(s, cond, l->u.value_ptr);
1067 } else {
1068 tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
1069 tcg_out_b_noaddr(s, cond);
1073 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1075 if (use_armv7_instructions) {
1076 tcg_out32(s, INSN_DMB_ISH);
1077 } else if (use_armv6_instructions) {
1078 tcg_out32(s, INSN_DMB_MCR);
1082 #ifdef CONFIG_SOFTMMU
1083 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1084 * int mmu_idx, uintptr_t ra)
1086 static void * const qemu_ld_helpers[16] = {
1087 [MO_UB] = helper_ret_ldub_mmu,
1088 [MO_SB] = helper_ret_ldsb_mmu,
1090 [MO_LEUW] = helper_le_lduw_mmu,
1091 [MO_LEUL] = helper_le_ldul_mmu,
1092 [MO_LEQ] = helper_le_ldq_mmu,
1093 [MO_LESW] = helper_le_ldsw_mmu,
1094 [MO_LESL] = helper_le_ldul_mmu,
1096 [MO_BEUW] = helper_be_lduw_mmu,
1097 [MO_BEUL] = helper_be_ldul_mmu,
1098 [MO_BEQ] = helper_be_ldq_mmu,
1099 [MO_BESW] = helper_be_ldsw_mmu,
1100 [MO_BESL] = helper_be_ldul_mmu,
1103 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1104 * uintxx_t val, int mmu_idx, uintptr_t ra)
1106 static void * const qemu_st_helpers[16] = {
1107 [MO_UB] = helper_ret_stb_mmu,
1108 [MO_LEUW] = helper_le_stw_mmu,
1109 [MO_LEUL] = helper_le_stl_mmu,
1110 [MO_LEQ] = helper_le_stq_mmu,
1111 [MO_BEUW] = helper_be_stw_mmu,
1112 [MO_BEUL] = helper_be_stl_mmu,
1113 [MO_BEQ] = helper_be_stq_mmu,
1116 /* Helper routines for marshalling helper function arguments into
1117 * the correct registers and stack.
1118 * argreg is where we want to put this argument, arg is the argument itself.
1119 * Return value is the updated argreg ready for the next call.
1120 * Note that argreg 0..3 is real registers, 4+ on stack.
1122 * We provide routines for arguments which are: immediate, 32 bit
1123 * value in register, 16 and 8 bit values in register (which must be zero
1124 * extended before use) and 64 bit value in a lo:hi register pair.
1126 #define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
1127 static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
1129 if (argreg < 4) { \
1130 MOV_ARG(s, COND_AL, argreg, arg); \
1131 } else { \
1132 int ofs = (argreg - 4) * 4; \
1133 EXT_ARG; \
1134 tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
1135 tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
1137 return argreg + 1; \
1140 DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1141 (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1142 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1143 (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1144 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1145 (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1146 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1148 static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1149 TCGReg arglo, TCGReg arghi)
1151 /* 64 bit arguments must go in even/odd register pairs
1152 * and in 8-aligned stack slots.
1154 if (argreg & 1) {
1155 argreg++;
1157 if (use_armv6_instructions && argreg >= 4
1158 && (arglo & 1) == 0 && arghi == arglo + 1) {
1159 tcg_out_strd_8(s, COND_AL, arglo,
1160 TCG_REG_CALL_STACK, (argreg - 4) * 4);
1161 return argreg + 2;
1162 } else {
1163 argreg = tcg_out_arg_reg32(s, argreg, arglo);
1164 argreg = tcg_out_arg_reg32(s, argreg, arghi);
1165 return argreg;
1169 #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1171 /* We're expecting to use an 8-bit immediate and to mask. */
1172 QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1174 /* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
1175 Using the offset of the second entry in the last tlb table ensures
1176 that we can index all of the elements of the first entry. */
1177 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1178 > 0xffff);
1180 /* Load and compare a TLB entry, leaving the flags set. Returns the register
1181 containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
1183 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1184 TCGMemOp opc, int mem_index, bool is_load)
1186 TCGReg base = TCG_AREG0;
1187 int cmp_off =
1188 (is_load
1189 ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1190 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1191 int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1192 unsigned s_bits = opc & MO_SIZE;
1193 unsigned a_bits = get_alignment_bits(opc);
1195 /* Should generate something like the following:
1196 * shr tmp, addrlo, #TARGET_PAGE_BITS (1)
1197 * add r2, env, #high
1198 * and r0, tmp, #(CPU_TLB_SIZE - 1) (2)
1199 * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3)
1200 * ldr r0, [r2, #cmp] (4)
1201 * tst addrlo, #s_mask
1202 * ldr r2, [r2, #add] (5)
1203 * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS
1205 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1206 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1208 /* We checked that the offset is contained within 16 bits above. */
1209 if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
1210 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1211 (24 << 7) | (cmp_off >> 8));
1212 base = TCG_REG_R2;
1213 add_off -= cmp_off & 0xff00;
1214 cmp_off &= 0xff;
1217 tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1218 TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1219 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1220 TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1222 /* Load the tlb comparator. Use ldrd if needed and available,
1223 but due to how the pointer needs setting up, ldm isn't useful.
1224 Base arm5 doesn't have ldrd, but armv5te does. */
1225 if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1226 tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1227 } else {
1228 tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1229 if (TARGET_LONG_BITS == 64) {
1230 tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1234 /* Check alignment. We don't support inline unaligned acceses,
1235 but we can easily support overalignment checks. */
1236 if (a_bits < s_bits) {
1237 a_bits = s_bits;
1239 if (a_bits) {
1240 tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
1243 /* Load the tlb addend. */
1244 tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1246 tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
1247 TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1249 if (TARGET_LONG_BITS == 64) {
1250 tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1251 TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
1254 return TCG_REG_R2;
1257 /* Record the context of a call to the out of line helper code for the slow
1258 path for a load or store, so that we can later generate the correct
1259 helper code. */
1260 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1261 TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1262 TCGReg addrhi, tcg_insn_unit *raddr,
1263 tcg_insn_unit *label_ptr)
1265 TCGLabelQemuLdst *label = new_ldst_label(s);
1267 label->is_ld = is_ld;
1268 label->oi = oi;
1269 label->datalo_reg = datalo;
1270 label->datahi_reg = datahi;
1271 label->addrlo_reg = addrlo;
1272 label->addrhi_reg = addrhi;
1273 label->raddr = raddr;
1274 label->label_ptr[0] = label_ptr;
1277 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1279 TCGReg argreg, datalo, datahi;
1280 TCGMemOpIdx oi = lb->oi;
1281 TCGMemOp opc = get_memop(oi);
1282 void *func;
1284 reloc_pc24(lb->label_ptr[0], s->code_ptr);
1286 argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1287 if (TARGET_LONG_BITS == 64) {
1288 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1289 } else {
1290 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1292 argreg = tcg_out_arg_imm32(s, argreg, oi);
1293 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1295 /* For armv6 we can use the canonical unsigned helpers and minimize
1296 icache usage. For pre-armv6, use the signed helpers since we do
1297 not have a single insn sign-extend. */
1298 if (use_armv6_instructions) {
1299 func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
1300 } else {
1301 func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
1302 if (opc & MO_SIGN) {
1303 opc = MO_UL;
1306 tcg_out_call(s, func);
1308 datalo = lb->datalo_reg;
1309 datahi = lb->datahi_reg;
1310 switch (opc & MO_SSIZE) {
1311 case MO_SB:
1312 tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1313 break;
1314 case MO_SW:
1315 tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1316 break;
1317 default:
1318 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1319 break;
1320 case MO_Q:
1321 if (datalo != TCG_REG_R1) {
1322 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1323 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1324 } else if (datahi != TCG_REG_R0) {
1325 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1326 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1327 } else {
1328 tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1329 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1330 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1332 break;
1335 tcg_out_goto(s, COND_AL, lb->raddr);
1338 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1340 TCGReg argreg, datalo, datahi;
1341 TCGMemOpIdx oi = lb->oi;
1342 TCGMemOp opc = get_memop(oi);
1344 reloc_pc24(lb->label_ptr[0], s->code_ptr);
1346 argreg = TCG_REG_R0;
1347 argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1348 if (TARGET_LONG_BITS == 64) {
1349 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1350 } else {
1351 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1354 datalo = lb->datalo_reg;
1355 datahi = lb->datahi_reg;
1356 switch (opc & MO_SIZE) {
1357 case MO_8:
1358 argreg = tcg_out_arg_reg8(s, argreg, datalo);
1359 break;
1360 case MO_16:
1361 argreg = tcg_out_arg_reg16(s, argreg, datalo);
1362 break;
1363 case MO_32:
1364 default:
1365 argreg = tcg_out_arg_reg32(s, argreg, datalo);
1366 break;
1367 case MO_64:
1368 argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1369 break;
1372 argreg = tcg_out_arg_imm32(s, argreg, oi);
1373 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1375 /* Tail-call to the helper, which will return to the fast path. */
1376 tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1378 #endif /* SOFTMMU */
1380 static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1381 TCGReg datalo, TCGReg datahi,
1382 TCGReg addrlo, TCGReg addend)
1384 TCGMemOp bswap = opc & MO_BSWAP;
1386 switch (opc & MO_SSIZE) {
1387 case MO_UB:
1388 tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1389 break;
1390 case MO_SB:
1391 tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1392 break;
1393 case MO_UW:
1394 tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1395 if (bswap) {
1396 tcg_out_bswap16(s, COND_AL, datalo, datalo);
1398 break;
1399 case MO_SW:
1400 if (bswap) {
1401 tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1402 tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1403 } else {
1404 tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1406 break;
1407 case MO_UL:
1408 default:
1409 tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1410 if (bswap) {
1411 tcg_out_bswap32(s, COND_AL, datalo, datalo);
1413 break;
1414 case MO_Q:
1416 TCGReg dl = (bswap ? datahi : datalo);
1417 TCGReg dh = (bswap ? datalo : datahi);
1419 /* Avoid ldrd for user-only emulation, to handle unaligned. */
1420 if (USING_SOFTMMU && use_armv6_instructions
1421 && (dl & 1) == 0 && dh == dl + 1) {
1422 tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1423 } else if (dl != addend) {
1424 tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1425 tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1426 } else {
1427 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1428 addend, addrlo, SHIFT_IMM_LSL(0));
1429 tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1430 tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1432 if (bswap) {
1433 tcg_out_bswap32(s, COND_AL, dl, dl);
1434 tcg_out_bswap32(s, COND_AL, dh, dh);
1437 break;
1441 static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1442 TCGReg datalo, TCGReg datahi,
1443 TCGReg addrlo)
1445 TCGMemOp bswap = opc & MO_BSWAP;
1447 switch (opc & MO_SSIZE) {
1448 case MO_UB:
1449 tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1450 break;
1451 case MO_SB:
1452 tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1453 break;
1454 case MO_UW:
1455 tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1456 if (bswap) {
1457 tcg_out_bswap16(s, COND_AL, datalo, datalo);
1459 break;
1460 case MO_SW:
1461 if (bswap) {
1462 tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1463 tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1464 } else {
1465 tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1467 break;
1468 case MO_UL:
1469 default:
1470 tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1471 if (bswap) {
1472 tcg_out_bswap32(s, COND_AL, datalo, datalo);
1474 break;
1475 case MO_Q:
1477 TCGReg dl = (bswap ? datahi : datalo);
1478 TCGReg dh = (bswap ? datalo : datahi);
1480 /* Avoid ldrd for user-only emulation, to handle unaligned. */
1481 if (USING_SOFTMMU && use_armv6_instructions
1482 && (dl & 1) == 0 && dh == dl + 1) {
1483 tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1484 } else if (dl == addrlo) {
1485 tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1486 tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1487 } else {
1488 tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1489 tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1491 if (bswap) {
1492 tcg_out_bswap32(s, COND_AL, dl, dl);
1493 tcg_out_bswap32(s, COND_AL, dh, dh);
1496 break;
1500 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1502 TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1503 TCGMemOpIdx oi;
1504 TCGMemOp opc;
1505 #ifdef CONFIG_SOFTMMU
1506 int mem_index;
1507 TCGReg addend;
1508 tcg_insn_unit *label_ptr;
1509 #endif
1511 datalo = *args++;
1512 datahi = (is64 ? *args++ : 0);
1513 addrlo = *args++;
1514 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1515 oi = *args++;
1516 opc = get_memop(oi);
1518 #ifdef CONFIG_SOFTMMU
1519 mem_index = get_mmuidx(oi);
1520 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
1522 /* This a conditional BL only to load a pointer within this opcode into LR
1523 for the slow path. We will not be using the value for a tail call. */
1524 label_ptr = s->code_ptr;
1525 tcg_out_bl_noaddr(s, COND_NE);
1527 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1529 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1530 s->code_ptr, label_ptr);
1531 #else /* !CONFIG_SOFTMMU */
1532 if (guest_base) {
1533 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1534 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1535 } else {
1536 tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1538 #endif
1541 static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1542 TCGReg datalo, TCGReg datahi,
1543 TCGReg addrlo, TCGReg addend)
1545 TCGMemOp bswap = opc & MO_BSWAP;
1547 switch (opc & MO_SIZE) {
1548 case MO_8:
1549 tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1550 break;
1551 case MO_16:
1552 if (bswap) {
1553 tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1554 tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1555 } else {
1556 tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1558 break;
1559 case MO_32:
1560 default:
1561 if (bswap) {
1562 tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1563 tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1564 } else {
1565 tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1567 break;
1568 case MO_64:
1569 /* Avoid strd for user-only emulation, to handle unaligned. */
1570 if (bswap) {
1571 tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1572 tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1573 tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1574 tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1575 } else if (USING_SOFTMMU && use_armv6_instructions
1576 && (datalo & 1) == 0 && datahi == datalo + 1) {
1577 tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1578 } else {
1579 tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1580 tcg_out_st32_12(s, cond, datahi, addend, 4);
1582 break;
1586 static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1587 TCGReg datalo, TCGReg datahi,
1588 TCGReg addrlo)
1590 TCGMemOp bswap = opc & MO_BSWAP;
1592 switch (opc & MO_SIZE) {
1593 case MO_8:
1594 tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1595 break;
1596 case MO_16:
1597 if (bswap) {
1598 tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1599 tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1600 } else {
1601 tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1603 break;
1604 case MO_32:
1605 default:
1606 if (bswap) {
1607 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1608 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1609 } else {
1610 tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1612 break;
1613 case MO_64:
1614 /* Avoid strd for user-only emulation, to handle unaligned. */
1615 if (bswap) {
1616 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1617 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1618 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1619 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1620 } else if (USING_SOFTMMU && use_armv6_instructions
1621 && (datalo & 1) == 0 && datahi == datalo + 1) {
1622 tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1623 } else {
1624 tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1625 tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1627 break;
1631 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1633 TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1634 TCGMemOpIdx oi;
1635 TCGMemOp opc;
1636 #ifdef CONFIG_SOFTMMU
1637 int mem_index;
1638 TCGReg addend;
1639 tcg_insn_unit *label_ptr;
1640 #endif
1642 datalo = *args++;
1643 datahi = (is64 ? *args++ : 0);
1644 addrlo = *args++;
1645 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1646 oi = *args++;
1647 opc = get_memop(oi);
1649 #ifdef CONFIG_SOFTMMU
1650 mem_index = get_mmuidx(oi);
1651 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
1653 tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1655 /* The conditional call must come last, as we're going to return here. */
1656 label_ptr = s->code_ptr;
1657 tcg_out_bl_noaddr(s, COND_NE);
1659 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1660 s->code_ptr, label_ptr);
1661 #else /* !CONFIG_SOFTMMU */
1662 if (guest_base) {
1663 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1664 tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1665 datahi, addrlo, TCG_REG_TMP);
1666 } else {
1667 tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1669 #endif
1672 static tcg_insn_unit *tb_ret_addr;
1674 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1675 const TCGArg *args, const int *const_args)
1677 TCGArg a0, a1, a2, a3, a4, a5;
1678 int c;
1680 switch (opc) {
1681 case INDEX_op_exit_tb:
1682 tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1683 tcg_out_goto(s, COND_AL, tb_ret_addr);
1684 break;
1685 case INDEX_op_goto_tb:
1686 if (s->tb_jmp_insn_offset) {
1687 /* Direct jump method */
1688 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
1689 tcg_out_b_noaddr(s, COND_AL);
1690 } else {
1691 /* Indirect jump method */
1692 intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
1693 tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
1694 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
1696 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
1697 break;
1698 case INDEX_op_br:
1699 tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
1700 break;
1702 case INDEX_op_ld8u_i32:
1703 tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1704 break;
1705 case INDEX_op_ld8s_i32:
1706 tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1707 break;
1708 case INDEX_op_ld16u_i32:
1709 tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1710 break;
1711 case INDEX_op_ld16s_i32:
1712 tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1713 break;
1714 case INDEX_op_ld_i32:
1715 tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1716 break;
1717 case INDEX_op_st8_i32:
1718 tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1719 break;
1720 case INDEX_op_st16_i32:
1721 tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1722 break;
1723 case INDEX_op_st_i32:
1724 tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1725 break;
1727 case INDEX_op_movcond_i32:
1728 /* Constraints mean that v2 is always in the same register as dest,
1729 * so we only need to do "if condition passed, move v1 to dest".
1731 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1732 args[1], args[2], const_args[2]);
1733 tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1734 ARITH_MVN, args[0], 0, args[3], const_args[3]);
1735 break;
1736 case INDEX_op_add_i32:
1737 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1738 args[0], args[1], args[2], const_args[2]);
1739 break;
1740 case INDEX_op_sub_i32:
1741 if (const_args[1]) {
1742 if (const_args[2]) {
1743 tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1744 } else {
1745 tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1746 args[0], args[2], args[1], 1);
1748 } else {
1749 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1750 args[0], args[1], args[2], const_args[2]);
1752 break;
1753 case INDEX_op_and_i32:
1754 tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1755 args[0], args[1], args[2], const_args[2]);
1756 break;
1757 case INDEX_op_andc_i32:
1758 tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1759 args[0], args[1], args[2], const_args[2]);
1760 break;
1761 case INDEX_op_or_i32:
1762 c = ARITH_ORR;
1763 goto gen_arith;
1764 case INDEX_op_xor_i32:
1765 c = ARITH_EOR;
1766 /* Fall through. */
1767 gen_arith:
1768 tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1769 break;
1770 case INDEX_op_add2_i32:
1771 a0 = args[0], a1 = args[1], a2 = args[2];
1772 a3 = args[3], a4 = args[4], a5 = args[5];
1773 if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1774 a0 = TCG_REG_TMP;
1776 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1777 a0, a2, a4, const_args[4]);
1778 tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1779 a1, a3, a5, const_args[5]);
1780 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1781 break;
1782 case INDEX_op_sub2_i32:
1783 a0 = args[0], a1 = args[1], a2 = args[2];
1784 a3 = args[3], a4 = args[4], a5 = args[5];
1785 if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1786 a0 = TCG_REG_TMP;
1788 if (const_args[2]) {
1789 if (const_args[4]) {
1790 tcg_out_movi32(s, COND_AL, a0, a4);
1791 a4 = a0;
1793 tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1794 } else {
1795 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1796 ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1798 if (const_args[3]) {
1799 if (const_args[5]) {
1800 tcg_out_movi32(s, COND_AL, a1, a5);
1801 a5 = a1;
1803 tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1804 } else {
1805 tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1806 a1, a3, a5, const_args[5]);
1808 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1809 break;
1810 case INDEX_op_neg_i32:
1811 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1812 break;
1813 case INDEX_op_not_i32:
1814 tcg_out_dat_reg(s, COND_AL,
1815 ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1816 break;
1817 case INDEX_op_mul_i32:
1818 tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1819 break;
1820 case INDEX_op_mulu2_i32:
1821 tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1822 break;
1823 case INDEX_op_muls2_i32:
1824 tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1825 break;
1826 /* XXX: Perhaps args[2] & 0x1f is wrong */
1827 case INDEX_op_shl_i32:
1828 c = const_args[2] ?
1829 SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1830 goto gen_shift32;
1831 case INDEX_op_shr_i32:
1832 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1833 SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1834 goto gen_shift32;
1835 case INDEX_op_sar_i32:
1836 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1837 SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1838 goto gen_shift32;
1839 case INDEX_op_rotr_i32:
1840 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1841 SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1842 /* Fall through. */
1843 gen_shift32:
1844 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1845 break;
1847 case INDEX_op_rotl_i32:
1848 if (const_args[2]) {
1849 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1850 ((0x20 - args[2]) & 0x1f) ?
1851 SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1852 SHIFT_IMM_LSL(0));
1853 } else {
1854 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1855 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1856 SHIFT_REG_ROR(TCG_REG_TMP));
1858 break;
1860 case INDEX_op_brcond_i32:
1861 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1862 args[0], args[1], const_args[1]);
1863 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
1864 arg_label(args[3]));
1865 break;
1866 case INDEX_op_brcond2_i32:
1867 /* The resulting conditions are:
1868 * TCG_COND_EQ --> a0 == a2 && a1 == a3,
1869 * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3,
1870 * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3,
1871 * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1872 * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1873 * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3,
1875 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1876 args[1], args[3], const_args[3]);
1877 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1878 args[0], args[2], const_args[2]);
1879 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
1880 arg_label(args[5]));
1881 break;
1882 case INDEX_op_setcond_i32:
1883 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1884 args[1], args[2], const_args[2]);
1885 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1886 ARITH_MOV, args[0], 0, 1);
1887 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1888 ARITH_MOV, args[0], 0, 0);
1889 break;
1890 case INDEX_op_setcond2_i32:
1891 /* See brcond2_i32 comment */
1892 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1893 args[2], args[4], const_args[4]);
1894 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1895 args[1], args[3], const_args[3]);
1896 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1897 ARITH_MOV, args[0], 0, 1);
1898 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
1899 ARITH_MOV, args[0], 0, 0);
1900 break;
1902 case INDEX_op_qemu_ld_i32:
1903 tcg_out_qemu_ld(s, args, 0);
1904 break;
1905 case INDEX_op_qemu_ld_i64:
1906 tcg_out_qemu_ld(s, args, 1);
1907 break;
1908 case INDEX_op_qemu_st_i32:
1909 tcg_out_qemu_st(s, args, 0);
1910 break;
1911 case INDEX_op_qemu_st_i64:
1912 tcg_out_qemu_st(s, args, 1);
1913 break;
1915 case INDEX_op_bswap16_i32:
1916 tcg_out_bswap16(s, COND_AL, args[0], args[1]);
1917 break;
1918 case INDEX_op_bswap32_i32:
1919 tcg_out_bswap32(s, COND_AL, args[0], args[1]);
1920 break;
1922 case INDEX_op_ext8s_i32:
1923 tcg_out_ext8s(s, COND_AL, args[0], args[1]);
1924 break;
1925 case INDEX_op_ext16s_i32:
1926 tcg_out_ext16s(s, COND_AL, args[0], args[1]);
1927 break;
1928 case INDEX_op_ext16u_i32:
1929 tcg_out_ext16u(s, COND_AL, args[0], args[1]);
1930 break;
1932 case INDEX_op_deposit_i32:
1933 tcg_out_deposit(s, COND_AL, args[0], args[2],
1934 args[3], args[4], const_args[2]);
1935 break;
1937 case INDEX_op_div_i32:
1938 tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
1939 break;
1940 case INDEX_op_divu_i32:
1941 tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
1942 break;
1944 case INDEX_op_mb:
1945 tcg_out_mb(s, args[0]);
1946 break;
1948 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1949 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1950 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1951 default:
1952 tcg_abort();
1956 static const TCGTargetOpDef arm_op_defs[] = {
1957 { INDEX_op_exit_tb, { } },
1958 { INDEX_op_goto_tb, { } },
1959 { INDEX_op_br, { } },
1961 { INDEX_op_ld8u_i32, { "r", "r" } },
1962 { INDEX_op_ld8s_i32, { "r", "r" } },
1963 { INDEX_op_ld16u_i32, { "r", "r" } },
1964 { INDEX_op_ld16s_i32, { "r", "r" } },
1965 { INDEX_op_ld_i32, { "r", "r" } },
1966 { INDEX_op_st8_i32, { "r", "r" } },
1967 { INDEX_op_st16_i32, { "r", "r" } },
1968 { INDEX_op_st_i32, { "r", "r" } },
1970 /* TODO: "r", "r", "ri" */
1971 { INDEX_op_add_i32, { "r", "r", "rIN" } },
1972 { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
1973 { INDEX_op_mul_i32, { "r", "r", "r" } },
1974 { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
1975 { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
1976 { INDEX_op_and_i32, { "r", "r", "rIK" } },
1977 { INDEX_op_andc_i32, { "r", "r", "rIK" } },
1978 { INDEX_op_or_i32, { "r", "r", "rI" } },
1979 { INDEX_op_xor_i32, { "r", "r", "rI" } },
1980 { INDEX_op_neg_i32, { "r", "r" } },
1981 { INDEX_op_not_i32, { "r", "r" } },
1983 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1984 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1985 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1986 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1987 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1989 { INDEX_op_brcond_i32, { "r", "rIN" } },
1990 { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
1991 { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
1993 { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
1994 { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
1995 { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
1996 { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
1998 #if TARGET_LONG_BITS == 32
1999 { INDEX_op_qemu_ld_i32, { "r", "l" } },
2000 { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
2001 { INDEX_op_qemu_st_i32, { "s", "s" } },
2002 { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
2003 #else
2004 { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
2005 { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } },
2006 { INDEX_op_qemu_st_i32, { "s", "s", "s" } },
2007 { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } },
2008 #endif
2010 { INDEX_op_bswap16_i32, { "r", "r" } },
2011 { INDEX_op_bswap32_i32, { "r", "r" } },
2013 { INDEX_op_ext8s_i32, { "r", "r" } },
2014 { INDEX_op_ext16s_i32, { "r", "r" } },
2015 { INDEX_op_ext16u_i32, { "r", "r" } },
2017 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
2019 { INDEX_op_div_i32, { "r", "r", "r" } },
2020 { INDEX_op_divu_i32, { "r", "r", "r" } },
2022 { INDEX_op_mb, { } },
2023 { -1 },
2026 static void tcg_target_init(TCGContext *s)
2028 /* Only probe for the platform and capabilities if we havn't already
2029 determined maximum values at compile time. */
2030 #ifndef use_idiv_instructions
2032 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2033 use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2035 #endif
2036 if (__ARM_ARCH < 7) {
2037 const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
2038 if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2039 arm_arch = pl[1] - '0';
2043 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2044 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
2045 (1 << TCG_REG_R0) |
2046 (1 << TCG_REG_R1) |
2047 (1 << TCG_REG_R2) |
2048 (1 << TCG_REG_R3) |
2049 (1 << TCG_REG_R12) |
2050 (1 << TCG_REG_R14));
2052 tcg_regset_clear(s->reserved_regs);
2053 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2054 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2055 tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2057 tcg_add_target_add_op_defs(arm_op_defs);
2060 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2061 TCGReg arg1, intptr_t arg2)
2063 tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2066 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2067 TCGReg arg1, intptr_t arg2)
2069 tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2072 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
2073 TCGReg base, intptr_t ofs)
2075 return false;
2078 static inline void tcg_out_mov(TCGContext *s, TCGType type,
2079 TCGReg ret, TCGReg arg)
2081 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2084 static inline void tcg_out_movi(TCGContext *s, TCGType type,
2085 TCGReg ret, tcg_target_long arg)
2087 tcg_out_movi32(s, COND_AL, ret, arg);
2090 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2091 and tcg_register_jit. */
2093 #define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2095 #define FRAME_SIZE \
2096 ((PUSH_SIZE \
2097 + TCG_STATIC_CALL_ARGS_SIZE \
2098 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2099 + TCG_TARGET_STACK_ALIGN - 1) \
2100 & -TCG_TARGET_STACK_ALIGN)
2102 static void tcg_target_qemu_prologue(TCGContext *s)
2104 int stack_addend;
2106 /* Calling convention requires us to save r4-r11 and lr. */
2107 /* stmdb sp!, { r4 - r11, lr } */
2108 tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2110 /* Reserve callee argument and tcg temp space. */
2111 stack_addend = FRAME_SIZE - PUSH_SIZE;
2113 tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2114 TCG_REG_CALL_STACK, stack_addend, 1);
2115 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2116 CPU_TEMP_BUF_NLONGS * sizeof(long));
2118 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2120 tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2121 tb_ret_addr = s->code_ptr;
2123 /* Epilogue. We branch here via tb_ret_addr. */
2124 tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2125 TCG_REG_CALL_STACK, stack_addend, 1);
2127 /* ldmia sp!, { r4 - r11, pc } */
2128 tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2131 typedef struct {
2132 DebugFrameHeader h;
2133 uint8_t fde_def_cfa[4];
2134 uint8_t fde_reg_ofs[18];
2135 } DebugFrame;
2137 #define ELF_HOST_MACHINE EM_ARM
2139 /* We're expecting a 2 byte uleb128 encoded value. */
2140 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2142 static const DebugFrame debug_frame = {
2143 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2144 .h.cie.id = -1,
2145 .h.cie.version = 1,
2146 .h.cie.code_align = 1,
2147 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2148 .h.cie.return_column = 14,
2150 /* Total FDE size does not include the "len" member. */
2151 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2153 .fde_def_cfa = {
2154 12, 13, /* DW_CFA_def_cfa sp, ... */
2155 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2156 (FRAME_SIZE >> 7)
2158 .fde_reg_ofs = {
2159 /* The following must match the stmdb in the prologue. */
2160 0x8e, 1, /* DW_CFA_offset, lr, -4 */
2161 0x8b, 2, /* DW_CFA_offset, r11, -8 */
2162 0x8a, 3, /* DW_CFA_offset, r10, -12 */
2163 0x89, 4, /* DW_CFA_offset, r9, -16 */
2164 0x88, 5, /* DW_CFA_offset, r8, -20 */
2165 0x87, 6, /* DW_CFA_offset, r7, -24 */
2166 0x86, 7, /* DW_CFA_offset, r6, -28 */
2167 0x85, 8, /* DW_CFA_offset, r5, -32 */
2168 0x84, 9, /* DW_CFA_offset, r4, -36 */
2172 void tcg_register_jit(void *buf, size_t buf_size)
2174 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));