numa: Keep track of NUMA nodes present on the command-line
[qemu/kevin.git] / tcg / arm / tcg-target.c
blobe40301c78b732e72e9291c658810a08d4c26b56e
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Andrzej Zaborowski
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "elf.h"
26 #include "tcg-be-ldst.h"
28 /* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */
29 #ifndef __ARM_ARCH
30 # if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
31 || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
32 || defined(__ARM_ARCH_7EM__)
33 # define __ARM_ARCH 7
34 # elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
35 || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
36 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
37 # define __ARM_ARCH 6
38 # elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
39 || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
40 || defined(__ARM_ARCH_5TEJ__)
41 # define __ARM_ARCH 5
42 # else
43 # define __ARM_ARCH 4
44 # endif
45 #endif
47 static int arm_arch = __ARM_ARCH;
49 #if defined(__ARM_ARCH_5T__) \
50 || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
51 # define use_armv5t_instructions 1
52 #else
53 # define use_armv5t_instructions use_armv6_instructions
54 #endif
56 #define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6)
57 #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
59 #ifndef use_idiv_instructions
60 bool use_idiv_instructions;
61 #endif
63 /* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */
64 #ifdef CONFIG_SOFTMMU
65 # define USING_SOFTMMU 1
66 #else
67 # define USING_SOFTMMU 0
68 #endif
70 #ifndef NDEBUG
71 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
72 "%r0",
73 "%r1",
74 "%r2",
75 "%r3",
76 "%r4",
77 "%r5",
78 "%r6",
79 "%r7",
80 "%r8",
81 "%r9",
82 "%r10",
83 "%r11",
84 "%r12",
85 "%r13",
86 "%r14",
87 "%pc",
89 #endif
91 static const int tcg_target_reg_alloc_order[] = {
92 TCG_REG_R4,
93 TCG_REG_R5,
94 TCG_REG_R6,
95 TCG_REG_R7,
96 TCG_REG_R8,
97 TCG_REG_R9,
98 TCG_REG_R10,
99 TCG_REG_R11,
100 TCG_REG_R13,
101 TCG_REG_R0,
102 TCG_REG_R1,
103 TCG_REG_R2,
104 TCG_REG_R3,
105 TCG_REG_R12,
106 TCG_REG_R14,
109 static const int tcg_target_call_iarg_regs[4] = {
110 TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
112 static const int tcg_target_call_oarg_regs[2] = {
113 TCG_REG_R0, TCG_REG_R1
116 #define TCG_REG_TMP TCG_REG_R12
118 static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
120 ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
121 *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
124 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
125 intptr_t value, intptr_t addend)
127 assert(type == R_ARM_PC24);
128 assert(addend == 0);
129 reloc_pc24(code_ptr, (tcg_insn_unit *)value);
132 #define TCG_CT_CONST_ARM 0x100
133 #define TCG_CT_CONST_INV 0x200
134 #define TCG_CT_CONST_NEG 0x400
135 #define TCG_CT_CONST_ZERO 0x800
137 /* parse target specific constraints */
138 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
140 const char *ct_str;
142 ct_str = *pct_str;
143 switch (ct_str[0]) {
144 case 'I':
145 ct->ct |= TCG_CT_CONST_ARM;
146 break;
147 case 'K':
148 ct->ct |= TCG_CT_CONST_INV;
149 break;
150 case 'N': /* The gcc constraint letter is L, already used here. */
151 ct->ct |= TCG_CT_CONST_NEG;
152 break;
153 case 'Z':
154 ct->ct |= TCG_CT_CONST_ZERO;
155 break;
157 case 'r':
158 ct->ct |= TCG_CT_REG;
159 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
160 break;
162 /* qemu_ld address */
163 case 'l':
164 ct->ct |= TCG_CT_REG;
165 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
166 #ifdef CONFIG_SOFTMMU
167 /* r0-r2,lr will be overwritten when reading the tlb entry,
168 so don't use these. */
169 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
170 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
171 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
172 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
173 #endif
174 break;
176 /* qemu_st address & data */
177 case 's':
178 ct->ct |= TCG_CT_REG;
179 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
180 /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
181 and r0-r1 doing the byte swapping, so don't use these. */
182 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
183 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
184 #if defined(CONFIG_SOFTMMU)
185 /* Avoid clashes with registers being used for helper args */
186 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
187 #if TARGET_LONG_BITS == 64
188 /* Avoid clashes with registers being used for helper args */
189 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
190 #endif
191 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
192 #endif
193 break;
195 default:
196 return -1;
198 ct_str++;
199 *pct_str = ct_str;
201 return 0;
204 static inline uint32_t rotl(uint32_t val, int n)
206 return (val << n) | (val >> (32 - n));
209 /* ARM immediates for ALU instructions are made of an unsigned 8-bit
210 right-rotated by an even amount between 0 and 30. */
211 static inline int encode_imm(uint32_t imm)
213 int shift;
215 /* simple case, only lower bits */
216 if ((imm & ~0xff) == 0)
217 return 0;
218 /* then try a simple even shift */
219 shift = ctz32(imm) & ~1;
220 if (((imm >> shift) & ~0xff) == 0)
221 return 32 - shift;
222 /* now try harder with rotations */
223 if ((rotl(imm, 2) & ~0xff) == 0)
224 return 2;
225 if ((rotl(imm, 4) & ~0xff) == 0)
226 return 4;
227 if ((rotl(imm, 6) & ~0xff) == 0)
228 return 6;
229 /* imm can't be encoded */
230 return -1;
233 static inline int check_fit_imm(uint32_t imm)
235 return encode_imm(imm) >= 0;
238 /* Test if a constant matches the constraint.
239 * TODO: define constraints for:
241 * ldr/str offset: between -0xfff and 0xfff
242 * ldrh/strh offset: between -0xff and 0xff
243 * mov operand2: values represented with x << (2 * y), x < 0x100
244 * add, sub, eor...: ditto
246 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
247 const TCGArgConstraint *arg_ct)
249 int ct;
250 ct = arg_ct->ct;
251 if (ct & TCG_CT_CONST) {
252 return 1;
253 } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
254 return 1;
255 } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
256 return 1;
257 } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
258 return 1;
259 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
260 return 1;
261 } else {
262 return 0;
266 #define TO_CPSR (1 << 20)
268 typedef enum {
269 ARITH_AND = 0x0 << 21,
270 ARITH_EOR = 0x1 << 21,
271 ARITH_SUB = 0x2 << 21,
272 ARITH_RSB = 0x3 << 21,
273 ARITH_ADD = 0x4 << 21,
274 ARITH_ADC = 0x5 << 21,
275 ARITH_SBC = 0x6 << 21,
276 ARITH_RSC = 0x7 << 21,
277 ARITH_TST = 0x8 << 21 | TO_CPSR,
278 ARITH_CMP = 0xa << 21 | TO_CPSR,
279 ARITH_CMN = 0xb << 21 | TO_CPSR,
280 ARITH_ORR = 0xc << 21,
281 ARITH_MOV = 0xd << 21,
282 ARITH_BIC = 0xe << 21,
283 ARITH_MVN = 0xf << 21,
285 INSN_LDR_IMM = 0x04100000,
286 INSN_LDR_REG = 0x06100000,
287 INSN_STR_IMM = 0x04000000,
288 INSN_STR_REG = 0x06000000,
290 INSN_LDRH_IMM = 0x005000b0,
291 INSN_LDRH_REG = 0x001000b0,
292 INSN_LDRSH_IMM = 0x005000f0,
293 INSN_LDRSH_REG = 0x001000f0,
294 INSN_STRH_IMM = 0x004000b0,
295 INSN_STRH_REG = 0x000000b0,
297 INSN_LDRB_IMM = 0x04500000,
298 INSN_LDRB_REG = 0x06500000,
299 INSN_LDRSB_IMM = 0x005000d0,
300 INSN_LDRSB_REG = 0x001000d0,
301 INSN_STRB_IMM = 0x04400000,
302 INSN_STRB_REG = 0x06400000,
304 INSN_LDRD_IMM = 0x004000d0,
305 INSN_LDRD_REG = 0x000000d0,
306 INSN_STRD_IMM = 0x004000f0,
307 INSN_STRD_REG = 0x000000f0,
308 } ARMInsn;
310 #define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00)
311 #define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20)
312 #define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40)
313 #define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60)
314 #define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10)
315 #define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30)
316 #define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50)
317 #define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70)
319 enum arm_cond_code_e {
320 COND_EQ = 0x0,
321 COND_NE = 0x1,
322 COND_CS = 0x2, /* Unsigned greater or equal */
323 COND_CC = 0x3, /* Unsigned less than */
324 COND_MI = 0x4, /* Negative */
325 COND_PL = 0x5, /* Zero or greater */
326 COND_VS = 0x6, /* Overflow */
327 COND_VC = 0x7, /* No overflow */
328 COND_HI = 0x8, /* Unsigned greater than */
329 COND_LS = 0x9, /* Unsigned less or equal */
330 COND_GE = 0xa,
331 COND_LT = 0xb,
332 COND_GT = 0xc,
333 COND_LE = 0xd,
334 COND_AL = 0xe,
337 static const uint8_t tcg_cond_to_arm_cond[] = {
338 [TCG_COND_EQ] = COND_EQ,
339 [TCG_COND_NE] = COND_NE,
340 [TCG_COND_LT] = COND_LT,
341 [TCG_COND_GE] = COND_GE,
342 [TCG_COND_LE] = COND_LE,
343 [TCG_COND_GT] = COND_GT,
344 /* unsigned */
345 [TCG_COND_LTU] = COND_CC,
346 [TCG_COND_GEU] = COND_CS,
347 [TCG_COND_LEU] = COND_LS,
348 [TCG_COND_GTU] = COND_HI,
351 static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
353 tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
356 static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
358 tcg_out32(s, (cond << 28) | 0x0a000000 |
359 (((offset - 8) >> 2) & 0x00ffffff));
362 static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
364 /* We pay attention here to not modify the branch target by masking
365 the corresponding bytes. This ensure that caches and memory are
366 kept coherent during retranslation. */
367 tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
370 static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
372 /* We pay attention here to not modify the branch target by masking
373 the corresponding bytes. This ensure that caches and memory are
374 kept coherent during retranslation. */
375 tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
378 static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
380 tcg_out32(s, (cond << 28) | 0x0b000000 |
381 (((offset - 8) >> 2) & 0x00ffffff));
384 static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
386 tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
389 static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
391 tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
392 (((offset - 8) >> 2) & 0x00ffffff));
395 static inline void tcg_out_dat_reg(TCGContext *s,
396 int cond, int opc, int rd, int rn, int rm, int shift)
398 tcg_out32(s, (cond << 28) | (0 << 25) | opc |
399 (rn << 16) | (rd << 12) | shift | rm);
402 static inline void tcg_out_nop(TCGContext *s)
404 if (use_armv7_instructions) {
405 /* Architected nop introduced in v6k. */
406 /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this
407 also Just So Happened to do nothing on pre-v6k so that we
408 don't need to conditionalize it? */
409 tcg_out32(s, 0xe320f000);
410 } else {
411 /* Prior to that the assembler uses mov r0, r0. */
412 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
416 static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
418 /* Simple reg-reg move, optimising out the 'do nothing' case */
419 if (rd != rm) {
420 tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
424 static inline void tcg_out_dat_imm(TCGContext *s,
425 int cond, int opc, int rd, int rn, int im)
427 tcg_out32(s, (cond << 28) | (1 << 25) | opc |
428 (rn << 16) | (rd << 12) | im);
431 static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
433 int rot, opc, rn;
435 /* For armv7, make sure not to use movw+movt when mov/mvn would do.
436 Speed things up by only checking when movt would be required.
437 Prior to armv7, have one go at fully rotated immediates before
438 doing the decomposition thing below. */
439 if (!use_armv7_instructions || (arg & 0xffff0000)) {
440 rot = encode_imm(arg);
441 if (rot >= 0) {
442 tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
443 rotl(arg, rot) | (rot << 7));
444 return;
446 rot = encode_imm(~arg);
447 if (rot >= 0) {
448 tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
449 rotl(~arg, rot) | (rot << 7));
450 return;
454 /* Use movw + movt. */
455 if (use_armv7_instructions) {
456 /* movw */
457 tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
458 | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
459 if (arg & 0xffff0000) {
460 /* movt */
461 tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
462 | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
464 return;
467 /* TODO: This is very suboptimal, we can easily have a constant
468 pool somewhere after all the instructions. */
469 opc = ARITH_MOV;
470 rn = 0;
471 /* If we have lots of leading 1's, we can shorten the sequence by
472 beginning with mvn and then clearing higher bits with eor. */
473 if (clz32(~arg) > clz32(arg)) {
474 opc = ARITH_MVN, arg = ~arg;
476 do {
477 int i = ctz32(arg) & ~1;
478 rot = ((32 - i) << 7) & 0xf00;
479 tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
480 arg &= ~(0xff << i);
482 opc = ARITH_EOR;
483 rn = rd;
484 } while (arg);
487 static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
488 TCGArg lhs, TCGArg rhs, int rhs_is_const)
490 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
491 * rhs must satisfy the "rI" constraint.
493 if (rhs_is_const) {
494 int rot = encode_imm(rhs);
495 assert(rot >= 0);
496 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
497 } else {
498 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
502 static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
503 TCGReg dst, TCGReg lhs, TCGArg rhs,
504 bool rhs_is_const)
506 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
507 * rhs must satisfy the "rIK" constraint.
509 if (rhs_is_const) {
510 int rot = encode_imm(rhs);
511 if (rot < 0) {
512 rhs = ~rhs;
513 rot = encode_imm(rhs);
514 assert(rot >= 0);
515 opc = opinv;
517 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
518 } else {
519 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
523 static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
524 TCGArg dst, TCGArg lhs, TCGArg rhs,
525 bool rhs_is_const)
527 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
528 * rhs must satisfy the "rIN" constraint.
530 if (rhs_is_const) {
531 int rot = encode_imm(rhs);
532 if (rot < 0) {
533 rhs = -rhs;
534 rot = encode_imm(rhs);
535 assert(rot >= 0);
536 opc = opneg;
538 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
539 } else {
540 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
544 static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
545 TCGReg rn, TCGReg rm)
547 /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */
548 if (!use_armv6_instructions && rd == rn) {
549 if (rd == rm) {
550 /* rd == rn == rm; copy an input to tmp first. */
551 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
552 rm = rn = TCG_REG_TMP;
553 } else {
554 rn = rm;
555 rm = rd;
558 /* mul */
559 tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
562 static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
563 TCGReg rd1, TCGReg rn, TCGReg rm)
565 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
566 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
567 if (rd0 == rm || rd1 == rm) {
568 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
569 rn = TCG_REG_TMP;
570 } else {
571 TCGReg t = rn;
572 rn = rm;
573 rm = t;
576 /* umull */
577 tcg_out32(s, (cond << 28) | 0x00800090 |
578 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
581 static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
582 TCGReg rd1, TCGReg rn, TCGReg rm)
584 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
585 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
586 if (rd0 == rm || rd1 == rm) {
587 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
588 rn = TCG_REG_TMP;
589 } else {
590 TCGReg t = rn;
591 rn = rm;
592 rm = t;
595 /* smull */
596 tcg_out32(s, (cond << 28) | 0x00c00090 |
597 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
600 static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
602 tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
605 static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
607 tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
610 static inline void tcg_out_ext8s(TCGContext *s, int cond,
611 int rd, int rn)
613 if (use_armv6_instructions) {
614 /* sxtb */
615 tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
616 } else {
617 tcg_out_dat_reg(s, cond, ARITH_MOV,
618 rd, 0, rn, SHIFT_IMM_LSL(24));
619 tcg_out_dat_reg(s, cond, ARITH_MOV,
620 rd, 0, rd, SHIFT_IMM_ASR(24));
624 static inline void tcg_out_ext8u(TCGContext *s, int cond,
625 int rd, int rn)
627 tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
630 static inline void tcg_out_ext16s(TCGContext *s, int cond,
631 int rd, int rn)
633 if (use_armv6_instructions) {
634 /* sxth */
635 tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
636 } else {
637 tcg_out_dat_reg(s, cond, ARITH_MOV,
638 rd, 0, rn, SHIFT_IMM_LSL(16));
639 tcg_out_dat_reg(s, cond, ARITH_MOV,
640 rd, 0, rd, SHIFT_IMM_ASR(16));
644 static inline void tcg_out_ext16u(TCGContext *s, int cond,
645 int rd, int rn)
647 if (use_armv6_instructions) {
648 /* uxth */
649 tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
650 } else {
651 tcg_out_dat_reg(s, cond, ARITH_MOV,
652 rd, 0, rn, SHIFT_IMM_LSL(16));
653 tcg_out_dat_reg(s, cond, ARITH_MOV,
654 rd, 0, rd, SHIFT_IMM_LSR(16));
658 static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
660 if (use_armv6_instructions) {
661 /* revsh */
662 tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
663 } else {
664 tcg_out_dat_reg(s, cond, ARITH_MOV,
665 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
666 tcg_out_dat_reg(s, cond, ARITH_MOV,
667 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
668 tcg_out_dat_reg(s, cond, ARITH_ORR,
669 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
673 static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
675 if (use_armv6_instructions) {
676 /* rev16 */
677 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
678 } else {
679 tcg_out_dat_reg(s, cond, ARITH_MOV,
680 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
681 tcg_out_dat_reg(s, cond, ARITH_MOV,
682 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
683 tcg_out_dat_reg(s, cond, ARITH_ORR,
684 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
688 /* swap the two low bytes assuming that the two high input bytes and the
689 two high output bit can hold any value. */
690 static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
692 if (use_armv6_instructions) {
693 /* rev16 */
694 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
695 } else {
696 tcg_out_dat_reg(s, cond, ARITH_MOV,
697 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
698 tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
699 tcg_out_dat_reg(s, cond, ARITH_ORR,
700 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
704 static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
706 if (use_armv6_instructions) {
707 /* rev */
708 tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
709 } else {
710 tcg_out_dat_reg(s, cond, ARITH_EOR,
711 TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
712 tcg_out_dat_imm(s, cond, ARITH_BIC,
713 TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
714 tcg_out_dat_reg(s, cond, ARITH_MOV,
715 rd, 0, rn, SHIFT_IMM_ROR(8));
716 tcg_out_dat_reg(s, cond, ARITH_EOR,
717 rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
721 bool tcg_target_deposit_valid(int ofs, int len)
723 /* ??? Without bfi, we could improve over generic code by combining
724 the right-shift from a non-zero ofs with the orr. We do run into
725 problems when rd == rs, and the mask generated from ofs+len doesn't
726 fit into an immediate. We would have to be careful not to pessimize
727 wrt the optimizations performed on the expanded code. */
728 return use_armv7_instructions;
731 static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
732 TCGArg a1, int ofs, int len, bool const_a1)
734 if (const_a1) {
735 /* bfi becomes bfc with rn == 15. */
736 a1 = 15;
738 /* bfi/bfc */
739 tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
740 | (ofs << 7) | ((ofs + len - 1) << 16));
743 /* Note that this routine is used for both LDR and LDRH formats, so we do
744 not wish to include an immediate shift at this point. */
745 static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
746 TCGReg rn, TCGReg rm, bool u, bool p, bool w)
748 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
749 | (w << 21) | (rn << 16) | (rt << 12) | rm);
752 static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
753 TCGReg rn, int imm8, bool p, bool w)
755 bool u = 1;
756 if (imm8 < 0) {
757 imm8 = -imm8;
758 u = 0;
760 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
761 (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
764 static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
765 TCGReg rn, int imm12, bool p, bool w)
767 bool u = 1;
768 if (imm12 < 0) {
769 imm12 = -imm12;
770 u = 0;
772 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
773 (rn << 16) | (rt << 12) | imm12);
776 static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
777 TCGReg rn, int imm12)
779 tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
782 static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
783 TCGReg rn, int imm12)
785 tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
788 static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
789 TCGReg rn, TCGReg rm)
791 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
794 static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
795 TCGReg rn, TCGReg rm)
797 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
800 static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
801 TCGReg rn, int imm8)
803 tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
806 static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
807 TCGReg rn, TCGReg rm)
809 tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
812 static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
813 TCGReg rn, int imm8)
815 tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
818 static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
819 TCGReg rn, TCGReg rm)
821 tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
824 /* Register pre-increment with base writeback. */
825 static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
826 TCGReg rn, TCGReg rm)
828 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
831 static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
832 TCGReg rn, TCGReg rm)
834 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
837 static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
838 TCGReg rn, int imm8)
840 tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
843 static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
844 TCGReg rn, int imm8)
846 tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
849 static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
850 TCGReg rn, TCGReg rm)
852 tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
855 static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
856 TCGReg rn, TCGReg rm)
858 tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
861 static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
862 TCGReg rn, int imm8)
864 tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
867 static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
868 TCGReg rn, TCGReg rm)
870 tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
873 static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
874 TCGReg rn, int imm12)
876 tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
879 static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
880 TCGReg rn, int imm12)
882 tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
885 static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
886 TCGReg rn, TCGReg rm)
888 tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
891 static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
892 TCGReg rn, TCGReg rm)
894 tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
897 static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
898 TCGReg rn, int imm8)
900 tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
903 static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
904 TCGReg rn, TCGReg rm)
906 tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
909 static inline void tcg_out_ld32u(TCGContext *s, int cond,
910 int rd, int rn, int32_t offset)
912 if (offset > 0xfff || offset < -0xfff) {
913 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
914 tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
915 } else
916 tcg_out_ld32_12(s, cond, rd, rn, offset);
919 static inline void tcg_out_st32(TCGContext *s, int cond,
920 int rd, int rn, int32_t offset)
922 if (offset > 0xfff || offset < -0xfff) {
923 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
924 tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
925 } else
926 tcg_out_st32_12(s, cond, rd, rn, offset);
929 static inline void tcg_out_ld16u(TCGContext *s, int cond,
930 int rd, int rn, int32_t offset)
932 if (offset > 0xff || offset < -0xff) {
933 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
934 tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
935 } else
936 tcg_out_ld16u_8(s, cond, rd, rn, offset);
939 static inline void tcg_out_ld16s(TCGContext *s, int cond,
940 int rd, int rn, int32_t offset)
942 if (offset > 0xff || offset < -0xff) {
943 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
944 tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
945 } else
946 tcg_out_ld16s_8(s, cond, rd, rn, offset);
949 static inline void tcg_out_st16(TCGContext *s, int cond,
950 int rd, int rn, int32_t offset)
952 if (offset > 0xff || offset < -0xff) {
953 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
954 tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
955 } else
956 tcg_out_st16_8(s, cond, rd, rn, offset);
959 static inline void tcg_out_ld8u(TCGContext *s, int cond,
960 int rd, int rn, int32_t offset)
962 if (offset > 0xfff || offset < -0xfff) {
963 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
964 tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
965 } else
966 tcg_out_ld8_12(s, cond, rd, rn, offset);
969 static inline void tcg_out_ld8s(TCGContext *s, int cond,
970 int rd, int rn, int32_t offset)
972 if (offset > 0xff || offset < -0xff) {
973 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
974 tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
975 } else
976 tcg_out_ld8s_8(s, cond, rd, rn, offset);
979 static inline void tcg_out_st8(TCGContext *s, int cond,
980 int rd, int rn, int32_t offset)
982 if (offset > 0xfff || offset < -0xfff) {
983 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
984 tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
985 } else
986 tcg_out_st8_12(s, cond, rd, rn, offset);
989 /* The _goto case is normally between TBs within the same code buffer, and
990 * with the code buffer limited to 16MB we wouldn't need the long case.
991 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
993 static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
995 intptr_t addri = (intptr_t)addr;
996 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
998 if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
999 tcg_out_b(s, cond, disp);
1000 return;
1003 tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
1004 if (use_armv5t_instructions) {
1005 tcg_out_bx(s, cond, TCG_REG_TMP);
1006 } else {
1007 if (addri & 1) {
1008 tcg_abort();
1010 tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
1014 /* The call case is mostly used for helpers - so it's not unreasonable
1015 * for them to be beyond branch range */
1016 static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1018 intptr_t addri = (intptr_t)addr;
1019 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1021 if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1022 if (addri & 1) {
1023 /* Use BLX if the target is in Thumb mode */
1024 if (!use_armv5t_instructions) {
1025 tcg_abort();
1027 tcg_out_blx_imm(s, disp);
1028 } else {
1029 tcg_out_bl(s, COND_AL, disp);
1031 } else if (use_armv7_instructions) {
1032 tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1033 tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1034 } else {
1035 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1036 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1037 tcg_out32(s, addri);
1041 static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
1043 TCGLabel *l = &s->labels[label_index];
1045 if (l->has_value) {
1046 tcg_out_goto(s, cond, l->u.value_ptr);
1047 } else {
1048 tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, label_index, 0);
1049 tcg_out_b_noaddr(s, cond);
1053 #ifdef CONFIG_SOFTMMU
1054 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1055 * int mmu_idx, uintptr_t ra)
1057 static void * const qemu_ld_helpers[16] = {
1058 [MO_UB] = helper_ret_ldub_mmu,
1059 [MO_SB] = helper_ret_ldsb_mmu,
1061 [MO_LEUW] = helper_le_lduw_mmu,
1062 [MO_LEUL] = helper_le_ldul_mmu,
1063 [MO_LEQ] = helper_le_ldq_mmu,
1064 [MO_LESW] = helper_le_ldsw_mmu,
1065 [MO_LESL] = helper_le_ldul_mmu,
1067 [MO_BEUW] = helper_be_lduw_mmu,
1068 [MO_BEUL] = helper_be_ldul_mmu,
1069 [MO_BEQ] = helper_be_ldq_mmu,
1070 [MO_BESW] = helper_be_ldsw_mmu,
1071 [MO_BESL] = helper_be_ldul_mmu,
1074 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1075 * uintxx_t val, int mmu_idx, uintptr_t ra)
1077 static void * const qemu_st_helpers[16] = {
1078 [MO_UB] = helper_ret_stb_mmu,
1079 [MO_LEUW] = helper_le_stw_mmu,
1080 [MO_LEUL] = helper_le_stl_mmu,
1081 [MO_LEQ] = helper_le_stq_mmu,
1082 [MO_BEUW] = helper_be_stw_mmu,
1083 [MO_BEUL] = helper_be_stl_mmu,
1084 [MO_BEQ] = helper_be_stq_mmu,
1087 /* Helper routines for marshalling helper function arguments into
1088 * the correct registers and stack.
1089 * argreg is where we want to put this argument, arg is the argument itself.
1090 * Return value is the updated argreg ready for the next call.
1091 * Note that argreg 0..3 is real registers, 4+ on stack.
1093 * We provide routines for arguments which are: immediate, 32 bit
1094 * value in register, 16 and 8 bit values in register (which must be zero
1095 * extended before use) and 64 bit value in a lo:hi register pair.
1097 #define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
1098 static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
1100 if (argreg < 4) { \
1101 MOV_ARG(s, COND_AL, argreg, arg); \
1102 } else { \
1103 int ofs = (argreg - 4) * 4; \
1104 EXT_ARG; \
1105 assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
1106 tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
1108 return argreg + 1; \
1111 DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1112 (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1113 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1114 (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1115 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1116 (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1117 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1119 static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1120 TCGReg arglo, TCGReg arghi)
1122 /* 64 bit arguments must go in even/odd register pairs
1123 * and in 8-aligned stack slots.
1125 if (argreg & 1) {
1126 argreg++;
1128 if (use_armv6_instructions && argreg >= 4
1129 && (arglo & 1) == 0 && arghi == arglo + 1) {
1130 tcg_out_strd_8(s, COND_AL, arglo,
1131 TCG_REG_CALL_STACK, (argreg - 4) * 4);
1132 return argreg + 2;
1133 } else {
1134 argreg = tcg_out_arg_reg32(s, argreg, arglo);
1135 argreg = tcg_out_arg_reg32(s, argreg, arghi);
1136 return argreg;
1140 #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1142 /* We're expecting to use an 8-bit immediate and to mask. */
1143 QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1145 /* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
1146 Using the offset of the second entry in the last tlb table ensures
1147 that we can index all of the elements of the first entry. */
1148 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1149 > 0xffff);
1151 /* Load and compare a TLB entry, leaving the flags set. Returns the register
1152 containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
1154 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1155 TCGMemOp s_bits, int mem_index, bool is_load)
1157 TCGReg base = TCG_AREG0;
1158 int cmp_off =
1159 (is_load
1160 ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1161 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1162 int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1164 /* Should generate something like the following:
1165 * shr tmp, addrlo, #TARGET_PAGE_BITS (1)
1166 * add r2, env, #high
1167 * and r0, tmp, #(CPU_TLB_SIZE - 1) (2)
1168 * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3)
1169 * ldr r0, [r2, #cmp] (4)
1170 * tst addrlo, #s_mask
1171 * ldr r2, [r2, #add] (5)
1172 * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS
1174 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1175 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1177 /* We checked that the offset is contained within 16 bits above. */
1178 if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
1179 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1180 (24 << 7) | (cmp_off >> 8));
1181 base = TCG_REG_R2;
1182 add_off -= cmp_off & 0xff00;
1183 cmp_off &= 0xff;
1186 tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1187 TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1188 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1189 TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1191 /* Load the tlb comparator. Use ldrd if needed and available,
1192 but due to how the pointer needs setting up, ldm isn't useful.
1193 Base arm5 doesn't have ldrd, but armv5te does. */
1194 if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1195 tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1196 } else {
1197 tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1198 if (TARGET_LONG_BITS == 64) {
1199 tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1203 /* Check alignment. */
1204 if (s_bits) {
1205 tcg_out_dat_imm(s, COND_AL, ARITH_TST,
1206 0, addrlo, (1 << s_bits) - 1);
1209 /* Load the tlb addend. */
1210 tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1212 tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
1213 TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1215 if (TARGET_LONG_BITS == 64) {
1216 tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1217 TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
1220 return TCG_REG_R2;
1223 /* Record the context of a call to the out of line helper code for the slow
1224 path for a load or store, so that we can later generate the correct
1225 helper code. */
1226 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
1227 TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1228 TCGReg addrhi, int mem_index,
1229 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1231 TCGLabelQemuLdst *label = new_ldst_label(s);
1233 label->is_ld = is_ld;
1234 label->opc = opc;
1235 label->datalo_reg = datalo;
1236 label->datahi_reg = datahi;
1237 label->addrlo_reg = addrlo;
1238 label->addrhi_reg = addrhi;
1239 label->mem_index = mem_index;
1240 label->raddr = raddr;
1241 label->label_ptr[0] = label_ptr;
1244 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1246 TCGReg argreg, datalo, datahi;
1247 TCGMemOp opc = lb->opc;
1248 void *func;
1250 reloc_pc24(lb->label_ptr[0], s->code_ptr);
1252 argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1253 if (TARGET_LONG_BITS == 64) {
1254 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1255 } else {
1256 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1258 argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index);
1259 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1261 /* For armv6 we can use the canonical unsigned helpers and minimize
1262 icache usage. For pre-armv6, use the signed helpers since we do
1263 not have a single insn sign-extend. */
1264 if (use_armv6_instructions) {
1265 func = qemu_ld_helpers[opc & ~MO_SIGN];
1266 } else {
1267 func = qemu_ld_helpers[opc];
1268 if (opc & MO_SIGN) {
1269 opc = MO_UL;
1272 tcg_out_call(s, func);
1274 datalo = lb->datalo_reg;
1275 datahi = lb->datahi_reg;
1276 switch (opc & MO_SSIZE) {
1277 case MO_SB:
1278 tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1279 break;
1280 case MO_SW:
1281 tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1282 break;
1283 default:
1284 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1285 break;
1286 case MO_Q:
1287 if (datalo != TCG_REG_R1) {
1288 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1289 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1290 } else if (datahi != TCG_REG_R0) {
1291 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1292 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1293 } else {
1294 tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1295 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1296 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1298 break;
1301 tcg_out_goto(s, COND_AL, lb->raddr);
1304 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1306 TCGReg argreg, datalo, datahi;
1307 TCGMemOp opc = lb->opc;
1309 reloc_pc24(lb->label_ptr[0], s->code_ptr);
1311 argreg = TCG_REG_R0;
1312 argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1313 if (TARGET_LONG_BITS == 64) {
1314 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1315 } else {
1316 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1319 datalo = lb->datalo_reg;
1320 datahi = lb->datahi_reg;
1321 switch (opc & MO_SIZE) {
1322 case MO_8:
1323 argreg = tcg_out_arg_reg8(s, argreg, datalo);
1324 break;
1325 case MO_16:
1326 argreg = tcg_out_arg_reg16(s, argreg, datalo);
1327 break;
1328 case MO_32:
1329 default:
1330 argreg = tcg_out_arg_reg32(s, argreg, datalo);
1331 break;
1332 case MO_64:
1333 argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1334 break;
1337 argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index);
1338 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1340 /* Tail-call to the helper, which will return to the fast path. */
1341 tcg_out_goto(s, COND_AL, qemu_st_helpers[opc]);
1343 #endif /* SOFTMMU */
1345 static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1346 TCGReg datalo, TCGReg datahi,
1347 TCGReg addrlo, TCGReg addend)
1349 TCGMemOp bswap = opc & MO_BSWAP;
1351 switch (opc & MO_SSIZE) {
1352 case MO_UB:
1353 tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1354 break;
1355 case MO_SB:
1356 tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1357 break;
1358 case MO_UW:
1359 tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1360 if (bswap) {
1361 tcg_out_bswap16(s, COND_AL, datalo, datalo);
1363 break;
1364 case MO_SW:
1365 if (bswap) {
1366 tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1367 tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1368 } else {
1369 tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1371 break;
1372 case MO_UL:
1373 default:
1374 tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1375 if (bswap) {
1376 tcg_out_bswap32(s, COND_AL, datalo, datalo);
1378 break;
1379 case MO_Q:
1381 TCGReg dl = (bswap ? datahi : datalo);
1382 TCGReg dh = (bswap ? datalo : datahi);
1384 /* Avoid ldrd for user-only emulation, to handle unaligned. */
1385 if (USING_SOFTMMU && use_armv6_instructions
1386 && (dl & 1) == 0 && dh == dl + 1) {
1387 tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1388 } else if (dl != addend) {
1389 tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1390 tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1391 } else {
1392 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1393 addend, addrlo, SHIFT_IMM_LSL(0));
1394 tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1395 tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1397 if (bswap) {
1398 tcg_out_bswap32(s, COND_AL, dl, dl);
1399 tcg_out_bswap32(s, COND_AL, dh, dh);
1402 break;
1406 static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1407 TCGReg datalo, TCGReg datahi,
1408 TCGReg addrlo)
1410 TCGMemOp bswap = opc & MO_BSWAP;
1412 switch (opc & MO_SSIZE) {
1413 case MO_UB:
1414 tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1415 break;
1416 case MO_SB:
1417 tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1418 break;
1419 case MO_UW:
1420 tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1421 if (bswap) {
1422 tcg_out_bswap16(s, COND_AL, datalo, datalo);
1424 break;
1425 case MO_SW:
1426 if (bswap) {
1427 tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1428 tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1429 } else {
1430 tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1432 break;
1433 case MO_UL:
1434 default:
1435 tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1436 if (bswap) {
1437 tcg_out_bswap32(s, COND_AL, datalo, datalo);
1439 break;
1440 case MO_Q:
1442 TCGReg dl = (bswap ? datahi : datalo);
1443 TCGReg dh = (bswap ? datalo : datahi);
1445 /* Avoid ldrd for user-only emulation, to handle unaligned. */
1446 if (USING_SOFTMMU && use_armv6_instructions
1447 && (dl & 1) == 0 && dh == dl + 1) {
1448 tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1449 } else if (dl == addrlo) {
1450 tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1451 tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1452 } else {
1453 tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1454 tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1456 if (bswap) {
1457 tcg_out_bswap32(s, COND_AL, dl, dl);
1458 tcg_out_bswap32(s, COND_AL, dh, dh);
1461 break;
1465 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1467 TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1468 TCGMemOp opc;
1469 #ifdef CONFIG_SOFTMMU
1470 int mem_index;
1471 TCGReg addend;
1472 tcg_insn_unit *label_ptr;
1473 #endif
1475 datalo = *args++;
1476 datahi = (is64 ? *args++ : 0);
1477 addrlo = *args++;
1478 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1479 opc = *args++;
1481 #ifdef CONFIG_SOFTMMU
1482 mem_index = *args;
1483 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1);
1485 /* This a conditional BL only to load a pointer within this opcode into LR
1486 for the slow path. We will not be using the value for a tail call. */
1487 label_ptr = s->code_ptr;
1488 tcg_out_bl_noaddr(s, COND_NE);
1490 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1492 add_qemu_ldst_label(s, true, opc, datalo, datahi, addrlo, addrhi,
1493 mem_index, s->code_ptr, label_ptr);
1494 #else /* !CONFIG_SOFTMMU */
1495 if (GUEST_BASE) {
1496 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE);
1497 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1498 } else {
1499 tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1501 #endif
1504 static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1505 TCGReg datalo, TCGReg datahi,
1506 TCGReg addrlo, TCGReg addend)
1508 TCGMemOp bswap = opc & MO_BSWAP;
1510 switch (opc & MO_SIZE) {
1511 case MO_8:
1512 tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1513 break;
1514 case MO_16:
1515 if (bswap) {
1516 tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1517 tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1518 } else {
1519 tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1521 break;
1522 case MO_32:
1523 default:
1524 if (bswap) {
1525 tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1526 tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1527 } else {
1528 tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1530 break;
1531 case MO_64:
1532 /* Avoid strd for user-only emulation, to handle unaligned. */
1533 if (bswap) {
1534 tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1535 tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1536 tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1537 tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1538 } else if (USING_SOFTMMU && use_armv6_instructions
1539 && (datalo & 1) == 0 && datahi == datalo + 1) {
1540 tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1541 } else {
1542 tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1543 tcg_out_st32_12(s, cond, datahi, addend, 4);
1545 break;
1549 static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1550 TCGReg datalo, TCGReg datahi,
1551 TCGReg addrlo)
1553 TCGMemOp bswap = opc & MO_BSWAP;
1555 switch (opc & MO_SIZE) {
1556 case MO_8:
1557 tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1558 break;
1559 case MO_16:
1560 if (bswap) {
1561 tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1562 tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1563 } else {
1564 tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1566 break;
1567 case MO_32:
1568 default:
1569 if (bswap) {
1570 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1571 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1572 } else {
1573 tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1575 break;
1576 case MO_64:
1577 /* Avoid strd for user-only emulation, to handle unaligned. */
1578 if (bswap) {
1579 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1580 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1581 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1582 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1583 } else if (USING_SOFTMMU && use_armv6_instructions
1584 && (datalo & 1) == 0 && datahi == datalo + 1) {
1585 tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1586 } else {
1587 tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1588 tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1590 break;
1594 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1596 TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1597 TCGMemOp opc;
1598 #ifdef CONFIG_SOFTMMU
1599 int mem_index;
1600 TCGReg addend;
1601 tcg_insn_unit *label_ptr;
1602 #endif
1604 datalo = *args++;
1605 datahi = (is64 ? *args++ : 0);
1606 addrlo = *args++;
1607 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1608 opc = *args++;
1610 #ifdef CONFIG_SOFTMMU
1611 mem_index = *args;
1612 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0);
1614 tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1616 /* The conditional call must come last, as we're going to return here. */
1617 label_ptr = s->code_ptr;
1618 tcg_out_bl_noaddr(s, COND_NE);
1620 add_qemu_ldst_label(s, false, opc, datalo, datahi, addrlo, addrhi,
1621 mem_index, s->code_ptr, label_ptr);
1622 #else /* !CONFIG_SOFTMMU */
1623 if (GUEST_BASE) {
1624 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE);
1625 tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1626 datahi, addrlo, TCG_REG_TMP);
1627 } else {
1628 tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1630 #endif
1633 static tcg_insn_unit *tb_ret_addr;
1635 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1636 const TCGArg *args, const int *const_args)
1638 TCGArg a0, a1, a2, a3, a4, a5;
1639 int c;
1641 switch (opc) {
1642 case INDEX_op_exit_tb:
1643 tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1644 tcg_out_goto(s, COND_AL, tb_ret_addr);
1645 break;
1646 case INDEX_op_goto_tb:
1647 if (s->tb_jmp_offset) {
1648 /* Direct jump method */
1649 s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
1650 tcg_out_b_noaddr(s, COND_AL);
1651 } else {
1652 /* Indirect jump method */
1653 intptr_t ptr = (intptr_t)(s->tb_next + args[0]);
1654 tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
1655 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
1657 s->tb_next_offset[args[0]] = tcg_current_code_size(s);
1658 break;
1659 case INDEX_op_br:
1660 tcg_out_goto_label(s, COND_AL, args[0]);
1661 break;
1663 case INDEX_op_ld8u_i32:
1664 tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1665 break;
1666 case INDEX_op_ld8s_i32:
1667 tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1668 break;
1669 case INDEX_op_ld16u_i32:
1670 tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1671 break;
1672 case INDEX_op_ld16s_i32:
1673 tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1674 break;
1675 case INDEX_op_ld_i32:
1676 tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1677 break;
1678 case INDEX_op_st8_i32:
1679 tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1680 break;
1681 case INDEX_op_st16_i32:
1682 tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1683 break;
1684 case INDEX_op_st_i32:
1685 tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1686 break;
1688 case INDEX_op_movcond_i32:
1689 /* Constraints mean that v2 is always in the same register as dest,
1690 * so we only need to do "if condition passed, move v1 to dest".
1692 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1693 args[1], args[2], const_args[2]);
1694 tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1695 ARITH_MVN, args[0], 0, args[3], const_args[3]);
1696 break;
1697 case INDEX_op_add_i32:
1698 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1699 args[0], args[1], args[2], const_args[2]);
1700 break;
1701 case INDEX_op_sub_i32:
1702 if (const_args[1]) {
1703 if (const_args[2]) {
1704 tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1705 } else {
1706 tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1707 args[0], args[2], args[1], 1);
1709 } else {
1710 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1711 args[0], args[1], args[2], const_args[2]);
1713 break;
1714 case INDEX_op_and_i32:
1715 tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1716 args[0], args[1], args[2], const_args[2]);
1717 break;
1718 case INDEX_op_andc_i32:
1719 tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1720 args[0], args[1], args[2], const_args[2]);
1721 break;
1722 case INDEX_op_or_i32:
1723 c = ARITH_ORR;
1724 goto gen_arith;
1725 case INDEX_op_xor_i32:
1726 c = ARITH_EOR;
1727 /* Fall through. */
1728 gen_arith:
1729 tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1730 break;
1731 case INDEX_op_add2_i32:
1732 a0 = args[0], a1 = args[1], a2 = args[2];
1733 a3 = args[3], a4 = args[4], a5 = args[5];
1734 if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1735 a0 = TCG_REG_TMP;
1737 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1738 a0, a2, a4, const_args[4]);
1739 tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1740 a1, a3, a5, const_args[5]);
1741 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1742 break;
1743 case INDEX_op_sub2_i32:
1744 a0 = args[0], a1 = args[1], a2 = args[2];
1745 a3 = args[3], a4 = args[4], a5 = args[5];
1746 if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1747 a0 = TCG_REG_TMP;
1749 if (const_args[2]) {
1750 if (const_args[4]) {
1751 tcg_out_movi32(s, COND_AL, a0, a4);
1752 a4 = a0;
1754 tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1755 } else {
1756 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1757 ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1759 if (const_args[3]) {
1760 if (const_args[5]) {
1761 tcg_out_movi32(s, COND_AL, a1, a5);
1762 a5 = a1;
1764 tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1765 } else {
1766 tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1767 a1, a3, a5, const_args[5]);
1769 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1770 break;
1771 case INDEX_op_neg_i32:
1772 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1773 break;
1774 case INDEX_op_not_i32:
1775 tcg_out_dat_reg(s, COND_AL,
1776 ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1777 break;
1778 case INDEX_op_mul_i32:
1779 tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1780 break;
1781 case INDEX_op_mulu2_i32:
1782 tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1783 break;
1784 case INDEX_op_muls2_i32:
1785 tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1786 break;
1787 /* XXX: Perhaps args[2] & 0x1f is wrong */
1788 case INDEX_op_shl_i32:
1789 c = const_args[2] ?
1790 SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1791 goto gen_shift32;
1792 case INDEX_op_shr_i32:
1793 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1794 SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1795 goto gen_shift32;
1796 case INDEX_op_sar_i32:
1797 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1798 SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1799 goto gen_shift32;
1800 case INDEX_op_rotr_i32:
1801 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1802 SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1803 /* Fall through. */
1804 gen_shift32:
1805 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1806 break;
1808 case INDEX_op_rotl_i32:
1809 if (const_args[2]) {
1810 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1811 ((0x20 - args[2]) & 0x1f) ?
1812 SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1813 SHIFT_IMM_LSL(0));
1814 } else {
1815 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1816 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1817 SHIFT_REG_ROR(TCG_REG_TMP));
1819 break;
1821 case INDEX_op_brcond_i32:
1822 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1823 args[0], args[1], const_args[1]);
1824 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]);
1825 break;
1826 case INDEX_op_brcond2_i32:
1827 /* The resulting conditions are:
1828 * TCG_COND_EQ --> a0 == a2 && a1 == a3,
1829 * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3,
1830 * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3,
1831 * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1832 * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1833 * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3,
1835 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1836 args[1], args[3], const_args[3]);
1837 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1838 args[0], args[2], const_args[2]);
1839 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]);
1840 break;
1841 case INDEX_op_setcond_i32:
1842 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1843 args[1], args[2], const_args[2]);
1844 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1845 ARITH_MOV, args[0], 0, 1);
1846 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1847 ARITH_MOV, args[0], 0, 0);
1848 break;
1849 case INDEX_op_setcond2_i32:
1850 /* See brcond2_i32 comment */
1851 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1852 args[2], args[4], const_args[4]);
1853 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1854 args[1], args[3], const_args[3]);
1855 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1856 ARITH_MOV, args[0], 0, 1);
1857 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
1858 ARITH_MOV, args[0], 0, 0);
1859 break;
1861 case INDEX_op_qemu_ld_i32:
1862 tcg_out_qemu_ld(s, args, 0);
1863 break;
1864 case INDEX_op_qemu_ld_i64:
1865 tcg_out_qemu_ld(s, args, 1);
1866 break;
1867 case INDEX_op_qemu_st_i32:
1868 tcg_out_qemu_st(s, args, 0);
1869 break;
1870 case INDEX_op_qemu_st_i64:
1871 tcg_out_qemu_st(s, args, 1);
1872 break;
1874 case INDEX_op_bswap16_i32:
1875 tcg_out_bswap16(s, COND_AL, args[0], args[1]);
1876 break;
1877 case INDEX_op_bswap32_i32:
1878 tcg_out_bswap32(s, COND_AL, args[0], args[1]);
1879 break;
1881 case INDEX_op_ext8s_i32:
1882 tcg_out_ext8s(s, COND_AL, args[0], args[1]);
1883 break;
1884 case INDEX_op_ext16s_i32:
1885 tcg_out_ext16s(s, COND_AL, args[0], args[1]);
1886 break;
1887 case INDEX_op_ext16u_i32:
1888 tcg_out_ext16u(s, COND_AL, args[0], args[1]);
1889 break;
1891 case INDEX_op_deposit_i32:
1892 tcg_out_deposit(s, COND_AL, args[0], args[2],
1893 args[3], args[4], const_args[2]);
1894 break;
1896 case INDEX_op_div_i32:
1897 tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
1898 break;
1899 case INDEX_op_divu_i32:
1900 tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
1901 break;
1903 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1904 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1905 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1906 default:
1907 tcg_abort();
1911 static const TCGTargetOpDef arm_op_defs[] = {
1912 { INDEX_op_exit_tb, { } },
1913 { INDEX_op_goto_tb, { } },
1914 { INDEX_op_br, { } },
1916 { INDEX_op_ld8u_i32, { "r", "r" } },
1917 { INDEX_op_ld8s_i32, { "r", "r" } },
1918 { INDEX_op_ld16u_i32, { "r", "r" } },
1919 { INDEX_op_ld16s_i32, { "r", "r" } },
1920 { INDEX_op_ld_i32, { "r", "r" } },
1921 { INDEX_op_st8_i32, { "r", "r" } },
1922 { INDEX_op_st16_i32, { "r", "r" } },
1923 { INDEX_op_st_i32, { "r", "r" } },
1925 /* TODO: "r", "r", "ri" */
1926 { INDEX_op_add_i32, { "r", "r", "rIN" } },
1927 { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
1928 { INDEX_op_mul_i32, { "r", "r", "r" } },
1929 { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
1930 { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
1931 { INDEX_op_and_i32, { "r", "r", "rIK" } },
1932 { INDEX_op_andc_i32, { "r", "r", "rIK" } },
1933 { INDEX_op_or_i32, { "r", "r", "rI" } },
1934 { INDEX_op_xor_i32, { "r", "r", "rI" } },
1935 { INDEX_op_neg_i32, { "r", "r" } },
1936 { INDEX_op_not_i32, { "r", "r" } },
1938 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1939 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1940 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1941 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1942 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1944 { INDEX_op_brcond_i32, { "r", "rIN" } },
1945 { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
1946 { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
1948 { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
1949 { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
1950 { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
1951 { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
1953 #if TARGET_LONG_BITS == 32
1954 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1955 { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
1956 { INDEX_op_qemu_st_i32, { "s", "s" } },
1957 { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
1958 #else
1959 { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
1960 { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } },
1961 { INDEX_op_qemu_st_i32, { "s", "s", "s" } },
1962 { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } },
1963 #endif
1965 { INDEX_op_bswap16_i32, { "r", "r" } },
1966 { INDEX_op_bswap32_i32, { "r", "r" } },
1968 { INDEX_op_ext8s_i32, { "r", "r" } },
1969 { INDEX_op_ext16s_i32, { "r", "r" } },
1970 { INDEX_op_ext16u_i32, { "r", "r" } },
1972 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1974 { INDEX_op_div_i32, { "r", "r", "r" } },
1975 { INDEX_op_divu_i32, { "r", "r", "r" } },
1977 { -1 },
1980 static void tcg_target_init(TCGContext *s)
1982 /* Only probe for the platform and capabilities if we havn't already
1983 determined maximum values at compile time. */
1984 #ifndef use_idiv_instructions
1986 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
1987 use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
1989 #endif
1990 if (__ARM_ARCH < 7) {
1991 const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
1992 if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
1993 arm_arch = pl[1] - '0';
1997 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
1998 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1999 (1 << TCG_REG_R0) |
2000 (1 << TCG_REG_R1) |
2001 (1 << TCG_REG_R2) |
2002 (1 << TCG_REG_R3) |
2003 (1 << TCG_REG_R12) |
2004 (1 << TCG_REG_R14));
2006 tcg_regset_clear(s->reserved_regs);
2007 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2008 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2009 tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2011 tcg_add_target_add_op_defs(arm_op_defs);
2014 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2015 TCGReg arg1, intptr_t arg2)
2017 tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2020 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2021 TCGReg arg1, intptr_t arg2)
2023 tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2026 static inline void tcg_out_mov(TCGContext *s, TCGType type,
2027 TCGReg ret, TCGReg arg)
2029 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2032 static inline void tcg_out_movi(TCGContext *s, TCGType type,
2033 TCGReg ret, tcg_target_long arg)
2035 tcg_out_movi32(s, COND_AL, ret, arg);
2038 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2039 and tcg_register_jit. */
2041 #define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2043 #define FRAME_SIZE \
2044 ((PUSH_SIZE \
2045 + TCG_STATIC_CALL_ARGS_SIZE \
2046 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2047 + TCG_TARGET_STACK_ALIGN - 1) \
2048 & -TCG_TARGET_STACK_ALIGN)
2050 static void tcg_target_qemu_prologue(TCGContext *s)
2052 int stack_addend;
2054 /* Calling convention requires us to save r4-r11 and lr. */
2055 /* stmdb sp!, { r4 - r11, lr } */
2056 tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2058 /* Reserve callee argument and tcg temp space. */
2059 stack_addend = FRAME_SIZE - PUSH_SIZE;
2061 tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2062 TCG_REG_CALL_STACK, stack_addend, 1);
2063 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2064 CPU_TEMP_BUF_NLONGS * sizeof(long));
2066 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2068 tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2069 tb_ret_addr = s->code_ptr;
2071 /* Epilogue. We branch here via tb_ret_addr. */
2072 tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2073 TCG_REG_CALL_STACK, stack_addend, 1);
2075 /* ldmia sp!, { r4 - r11, pc } */
2076 tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2079 typedef struct {
2080 DebugFrameHeader h;
2081 uint8_t fde_def_cfa[4];
2082 uint8_t fde_reg_ofs[18];
2083 } DebugFrame;
2085 #define ELF_HOST_MACHINE EM_ARM
2087 /* We're expecting a 2 byte uleb128 encoded value. */
2088 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2090 static const DebugFrame debug_frame = {
2091 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2092 .h.cie.id = -1,
2093 .h.cie.version = 1,
2094 .h.cie.code_align = 1,
2095 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2096 .h.cie.return_column = 14,
2098 /* Total FDE size does not include the "len" member. */
2099 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2101 .fde_def_cfa = {
2102 12, 13, /* DW_CFA_def_cfa sp, ... */
2103 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2104 (FRAME_SIZE >> 7)
2106 .fde_reg_ofs = {
2107 /* The following must match the stmdb in the prologue. */
2108 0x8e, 1, /* DW_CFA_offset, lr, -4 */
2109 0x8b, 2, /* DW_CFA_offset, r11, -8 */
2110 0x8a, 3, /* DW_CFA_offset, r10, -12 */
2111 0x89, 4, /* DW_CFA_offset, r9, -16 */
2112 0x88, 5, /* DW_CFA_offset, r8, -20 */
2113 0x87, 6, /* DW_CFA_offset, r7, -24 */
2114 0x86, 7, /* DW_CFA_offset, r6, -28 */
2115 0x85, 8, /* DW_CFA_offset, r5, -32 */
2116 0x84, 9, /* DW_CFA_offset, r4, -36 */
2120 void tcg_register_jit(void *buf, size_t buf_size)
2122 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));