Passthru CCID card: QOMify
[qemu/ar7.git] / tcg / sparc / tcg-target.c
blobd3100ab557982a9116ffe045eda913f76fc6fcb5
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "qemu/osdep.h"
26 #include "tcg-be-null.h"
28 #ifndef NDEBUG
29 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
30 "%g0",
31 "%g1",
32 "%g2",
33 "%g3",
34 "%g4",
35 "%g5",
36 "%g6",
37 "%g7",
38 "%o0",
39 "%o1",
40 "%o2",
41 "%o3",
42 "%o4",
43 "%o5",
44 "%o6",
45 "%o7",
46 "%l0",
47 "%l1",
48 "%l2",
49 "%l3",
50 "%l4",
51 "%l5",
52 "%l6",
53 "%l7",
54 "%i0",
55 "%i1",
56 "%i2",
57 "%i3",
58 "%i4",
59 "%i5",
60 "%i6",
61 "%i7",
63 #endif
65 #ifdef __arch64__
66 # define SPARC64 1
67 #else
68 # define SPARC64 0
69 #endif
71 /* Note that sparcv8plus can only hold 64 bit quantities in %g and %o
72 registers. These are saved manually by the kernel in full 64-bit
73 slots. The %i and %l registers are saved by the register window
74 mechanism, which only allocates space for 32 bits. Given that this
75 window spill/fill can happen on any signal, we must consider the
76 high bits of the %i and %l registers garbage at all times. */
77 #if SPARC64
78 # define ALL_64 0xffffffffu
79 #else
80 # define ALL_64 0xffffu
81 #endif
83 /* Define some temporary registers. T2 is used for constant generation. */
84 #define TCG_REG_T1 TCG_REG_G1
85 #define TCG_REG_T2 TCG_REG_O7
87 #ifndef CONFIG_SOFTMMU
88 # define TCG_GUEST_BASE_REG TCG_REG_I5
89 #endif
91 static const int tcg_target_reg_alloc_order[] = {
92 TCG_REG_L0,
93 TCG_REG_L1,
94 TCG_REG_L2,
95 TCG_REG_L3,
96 TCG_REG_L4,
97 TCG_REG_L5,
98 TCG_REG_L6,
99 TCG_REG_L7,
101 TCG_REG_I0,
102 TCG_REG_I1,
103 TCG_REG_I2,
104 TCG_REG_I3,
105 TCG_REG_I4,
106 TCG_REG_I5,
108 TCG_REG_G2,
109 TCG_REG_G3,
110 TCG_REG_G4,
111 TCG_REG_G5,
113 TCG_REG_O0,
114 TCG_REG_O1,
115 TCG_REG_O2,
116 TCG_REG_O3,
117 TCG_REG_O4,
118 TCG_REG_O5,
121 static const int tcg_target_call_iarg_regs[6] = {
122 TCG_REG_O0,
123 TCG_REG_O1,
124 TCG_REG_O2,
125 TCG_REG_O3,
126 TCG_REG_O4,
127 TCG_REG_O5,
130 static const int tcg_target_call_oarg_regs[] = {
131 TCG_REG_O0,
132 TCG_REG_O1,
133 TCG_REG_O2,
134 TCG_REG_O3,
137 #define INSN_OP(x) ((x) << 30)
138 #define INSN_OP2(x) ((x) << 22)
139 #define INSN_OP3(x) ((x) << 19)
140 #define INSN_OPF(x) ((x) << 5)
141 #define INSN_RD(x) ((x) << 25)
142 #define INSN_RS1(x) ((x) << 14)
143 #define INSN_RS2(x) (x)
144 #define INSN_ASI(x) ((x) << 5)
146 #define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
147 #define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
148 #define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
149 #define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
150 #define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
151 #define INSN_COND(x) ((x) << 25)
153 #define COND_N 0x0
154 #define COND_E 0x1
155 #define COND_LE 0x2
156 #define COND_L 0x3
157 #define COND_LEU 0x4
158 #define COND_CS 0x5
159 #define COND_NEG 0x6
160 #define COND_VS 0x7
161 #define COND_A 0x8
162 #define COND_NE 0x9
163 #define COND_G 0xa
164 #define COND_GE 0xb
165 #define COND_GU 0xc
166 #define COND_CC 0xd
167 #define COND_POS 0xe
168 #define COND_VC 0xf
169 #define BA (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
171 #define RCOND_Z 1
172 #define RCOND_LEZ 2
173 #define RCOND_LZ 3
174 #define RCOND_NZ 5
175 #define RCOND_GZ 6
176 #define RCOND_GEZ 7
178 #define MOVCC_ICC (1 << 18)
179 #define MOVCC_XCC (1 << 18 | 1 << 12)
181 #define BPCC_ICC 0
182 #define BPCC_XCC (2 << 20)
183 #define BPCC_PT (1 << 19)
184 #define BPCC_PN 0
185 #define BPCC_A (1 << 29)
187 #define BPR_PT BPCC_PT
189 #define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00))
190 #define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
191 #define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01))
192 #define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05))
193 #define ARITH_OR (INSN_OP(2) | INSN_OP3(0x02))
194 #define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12))
195 #define ARITH_ORN (INSN_OP(2) | INSN_OP3(0x06))
196 #define ARITH_XOR (INSN_OP(2) | INSN_OP3(0x03))
197 #define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04))
198 #define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
199 #define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08))
200 #define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c))
201 #define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
202 #define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b))
203 #define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
204 #define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f))
205 #define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09))
206 #define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
207 #define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
208 #define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
209 #define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
211 #define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11))
212 #define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16))
214 #define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25))
215 #define SHIFT_SRL (INSN_OP(2) | INSN_OP3(0x26))
216 #define SHIFT_SRA (INSN_OP(2) | INSN_OP3(0x27))
218 #define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12))
219 #define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12))
220 #define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12))
222 #define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0))
223 #define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0))
224 #define JMPL (INSN_OP(2) | INSN_OP3(0x38))
225 #define RETURN (INSN_OP(2) | INSN_OP3(0x39))
226 #define SAVE (INSN_OP(2) | INSN_OP3(0x3c))
227 #define RESTORE (INSN_OP(2) | INSN_OP3(0x3d))
228 #define SETHI (INSN_OP(0) | INSN_OP2(0x4))
229 #define CALL INSN_OP(1)
230 #define LDUB (INSN_OP(3) | INSN_OP3(0x01))
231 #define LDSB (INSN_OP(3) | INSN_OP3(0x09))
232 #define LDUH (INSN_OP(3) | INSN_OP3(0x02))
233 #define LDSH (INSN_OP(3) | INSN_OP3(0x0a))
234 #define LDUW (INSN_OP(3) | INSN_OP3(0x00))
235 #define LDSW (INSN_OP(3) | INSN_OP3(0x08))
236 #define LDX (INSN_OP(3) | INSN_OP3(0x0b))
237 #define STB (INSN_OP(3) | INSN_OP3(0x05))
238 #define STH (INSN_OP(3) | INSN_OP3(0x06))
239 #define STW (INSN_OP(3) | INSN_OP3(0x04))
240 #define STX (INSN_OP(3) | INSN_OP3(0x0e))
241 #define LDUBA (INSN_OP(3) | INSN_OP3(0x11))
242 #define LDSBA (INSN_OP(3) | INSN_OP3(0x19))
243 #define LDUHA (INSN_OP(3) | INSN_OP3(0x12))
244 #define LDSHA (INSN_OP(3) | INSN_OP3(0x1a))
245 #define LDUWA (INSN_OP(3) | INSN_OP3(0x10))
246 #define LDSWA (INSN_OP(3) | INSN_OP3(0x18))
247 #define LDXA (INSN_OP(3) | INSN_OP3(0x1b))
248 #define STBA (INSN_OP(3) | INSN_OP3(0x15))
249 #define STHA (INSN_OP(3) | INSN_OP3(0x16))
250 #define STWA (INSN_OP(3) | INSN_OP3(0x14))
251 #define STXA (INSN_OP(3) | INSN_OP3(0x1e))
253 #ifndef ASI_PRIMARY_LITTLE
254 #define ASI_PRIMARY_LITTLE 0x88
255 #endif
257 #define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
258 #define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
259 #define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
260 #define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
261 #define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE))
263 #define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE))
264 #define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE))
265 #define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE))
267 #ifndef use_vis3_instructions
268 bool use_vis3_instructions;
269 #endif
271 static inline int check_fit_i64(int64_t val, unsigned int bits)
273 return val == sextract64(val, 0, bits);
276 static inline int check_fit_i32(int32_t val, unsigned int bits)
278 return val == sextract32(val, 0, bits);
281 #define check_fit_tl check_fit_i64
282 #if SPARC64
283 # define check_fit_ptr check_fit_i64
284 #else
285 # define check_fit_ptr check_fit_i32
286 #endif
288 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
289 intptr_t value, intptr_t addend)
291 uint32_t insn;
293 assert(addend == 0);
294 value = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr);
296 switch (type) {
297 case R_SPARC_WDISP16:
298 if (!check_fit_ptr(value >> 2, 16)) {
299 tcg_abort();
301 insn = *code_ptr;
302 insn &= ~INSN_OFF16(-1);
303 insn |= INSN_OFF16(value);
304 *code_ptr = insn;
305 break;
306 case R_SPARC_WDISP19:
307 if (!check_fit_ptr(value >> 2, 19)) {
308 tcg_abort();
310 insn = *code_ptr;
311 insn &= ~INSN_OFF19(-1);
312 insn |= INSN_OFF19(value);
313 *code_ptr = insn;
314 break;
315 default:
316 tcg_abort();
320 /* parse target specific constraints */
321 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
323 const char *ct_str;
325 ct_str = *pct_str;
326 switch (ct_str[0]) {
327 case 'r':
328 ct->ct |= TCG_CT_REG;
329 tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
330 break;
331 case 'R':
332 ct->ct |= TCG_CT_REG;
333 tcg_regset_set32(ct->u.regs, 0, ALL_64);
334 break;
335 case 'A': /* qemu_ld/st address constraint */
336 ct->ct |= TCG_CT_REG;
337 tcg_regset_set32(ct->u.regs, 0,
338 TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff);
339 reserve_helpers:
340 tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
341 tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
342 tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
343 break;
344 case 's': /* qemu_st data 32-bit constraint */
345 ct->ct |= TCG_CT_REG;
346 tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
347 goto reserve_helpers;
348 case 'S': /* qemu_st data 64-bit constraint */
349 ct->ct |= TCG_CT_REG;
350 tcg_regset_set32(ct->u.regs, 0, ALL_64);
351 goto reserve_helpers;
352 case 'I':
353 ct->ct |= TCG_CT_CONST_S11;
354 break;
355 case 'J':
356 ct->ct |= TCG_CT_CONST_S13;
357 break;
358 case 'Z':
359 ct->ct |= TCG_CT_CONST_ZERO;
360 break;
361 default:
362 return -1;
364 ct_str++;
365 *pct_str = ct_str;
366 return 0;
369 /* test if a constant matches the constraint */
370 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
371 const TCGArgConstraint *arg_ct)
373 int ct = arg_ct->ct;
375 if (ct & TCG_CT_CONST) {
376 return 1;
379 if (type == TCG_TYPE_I32) {
380 val = (int32_t)val;
383 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
384 return 1;
385 } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
386 return 1;
387 } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
388 return 1;
389 } else {
390 return 0;
394 static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1,
395 TCGReg rs2, int op)
397 tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2));
400 static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1,
401 int32_t offset, int op)
403 tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset));
406 static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1,
407 int32_t val2, int val2const, int op)
409 tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1)
410 | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
413 static inline void tcg_out_mov(TCGContext *s, TCGType type,
414 TCGReg ret, TCGReg arg)
416 if (ret != arg) {
417 tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
421 static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
423 tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
426 static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
428 tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
431 static void tcg_out_movi(TCGContext *s, TCGType type,
432 TCGReg ret, tcg_target_long arg)
434 tcg_target_long hi, lo = (int32_t)arg;
436 /* Make sure we test 32-bit constants for imm13 properly. */
437 if (type == TCG_TYPE_I32) {
438 arg = lo;
441 /* A 13-bit constant sign-extended to 64-bits. */
442 if (check_fit_tl(arg, 13)) {
443 tcg_out_movi_imm13(s, ret, arg);
444 return;
447 /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
448 if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
449 tcg_out_sethi(s, ret, arg);
450 if (arg & 0x3ff) {
451 tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
453 return;
456 /* A 32-bit constant sign-extended to 64-bits. */
457 if (arg == lo) {
458 tcg_out_sethi(s, ret, ~arg);
459 tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
460 return;
463 /* A 64-bit constant decomposed into 2 32-bit pieces. */
464 if (check_fit_i32(lo, 13)) {
465 hi = (arg - lo) >> 32;
466 tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
467 tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
468 tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
469 } else {
470 hi = arg >> 32;
471 tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
472 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo);
473 tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
474 tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
478 static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
479 TCGReg a2, int op)
481 tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
484 static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
485 intptr_t offset, int op)
487 if (check_fit_ptr(offset, 13)) {
488 tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
489 INSN_IMM13(offset));
490 } else {
491 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
492 tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
496 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
497 TCGReg arg1, intptr_t arg2)
499 tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
502 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
503 TCGReg arg1, intptr_t arg2)
505 tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
508 static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg)
510 tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
511 tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
514 static inline void tcg_out_sety(TCGContext *s, TCGReg rs)
516 tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
519 static inline void tcg_out_rdy(TCGContext *s, TCGReg rd)
521 tcg_out32(s, RDY | INSN_RD(rd));
524 static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
525 int32_t val2, int val2const, int uns)
527 /* Load Y with the sign/zero extension of RS1 to 64-bits. */
528 if (uns) {
529 tcg_out_sety(s, TCG_REG_G0);
530 } else {
531 tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
532 tcg_out_sety(s, TCG_REG_T1);
535 tcg_out_arithc(s, rd, rs1, val2, val2const,
536 uns ? ARITH_UDIV : ARITH_SDIV);
539 static inline void tcg_out_nop(TCGContext *s)
541 tcg_out_sethi(s, TCG_REG_G0, 0);
544 static const uint8_t tcg_cond_to_bcond[] = {
545 [TCG_COND_EQ] = COND_E,
546 [TCG_COND_NE] = COND_NE,
547 [TCG_COND_LT] = COND_L,
548 [TCG_COND_GE] = COND_GE,
549 [TCG_COND_LE] = COND_LE,
550 [TCG_COND_GT] = COND_G,
551 [TCG_COND_LTU] = COND_CS,
552 [TCG_COND_GEU] = COND_CC,
553 [TCG_COND_LEU] = COND_LEU,
554 [TCG_COND_GTU] = COND_GU,
557 static const uint8_t tcg_cond_to_rcond[] = {
558 [TCG_COND_EQ] = RCOND_Z,
559 [TCG_COND_NE] = RCOND_NZ,
560 [TCG_COND_LT] = RCOND_LZ,
561 [TCG_COND_GT] = RCOND_GZ,
562 [TCG_COND_LE] = RCOND_LEZ,
563 [TCG_COND_GE] = RCOND_GEZ
566 static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
568 tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
571 static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l)
573 int off19;
575 if (l->has_value) {
576 off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr));
577 } else {
578 /* Make sure to preserve destinations during retranslation. */
579 off19 = *s->code_ptr & INSN_OFF19(-1);
580 tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0);
582 tcg_out_bpcc0(s, scond, flags, off19);
585 static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const)
587 tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
590 static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1,
591 int32_t arg2, int const_arg2, TCGLabel *l)
593 tcg_out_cmp(s, arg1, arg2, const_arg2);
594 tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l);
595 tcg_out_nop(s);
598 static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret,
599 int32_t v1, int v1const)
601 tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
602 | INSN_RS1(tcg_cond_to_bcond[cond])
603 | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
606 static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
607 TCGReg c1, int32_t c2, int c2const,
608 int32_t v1, int v1const)
610 tcg_out_cmp(s, c1, c2, c2const);
611 tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
614 static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1,
615 int32_t arg2, int const_arg2, TCGLabel *l)
617 /* For 64-bit signed comparisons vs zero, we can avoid the compare. */
618 if (arg2 == 0 && !is_unsigned_cond(cond)) {
619 int off16;
621 if (l->has_value) {
622 off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr));
623 } else {
624 /* Make sure to preserve destinations during retranslation. */
625 off16 = *s->code_ptr & INSN_OFF16(-1);
626 tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0);
628 tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
629 | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
630 } else {
631 tcg_out_cmp(s, arg1, arg2, const_arg2);
632 tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l);
634 tcg_out_nop(s);
637 static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1,
638 int32_t v1, int v1const)
640 tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
641 | (tcg_cond_to_rcond[cond] << 10)
642 | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
645 static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
646 TCGReg c1, int32_t c2, int c2const,
647 int32_t v1, int v1const)
649 /* For 64-bit signed comparisons vs zero, we can avoid the compare.
650 Note that the immediate range is one bit smaller, so we must check
651 for that as well. */
652 if (c2 == 0 && !is_unsigned_cond(cond)
653 && (!v1const || check_fit_i32(v1, 10))) {
654 tcg_out_movr(s, cond, ret, c1, v1, v1const);
655 } else {
656 tcg_out_cmp(s, c1, c2, c2const);
657 tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
661 static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
662 TCGReg c1, int32_t c2, int c2const)
664 /* For 32-bit comparisons, we can play games with ADDC/SUBC. */
665 switch (cond) {
666 case TCG_COND_LTU:
667 case TCG_COND_GEU:
668 /* The result of the comparison is in the carry bit. */
669 break;
671 case TCG_COND_EQ:
672 case TCG_COND_NE:
673 /* For equality, we can transform to inequality vs zero. */
674 if (c2 != 0) {
675 tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR);
676 c2 = TCG_REG_T1;
677 } else {
678 c2 = c1;
680 c1 = TCG_REG_G0, c2const = 0;
681 cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
682 break;
684 case TCG_COND_GTU:
685 case TCG_COND_LEU:
686 /* If we don't need to load a constant into a register, we can
687 swap the operands on GTU/LEU. There's no benefit to loading
688 the constant into a temporary register. */
689 if (!c2const || c2 == 0) {
690 TCGReg t = c1;
691 c1 = c2;
692 c2 = t;
693 c2const = 0;
694 cond = tcg_swap_cond(cond);
695 break;
697 /* FALLTHRU */
699 default:
700 tcg_out_cmp(s, c1, c2, c2const);
701 tcg_out_movi_imm13(s, ret, 0);
702 tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
703 return;
706 tcg_out_cmp(s, c1, c2, c2const);
707 if (cond == TCG_COND_LTU) {
708 tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC);
709 } else {
710 tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC);
714 static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
715 TCGReg c1, int32_t c2, int c2const)
717 if (use_vis3_instructions) {
718 switch (cond) {
719 case TCG_COND_NE:
720 if (c2 != 0) {
721 break;
723 c2 = c1, c2const = 0, c1 = TCG_REG_G0;
724 /* FALLTHRU */
725 case TCG_COND_LTU:
726 tcg_out_cmp(s, c1, c2, c2const);
727 tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC);
728 return;
729 default:
730 break;
734 /* For 64-bit signed comparisons vs zero, we can avoid the compare
735 if the input does not overlap the output. */
736 if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
737 tcg_out_movi_imm13(s, ret, 0);
738 tcg_out_movr(s, cond, ret, c1, 1, 1);
739 } else {
740 tcg_out_cmp(s, c1, c2, c2const);
741 tcg_out_movi_imm13(s, ret, 0);
742 tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
746 static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh,
747 TCGReg al, TCGReg ah, int32_t bl, int blconst,
748 int32_t bh, int bhconst, int opl, int oph)
750 TCGReg tmp = TCG_REG_T1;
752 /* Note that the low parts are fully consumed before tmp is set. */
753 if (rl != ah && (bhconst || rl != bh)) {
754 tmp = rl;
757 tcg_out_arithc(s, tmp, al, bl, blconst, opl);
758 tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
759 tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
762 static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
763 TCGReg al, TCGReg ah, int32_t bl, int blconst,
764 int32_t bh, int bhconst, bool is_sub)
766 TCGReg tmp = TCG_REG_T1;
768 /* Note that the low parts are fully consumed before tmp is set. */
769 if (rl != ah && (bhconst || rl != bh)) {
770 tmp = rl;
773 tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC);
775 if (use_vis3_instructions && !is_sub) {
776 /* Note that ADDXC doesn't accept immediates. */
777 if (bhconst && bh != 0) {
778 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh);
779 bh = TCG_REG_T2;
781 tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
782 } else if (bh == TCG_REG_G0) {
783 /* If we have a zero, we can perform the operation in two insns,
784 with the arithmetic first, and a conditional move into place. */
785 if (rh == ah) {
786 tcg_out_arithi(s, TCG_REG_T2, ah, 1,
787 is_sub ? ARITH_SUB : ARITH_ADD);
788 tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0);
789 } else {
790 tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD);
791 tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0);
793 } else {
794 /* Otherwise adjust BH as if there is carry into T2 ... */
795 if (bhconst) {
796 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1));
797 } else {
798 tcg_out_arithi(s, TCG_REG_T2, bh, 1,
799 is_sub ? ARITH_SUB : ARITH_ADD);
801 /* ... smoosh T2 back to original BH if carry is clear ... */
802 tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst);
803 /* ... and finally perform the arithmetic with the new operand. */
804 tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD);
807 tcg_out_mov(s, TCG_TYPE_I64, rl, tmp);
810 static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest)
812 ptrdiff_t disp = tcg_pcrel_diff(s, dest);
814 if (disp == (int32_t)disp) {
815 tcg_out32(s, CALL | (uint32_t)disp >> 2);
816 } else {
817 uintptr_t desti = (uintptr_t)dest;
818 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, desti & ~0xfff);
819 tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL);
823 static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
825 tcg_out_call_nodelay(s, dest);
826 tcg_out_nop(s);
829 #ifdef CONFIG_SOFTMMU
830 static tcg_insn_unit *qemu_ld_trampoline[16];
831 static tcg_insn_unit *qemu_st_trampoline[16];
833 static void build_trampolines(TCGContext *s)
835 static void * const qemu_ld_helpers[16] = {
836 [MO_UB] = helper_ret_ldub_mmu,
837 [MO_SB] = helper_ret_ldsb_mmu,
838 [MO_LEUW] = helper_le_lduw_mmu,
839 [MO_LESW] = helper_le_ldsw_mmu,
840 [MO_LEUL] = helper_le_ldul_mmu,
841 [MO_LEQ] = helper_le_ldq_mmu,
842 [MO_BEUW] = helper_be_lduw_mmu,
843 [MO_BESW] = helper_be_ldsw_mmu,
844 [MO_BEUL] = helper_be_ldul_mmu,
845 [MO_BEQ] = helper_be_ldq_mmu,
847 static void * const qemu_st_helpers[16] = {
848 [MO_UB] = helper_ret_stb_mmu,
849 [MO_LEUW] = helper_le_stw_mmu,
850 [MO_LEUL] = helper_le_stl_mmu,
851 [MO_LEQ] = helper_le_stq_mmu,
852 [MO_BEUW] = helper_be_stw_mmu,
853 [MO_BEUL] = helper_be_stl_mmu,
854 [MO_BEQ] = helper_be_stq_mmu,
857 int i;
858 TCGReg ra;
860 for (i = 0; i < 16; ++i) {
861 if (qemu_ld_helpers[i] == NULL) {
862 continue;
865 /* May as well align the trampoline. */
866 while ((uintptr_t)s->code_ptr & 15) {
867 tcg_out_nop(s);
869 qemu_ld_trampoline[i] = s->code_ptr;
871 if (SPARC64 || TARGET_LONG_BITS == 32) {
872 ra = TCG_REG_O3;
873 } else {
874 /* Install the high part of the address. */
875 tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
876 ra = TCG_REG_O4;
879 /* Set the retaddr operand. */
880 tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
881 /* Set the env operand. */
882 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
883 /* Tail call. */
884 tcg_out_call_nodelay(s, qemu_ld_helpers[i]);
885 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
888 for (i = 0; i < 16; ++i) {
889 if (qemu_st_helpers[i] == NULL) {
890 continue;
893 /* May as well align the trampoline. */
894 while ((uintptr_t)s->code_ptr & 15) {
895 tcg_out_nop(s);
897 qemu_st_trampoline[i] = s->code_ptr;
899 if (SPARC64) {
900 ra = TCG_REG_O4;
901 } else {
902 ra = TCG_REG_O1;
903 if (TARGET_LONG_BITS == 64) {
904 /* Install the high part of the address. */
905 tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
906 ra += 2;
907 } else {
908 ra += 1;
910 if ((i & MO_SIZE) == MO_64) {
911 /* Install the high part of the data. */
912 tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
913 ra += 2;
914 } else {
915 ra += 1;
917 /* Skip the oi argument. */
918 ra += 1;
921 /* Set the retaddr operand. */
922 if (ra >= TCG_REG_O6) {
923 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK,
924 TCG_TARGET_CALL_STACK_OFFSET);
925 ra = TCG_REG_G1;
927 tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
928 /* Set the env operand. */
929 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
930 /* Tail call. */
931 tcg_out_call_nodelay(s, qemu_st_helpers[i]);
932 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
935 #endif
937 /* Generate global QEMU prologue and epilogue code */
938 static void tcg_target_qemu_prologue(TCGContext *s)
940 int tmp_buf_size, frame_size;
942 /* The TCG temp buffer is at the top of the frame, immediately
943 below the frame pointer. */
944 tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
945 tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size,
946 tmp_buf_size);
948 /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
949 otherwise the minimal frame usable by callees. */
950 frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
951 frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
952 frame_size += TCG_TARGET_STACK_ALIGN - 1;
953 frame_size &= -TCG_TARGET_STACK_ALIGN;
954 tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
955 INSN_IMM13(-frame_size));
957 #ifndef CONFIG_SOFTMMU
958 if (guest_base != 0) {
959 tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
960 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
962 #endif
964 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL);
965 /* delay slot */
966 tcg_out_nop(s);
968 /* No epilogue required. We issue ret + restore directly in the TB. */
970 #ifdef CONFIG_SOFTMMU
971 build_trampolines(s);
972 #endif
975 #if defined(CONFIG_SOFTMMU)
976 /* Perform the TLB load and compare.
978 Inputs:
979 ADDRLO and ADDRHI contain the possible two parts of the address.
981 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
983 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
984 This should be offsetof addr_read or addr_write.
986 The result of the TLB comparison is in %[ix]cc. The sanitized address
987 is in the returned register, maybe %o0. The TLB addend is in %o1. */
989 static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
990 TCGMemOp s_bits, int which)
992 const TCGReg r0 = TCG_REG_O0;
993 const TCGReg r1 = TCG_REG_O1;
994 const TCGReg r2 = TCG_REG_O2;
995 int tlb_ofs;
997 /* Shift the page number down. */
998 tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL);
1000 /* Mask out the page offset, except for the required alignment. */
1001 tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1,
1002 TARGET_PAGE_MASK | ((1 << s_bits) - 1));
1004 /* Mask the tlb index. */
1005 tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND);
1007 /* Mask page, part 2. */
1008 tcg_out_arith(s, r0, addr, TCG_REG_T1, ARITH_AND);
1010 /* Shift the tlb index into place. */
1011 tcg_out_arithi(s, r1, r1, CPU_TLB_ENTRY_BITS, SHIFT_SLL);
1013 /* Relative to the current ENV. */
1014 tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD);
1016 /* Find a base address that can load both tlb comparator and addend. */
1017 tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
1018 if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) {
1019 if (tlb_ofs & ~0x3ff) {
1020 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff);
1021 tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD);
1023 tlb_ofs &= 0x3ff;
1026 /* Load the tlb comparator and the addend. */
1027 tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
1028 tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
1030 /* subcc arg0, arg2, %g0 */
1031 tcg_out_cmp(s, r0, r2, 0);
1033 /* If the guest address must be zero-extended, do so now. */
1034 if (SPARC64 && TARGET_LONG_BITS == 32) {
1035 tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
1036 return r0;
1038 return addr;
1040 #endif /* CONFIG_SOFTMMU */
1042 static const int qemu_ld_opc[16] = {
1043 [MO_UB] = LDUB,
1044 [MO_SB] = LDSB,
1046 [MO_BEUW] = LDUH,
1047 [MO_BESW] = LDSH,
1048 [MO_BEUL] = LDUW,
1049 [MO_BESL] = LDSW,
1050 [MO_BEQ] = LDX,
1052 [MO_LEUW] = LDUH_LE,
1053 [MO_LESW] = LDSH_LE,
1054 [MO_LEUL] = LDUW_LE,
1055 [MO_LESL] = LDSW_LE,
1056 [MO_LEQ] = LDX_LE,
1059 static const int qemu_st_opc[16] = {
1060 [MO_UB] = STB,
1062 [MO_BEUW] = STH,
1063 [MO_BEUL] = STW,
1064 [MO_BEQ] = STX,
1066 [MO_LEUW] = STH_LE,
1067 [MO_LEUL] = STW_LE,
1068 [MO_LEQ] = STX_LE,
1071 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
1072 TCGMemOpIdx oi, bool is_64)
1074 TCGMemOp memop = get_memop(oi);
1075 #ifdef CONFIG_SOFTMMU
1076 unsigned memi = get_mmuidx(oi);
1077 TCGReg addrz, param;
1078 tcg_insn_unit *func;
1079 tcg_insn_unit *label_ptr;
1081 addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE,
1082 offsetof(CPUTLBEntry, addr_read));
1084 /* The fast path is exactly one insn. Thus we can perform the
1085 entire TLB Hit in the (annulled) delay slot of the branch
1086 over the TLB Miss case. */
1088 /* beq,a,pt %[xi]cc, label0 */
1089 label_ptr = s->code_ptr;
1090 tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
1091 | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
1092 /* delay slot */
1093 tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
1094 qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
1096 /* TLB Miss. */
1098 param = TCG_REG_O1;
1099 if (!SPARC64 && TARGET_LONG_BITS == 64) {
1100 /* Skip the high-part; we'll perform the extract in the trampoline. */
1101 param++;
1103 tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
1105 /* We use the helpers to extend SB and SW data, leaving the case
1106 of SL needing explicit extending below. */
1107 if ((memop & MO_SSIZE) == MO_SL) {
1108 func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1109 } else {
1110 func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
1112 assert(func != NULL);
1113 tcg_out_call_nodelay(s, func);
1114 /* delay slot */
1115 tcg_out_movi(s, TCG_TYPE_I32, param, oi);
1117 /* Recall that all of the helpers return 64-bit results.
1118 Which complicates things for sparcv8plus. */
1119 if (SPARC64) {
1120 /* We let the helper sign-extend SB and SW, but leave SL for here. */
1121 if (is_64 && (memop & MO_SSIZE) == MO_SL) {
1122 tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
1123 } else {
1124 tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
1126 } else {
1127 if ((memop & MO_SIZE) == MO_64) {
1128 tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
1129 tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
1130 tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
1131 } else if (is_64) {
1132 /* Re-extend from 32-bit rather than reassembling when we
1133 know the high register must be an extension. */
1134 tcg_out_arithi(s, data, TCG_REG_O1, 0,
1135 memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL);
1136 } else {
1137 tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1);
1141 *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
1142 #else
1143 if (SPARC64 && TARGET_LONG_BITS == 32) {
1144 tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
1145 addr = TCG_REG_T1;
1147 tcg_out_ldst_rr(s, data, addr,
1148 (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0),
1149 qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
1150 #endif /* CONFIG_SOFTMMU */
1153 static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
1154 TCGMemOpIdx oi)
1156 TCGMemOp memop = get_memop(oi);
1157 #ifdef CONFIG_SOFTMMU
1158 unsigned memi = get_mmuidx(oi);
1159 TCGReg addrz, param;
1160 tcg_insn_unit *func;
1161 tcg_insn_unit *label_ptr;
1163 addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE,
1164 offsetof(CPUTLBEntry, addr_write));
1166 /* The fast path is exactly one insn. Thus we can perform the entire
1167 TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
1168 /* beq,a,pt %[xi]cc, label0 */
1169 label_ptr = s->code_ptr;
1170 tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
1171 | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
1172 /* delay slot */
1173 tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
1174 qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
1176 /* TLB Miss. */
1178 param = TCG_REG_O1;
1179 if (!SPARC64 && TARGET_LONG_BITS == 64) {
1180 /* Skip the high-part; we'll perform the extract in the trampoline. */
1181 param++;
1183 tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
1184 if (!SPARC64 && (memop & MO_SIZE) == MO_64) {
1185 /* Skip the high-part; we'll perform the extract in the trampoline. */
1186 param++;
1188 tcg_out_mov(s, TCG_TYPE_REG, param++, data);
1190 func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1191 assert(func != NULL);
1192 tcg_out_call_nodelay(s, func);
1193 /* delay slot */
1194 tcg_out_movi(s, TCG_TYPE_I32, param, oi);
1196 *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
1197 #else
1198 if (SPARC64 && TARGET_LONG_BITS == 32) {
1199 tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
1200 addr = TCG_REG_T1;
1202 tcg_out_ldst_rr(s, data, addr,
1203 (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0),
1204 qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
1205 #endif /* CONFIG_SOFTMMU */
1208 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1209 const TCGArg args[TCG_MAX_OP_ARGS],
1210 const int const_args[TCG_MAX_OP_ARGS])
1212 TCGArg a0, a1, a2;
1213 int c, c2;
1215 /* Hoist the loads of the most common arguments. */
1216 a0 = args[0];
1217 a1 = args[1];
1218 a2 = args[2];
1219 c2 = const_args[2];
1221 switch (opc) {
1222 case INDEX_op_exit_tb:
1223 if (check_fit_ptr(a0, 13)) {
1224 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1225 tcg_out_movi_imm13(s, TCG_REG_O0, a0);
1226 } else {
1227 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff);
1228 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1229 tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR);
1231 break;
1232 case INDEX_op_goto_tb:
1233 if (s->tb_jmp_offset) {
1234 /* direct jump method */
1235 s->tb_jmp_offset[a0] = tcg_current_code_size(s);
1236 /* Make sure to preserve links during retranslation. */
1237 tcg_out32(s, CALL | (*s->code_ptr & ~INSN_OP(-1)));
1238 } else {
1239 /* indirect jump method */
1240 tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0));
1241 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL);
1243 tcg_out_nop(s);
1244 s->tb_next_offset[a0] = tcg_current_code_size(s);
1245 break;
1246 case INDEX_op_br:
1247 tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0));
1248 tcg_out_nop(s);
1249 break;
1251 #define OP_32_64(x) \
1252 glue(glue(case INDEX_op_, x), _i32): \
1253 glue(glue(case INDEX_op_, x), _i64)
1255 OP_32_64(ld8u):
1256 tcg_out_ldst(s, a0, a1, a2, LDUB);
1257 break;
1258 OP_32_64(ld8s):
1259 tcg_out_ldst(s, a0, a1, a2, LDSB);
1260 break;
1261 OP_32_64(ld16u):
1262 tcg_out_ldst(s, a0, a1, a2, LDUH);
1263 break;
1264 OP_32_64(ld16s):
1265 tcg_out_ldst(s, a0, a1, a2, LDSH);
1266 break;
1267 case INDEX_op_ld_i32:
1268 case INDEX_op_ld32u_i64:
1269 tcg_out_ldst(s, a0, a1, a2, LDUW);
1270 break;
1271 OP_32_64(st8):
1272 tcg_out_ldst(s, a0, a1, a2, STB);
1273 break;
1274 OP_32_64(st16):
1275 tcg_out_ldst(s, a0, a1, a2, STH);
1276 break;
1277 case INDEX_op_st_i32:
1278 case INDEX_op_st32_i64:
1279 tcg_out_ldst(s, a0, a1, a2, STW);
1280 break;
1281 OP_32_64(add):
1282 c = ARITH_ADD;
1283 goto gen_arith;
1284 OP_32_64(sub):
1285 c = ARITH_SUB;
1286 goto gen_arith;
1287 OP_32_64(and):
1288 c = ARITH_AND;
1289 goto gen_arith;
1290 OP_32_64(andc):
1291 c = ARITH_ANDN;
1292 goto gen_arith;
1293 OP_32_64(or):
1294 c = ARITH_OR;
1295 goto gen_arith;
1296 OP_32_64(orc):
1297 c = ARITH_ORN;
1298 goto gen_arith;
1299 OP_32_64(xor):
1300 c = ARITH_XOR;
1301 goto gen_arith;
1302 case INDEX_op_shl_i32:
1303 c = SHIFT_SLL;
1304 do_shift32:
1305 /* Limit immediate shift count lest we create an illegal insn. */
1306 tcg_out_arithc(s, a0, a1, a2 & 31, c2, c);
1307 break;
1308 case INDEX_op_shr_i32:
1309 c = SHIFT_SRL;
1310 goto do_shift32;
1311 case INDEX_op_sar_i32:
1312 c = SHIFT_SRA;
1313 goto do_shift32;
1314 case INDEX_op_mul_i32:
1315 c = ARITH_UMUL;
1316 goto gen_arith;
1318 OP_32_64(neg):
1319 c = ARITH_SUB;
1320 goto gen_arith1;
1321 OP_32_64(not):
1322 c = ARITH_ORN;
1323 goto gen_arith1;
1325 case INDEX_op_div_i32:
1326 tcg_out_div32(s, a0, a1, a2, c2, 0);
1327 break;
1328 case INDEX_op_divu_i32:
1329 tcg_out_div32(s, a0, a1, a2, c2, 1);
1330 break;
1332 case INDEX_op_brcond_i32:
1333 tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3]));
1334 break;
1335 case INDEX_op_setcond_i32:
1336 tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2);
1337 break;
1338 case INDEX_op_movcond_i32:
1339 tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
1340 break;
1342 case INDEX_op_add2_i32:
1343 tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
1344 args[4], const_args[4], args[5], const_args[5],
1345 ARITH_ADDCC, ARITH_ADDC);
1346 break;
1347 case INDEX_op_sub2_i32:
1348 tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
1349 args[4], const_args[4], args[5], const_args[5],
1350 ARITH_SUBCC, ARITH_SUBC);
1351 break;
1352 case INDEX_op_mulu2_i32:
1353 c = ARITH_UMUL;
1354 goto do_mul2;
1355 case INDEX_op_muls2_i32:
1356 c = ARITH_SMUL;
1357 do_mul2:
1358 /* The 32-bit multiply insns produce a full 64-bit result. If the
1359 destination register can hold it, we can avoid the slower RDY. */
1360 tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
1361 if (SPARC64 || a0 <= TCG_REG_O7) {
1362 tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
1363 } else {
1364 tcg_out_rdy(s, a1);
1366 break;
1368 case INDEX_op_qemu_ld_i32:
1369 tcg_out_qemu_ld(s, a0, a1, a2, false);
1370 break;
1371 case INDEX_op_qemu_ld_i64:
1372 tcg_out_qemu_ld(s, a0, a1, a2, true);
1373 break;
1374 case INDEX_op_qemu_st_i32:
1375 case INDEX_op_qemu_st_i64:
1376 tcg_out_qemu_st(s, a0, a1, a2);
1377 break;
1379 case INDEX_op_ld32s_i64:
1380 tcg_out_ldst(s, a0, a1, a2, LDSW);
1381 break;
1382 case INDEX_op_ld_i64:
1383 tcg_out_ldst(s, a0, a1, a2, LDX);
1384 break;
1385 case INDEX_op_st_i64:
1386 tcg_out_ldst(s, a0, a1, a2, STX);
1387 break;
1388 case INDEX_op_shl_i64:
1389 c = SHIFT_SLLX;
1390 do_shift64:
1391 /* Limit immediate shift count lest we create an illegal insn. */
1392 tcg_out_arithc(s, a0, a1, a2 & 63, c2, c);
1393 break;
1394 case INDEX_op_shr_i64:
1395 c = SHIFT_SRLX;
1396 goto do_shift64;
1397 case INDEX_op_sar_i64:
1398 c = SHIFT_SRAX;
1399 goto do_shift64;
1400 case INDEX_op_mul_i64:
1401 c = ARITH_MULX;
1402 goto gen_arith;
1403 case INDEX_op_div_i64:
1404 c = ARITH_SDIVX;
1405 goto gen_arith;
1406 case INDEX_op_divu_i64:
1407 c = ARITH_UDIVX;
1408 goto gen_arith;
1409 case INDEX_op_ext_i32_i64:
1410 case INDEX_op_ext32s_i64:
1411 tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
1412 break;
1413 case INDEX_op_extu_i32_i64:
1414 case INDEX_op_ext32u_i64:
1415 tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
1416 break;
1417 case INDEX_op_extrl_i64_i32:
1418 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
1419 break;
1420 case INDEX_op_extrh_i64_i32:
1421 tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX);
1422 break;
1424 case INDEX_op_brcond_i64:
1425 tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3]));
1426 break;
1427 case INDEX_op_setcond_i64:
1428 tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2);
1429 break;
1430 case INDEX_op_movcond_i64:
1431 tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
1432 break;
1433 case INDEX_op_add2_i64:
1434 tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
1435 const_args[4], args[5], const_args[5], false);
1436 break;
1437 case INDEX_op_sub2_i64:
1438 tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
1439 const_args[4], args[5], const_args[5], true);
1440 break;
1441 case INDEX_op_muluh_i64:
1442 tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI);
1443 break;
1445 gen_arith:
1446 tcg_out_arithc(s, a0, a1, a2, c2, c);
1447 break;
1449 gen_arith1:
1450 tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
1451 break;
1453 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1454 case INDEX_op_mov_i64:
1455 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1456 case INDEX_op_movi_i64:
1457 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1458 default:
1459 tcg_abort();
1463 static const TCGTargetOpDef sparc_op_defs[] = {
1464 { INDEX_op_exit_tb, { } },
1465 { INDEX_op_goto_tb, { } },
1466 { INDEX_op_br, { } },
1468 { INDEX_op_ld8u_i32, { "r", "r" } },
1469 { INDEX_op_ld8s_i32, { "r", "r" } },
1470 { INDEX_op_ld16u_i32, { "r", "r" } },
1471 { INDEX_op_ld16s_i32, { "r", "r" } },
1472 { INDEX_op_ld_i32, { "r", "r" } },
1473 { INDEX_op_st8_i32, { "rZ", "r" } },
1474 { INDEX_op_st16_i32, { "rZ", "r" } },
1475 { INDEX_op_st_i32, { "rZ", "r" } },
1477 { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
1478 { INDEX_op_mul_i32, { "r", "rZ", "rJ" } },
1479 { INDEX_op_div_i32, { "r", "rZ", "rJ" } },
1480 { INDEX_op_divu_i32, { "r", "rZ", "rJ" } },
1481 { INDEX_op_sub_i32, { "r", "rZ", "rJ" } },
1482 { INDEX_op_and_i32, { "r", "rZ", "rJ" } },
1483 { INDEX_op_andc_i32, { "r", "rZ", "rJ" } },
1484 { INDEX_op_or_i32, { "r", "rZ", "rJ" } },
1485 { INDEX_op_orc_i32, { "r", "rZ", "rJ" } },
1486 { INDEX_op_xor_i32, { "r", "rZ", "rJ" } },
1488 { INDEX_op_shl_i32, { "r", "rZ", "rJ" } },
1489 { INDEX_op_shr_i32, { "r", "rZ", "rJ" } },
1490 { INDEX_op_sar_i32, { "r", "rZ", "rJ" } },
1492 { INDEX_op_neg_i32, { "r", "rJ" } },
1493 { INDEX_op_not_i32, { "r", "rJ" } },
1495 { INDEX_op_brcond_i32, { "rZ", "rJ" } },
1496 { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } },
1497 { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } },
1499 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
1500 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
1501 { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } },
1502 { INDEX_op_muls2_i32, { "r", "r", "rZ", "rJ" } },
1504 { INDEX_op_ld8u_i64, { "R", "r" } },
1505 { INDEX_op_ld8s_i64, { "R", "r" } },
1506 { INDEX_op_ld16u_i64, { "R", "r" } },
1507 { INDEX_op_ld16s_i64, { "R", "r" } },
1508 { INDEX_op_ld32u_i64, { "R", "r" } },
1509 { INDEX_op_ld32s_i64, { "R", "r" } },
1510 { INDEX_op_ld_i64, { "R", "r" } },
1511 { INDEX_op_st8_i64, { "RZ", "r" } },
1512 { INDEX_op_st16_i64, { "RZ", "r" } },
1513 { INDEX_op_st32_i64, { "RZ", "r" } },
1514 { INDEX_op_st_i64, { "RZ", "r" } },
1516 { INDEX_op_add_i64, { "R", "RZ", "RJ" } },
1517 { INDEX_op_mul_i64, { "R", "RZ", "RJ" } },
1518 { INDEX_op_div_i64, { "R", "RZ", "RJ" } },
1519 { INDEX_op_divu_i64, { "R", "RZ", "RJ" } },
1520 { INDEX_op_sub_i64, { "R", "RZ", "RJ" } },
1521 { INDEX_op_and_i64, { "R", "RZ", "RJ" } },
1522 { INDEX_op_andc_i64, { "R", "RZ", "RJ" } },
1523 { INDEX_op_or_i64, { "R", "RZ", "RJ" } },
1524 { INDEX_op_orc_i64, { "R", "RZ", "RJ" } },
1525 { INDEX_op_xor_i64, { "R", "RZ", "RJ" } },
1527 { INDEX_op_shl_i64, { "R", "RZ", "RJ" } },
1528 { INDEX_op_shr_i64, { "R", "RZ", "RJ" } },
1529 { INDEX_op_sar_i64, { "R", "RZ", "RJ" } },
1531 { INDEX_op_neg_i64, { "R", "RJ" } },
1532 { INDEX_op_not_i64, { "R", "RJ" } },
1534 { INDEX_op_ext32s_i64, { "R", "R" } },
1535 { INDEX_op_ext32u_i64, { "R", "R" } },
1536 { INDEX_op_ext_i32_i64, { "R", "r" } },
1537 { INDEX_op_extu_i32_i64, { "R", "r" } },
1538 { INDEX_op_extrl_i64_i32, { "r", "R" } },
1539 { INDEX_op_extrh_i64_i32, { "r", "R" } },
1541 { INDEX_op_brcond_i64, { "RZ", "RJ" } },
1542 { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } },
1543 { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } },
1545 { INDEX_op_add2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } },
1546 { INDEX_op_sub2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } },
1547 { INDEX_op_muluh_i64, { "R", "RZ", "RZ" } },
1549 { INDEX_op_qemu_ld_i32, { "r", "A" } },
1550 { INDEX_op_qemu_ld_i64, { "R", "A" } },
1551 { INDEX_op_qemu_st_i32, { "sZ", "A" } },
1552 { INDEX_op_qemu_st_i64, { "SZ", "A" } },
1554 { -1 },
1557 static void tcg_target_init(TCGContext *s)
1559 /* Only probe for the platform and capabilities if we havn't already
1560 determined maximum values at compile time. */
1561 #ifndef use_vis3_instructions
1563 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
1564 use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0;
1566 #endif
1568 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1569 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64);
1571 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1572 (1 << TCG_REG_G1) |
1573 (1 << TCG_REG_G2) |
1574 (1 << TCG_REG_G3) |
1575 (1 << TCG_REG_G4) |
1576 (1 << TCG_REG_G5) |
1577 (1 << TCG_REG_G6) |
1578 (1 << TCG_REG_G7) |
1579 (1 << TCG_REG_O0) |
1580 (1 << TCG_REG_O1) |
1581 (1 << TCG_REG_O2) |
1582 (1 << TCG_REG_O3) |
1583 (1 << TCG_REG_O4) |
1584 (1 << TCG_REG_O5) |
1585 (1 << TCG_REG_O7));
1587 tcg_regset_clear(s->reserved_regs);
1588 tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
1589 tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
1590 tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
1591 tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
1592 tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
1593 tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
1594 tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
1595 tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
1597 tcg_add_target_add_op_defs(sparc_op_defs);
1600 #if SPARC64
1601 # define ELF_HOST_MACHINE EM_SPARCV9
1602 #else
1603 # define ELF_HOST_MACHINE EM_SPARC32PLUS
1604 # define ELF_HOST_FLAGS EF_SPARC_32PLUS
1605 #endif
1607 typedef struct {
1608 DebugFrameHeader h;
1609 uint8_t fde_def_cfa[SPARC64 ? 4 : 2];
1610 uint8_t fde_win_save;
1611 uint8_t fde_ret_save[3];
1612 } DebugFrame;
1614 static const DebugFrame debug_frame = {
1615 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1616 .h.cie.id = -1,
1617 .h.cie.version = 1,
1618 .h.cie.code_align = 1,
1619 .h.cie.data_align = -sizeof(void *) & 0x7f,
1620 .h.cie.return_column = 15, /* o7 */
1622 /* Total FDE size does not include the "len" member. */
1623 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1625 .fde_def_cfa = {
1626 #if SPARC64
1627 12, 30, /* DW_CFA_def_cfa i6, 2047 */
1628 (2047 & 0x7f) | 0x80, (2047 >> 7)
1629 #else
1630 13, 30 /* DW_CFA_def_cfa_register i6 */
1631 #endif
1633 .fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */
1634 .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */
1637 void tcg_register_jit(void *buf, size_t buf_size)
1639 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1642 void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
1644 uint32_t *ptr = (uint32_t *)jmp_addr;
1645 uintptr_t disp = addr - jmp_addr;
1647 /* We can reach the entire address space for 32-bit. For 64-bit
1648 the code_gen_buffer can't be larger than 2GB. */
1649 assert(disp == (int32_t)disp);
1651 *ptr = CALL | (uint32_t)disp >> 2;
1652 flush_icache_range(jmp_addr, jmp_addr + 4);