Add some trace calls to pci.c.
[qemu/ar7.git] / tcg / sparc / tcg-target.c
blob0c4b028580d12a709f5c77cb3ff531f581f0921b
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "tcg-be-null.h"
27 #ifndef NDEBUG
28 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29 "%g0",
30 "%g1",
31 "%g2",
32 "%g3",
33 "%g4",
34 "%g5",
35 "%g6",
36 "%g7",
37 "%o0",
38 "%o1",
39 "%o2",
40 "%o3",
41 "%o4",
42 "%o5",
43 "%o6",
44 "%o7",
45 "%l0",
46 "%l1",
47 "%l2",
48 "%l3",
49 "%l4",
50 "%l5",
51 "%l6",
52 "%l7",
53 "%i0",
54 "%i1",
55 "%i2",
56 "%i3",
57 "%i4",
58 "%i5",
59 "%i6",
60 "%i7",
62 #endif
64 #ifdef __arch64__
65 # define SPARC64 1
66 #else
67 # define SPARC64 0
68 #endif
70 /* Note that sparcv8plus can only hold 64 bit quantities in %g and %o
71 registers. These are saved manually by the kernel in full 64-bit
72 slots. The %i and %l registers are saved by the register window
73 mechanism, which only allocates space for 32 bits. Given that this
74 window spill/fill can happen on any signal, we must consider the
75 high bits of the %i and %l registers garbage at all times. */
76 #if SPARC64
77 # define ALL_64 0xffffffffu
78 #else
79 # define ALL_64 0xffffu
80 #endif
82 /* Define some temporary registers. T2 is used for constant generation. */
83 #define TCG_REG_T1 TCG_REG_G1
84 #define TCG_REG_T2 TCG_REG_O7
86 #ifdef CONFIG_USE_GUEST_BASE
87 # define TCG_GUEST_BASE_REG TCG_REG_I5
88 #else
89 # define TCG_GUEST_BASE_REG TCG_REG_G0
90 #endif
92 static const int tcg_target_reg_alloc_order[] = {
93 TCG_REG_L0,
94 TCG_REG_L1,
95 TCG_REG_L2,
96 TCG_REG_L3,
97 TCG_REG_L4,
98 TCG_REG_L5,
99 TCG_REG_L6,
100 TCG_REG_L7,
102 TCG_REG_I0,
103 TCG_REG_I1,
104 TCG_REG_I2,
105 TCG_REG_I3,
106 TCG_REG_I4,
107 TCG_REG_I5,
109 TCG_REG_G2,
110 TCG_REG_G3,
111 TCG_REG_G4,
112 TCG_REG_G5,
114 TCG_REG_O0,
115 TCG_REG_O1,
116 TCG_REG_O2,
117 TCG_REG_O3,
118 TCG_REG_O4,
119 TCG_REG_O5,
122 static const int tcg_target_call_iarg_regs[6] = {
123 TCG_REG_O0,
124 TCG_REG_O1,
125 TCG_REG_O2,
126 TCG_REG_O3,
127 TCG_REG_O4,
128 TCG_REG_O5,
131 static const int tcg_target_call_oarg_regs[] = {
132 TCG_REG_O0,
133 TCG_REG_O1,
134 TCG_REG_O2,
135 TCG_REG_O3,
138 #define INSN_OP(x) ((x) << 30)
139 #define INSN_OP2(x) ((x) << 22)
140 #define INSN_OP3(x) ((x) << 19)
141 #define INSN_OPF(x) ((x) << 5)
142 #define INSN_RD(x) ((x) << 25)
143 #define INSN_RS1(x) ((x) << 14)
144 #define INSN_RS2(x) (x)
145 #define INSN_ASI(x) ((x) << 5)
147 #define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
148 #define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
149 #define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
150 #define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
151 #define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
152 #define INSN_COND(x) ((x) << 25)
154 #define COND_N 0x0
155 #define COND_E 0x1
156 #define COND_LE 0x2
157 #define COND_L 0x3
158 #define COND_LEU 0x4
159 #define COND_CS 0x5
160 #define COND_NEG 0x6
161 #define COND_VS 0x7
162 #define COND_A 0x8
163 #define COND_NE 0x9
164 #define COND_G 0xa
165 #define COND_GE 0xb
166 #define COND_GU 0xc
167 #define COND_CC 0xd
168 #define COND_POS 0xe
169 #define COND_VC 0xf
170 #define BA (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
172 #define RCOND_Z 1
173 #define RCOND_LEZ 2
174 #define RCOND_LZ 3
175 #define RCOND_NZ 5
176 #define RCOND_GZ 6
177 #define RCOND_GEZ 7
179 #define MOVCC_ICC (1 << 18)
180 #define MOVCC_XCC (1 << 18 | 1 << 12)
182 #define BPCC_ICC 0
183 #define BPCC_XCC (2 << 20)
184 #define BPCC_PT (1 << 19)
185 #define BPCC_PN 0
186 #define BPCC_A (1 << 29)
188 #define BPR_PT BPCC_PT
190 #define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00))
191 #define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
192 #define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01))
193 #define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05))
194 #define ARITH_OR (INSN_OP(2) | INSN_OP3(0x02))
195 #define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12))
196 #define ARITH_ORN (INSN_OP(2) | INSN_OP3(0x06))
197 #define ARITH_XOR (INSN_OP(2) | INSN_OP3(0x03))
198 #define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04))
199 #define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
200 #define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08))
201 #define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c))
202 #define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
203 #define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b))
204 #define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
205 #define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f))
206 #define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09))
207 #define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
208 #define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
209 #define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
210 #define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
212 #define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11))
213 #define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16))
215 #define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25))
216 #define SHIFT_SRL (INSN_OP(2) | INSN_OP3(0x26))
217 #define SHIFT_SRA (INSN_OP(2) | INSN_OP3(0x27))
219 #define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12))
220 #define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12))
221 #define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12))
223 #define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0))
224 #define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0))
225 #define JMPL (INSN_OP(2) | INSN_OP3(0x38))
226 #define RETURN (INSN_OP(2) | INSN_OP3(0x39))
227 #define SAVE (INSN_OP(2) | INSN_OP3(0x3c))
228 #define RESTORE (INSN_OP(2) | INSN_OP3(0x3d))
229 #define SETHI (INSN_OP(0) | INSN_OP2(0x4))
230 #define CALL INSN_OP(1)
231 #define LDUB (INSN_OP(3) | INSN_OP3(0x01))
232 #define LDSB (INSN_OP(3) | INSN_OP3(0x09))
233 #define LDUH (INSN_OP(3) | INSN_OP3(0x02))
234 #define LDSH (INSN_OP(3) | INSN_OP3(0x0a))
235 #define LDUW (INSN_OP(3) | INSN_OP3(0x00))
236 #define LDSW (INSN_OP(3) | INSN_OP3(0x08))
237 #define LDX (INSN_OP(3) | INSN_OP3(0x0b))
238 #define STB (INSN_OP(3) | INSN_OP3(0x05))
239 #define STH (INSN_OP(3) | INSN_OP3(0x06))
240 #define STW (INSN_OP(3) | INSN_OP3(0x04))
241 #define STX (INSN_OP(3) | INSN_OP3(0x0e))
242 #define LDUBA (INSN_OP(3) | INSN_OP3(0x11))
243 #define LDSBA (INSN_OP(3) | INSN_OP3(0x19))
244 #define LDUHA (INSN_OP(3) | INSN_OP3(0x12))
245 #define LDSHA (INSN_OP(3) | INSN_OP3(0x1a))
246 #define LDUWA (INSN_OP(3) | INSN_OP3(0x10))
247 #define LDSWA (INSN_OP(3) | INSN_OP3(0x18))
248 #define LDXA (INSN_OP(3) | INSN_OP3(0x1b))
249 #define STBA (INSN_OP(3) | INSN_OP3(0x15))
250 #define STHA (INSN_OP(3) | INSN_OP3(0x16))
251 #define STWA (INSN_OP(3) | INSN_OP3(0x14))
252 #define STXA (INSN_OP(3) | INSN_OP3(0x1e))
254 #ifndef ASI_PRIMARY_LITTLE
255 #define ASI_PRIMARY_LITTLE 0x88
256 #endif
258 #define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
259 #define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
260 #define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
261 #define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
262 #define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE))
264 #define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE))
265 #define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE))
266 #define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE))
268 #ifndef use_vis3_instructions
269 bool use_vis3_instructions;
270 #endif
272 static inline int check_fit_i64(int64_t val, unsigned int bits)
274 return val == sextract64(val, 0, bits);
277 static inline int check_fit_i32(int32_t val, unsigned int bits)
279 return val == sextract32(val, 0, bits);
282 #define check_fit_tl check_fit_i64
283 #if SPARC64
284 # define check_fit_ptr check_fit_i64
285 #else
286 # define check_fit_ptr check_fit_i32
287 #endif
289 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
290 intptr_t value, intptr_t addend)
292 uint32_t insn;
294 assert(addend == 0);
295 value = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr);
297 switch (type) {
298 case R_SPARC_WDISP16:
299 if (!check_fit_ptr(value >> 2, 16)) {
300 tcg_abort();
302 insn = *code_ptr;
303 insn &= ~INSN_OFF16(-1);
304 insn |= INSN_OFF16(value);
305 *code_ptr = insn;
306 break;
307 case R_SPARC_WDISP19:
308 if (!check_fit_ptr(value >> 2, 19)) {
309 tcg_abort();
311 insn = *code_ptr;
312 insn &= ~INSN_OFF19(-1);
313 insn |= INSN_OFF19(value);
314 *code_ptr = insn;
315 break;
316 default:
317 tcg_abort();
321 /* parse target specific constraints */
322 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
324 const char *ct_str;
326 ct_str = *pct_str;
327 switch (ct_str[0]) {
328 case 'r':
329 ct->ct |= TCG_CT_REG;
330 tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
331 break;
332 case 'R':
333 ct->ct |= TCG_CT_REG;
334 tcg_regset_set32(ct->u.regs, 0, ALL_64);
335 break;
336 case 'A': /* qemu_ld/st address constraint */
337 ct->ct |= TCG_CT_REG;
338 tcg_regset_set32(ct->u.regs, 0,
339 TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff);
340 reserve_helpers:
341 tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
342 tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
343 tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
344 break;
345 case 's': /* qemu_st data 32-bit constraint */
346 ct->ct |= TCG_CT_REG;
347 tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
348 goto reserve_helpers;
349 case 'S': /* qemu_st data 64-bit constraint */
350 ct->ct |= TCG_CT_REG;
351 tcg_regset_set32(ct->u.regs, 0, ALL_64);
352 goto reserve_helpers;
353 case 'I':
354 ct->ct |= TCG_CT_CONST_S11;
355 break;
356 case 'J':
357 ct->ct |= TCG_CT_CONST_S13;
358 break;
359 case 'Z':
360 ct->ct |= TCG_CT_CONST_ZERO;
361 break;
362 default:
363 return -1;
365 ct_str++;
366 *pct_str = ct_str;
367 return 0;
370 /* test if a constant matches the constraint */
371 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
372 const TCGArgConstraint *arg_ct)
374 int ct = arg_ct->ct;
376 if (ct & TCG_CT_CONST) {
377 return 1;
380 if (type == TCG_TYPE_I32) {
381 val = (int32_t)val;
384 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
385 return 1;
386 } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
387 return 1;
388 } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
389 return 1;
390 } else {
391 return 0;
395 static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1,
396 TCGReg rs2, int op)
398 tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2));
401 static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1,
402 int32_t offset, int op)
404 tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset));
407 static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1,
408 int32_t val2, int val2const, int op)
410 tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1)
411 | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
414 static inline void tcg_out_mov(TCGContext *s, TCGType type,
415 TCGReg ret, TCGReg arg)
417 if (ret != arg) {
418 tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
422 static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
424 tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
427 static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
429 tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
432 static void tcg_out_movi(TCGContext *s, TCGType type,
433 TCGReg ret, tcg_target_long arg)
435 tcg_target_long hi, lo = (int32_t)arg;
437 /* Make sure we test 32-bit constants for imm13 properly. */
438 if (type == TCG_TYPE_I32) {
439 arg = lo;
442 /* A 13-bit constant sign-extended to 64-bits. */
443 if (check_fit_tl(arg, 13)) {
444 tcg_out_movi_imm13(s, ret, arg);
445 return;
448 /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
449 if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
450 tcg_out_sethi(s, ret, arg);
451 if (arg & 0x3ff) {
452 tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
454 return;
457 /* A 32-bit constant sign-extended to 64-bits. */
458 if (arg == lo) {
459 tcg_out_sethi(s, ret, ~arg);
460 tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
461 return;
464 /* A 64-bit constant decomposed into 2 32-bit pieces. */
465 if (check_fit_i32(lo, 13)) {
466 hi = (arg - lo) >> 32;
467 tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
468 tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
469 tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
470 } else {
471 hi = arg >> 32;
472 tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
473 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo);
474 tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
475 tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
479 static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
480 TCGReg a2, int op)
482 tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
485 static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
486 intptr_t offset, int op)
488 if (check_fit_ptr(offset, 13)) {
489 tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
490 INSN_IMM13(offset));
491 } else {
492 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
493 tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
497 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
498 TCGReg arg1, intptr_t arg2)
500 tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
503 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
504 TCGReg arg1, intptr_t arg2)
506 tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
509 static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg)
511 tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
512 tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
515 static inline void tcg_out_sety(TCGContext *s, TCGReg rs)
517 tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
520 static inline void tcg_out_rdy(TCGContext *s, TCGReg rd)
522 tcg_out32(s, RDY | INSN_RD(rd));
525 static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
526 int32_t val2, int val2const, int uns)
528 /* Load Y with the sign/zero extension of RS1 to 64-bits. */
529 if (uns) {
530 tcg_out_sety(s, TCG_REG_G0);
531 } else {
532 tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
533 tcg_out_sety(s, TCG_REG_T1);
536 tcg_out_arithc(s, rd, rs1, val2, val2const,
537 uns ? ARITH_UDIV : ARITH_SDIV);
540 static inline void tcg_out_nop(TCGContext *s)
542 tcg_out_sethi(s, TCG_REG_G0, 0);
545 static const uint8_t tcg_cond_to_bcond[] = {
546 [TCG_COND_EQ] = COND_E,
547 [TCG_COND_NE] = COND_NE,
548 [TCG_COND_LT] = COND_L,
549 [TCG_COND_GE] = COND_GE,
550 [TCG_COND_LE] = COND_LE,
551 [TCG_COND_GT] = COND_G,
552 [TCG_COND_LTU] = COND_CS,
553 [TCG_COND_GEU] = COND_CC,
554 [TCG_COND_LEU] = COND_LEU,
555 [TCG_COND_GTU] = COND_GU,
558 static const uint8_t tcg_cond_to_rcond[] = {
559 [TCG_COND_EQ] = RCOND_Z,
560 [TCG_COND_NE] = RCOND_NZ,
561 [TCG_COND_LT] = RCOND_LZ,
562 [TCG_COND_GT] = RCOND_GZ,
563 [TCG_COND_LE] = RCOND_LEZ,
564 [TCG_COND_GE] = RCOND_GEZ
567 static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
569 tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
572 static void tcg_out_bpcc(TCGContext *s, int scond, int flags, int label)
574 TCGLabel *l = &s->labels[label];
575 int off19;
577 if (l->has_value) {
578 off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr));
579 } else {
580 /* Make sure to preserve destinations during retranslation. */
581 off19 = *s->code_ptr & INSN_OFF19(-1);
582 tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label, 0);
584 tcg_out_bpcc0(s, scond, flags, off19);
587 static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const)
589 tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
592 static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1,
593 int32_t arg2, int const_arg2, int label)
595 tcg_out_cmp(s, arg1, arg2, const_arg2);
596 tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, label);
597 tcg_out_nop(s);
600 static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret,
601 int32_t v1, int v1const)
603 tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
604 | INSN_RS1(tcg_cond_to_bcond[cond])
605 | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
608 static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
609 TCGReg c1, int32_t c2, int c2const,
610 int32_t v1, int v1const)
612 tcg_out_cmp(s, c1, c2, c2const);
613 tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
616 static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1,
617 int32_t arg2, int const_arg2, int label)
619 /* For 64-bit signed comparisons vs zero, we can avoid the compare. */
620 if (arg2 == 0 && !is_unsigned_cond(cond)) {
621 TCGLabel *l = &s->labels[label];
622 int off16;
624 if (l->has_value) {
625 off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr));
626 } else {
627 /* Make sure to preserve destinations during retranslation. */
628 off16 = *s->code_ptr & INSN_OFF16(-1);
629 tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, label, 0);
631 tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
632 | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
633 } else {
634 tcg_out_cmp(s, arg1, arg2, const_arg2);
635 tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, label);
637 tcg_out_nop(s);
640 static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1,
641 int32_t v1, int v1const)
643 tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
644 | (tcg_cond_to_rcond[cond] << 10)
645 | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
648 static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
649 TCGReg c1, int32_t c2, int c2const,
650 int32_t v1, int v1const)
652 /* For 64-bit signed comparisons vs zero, we can avoid the compare.
653 Note that the immediate range is one bit smaller, so we must check
654 for that as well. */
655 if (c2 == 0 && !is_unsigned_cond(cond)
656 && (!v1const || check_fit_i32(v1, 10))) {
657 tcg_out_movr(s, cond, ret, c1, v1, v1const);
658 } else {
659 tcg_out_cmp(s, c1, c2, c2const);
660 tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
664 static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
665 TCGReg c1, int32_t c2, int c2const)
667 /* For 32-bit comparisons, we can play games with ADDC/SUBC. */
668 switch (cond) {
669 case TCG_COND_LTU:
670 case TCG_COND_GEU:
671 /* The result of the comparison is in the carry bit. */
672 break;
674 case TCG_COND_EQ:
675 case TCG_COND_NE:
676 /* For equality, we can transform to inequality vs zero. */
677 if (c2 != 0) {
678 tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR);
679 c2 = TCG_REG_T1;
680 } else {
681 c2 = c1;
683 c1 = TCG_REG_G0, c2const = 0;
684 cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
685 break;
687 case TCG_COND_GTU:
688 case TCG_COND_LEU:
689 /* If we don't need to load a constant into a register, we can
690 swap the operands on GTU/LEU. There's no benefit to loading
691 the constant into a temporary register. */
692 if (!c2const || c2 == 0) {
693 TCGReg t = c1;
694 c1 = c2;
695 c2 = t;
696 c2const = 0;
697 cond = tcg_swap_cond(cond);
698 break;
700 /* FALLTHRU */
702 default:
703 tcg_out_cmp(s, c1, c2, c2const);
704 tcg_out_movi_imm13(s, ret, 0);
705 tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
706 return;
709 tcg_out_cmp(s, c1, c2, c2const);
710 if (cond == TCG_COND_LTU) {
711 tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC);
712 } else {
713 tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC);
717 static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
718 TCGReg c1, int32_t c2, int c2const)
720 if (use_vis3_instructions) {
721 switch (cond) {
722 case TCG_COND_NE:
723 if (c2 != 0) {
724 break;
726 c2 = c1, c2const = 0, c1 = TCG_REG_G0;
727 /* FALLTHRU */
728 case TCG_COND_LTU:
729 tcg_out_cmp(s, c1, c2, c2const);
730 tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC);
731 return;
732 default:
733 break;
737 /* For 64-bit signed comparisons vs zero, we can avoid the compare
738 if the input does not overlap the output. */
739 if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
740 tcg_out_movi_imm13(s, ret, 0);
741 tcg_out_movr(s, cond, ret, c1, 1, 1);
742 } else {
743 tcg_out_cmp(s, c1, c2, c2const);
744 tcg_out_movi_imm13(s, ret, 0);
745 tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
749 static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh,
750 TCGReg al, TCGReg ah, int32_t bl, int blconst,
751 int32_t bh, int bhconst, int opl, int oph)
753 TCGReg tmp = TCG_REG_T1;
755 /* Note that the low parts are fully consumed before tmp is set. */
756 if (rl != ah && (bhconst || rl != bh)) {
757 tmp = rl;
760 tcg_out_arithc(s, tmp, al, bl, blconst, opl);
761 tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
762 tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
765 static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
766 TCGReg al, TCGReg ah, int32_t bl, int blconst,
767 int32_t bh, int bhconst, bool is_sub)
769 TCGReg tmp = TCG_REG_T1;
771 /* Note that the low parts are fully consumed before tmp is set. */
772 if (rl != ah && (bhconst || rl != bh)) {
773 tmp = rl;
776 tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC);
778 if (use_vis3_instructions && !is_sub) {
779 /* Note that ADDXC doesn't accept immediates. */
780 if (bhconst && bh != 0) {
781 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh);
782 bh = TCG_REG_T2;
784 tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
785 } else if (bh == TCG_REG_G0) {
786 /* If we have a zero, we can perform the operation in two insns,
787 with the arithmetic first, and a conditional move into place. */
788 if (rh == ah) {
789 tcg_out_arithi(s, TCG_REG_T2, ah, 1,
790 is_sub ? ARITH_SUB : ARITH_ADD);
791 tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0);
792 } else {
793 tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD);
794 tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0);
796 } else {
797 /* Otherwise adjust BH as if there is carry into T2 ... */
798 if (bhconst) {
799 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1));
800 } else {
801 tcg_out_arithi(s, TCG_REG_T2, bh, 1,
802 is_sub ? ARITH_SUB : ARITH_ADD);
804 /* ... smoosh T2 back to original BH if carry is clear ... */
805 tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst);
806 /* ... and finally perform the arithmetic with the new operand. */
807 tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD);
810 tcg_out_mov(s, TCG_TYPE_I64, rl, tmp);
813 static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest)
815 ptrdiff_t disp = tcg_pcrel_diff(s, dest);
817 if (disp == (int32_t)disp) {
818 tcg_out32(s, CALL | (uint32_t)disp >> 2);
819 } else {
820 uintptr_t desti = (uintptr_t)dest;
821 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, desti & ~0xfff);
822 tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL);
826 static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
828 tcg_out_call_nodelay(s, dest);
829 tcg_out_nop(s);
832 #ifdef CONFIG_SOFTMMU
833 static tcg_insn_unit *qemu_ld_trampoline[16];
834 static tcg_insn_unit *qemu_st_trampoline[16];
836 static void build_trampolines(TCGContext *s)
838 static void * const qemu_ld_helpers[16] = {
839 [MO_UB] = helper_ret_ldub_mmu,
840 [MO_SB] = helper_ret_ldsb_mmu,
841 [MO_LEUW] = helper_le_lduw_mmu,
842 [MO_LESW] = helper_le_ldsw_mmu,
843 [MO_LEUL] = helper_le_ldul_mmu,
844 [MO_LEQ] = helper_le_ldq_mmu,
845 [MO_BEUW] = helper_be_lduw_mmu,
846 [MO_BESW] = helper_be_ldsw_mmu,
847 [MO_BEUL] = helper_be_ldul_mmu,
848 [MO_BEQ] = helper_be_ldq_mmu,
850 static void * const qemu_st_helpers[16] = {
851 [MO_UB] = helper_ret_stb_mmu,
852 [MO_LEUW] = helper_le_stw_mmu,
853 [MO_LEUL] = helper_le_stl_mmu,
854 [MO_LEQ] = helper_le_stq_mmu,
855 [MO_BEUW] = helper_be_stw_mmu,
856 [MO_BEUL] = helper_be_stl_mmu,
857 [MO_BEQ] = helper_be_stq_mmu,
860 int i;
861 TCGReg ra;
863 for (i = 0; i < 16; ++i) {
864 if (qemu_ld_helpers[i] == NULL) {
865 continue;
868 /* May as well align the trampoline. */
869 while ((uintptr_t)s->code_ptr & 15) {
870 tcg_out_nop(s);
872 qemu_ld_trampoline[i] = s->code_ptr;
874 if (SPARC64 || TARGET_LONG_BITS == 32) {
875 ra = TCG_REG_O3;
876 } else {
877 /* Install the high part of the address. */
878 tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
879 ra = TCG_REG_O4;
882 /* Set the retaddr operand. */
883 tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
884 /* Set the env operand. */
885 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
886 /* Tail call. */
887 tcg_out_call_nodelay(s, qemu_ld_helpers[i]);
888 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
891 for (i = 0; i < 16; ++i) {
892 if (qemu_st_helpers[i] == NULL) {
893 continue;
896 /* May as well align the trampoline. */
897 while ((uintptr_t)s->code_ptr & 15) {
898 tcg_out_nop(s);
900 qemu_st_trampoline[i] = s->code_ptr;
902 if (SPARC64) {
903 ra = TCG_REG_O4;
904 } else {
905 ra = TCG_REG_O1;
906 if (TARGET_LONG_BITS == 64) {
907 /* Install the high part of the address. */
908 tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
909 ra += 2;
910 } else {
911 ra += 1;
913 if ((i & MO_SIZE) == MO_64) {
914 /* Install the high part of the data. */
915 tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
916 ra += 2;
917 } else {
918 ra += 1;
920 /* Skip the mem_index argument. */
921 ra += 1;
924 /* Set the retaddr operand. */
925 if (ra >= TCG_REG_O6) {
926 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK,
927 TCG_TARGET_CALL_STACK_OFFSET);
928 ra = TCG_REG_G1;
930 tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
931 /* Set the env operand. */
932 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
933 /* Tail call. */
934 tcg_out_call_nodelay(s, qemu_st_helpers[i]);
935 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
938 #endif
940 /* Generate global QEMU prologue and epilogue code */
941 static void tcg_target_qemu_prologue(TCGContext *s)
943 int tmp_buf_size, frame_size;
945 /* The TCG temp buffer is at the top of the frame, immediately
946 below the frame pointer. */
947 tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
948 tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size,
949 tmp_buf_size);
951 /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
952 otherwise the minimal frame usable by callees. */
953 frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
954 frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
955 frame_size += TCG_TARGET_STACK_ALIGN - 1;
956 frame_size &= -TCG_TARGET_STACK_ALIGN;
957 tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
958 INSN_IMM13(-frame_size));
960 #ifdef CONFIG_USE_GUEST_BASE
961 if (GUEST_BASE != 0) {
962 tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
963 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
965 #endif
967 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL);
968 /* delay slot */
969 tcg_out_nop(s);
971 /* No epilogue required. We issue ret + restore directly in the TB. */
973 #ifdef CONFIG_SOFTMMU
974 build_trampolines(s);
975 #endif
978 #if defined(CONFIG_SOFTMMU)
979 /* Perform the TLB load and compare.
981 Inputs:
982 ADDRLO and ADDRHI contain the possible two parts of the address.
984 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
986 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
987 This should be offsetof addr_read or addr_write.
989 The result of the TLB comparison is in %[ix]cc. The sanitized address
990 is in the returned register, maybe %o0. The TLB addend is in %o1. */
992 static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
993 TCGMemOp s_bits, int which)
995 const TCGReg r0 = TCG_REG_O0;
996 const TCGReg r1 = TCG_REG_O1;
997 const TCGReg r2 = TCG_REG_O2;
998 int tlb_ofs;
1000 /* Shift the page number down. */
1001 tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL);
1003 /* Mask out the page offset, except for the required alignment. */
1004 tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1,
1005 TARGET_PAGE_MASK | ((1 << s_bits) - 1));
1007 /* Mask the tlb index. */
1008 tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND);
1010 /* Mask page, part 2. */
1011 tcg_out_arith(s, r0, addr, TCG_REG_T1, ARITH_AND);
1013 /* Shift the tlb index into place. */
1014 tcg_out_arithi(s, r1, r1, CPU_TLB_ENTRY_BITS, SHIFT_SLL);
1016 /* Relative to the current ENV. */
1017 tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD);
1019 /* Find a base address that can load both tlb comparator and addend. */
1020 tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
1021 if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) {
1022 if (tlb_ofs & ~0x3ff) {
1023 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff);
1024 tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD);
1026 tlb_ofs &= 0x3ff;
1029 /* Load the tlb comparator and the addend. */
1030 tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
1031 tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
1033 /* subcc arg0, arg2, %g0 */
1034 tcg_out_cmp(s, r0, r2, 0);
1036 /* If the guest address must be zero-extended, do so now. */
1037 if (SPARC64 && TARGET_LONG_BITS == 32) {
1038 tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
1039 return r0;
1041 return addr;
1043 #endif /* CONFIG_SOFTMMU */
1045 static const int qemu_ld_opc[16] = {
1046 [MO_UB] = LDUB,
1047 [MO_SB] = LDSB,
1049 [MO_BEUW] = LDUH,
1050 [MO_BESW] = LDSH,
1051 [MO_BEUL] = LDUW,
1052 [MO_BESL] = LDSW,
1053 [MO_BEQ] = LDX,
1055 [MO_LEUW] = LDUH_LE,
1056 [MO_LESW] = LDSH_LE,
1057 [MO_LEUL] = LDUW_LE,
1058 [MO_LESL] = LDSW_LE,
1059 [MO_LEQ] = LDX_LE,
1062 static const int qemu_st_opc[16] = {
1063 [MO_UB] = STB,
1065 [MO_BEUW] = STH,
1066 [MO_BEUL] = STW,
1067 [MO_BEQ] = STX,
1069 [MO_LEUW] = STH_LE,
1070 [MO_LEUL] = STW_LE,
1071 [MO_LEQ] = STX_LE,
1074 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
1075 TCGMemOp memop, int memi, bool is_64)
1077 #ifdef CONFIG_SOFTMMU
1078 TCGMemOp s_bits = memop & MO_SIZE;
1079 TCGReg addrz, param;
1080 tcg_insn_unit *func;
1081 tcg_insn_unit *label_ptr;
1083 addrz = tcg_out_tlb_load(s, addr, memi, s_bits,
1084 offsetof(CPUTLBEntry, addr_read));
1086 /* The fast path is exactly one insn. Thus we can perform the
1087 entire TLB Hit in the (annulled) delay slot of the branch
1088 over the TLB Miss case. */
1090 /* beq,a,pt %[xi]cc, label0 */
1091 label_ptr = s->code_ptr;
1092 tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
1093 | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
1094 /* delay slot */
1095 tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_ld_opc[memop]);
1097 /* TLB Miss. */
1099 param = TCG_REG_O1;
1100 if (!SPARC64 && TARGET_LONG_BITS == 64) {
1101 /* Skip the high-part; we'll perform the extract in the trampoline. */
1102 param++;
1104 tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
1106 /* We use the helpers to extend SB and SW data, leaving the case
1107 of SL needing explicit extending below. */
1108 if ((memop & ~MO_BSWAP) == MO_SL) {
1109 func = qemu_ld_trampoline[memop & ~MO_SIGN];
1110 } else {
1111 func = qemu_ld_trampoline[memop];
1113 assert(func != NULL);
1114 tcg_out_call_nodelay(s, func);
1115 /* delay slot */
1116 tcg_out_movi(s, TCG_TYPE_I32, param, memi);
1118 /* Recall that all of the helpers return 64-bit results.
1119 Which complicates things for sparcv8plus. */
1120 if (SPARC64) {
1121 /* We let the helper sign-extend SB and SW, but leave SL for here. */
1122 if (is_64 && (memop & ~MO_BSWAP) == MO_SL) {
1123 tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
1124 } else {
1125 tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
1127 } else {
1128 if (s_bits == MO_64) {
1129 tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
1130 tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
1131 tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
1132 } else if (is_64) {
1133 /* Re-extend from 32-bit rather than reassembling when we
1134 know the high register must be an extension. */
1135 tcg_out_arithi(s, data, TCG_REG_O1, 0,
1136 memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL);
1137 } else {
1138 tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1);
1142 *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
1143 #else
1144 if (SPARC64 && TARGET_LONG_BITS == 32) {
1145 tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
1146 addr = TCG_REG_T1;
1148 tcg_out_ldst_rr(s, data, addr,
1149 (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
1150 qemu_ld_opc[memop]);
1151 #endif /* CONFIG_SOFTMMU */
1154 static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
1155 TCGMemOp memop, int memi)
1157 #ifdef CONFIG_SOFTMMU
1158 TCGMemOp s_bits = memop & MO_SIZE;
1159 TCGReg addrz, param;
1160 tcg_insn_unit *func;
1161 tcg_insn_unit *label_ptr;
1163 addrz = tcg_out_tlb_load(s, addr, memi, s_bits,
1164 offsetof(CPUTLBEntry, addr_write));
1166 /* The fast path is exactly one insn. Thus we can perform the entire
1167 TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
1168 /* beq,a,pt %[xi]cc, label0 */
1169 label_ptr = s->code_ptr;
1170 tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
1171 | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
1172 /* delay slot */
1173 tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, qemu_st_opc[memop]);
1175 /* TLB Miss. */
1177 param = TCG_REG_O1;
1178 if (!SPARC64 && TARGET_LONG_BITS == 64) {
1179 /* Skip the high-part; we'll perform the extract in the trampoline. */
1180 param++;
1182 tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
1183 if (!SPARC64 && s_bits == MO_64) {
1184 /* Skip the high-part; we'll perform the extract in the trampoline. */
1185 param++;
1187 tcg_out_mov(s, TCG_TYPE_REG, param++, data);
1189 func = qemu_st_trampoline[memop];
1190 assert(func != NULL);
1191 tcg_out_call_nodelay(s, func);
1192 /* delay slot */
1193 tcg_out_movi(s, TCG_TYPE_REG, param, memi);
1195 *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
1196 #else
1197 if (SPARC64 && TARGET_LONG_BITS == 32) {
1198 tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
1199 addr = TCG_REG_T1;
1201 tcg_out_ldst_rr(s, data, addr,
1202 (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
1203 qemu_st_opc[memop]);
1204 #endif /* CONFIG_SOFTMMU */
1207 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1208 const TCGArg args[TCG_MAX_OP_ARGS],
1209 const int const_args[TCG_MAX_OP_ARGS])
1211 TCGArg a0, a1, a2;
1212 int c, c2;
1214 /* Hoist the loads of the most common arguments. */
1215 a0 = args[0];
1216 a1 = args[1];
1217 a2 = args[2];
1218 c2 = const_args[2];
1220 switch (opc) {
1221 case INDEX_op_exit_tb:
1222 if (check_fit_ptr(a0, 13)) {
1223 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1224 tcg_out_movi_imm13(s, TCG_REG_O0, a0);
1225 } else {
1226 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff);
1227 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1228 tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR);
1230 break;
1231 case INDEX_op_goto_tb:
1232 if (s->tb_jmp_offset) {
1233 /* direct jump method */
1234 s->tb_jmp_offset[a0] = tcg_current_code_size(s);
1235 /* Make sure to preserve links during retranslation. */
1236 tcg_out32(s, CALL | (*s->code_ptr & ~INSN_OP(-1)));
1237 } else {
1238 /* indirect jump method */
1239 tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0));
1240 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL);
1242 tcg_out_nop(s);
1243 s->tb_next_offset[a0] = tcg_current_code_size(s);
1244 break;
1245 case INDEX_op_br:
1246 tcg_out_bpcc(s, COND_A, BPCC_PT, a0);
1247 tcg_out_nop(s);
1248 break;
1250 #define OP_32_64(x) \
1251 glue(glue(case INDEX_op_, x), _i32): \
1252 glue(glue(case INDEX_op_, x), _i64)
1254 OP_32_64(ld8u):
1255 tcg_out_ldst(s, a0, a1, a2, LDUB);
1256 break;
1257 OP_32_64(ld8s):
1258 tcg_out_ldst(s, a0, a1, a2, LDSB);
1259 break;
1260 OP_32_64(ld16u):
1261 tcg_out_ldst(s, a0, a1, a2, LDUH);
1262 break;
1263 OP_32_64(ld16s):
1264 tcg_out_ldst(s, a0, a1, a2, LDSH);
1265 break;
1266 case INDEX_op_ld_i32:
1267 case INDEX_op_ld32u_i64:
1268 tcg_out_ldst(s, a0, a1, a2, LDUW);
1269 break;
1270 OP_32_64(st8):
1271 tcg_out_ldst(s, a0, a1, a2, STB);
1272 break;
1273 OP_32_64(st16):
1274 tcg_out_ldst(s, a0, a1, a2, STH);
1275 break;
1276 case INDEX_op_st_i32:
1277 case INDEX_op_st32_i64:
1278 tcg_out_ldst(s, a0, a1, a2, STW);
1279 break;
1280 OP_32_64(add):
1281 c = ARITH_ADD;
1282 goto gen_arith;
1283 OP_32_64(sub):
1284 c = ARITH_SUB;
1285 goto gen_arith;
1286 OP_32_64(and):
1287 c = ARITH_AND;
1288 goto gen_arith;
1289 OP_32_64(andc):
1290 c = ARITH_ANDN;
1291 goto gen_arith;
1292 OP_32_64(or):
1293 c = ARITH_OR;
1294 goto gen_arith;
1295 OP_32_64(orc):
1296 c = ARITH_ORN;
1297 goto gen_arith;
1298 OP_32_64(xor):
1299 c = ARITH_XOR;
1300 goto gen_arith;
1301 case INDEX_op_shl_i32:
1302 c = SHIFT_SLL;
1303 do_shift32:
1304 /* Limit immediate shift count lest we create an illegal insn. */
1305 tcg_out_arithc(s, a0, a1, a2 & 31, c2, c);
1306 break;
1307 case INDEX_op_shr_i32:
1308 c = SHIFT_SRL;
1309 goto do_shift32;
1310 case INDEX_op_sar_i32:
1311 c = SHIFT_SRA;
1312 goto do_shift32;
1313 case INDEX_op_mul_i32:
1314 c = ARITH_UMUL;
1315 goto gen_arith;
1317 OP_32_64(neg):
1318 c = ARITH_SUB;
1319 goto gen_arith1;
1320 OP_32_64(not):
1321 c = ARITH_ORN;
1322 goto gen_arith1;
1324 case INDEX_op_div_i32:
1325 tcg_out_div32(s, a0, a1, a2, c2, 0);
1326 break;
1327 case INDEX_op_divu_i32:
1328 tcg_out_div32(s, a0, a1, a2, c2, 1);
1329 break;
1331 case INDEX_op_brcond_i32:
1332 tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], args[3]);
1333 break;
1334 case INDEX_op_setcond_i32:
1335 tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2);
1336 break;
1337 case INDEX_op_movcond_i32:
1338 tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
1339 break;
1341 case INDEX_op_add2_i32:
1342 tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
1343 args[4], const_args[4], args[5], const_args[5],
1344 ARITH_ADDCC, ARITH_ADDC);
1345 break;
1346 case INDEX_op_sub2_i32:
1347 tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
1348 args[4], const_args[4], args[5], const_args[5],
1349 ARITH_SUBCC, ARITH_SUBC);
1350 break;
1351 case INDEX_op_mulu2_i32:
1352 c = ARITH_UMUL;
1353 goto do_mul2;
1354 case INDEX_op_muls2_i32:
1355 c = ARITH_SMUL;
1356 do_mul2:
1357 /* The 32-bit multiply insns produce a full 64-bit result. If the
1358 destination register can hold it, we can avoid the slower RDY. */
1359 tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
1360 if (SPARC64 || a0 <= TCG_REG_O7) {
1361 tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
1362 } else {
1363 tcg_out_rdy(s, a1);
1365 break;
1367 case INDEX_op_qemu_ld_i32:
1368 tcg_out_qemu_ld(s, a0, a1, a2, args[3], false);
1369 break;
1370 case INDEX_op_qemu_ld_i64:
1371 tcg_out_qemu_ld(s, a0, a1, a2, args[3], true);
1372 break;
1373 case INDEX_op_qemu_st_i32:
1374 case INDEX_op_qemu_st_i64:
1375 tcg_out_qemu_st(s, a0, a1, a2, args[3]);
1376 break;
1378 case INDEX_op_ld32s_i64:
1379 tcg_out_ldst(s, a0, a1, a2, LDSW);
1380 break;
1381 case INDEX_op_ld_i64:
1382 tcg_out_ldst(s, a0, a1, a2, LDX);
1383 break;
1384 case INDEX_op_st_i64:
1385 tcg_out_ldst(s, a0, a1, a2, STX);
1386 break;
1387 case INDEX_op_shl_i64:
1388 c = SHIFT_SLLX;
1389 do_shift64:
1390 /* Limit immediate shift count lest we create an illegal insn. */
1391 tcg_out_arithc(s, a0, a1, a2 & 63, c2, c);
1392 break;
1393 case INDEX_op_shr_i64:
1394 c = SHIFT_SRLX;
1395 goto do_shift64;
1396 case INDEX_op_sar_i64:
1397 c = SHIFT_SRAX;
1398 goto do_shift64;
1399 case INDEX_op_mul_i64:
1400 c = ARITH_MULX;
1401 goto gen_arith;
1402 case INDEX_op_div_i64:
1403 c = ARITH_SDIVX;
1404 goto gen_arith;
1405 case INDEX_op_divu_i64:
1406 c = ARITH_UDIVX;
1407 goto gen_arith;
1408 case INDEX_op_ext32s_i64:
1409 tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
1410 break;
1411 case INDEX_op_ext32u_i64:
1412 tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
1413 break;
1414 case INDEX_op_trunc_shr_i32:
1415 if (a2 == 0) {
1416 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
1417 } else {
1418 tcg_out_arithi(s, a0, a1, a2, SHIFT_SRLX);
1420 break;
1422 case INDEX_op_brcond_i64:
1423 tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], args[3]);
1424 break;
1425 case INDEX_op_setcond_i64:
1426 tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2);
1427 break;
1428 case INDEX_op_movcond_i64:
1429 tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
1430 break;
1431 case INDEX_op_add2_i64:
1432 tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
1433 const_args[4], args[5], const_args[5], false);
1434 break;
1435 case INDEX_op_sub2_i64:
1436 tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
1437 const_args[4], args[5], const_args[5], true);
1438 break;
1439 case INDEX_op_muluh_i64:
1440 tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI);
1441 break;
1443 gen_arith:
1444 tcg_out_arithc(s, a0, a1, a2, c2, c);
1445 break;
1447 gen_arith1:
1448 tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
1449 break;
1451 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1452 case INDEX_op_mov_i64:
1453 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1454 case INDEX_op_movi_i64:
1455 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1456 default:
1457 tcg_abort();
1461 static const TCGTargetOpDef sparc_op_defs[] = {
1462 { INDEX_op_exit_tb, { } },
1463 { INDEX_op_goto_tb, { } },
1464 { INDEX_op_br, { } },
1466 { INDEX_op_ld8u_i32, { "r", "r" } },
1467 { INDEX_op_ld8s_i32, { "r", "r" } },
1468 { INDEX_op_ld16u_i32, { "r", "r" } },
1469 { INDEX_op_ld16s_i32, { "r", "r" } },
1470 { INDEX_op_ld_i32, { "r", "r" } },
1471 { INDEX_op_st8_i32, { "rZ", "r" } },
1472 { INDEX_op_st16_i32, { "rZ", "r" } },
1473 { INDEX_op_st_i32, { "rZ", "r" } },
1475 { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
1476 { INDEX_op_mul_i32, { "r", "rZ", "rJ" } },
1477 { INDEX_op_div_i32, { "r", "rZ", "rJ" } },
1478 { INDEX_op_divu_i32, { "r", "rZ", "rJ" } },
1479 { INDEX_op_sub_i32, { "r", "rZ", "rJ" } },
1480 { INDEX_op_and_i32, { "r", "rZ", "rJ" } },
1481 { INDEX_op_andc_i32, { "r", "rZ", "rJ" } },
1482 { INDEX_op_or_i32, { "r", "rZ", "rJ" } },
1483 { INDEX_op_orc_i32, { "r", "rZ", "rJ" } },
1484 { INDEX_op_xor_i32, { "r", "rZ", "rJ" } },
1486 { INDEX_op_shl_i32, { "r", "rZ", "rJ" } },
1487 { INDEX_op_shr_i32, { "r", "rZ", "rJ" } },
1488 { INDEX_op_sar_i32, { "r", "rZ", "rJ" } },
1490 { INDEX_op_neg_i32, { "r", "rJ" } },
1491 { INDEX_op_not_i32, { "r", "rJ" } },
1493 { INDEX_op_brcond_i32, { "rZ", "rJ" } },
1494 { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } },
1495 { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } },
1497 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
1498 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
1499 { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } },
1500 { INDEX_op_muls2_i32, { "r", "r", "rZ", "rJ" } },
1502 { INDEX_op_ld8u_i64, { "R", "r" } },
1503 { INDEX_op_ld8s_i64, { "R", "r" } },
1504 { INDEX_op_ld16u_i64, { "R", "r" } },
1505 { INDEX_op_ld16s_i64, { "R", "r" } },
1506 { INDEX_op_ld32u_i64, { "R", "r" } },
1507 { INDEX_op_ld32s_i64, { "R", "r" } },
1508 { INDEX_op_ld_i64, { "R", "r" } },
1509 { INDEX_op_st8_i64, { "RZ", "r" } },
1510 { INDEX_op_st16_i64, { "RZ", "r" } },
1511 { INDEX_op_st32_i64, { "RZ", "r" } },
1512 { INDEX_op_st_i64, { "RZ", "r" } },
1514 { INDEX_op_add_i64, { "R", "RZ", "RJ" } },
1515 { INDEX_op_mul_i64, { "R", "RZ", "RJ" } },
1516 { INDEX_op_div_i64, { "R", "RZ", "RJ" } },
1517 { INDEX_op_divu_i64, { "R", "RZ", "RJ" } },
1518 { INDEX_op_sub_i64, { "R", "RZ", "RJ" } },
1519 { INDEX_op_and_i64, { "R", "RZ", "RJ" } },
1520 { INDEX_op_andc_i64, { "R", "RZ", "RJ" } },
1521 { INDEX_op_or_i64, { "R", "RZ", "RJ" } },
1522 { INDEX_op_orc_i64, { "R", "RZ", "RJ" } },
1523 { INDEX_op_xor_i64, { "R", "RZ", "RJ" } },
1525 { INDEX_op_shl_i64, { "R", "RZ", "RJ" } },
1526 { INDEX_op_shr_i64, { "R", "RZ", "RJ" } },
1527 { INDEX_op_sar_i64, { "R", "RZ", "RJ" } },
1529 { INDEX_op_neg_i64, { "R", "RJ" } },
1530 { INDEX_op_not_i64, { "R", "RJ" } },
1532 { INDEX_op_ext32s_i64, { "R", "r" } },
1533 { INDEX_op_ext32u_i64, { "R", "r" } },
1534 { INDEX_op_trunc_shr_i32, { "r", "R" } },
1536 { INDEX_op_brcond_i64, { "RZ", "RJ" } },
1537 { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } },
1538 { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } },
1540 { INDEX_op_add2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } },
1541 { INDEX_op_sub2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } },
1542 { INDEX_op_muluh_i64, { "R", "RZ", "RZ" } },
1544 { INDEX_op_qemu_ld_i32, { "r", "A" } },
1545 { INDEX_op_qemu_ld_i64, { "R", "A" } },
1546 { INDEX_op_qemu_st_i32, { "sZ", "A" } },
1547 { INDEX_op_qemu_st_i64, { "SZ", "A" } },
1549 { -1 },
1552 static void tcg_target_init(TCGContext *s)
1554 /* Only probe for the platform and capabilities if we havn't already
1555 determined maximum values at compile time. */
1556 #ifndef use_vis3_instructions
1558 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
1559 use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0;
1561 #endif
1563 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1564 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64);
1566 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1567 (1 << TCG_REG_G1) |
1568 (1 << TCG_REG_G2) |
1569 (1 << TCG_REG_G3) |
1570 (1 << TCG_REG_G4) |
1571 (1 << TCG_REG_G5) |
1572 (1 << TCG_REG_G6) |
1573 (1 << TCG_REG_G7) |
1574 (1 << TCG_REG_O0) |
1575 (1 << TCG_REG_O1) |
1576 (1 << TCG_REG_O2) |
1577 (1 << TCG_REG_O3) |
1578 (1 << TCG_REG_O4) |
1579 (1 << TCG_REG_O5) |
1580 (1 << TCG_REG_O7));
1582 tcg_regset_clear(s->reserved_regs);
1583 tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
1584 tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
1585 tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
1586 tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
1587 tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
1588 tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
1589 tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
1590 tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
1592 tcg_add_target_add_op_defs(sparc_op_defs);
1595 #if SPARC64
1596 # define ELF_HOST_MACHINE EM_SPARCV9
1597 #else
1598 # define ELF_HOST_MACHINE EM_SPARC32PLUS
1599 # define ELF_HOST_FLAGS EF_SPARC_32PLUS
1600 #endif
1602 typedef struct {
1603 DebugFrameHeader h;
1604 uint8_t fde_def_cfa[SPARC64 ? 4 : 2];
1605 uint8_t fde_win_save;
1606 uint8_t fde_ret_save[3];
1607 } DebugFrame;
1609 static const DebugFrame debug_frame = {
1610 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1611 .h.cie.id = -1,
1612 .h.cie.version = 1,
1613 .h.cie.code_align = 1,
1614 .h.cie.data_align = -sizeof(void *) & 0x7f,
1615 .h.cie.return_column = 15, /* o7 */
1617 /* Total FDE size does not include the "len" member. */
1618 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1620 .fde_def_cfa = {
1621 #if SPARC64
1622 12, 30, /* DW_CFA_def_cfa i6, 2047 */
1623 (2047 & 0x7f) | 0x80, (2047 >> 7)
1624 #else
1625 13, 30 /* DW_CFA_def_cfa_register i6 */
1626 #endif
1628 .fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */
1629 .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */
1632 void tcg_register_jit(void *buf, size_t buf_size)
1634 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1637 void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
1639 uint32_t *ptr = (uint32_t *)jmp_addr;
1640 uintptr_t disp = addr - jmp_addr;
1642 /* We can reach the entire address space for 32-bit. For 64-bit
1643 the code_gen_buffer can't be larger than 2GB. */
1644 assert(disp == (int32_t)disp);
1646 *ptr = CALL | (uint32_t)disp >> 2;
1647 flush_icache_range(jmp_addr, jmp_addr + 4);