tcg-s390: Implement direct chaining of TBs
[qemu.git] / tcg / s390 / tcg-target.c
blob07164e544d8710348fbe59739ade2085fd5e45df
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
27 #include "tcg-be-ldst.h"
29 /* We only support generating code for 64-bit mode. */
30 #if TCG_TARGET_REG_BITS != 64
31 #error "unsupported code generation mode"
32 #endif
34 #include "elf.h"
36 /* ??? The translation blocks produced by TCG are generally small enough to
37 be entirely reachable with a 16-bit displacement. Leaving the option for
38 a 32-bit displacement here Just In Case. */
39 #define USE_LONG_BRANCHES 0
41 #define TCG_CT_CONST_MULI 0x100
42 #define TCG_CT_CONST_ORI 0x200
43 #define TCG_CT_CONST_XORI 0x400
44 #define TCG_CT_CONST_CMPI 0x800
45 #define TCG_CT_CONST_ADLI 0x1000
47 /* Several places within the instruction set 0 means "no register"
48 rather than TCG_REG_R0. */
49 #define TCG_REG_NONE 0
51 /* A scratch register that may be be used throughout the backend. */
52 #define TCG_TMP0 TCG_REG_R14
54 #ifdef CONFIG_USE_GUEST_BASE
55 #define TCG_GUEST_BASE_REG TCG_REG_R13
56 #else
57 #define TCG_GUEST_BASE_REG TCG_REG_R0
58 #endif
60 #ifndef GUEST_BASE
61 #define GUEST_BASE 0
62 #endif
65 /* All of the following instructions are prefixed with their instruction
66 format, and are defined as 8- or 16-bit quantities, even when the two
67 halves of the 16-bit quantity may appear 32 bits apart in the insn.
68 This makes it easy to copy the values from the tables in Appendix B. */
69 typedef enum S390Opcode {
70 RIL_AFI = 0xc209,
71 RIL_AGFI = 0xc208,
72 RIL_ALFI = 0xc20b,
73 RIL_ALGFI = 0xc20a,
74 RIL_BRASL = 0xc005,
75 RIL_BRCL = 0xc004,
76 RIL_CFI = 0xc20d,
77 RIL_CGFI = 0xc20c,
78 RIL_CLFI = 0xc20f,
79 RIL_CLGFI = 0xc20e,
80 RIL_IIHF = 0xc008,
81 RIL_IILF = 0xc009,
82 RIL_LARL = 0xc000,
83 RIL_LGFI = 0xc001,
84 RIL_LGRL = 0xc408,
85 RIL_LLIHF = 0xc00e,
86 RIL_LLILF = 0xc00f,
87 RIL_LRL = 0xc40d,
88 RIL_MSFI = 0xc201,
89 RIL_MSGFI = 0xc200,
90 RIL_NIHF = 0xc00a,
91 RIL_NILF = 0xc00b,
92 RIL_OIHF = 0xc00c,
93 RIL_OILF = 0xc00d,
94 RIL_SLFI = 0xc205,
95 RIL_SLGFI = 0xc204,
96 RIL_XIHF = 0xc006,
97 RIL_XILF = 0xc007,
99 RI_AGHI = 0xa70b,
100 RI_AHI = 0xa70a,
101 RI_BRC = 0xa704,
102 RI_IIHH = 0xa500,
103 RI_IIHL = 0xa501,
104 RI_IILH = 0xa502,
105 RI_IILL = 0xa503,
106 RI_LGHI = 0xa709,
107 RI_LLIHH = 0xa50c,
108 RI_LLIHL = 0xa50d,
109 RI_LLILH = 0xa50e,
110 RI_LLILL = 0xa50f,
111 RI_MGHI = 0xa70d,
112 RI_MHI = 0xa70c,
113 RI_NIHH = 0xa504,
114 RI_NIHL = 0xa505,
115 RI_NILH = 0xa506,
116 RI_NILL = 0xa507,
117 RI_OIHH = 0xa508,
118 RI_OIHL = 0xa509,
119 RI_OILH = 0xa50a,
120 RI_OILL = 0xa50b,
122 RIE_CGIJ = 0xec7c,
123 RIE_CGRJ = 0xec64,
124 RIE_CIJ = 0xec7e,
125 RIE_CLGRJ = 0xec65,
126 RIE_CLIJ = 0xec7f,
127 RIE_CLGIJ = 0xec7d,
128 RIE_CLRJ = 0xec77,
129 RIE_CRJ = 0xec76,
130 RIE_RISBG = 0xec55,
132 RRE_AGR = 0xb908,
133 RRE_ALGR = 0xb90a,
134 RRE_ALCR = 0xb998,
135 RRE_ALCGR = 0xb988,
136 RRE_CGR = 0xb920,
137 RRE_CLGR = 0xb921,
138 RRE_DLGR = 0xb987,
139 RRE_DLR = 0xb997,
140 RRE_DSGFR = 0xb91d,
141 RRE_DSGR = 0xb90d,
142 RRE_LGBR = 0xb906,
143 RRE_LCGR = 0xb903,
144 RRE_LGFR = 0xb914,
145 RRE_LGHR = 0xb907,
146 RRE_LGR = 0xb904,
147 RRE_LLGCR = 0xb984,
148 RRE_LLGFR = 0xb916,
149 RRE_LLGHR = 0xb985,
150 RRE_LRVR = 0xb91f,
151 RRE_LRVGR = 0xb90f,
152 RRE_LTGR = 0xb902,
153 RRE_MLGR = 0xb986,
154 RRE_MSGR = 0xb90c,
155 RRE_MSR = 0xb252,
156 RRE_NGR = 0xb980,
157 RRE_OGR = 0xb981,
158 RRE_SGR = 0xb909,
159 RRE_SLGR = 0xb90b,
160 RRE_SLBR = 0xb999,
161 RRE_SLBGR = 0xb989,
162 RRE_XGR = 0xb982,
164 RRF_LOCR = 0xb9f2,
165 RRF_LOCGR = 0xb9e2,
167 RR_AR = 0x1a,
168 RR_ALR = 0x1e,
169 RR_BASR = 0x0d,
170 RR_BCR = 0x07,
171 RR_CLR = 0x15,
172 RR_CR = 0x19,
173 RR_DR = 0x1d,
174 RR_LCR = 0x13,
175 RR_LR = 0x18,
176 RR_LTR = 0x12,
177 RR_NR = 0x14,
178 RR_OR = 0x16,
179 RR_SR = 0x1b,
180 RR_SLR = 0x1f,
181 RR_XR = 0x17,
183 RSY_RLL = 0xeb1d,
184 RSY_RLLG = 0xeb1c,
185 RSY_SLLG = 0xeb0d,
186 RSY_SRAG = 0xeb0a,
187 RSY_SRLG = 0xeb0c,
189 RS_SLL = 0x89,
190 RS_SRA = 0x8a,
191 RS_SRL = 0x88,
193 RXY_AG = 0xe308,
194 RXY_AY = 0xe35a,
195 RXY_CG = 0xe320,
196 RXY_CY = 0xe359,
197 RXY_LAY = 0xe371,
198 RXY_LB = 0xe376,
199 RXY_LG = 0xe304,
200 RXY_LGB = 0xe377,
201 RXY_LGF = 0xe314,
202 RXY_LGH = 0xe315,
203 RXY_LHY = 0xe378,
204 RXY_LLGC = 0xe390,
205 RXY_LLGF = 0xe316,
206 RXY_LLGH = 0xe391,
207 RXY_LMG = 0xeb04,
208 RXY_LRV = 0xe31e,
209 RXY_LRVG = 0xe30f,
210 RXY_LRVH = 0xe31f,
211 RXY_LY = 0xe358,
212 RXY_STCY = 0xe372,
213 RXY_STG = 0xe324,
214 RXY_STHY = 0xe370,
215 RXY_STMG = 0xeb24,
216 RXY_STRV = 0xe33e,
217 RXY_STRVG = 0xe32f,
218 RXY_STRVH = 0xe33f,
219 RXY_STY = 0xe350,
221 RX_A = 0x5a,
222 RX_C = 0x59,
223 RX_L = 0x58,
224 RX_LA = 0x41,
225 RX_LH = 0x48,
226 RX_ST = 0x50,
227 RX_STC = 0x42,
228 RX_STH = 0x40,
229 } S390Opcode;
231 #ifndef NDEBUG
232 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
233 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
234 "%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15"
236 #endif
238 /* Since R6 is a potential argument register, choose it last of the
239 call-saved registers. Likewise prefer the call-clobbered registers
240 in reverse order to maximize the chance of avoiding the arguments. */
241 static const int tcg_target_reg_alloc_order[] = {
242 /* Call saved registers. */
243 TCG_REG_R13,
244 TCG_REG_R12,
245 TCG_REG_R11,
246 TCG_REG_R10,
247 TCG_REG_R9,
248 TCG_REG_R8,
249 TCG_REG_R7,
250 TCG_REG_R6,
251 /* Call clobbered registers. */
252 TCG_REG_R14,
253 TCG_REG_R0,
254 TCG_REG_R1,
255 /* Argument registers, in reverse order of allocation. */
256 TCG_REG_R5,
257 TCG_REG_R4,
258 TCG_REG_R3,
259 TCG_REG_R2,
262 static const int tcg_target_call_iarg_regs[] = {
263 TCG_REG_R2,
264 TCG_REG_R3,
265 TCG_REG_R4,
266 TCG_REG_R5,
267 TCG_REG_R6,
270 static const int tcg_target_call_oarg_regs[] = {
271 TCG_REG_R2,
274 #define S390_CC_EQ 8
275 #define S390_CC_LT 4
276 #define S390_CC_GT 2
277 #define S390_CC_OV 1
278 #define S390_CC_NE (S390_CC_LT | S390_CC_GT)
279 #define S390_CC_LE (S390_CC_LT | S390_CC_EQ)
280 #define S390_CC_GE (S390_CC_GT | S390_CC_EQ)
281 #define S390_CC_NEVER 0
282 #define S390_CC_ALWAYS 15
284 /* Condition codes that result from a COMPARE and COMPARE LOGICAL. */
285 static const uint8_t tcg_cond_to_s390_cond[] = {
286 [TCG_COND_EQ] = S390_CC_EQ,
287 [TCG_COND_NE] = S390_CC_NE,
288 [TCG_COND_LT] = S390_CC_LT,
289 [TCG_COND_LE] = S390_CC_LE,
290 [TCG_COND_GT] = S390_CC_GT,
291 [TCG_COND_GE] = S390_CC_GE,
292 [TCG_COND_LTU] = S390_CC_LT,
293 [TCG_COND_LEU] = S390_CC_LE,
294 [TCG_COND_GTU] = S390_CC_GT,
295 [TCG_COND_GEU] = S390_CC_GE,
298 /* Condition codes that result from a LOAD AND TEST. Here, we have no
299 unsigned instruction variation, however since the test is vs zero we
300 can re-map the outcomes appropriately. */
301 static const uint8_t tcg_cond_to_ltr_cond[] = {
302 [TCG_COND_EQ] = S390_CC_EQ,
303 [TCG_COND_NE] = S390_CC_NE,
304 [TCG_COND_LT] = S390_CC_LT,
305 [TCG_COND_LE] = S390_CC_LE,
306 [TCG_COND_GT] = S390_CC_GT,
307 [TCG_COND_GE] = S390_CC_GE,
308 [TCG_COND_LTU] = S390_CC_NEVER,
309 [TCG_COND_LEU] = S390_CC_EQ,
310 [TCG_COND_GTU] = S390_CC_NE,
311 [TCG_COND_GEU] = S390_CC_ALWAYS,
314 #ifdef CONFIG_SOFTMMU
315 static void * const qemu_ld_helpers[16] = {
316 [MO_UB] = helper_ret_ldub_mmu,
317 [MO_SB] = helper_ret_ldsb_mmu,
318 [MO_LEUW] = helper_le_lduw_mmu,
319 [MO_LESW] = helper_le_ldsw_mmu,
320 [MO_LEUL] = helper_le_ldul_mmu,
321 [MO_LESL] = helper_le_ldsl_mmu,
322 [MO_LEQ] = helper_le_ldq_mmu,
323 [MO_BEUW] = helper_be_lduw_mmu,
324 [MO_BESW] = helper_be_ldsw_mmu,
325 [MO_BEUL] = helper_be_ldul_mmu,
326 [MO_BESL] = helper_be_ldsl_mmu,
327 [MO_BEQ] = helper_be_ldq_mmu,
330 static void * const qemu_st_helpers[16] = {
331 [MO_UB] = helper_ret_stb_mmu,
332 [MO_LEUW] = helper_le_stw_mmu,
333 [MO_LEUL] = helper_le_stl_mmu,
334 [MO_LEQ] = helper_le_stq_mmu,
335 [MO_BEUW] = helper_be_stw_mmu,
336 [MO_BEUL] = helper_be_stl_mmu,
337 [MO_BEQ] = helper_be_stq_mmu,
339 #endif
341 static tcg_insn_unit *tb_ret_addr;
343 /* A list of relevant facilities used by this translator. Some of these
344 are required for proper operation, and these are checked at startup. */
346 #define FACILITY_ZARCH_ACTIVE (1ULL << (63 - 2))
347 #define FACILITY_LONG_DISP (1ULL << (63 - 18))
348 #define FACILITY_EXT_IMM (1ULL << (63 - 21))
349 #define FACILITY_GEN_INST_EXT (1ULL << (63 - 34))
350 #define FACILITY_LOAD_ON_COND (1ULL << (63 - 45))
352 static uint64_t facilities;
354 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
355 intptr_t value, intptr_t addend)
357 intptr_t pcrel2 = (tcg_insn_unit *)value - (code_ptr - 1);
358 assert(addend == -2);
360 switch (type) {
361 case R_390_PC16DBL:
362 assert(pcrel2 == (int16_t)pcrel2);
363 tcg_patch16(code_ptr, pcrel2);
364 break;
365 case R_390_PC32DBL:
366 assert(pcrel2 == (int32_t)pcrel2);
367 tcg_patch32(code_ptr, pcrel2);
368 break;
369 default:
370 tcg_abort();
371 break;
375 /* parse target specific constraints */
376 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
378 const char *ct_str = *pct_str;
380 switch (ct_str[0]) {
381 case 'r': /* all registers */
382 ct->ct |= TCG_CT_REG;
383 tcg_regset_set32(ct->u.regs, 0, 0xffff);
384 break;
385 case 'R': /* not R0 */
386 ct->ct |= TCG_CT_REG;
387 tcg_regset_set32(ct->u.regs, 0, 0xffff);
388 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
389 break;
390 case 'L': /* qemu_ld/st constraint */
391 ct->ct |= TCG_CT_REG;
392 tcg_regset_set32(ct->u.regs, 0, 0xffff);
393 tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2);
394 tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3);
395 tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
396 break;
397 case 'a': /* force R2 for division */
398 ct->ct |= TCG_CT_REG;
399 tcg_regset_clear(ct->u.regs);
400 tcg_regset_set_reg(ct->u.regs, TCG_REG_R2);
401 break;
402 case 'b': /* force R3 for division */
403 ct->ct |= TCG_CT_REG;
404 tcg_regset_clear(ct->u.regs);
405 tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
406 break;
407 case 'A':
408 ct->ct |= TCG_CT_CONST_ADLI;
409 break;
410 case 'K':
411 ct->ct |= TCG_CT_CONST_MULI;
412 break;
413 case 'O':
414 ct->ct |= TCG_CT_CONST_ORI;
415 break;
416 case 'X':
417 ct->ct |= TCG_CT_CONST_XORI;
418 break;
419 case 'C':
420 ct->ct |= TCG_CT_CONST_CMPI;
421 break;
422 default:
423 return -1;
425 ct_str++;
426 *pct_str = ct_str;
428 return 0;
431 /* Immediates to be used with logical OR. This is an optimization only,
432 since a full 64-bit immediate OR can always be performed with 4 sequential
433 OI[LH][LH] instructions. What we're looking for is immediates that we
434 can load efficiently, and the immediate load plus the reg-reg OR is
435 smaller than the sequential OI's. */
437 static int tcg_match_ori(TCGType type, tcg_target_long val)
439 if (facilities & FACILITY_EXT_IMM) {
440 if (type == TCG_TYPE_I32) {
441 /* All 32-bit ORs can be performed with 1 48-bit insn. */
442 return 1;
446 /* Look for negative values. These are best to load with LGHI. */
447 if (val < 0) {
448 if (val == (int16_t)val) {
449 return 0;
451 if (facilities & FACILITY_EXT_IMM) {
452 if (val == (int32_t)val) {
453 return 0;
458 return 1;
461 /* Immediates to be used with logical XOR. This is almost, but not quite,
462 only an optimization. XOR with immediate is only supported with the
463 extended-immediate facility. That said, there are a few patterns for
464 which it is better to load the value into a register first. */
466 static int tcg_match_xori(TCGType type, tcg_target_long val)
468 if ((facilities & FACILITY_EXT_IMM) == 0) {
469 return 0;
472 if (type == TCG_TYPE_I32) {
473 /* All 32-bit XORs can be performed with 1 48-bit insn. */
474 return 1;
477 /* Look for negative values. These are best to load with LGHI. */
478 if (val < 0 && val == (int32_t)val) {
479 return 0;
482 return 1;
485 /* Imediates to be used with comparisons. */
487 static int tcg_match_cmpi(TCGType type, tcg_target_long val)
489 if (facilities & FACILITY_EXT_IMM) {
490 /* The COMPARE IMMEDIATE instruction is available. */
491 if (type == TCG_TYPE_I32) {
492 /* We have a 32-bit immediate and can compare against anything. */
493 return 1;
494 } else {
495 /* ??? We have no insight here into whether the comparison is
496 signed or unsigned. The COMPARE IMMEDIATE insn uses a 32-bit
497 signed immediate, and the COMPARE LOGICAL IMMEDIATE insn uses
498 a 32-bit unsigned immediate. If we were to use the (semi)
499 obvious "val == (int32_t)val" we would be enabling unsigned
500 comparisons vs very large numbers. The only solution is to
501 take the intersection of the ranges. */
502 /* ??? Another possible solution is to simply lie and allow all
503 constants here and force the out-of-range values into a temp
504 register in tgen_cmp when we have knowledge of the actual
505 comparison code in use. */
506 return val >= 0 && val <= 0x7fffffff;
508 } else {
509 /* Only the LOAD AND TEST instruction is available. */
510 return val == 0;
514 /* Immediates to be used with add2/sub2. */
516 static int tcg_match_add2i(TCGType type, tcg_target_long val)
518 if (facilities & FACILITY_EXT_IMM) {
519 if (type == TCG_TYPE_I32) {
520 return 1;
521 } else if (val >= -0xffffffffll && val <= 0xffffffffll) {
522 return 1;
525 return 0;
528 /* Test if a constant matches the constraint. */
529 static int tcg_target_const_match(tcg_target_long val, TCGType type,
530 const TCGArgConstraint *arg_ct)
532 int ct = arg_ct->ct;
534 if (ct & TCG_CT_CONST) {
535 return 1;
538 if (type == TCG_TYPE_I32) {
539 val = (int32_t)val;
542 /* The following are mutually exclusive. */
543 if (ct & TCG_CT_CONST_MULI) {
544 /* Immediates that may be used with multiply. If we have the
545 general-instruction-extensions, then we have MULTIPLY SINGLE
546 IMMEDIATE with a signed 32-bit, otherwise we have only
547 MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */
548 if (facilities & FACILITY_GEN_INST_EXT) {
549 return val == (int32_t)val;
550 } else {
551 return val == (int16_t)val;
553 } else if (ct & TCG_CT_CONST_ADLI) {
554 return tcg_match_add2i(type, val);
555 } else if (ct & TCG_CT_CONST_ORI) {
556 return tcg_match_ori(type, val);
557 } else if (ct & TCG_CT_CONST_XORI) {
558 return tcg_match_xori(type, val);
559 } else if (ct & TCG_CT_CONST_CMPI) {
560 return tcg_match_cmpi(type, val);
563 return 0;
566 /* Emit instructions according to the given instruction format. */
568 static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
570 tcg_out16(s, (op << 8) | (r1 << 4) | r2);
573 static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
574 TCGReg r1, TCGReg r2)
576 tcg_out32(s, (op << 16) | (r1 << 4) | r2);
579 static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
580 TCGReg r1, TCGReg r2, int m3)
582 tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
585 static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
587 tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
590 static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
592 tcg_out16(s, op | (r1 << 4));
593 tcg_out32(s, i2);
596 static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
597 TCGReg b2, TCGReg r3, int disp)
599 tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
600 | (disp & 0xfff));
603 static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
604 TCGReg b2, TCGReg r3, int disp)
606 tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
607 tcg_out32(s, (op & 0xff) | (b2 << 28)
608 | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
611 #define tcg_out_insn_RX tcg_out_insn_RS
612 #define tcg_out_insn_RXY tcg_out_insn_RSY
614 /* Emit an opcode with "type-checking" of the format. */
615 #define tcg_out_insn(S, FMT, OP, ...) \
616 glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
619 /* emit 64-bit shifts */
620 static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
621 TCGReg src, TCGReg sh_reg, int sh_imm)
623 tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
626 /* emit 32-bit shifts */
627 static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
628 TCGReg sh_reg, int sh_imm)
630 tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
633 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
635 if (src != dst) {
636 if (type == TCG_TYPE_I32) {
637 tcg_out_insn(s, RR, LR, dst, src);
638 } else {
639 tcg_out_insn(s, RRE, LGR, dst, src);
644 /* load a register with an immediate value */
645 static void tcg_out_movi(TCGContext *s, TCGType type,
646 TCGReg ret, tcg_target_long sval)
648 static const S390Opcode lli_insns[4] = {
649 RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
652 tcg_target_ulong uval = sval;
653 int i;
655 if (type == TCG_TYPE_I32) {
656 uval = (uint32_t)sval;
657 sval = (int32_t)sval;
660 /* Try all 32-bit insns that can load it in one go. */
661 if (sval >= -0x8000 && sval < 0x8000) {
662 tcg_out_insn(s, RI, LGHI, ret, sval);
663 return;
666 for (i = 0; i < 4; i++) {
667 tcg_target_long mask = 0xffffull << i*16;
668 if ((uval & mask) == uval) {
669 tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
670 return;
674 /* Try all 48-bit insns that can load it in one go. */
675 if (facilities & FACILITY_EXT_IMM) {
676 if (sval == (int32_t)sval) {
677 tcg_out_insn(s, RIL, LGFI, ret, sval);
678 return;
680 if (uval <= 0xffffffff) {
681 tcg_out_insn(s, RIL, LLILF, ret, uval);
682 return;
684 if ((uval & 0xffffffff) == 0) {
685 tcg_out_insn(s, RIL, LLIHF, ret, uval >> 31 >> 1);
686 return;
690 /* Try for PC-relative address load. */
691 if ((sval & 1) == 0) {
692 ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
693 if (off == (int32_t)off) {
694 tcg_out_insn(s, RIL, LARL, ret, off);
695 return;
699 /* If extended immediates are not present, then we may have to issue
700 several instructions to load the low 32 bits. */
701 if (!(facilities & FACILITY_EXT_IMM)) {
702 /* A 32-bit unsigned value can be loaded in 2 insns. And given
703 that the lli_insns loop above did not succeed, we know that
704 both insns are required. */
705 if (uval <= 0xffffffff) {
706 tcg_out_insn(s, RI, LLILL, ret, uval);
707 tcg_out_insn(s, RI, IILH, ret, uval >> 16);
708 return;
711 /* If all high bits are set, the value can be loaded in 2 or 3 insns.
712 We first want to make sure that all the high bits get set. With
713 luck the low 16-bits can be considered negative to perform that for
714 free, otherwise we load an explicit -1. */
715 if (sval >> 31 >> 1 == -1) {
716 if (uval & 0x8000) {
717 tcg_out_insn(s, RI, LGHI, ret, uval);
718 } else {
719 tcg_out_insn(s, RI, LGHI, ret, -1);
720 tcg_out_insn(s, RI, IILL, ret, uval);
722 tcg_out_insn(s, RI, IILH, ret, uval >> 16);
723 return;
727 /* If we get here, both the high and low parts have non-zero bits. */
729 /* Recurse to load the lower 32-bits. */
730 tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff);
732 /* Insert data into the high 32-bits. */
733 uval = uval >> 31 >> 1;
734 if (facilities & FACILITY_EXT_IMM) {
735 if (uval < 0x10000) {
736 tcg_out_insn(s, RI, IIHL, ret, uval);
737 } else if ((uval & 0xffff) == 0) {
738 tcg_out_insn(s, RI, IIHH, ret, uval >> 16);
739 } else {
740 tcg_out_insn(s, RIL, IIHF, ret, uval);
742 } else {
743 if (uval & 0xffff) {
744 tcg_out_insn(s, RI, IIHL, ret, uval);
746 if (uval & 0xffff0000) {
747 tcg_out_insn(s, RI, IIHH, ret, uval >> 16);
753 /* Emit a load/store type instruction. Inputs are:
754 DATA: The register to be loaded or stored.
755 BASE+OFS: The effective address.
756 OPC_RX: If the operation has an RX format opcode (e.g. STC), otherwise 0.
757 OPC_RXY: The RXY format opcode for the operation (e.g. STCY). */
759 static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
760 TCGReg data, TCGReg base, TCGReg index,
761 tcg_target_long ofs)
763 if (ofs < -0x80000 || ofs >= 0x80000) {
764 /* Combine the low 20 bits of the offset with the actual load insn;
765 the high 44 bits must come from an immediate load. */
766 tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
767 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
768 ofs = low;
770 /* If we were already given an index register, add it in. */
771 if (index != TCG_REG_NONE) {
772 tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
774 index = TCG_TMP0;
777 if (opc_rx && ofs >= 0 && ofs < 0x1000) {
778 tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
779 } else {
780 tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
785 /* load data without address translation or endianness conversion */
786 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
787 TCGReg base, intptr_t ofs)
789 if (type == TCG_TYPE_I32) {
790 tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
791 } else {
792 tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
796 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
797 TCGReg base, intptr_t ofs)
799 if (type == TCG_TYPE_I32) {
800 tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
801 } else {
802 tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
806 /* load data from an absolute host address */
807 static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs)
809 intptr_t addr = (intptr_t)abs;
811 if ((facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) {
812 ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
813 if (disp == (int32_t)disp) {
814 if (type == TCG_TYPE_I32) {
815 tcg_out_insn(s, RIL, LRL, dest, disp);
816 } else {
817 tcg_out_insn(s, RIL, LGRL, dest, disp);
819 return;
823 tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
824 tcg_out_ld(s, type, dest, dest, addr & 0xffff);
827 static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
828 int msb, int lsb, int ofs, int z)
830 /* Format RIE-f */
831 tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
832 tcg_out16(s, (msb << 8) | (z << 7) | lsb);
833 tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
836 static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
838 if (facilities & FACILITY_EXT_IMM) {
839 tcg_out_insn(s, RRE, LGBR, dest, src);
840 return;
843 if (type == TCG_TYPE_I32) {
844 if (dest == src) {
845 tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
846 } else {
847 tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
849 tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
850 } else {
851 tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
852 tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
856 static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
858 if (facilities & FACILITY_EXT_IMM) {
859 tcg_out_insn(s, RRE, LLGCR, dest, src);
860 return;
863 if (dest == src) {
864 tcg_out_movi(s, type, TCG_TMP0, 0xff);
865 src = TCG_TMP0;
866 } else {
867 tcg_out_movi(s, type, dest, 0xff);
869 if (type == TCG_TYPE_I32) {
870 tcg_out_insn(s, RR, NR, dest, src);
871 } else {
872 tcg_out_insn(s, RRE, NGR, dest, src);
876 static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
878 if (facilities & FACILITY_EXT_IMM) {
879 tcg_out_insn(s, RRE, LGHR, dest, src);
880 return;
883 if (type == TCG_TYPE_I32) {
884 if (dest == src) {
885 tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
886 } else {
887 tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
889 tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
890 } else {
891 tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
892 tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
896 static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
898 if (facilities & FACILITY_EXT_IMM) {
899 tcg_out_insn(s, RRE, LLGHR, dest, src);
900 return;
903 if (dest == src) {
904 tcg_out_movi(s, type, TCG_TMP0, 0xffff);
905 src = TCG_TMP0;
906 } else {
907 tcg_out_movi(s, type, dest, 0xffff);
909 if (type == TCG_TYPE_I32) {
910 tcg_out_insn(s, RR, NR, dest, src);
911 } else {
912 tcg_out_insn(s, RRE, NGR, dest, src);
916 static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
918 tcg_out_insn(s, RRE, LGFR, dest, src);
921 static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
923 tcg_out_insn(s, RRE, LLGFR, dest, src);
926 /* Accept bit patterns like these:
927 0....01....1
928 1....10....0
929 1..10..01..1
930 0..01..10..0
931 Copied from gcc sources. */
932 static inline bool risbg_mask(uint64_t c)
934 uint64_t lsb;
935 /* We don't change the number of transitions by inverting,
936 so make sure we start with the LSB zero. */
937 if (c & 1) {
938 c = ~c;
940 /* Reject all zeros or all ones. */
941 if (c == 0) {
942 return false;
944 /* Find the first transition. */
945 lsb = c & -c;
946 /* Invert to look for a second transition. */
947 c = ~c;
948 /* Erase the first transition. */
949 c &= -lsb;
950 /* Find the second transition, if any. */
951 lsb = c & -c;
952 /* Match if all the bits are 1's, or if c is zero. */
953 return c == -lsb;
956 static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
958 int msb, lsb;
959 if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
960 /* Achieve wraparound by swapping msb and lsb. */
961 msb = 64 - ctz64(~val);
962 lsb = clz64(~val) - 1;
963 } else {
964 msb = clz64(val);
965 lsb = 63 - ctz64(val);
967 tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
970 static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
972 static const S390Opcode ni_insns[4] = {
973 RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
975 static const S390Opcode nif_insns[2] = {
976 RIL_NILF, RIL_NIHF
978 uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
979 int i;
981 /* Look for the zero-extensions. */
982 if ((val & valid) == 0xffffffff) {
983 tgen_ext32u(s, dest, dest);
984 return;
986 if (facilities & FACILITY_EXT_IMM) {
987 if ((val & valid) == 0xff) {
988 tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
989 return;
991 if ((val & valid) == 0xffff) {
992 tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
993 return;
997 /* Try all 32-bit insns that can perform it in one go. */
998 for (i = 0; i < 4; i++) {
999 tcg_target_ulong mask = ~(0xffffull << i*16);
1000 if (((val | ~valid) & mask) == mask) {
1001 tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
1002 return;
1006 /* Try all 48-bit insns that can perform it in one go. */
1007 if (facilities & FACILITY_EXT_IMM) {
1008 for (i = 0; i < 2; i++) {
1009 tcg_target_ulong mask = ~(0xffffffffull << i*32);
1010 if (((val | ~valid) & mask) == mask) {
1011 tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
1012 return;
1016 if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) {
1017 tgen_andi_risbg(s, dest, dest, val);
1018 return;
1021 /* Fall back to loading the constant. */
1022 tcg_out_movi(s, type, TCG_TMP0, val);
1023 if (type == TCG_TYPE_I32) {
1024 tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
1025 } else {
1026 tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
1030 static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val)
1032 static const S390Opcode oi_insns[4] = {
1033 RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
1035 static const S390Opcode nif_insns[2] = {
1036 RIL_OILF, RIL_OIHF
1039 int i;
1041 /* Look for no-op. */
1042 if (val == 0) {
1043 return;
1046 if (facilities & FACILITY_EXT_IMM) {
1047 /* Try all 32-bit insns that can perform it in one go. */
1048 for (i = 0; i < 4; i++) {
1049 tcg_target_ulong mask = (0xffffull << i*16);
1050 if ((val & mask) != 0 && (val & ~mask) == 0) {
1051 tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
1052 return;
1056 /* Try all 48-bit insns that can perform it in one go. */
1057 for (i = 0; i < 2; i++) {
1058 tcg_target_ulong mask = (0xffffffffull << i*32);
1059 if ((val & mask) != 0 && (val & ~mask) == 0) {
1060 tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
1061 return;
1065 /* Perform the OR via sequential modifications to the high and
1066 low parts. Do this via recursion to handle 16-bit vs 32-bit
1067 masks in each half. */
1068 tgen64_ori(s, dest, val & 0x00000000ffffffffull);
1069 tgen64_ori(s, dest, val & 0xffffffff00000000ull);
1070 } else {
1071 /* With no extended-immediate facility, we don't need to be so
1072 clever. Just iterate over the insns and mask in the constant. */
1073 for (i = 0; i < 4; i++) {
1074 tcg_target_ulong mask = (0xffffull << i*16);
1075 if ((val & mask) != 0) {
1076 tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
1082 static void tgen64_xori(TCGContext *s, TCGReg dest, tcg_target_ulong val)
1084 /* Perform the xor by parts. */
1085 if (val & 0xffffffff) {
1086 tcg_out_insn(s, RIL, XILF, dest, val);
1088 if (val > 0xffffffff) {
1089 tcg_out_insn(s, RIL, XIHF, dest, val >> 31 >> 1);
1093 static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1094 TCGArg c2, int c2const)
1096 bool is_unsigned = is_unsigned_cond(c);
1097 if (c2const) {
1098 if (c2 == 0) {
1099 if (type == TCG_TYPE_I32) {
1100 tcg_out_insn(s, RR, LTR, r1, r1);
1101 } else {
1102 tcg_out_insn(s, RRE, LTGR, r1, r1);
1104 return tcg_cond_to_ltr_cond[c];
1105 } else {
1106 if (is_unsigned) {
1107 if (type == TCG_TYPE_I32) {
1108 tcg_out_insn(s, RIL, CLFI, r1, c2);
1109 } else {
1110 tcg_out_insn(s, RIL, CLGFI, r1, c2);
1112 } else {
1113 if (type == TCG_TYPE_I32) {
1114 tcg_out_insn(s, RIL, CFI, r1, c2);
1115 } else {
1116 tcg_out_insn(s, RIL, CGFI, r1, c2);
1120 } else {
1121 if (is_unsigned) {
1122 if (type == TCG_TYPE_I32) {
1123 tcg_out_insn(s, RR, CLR, r1, c2);
1124 } else {
1125 tcg_out_insn(s, RRE, CLGR, r1, c2);
1127 } else {
1128 if (type == TCG_TYPE_I32) {
1129 tcg_out_insn(s, RR, CR, r1, c2);
1130 } else {
1131 tcg_out_insn(s, RRE, CGR, r1, c2);
1135 return tcg_cond_to_s390_cond[c];
1138 static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1139 TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1141 int cc;
1143 switch (cond) {
1144 case TCG_COND_GTU:
1145 case TCG_COND_GT:
1146 do_greater:
1147 /* The result of a compare has CC=2 for GT and CC=3 unused.
1148 ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */
1149 tgen_cmp(s, type, cond, c1, c2, c2const);
1150 tcg_out_movi(s, type, dest, 0);
1151 tcg_out_insn(s, RRE, ALCGR, dest, dest);
1152 return;
1154 case TCG_COND_GEU:
1155 do_geu:
1156 /* We need "real" carry semantics, so use SUBTRACT LOGICAL
1157 instead of COMPARE LOGICAL. This needs an extra move. */
1158 tcg_out_mov(s, type, TCG_TMP0, c1);
1159 if (c2const) {
1160 tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1161 if (type == TCG_TYPE_I32) {
1162 tcg_out_insn(s, RIL, SLFI, TCG_TMP0, c2);
1163 } else {
1164 tcg_out_insn(s, RIL, SLGFI, TCG_TMP0, c2);
1166 } else {
1167 if (type == TCG_TYPE_I32) {
1168 tcg_out_insn(s, RR, SLR, TCG_TMP0, c2);
1169 } else {
1170 tcg_out_insn(s, RRE, SLGR, TCG_TMP0, c2);
1172 tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1174 tcg_out_insn(s, RRE, ALCGR, dest, dest);
1175 return;
1177 case TCG_COND_LEU:
1178 case TCG_COND_LTU:
1179 case TCG_COND_LT:
1180 /* Swap operands so that we can use GEU/GTU/GT. */
1181 if (c2const) {
1182 tcg_out_movi(s, type, TCG_TMP0, c2);
1183 c2 = c1;
1184 c2const = 0;
1185 c1 = TCG_TMP0;
1186 } else {
1187 TCGReg t = c1;
1188 c1 = c2;
1189 c2 = t;
1191 if (cond == TCG_COND_LEU) {
1192 goto do_geu;
1194 cond = tcg_swap_cond(cond);
1195 goto do_greater;
1197 case TCG_COND_NE:
1198 /* X != 0 is X > 0. */
1199 if (c2const && c2 == 0) {
1200 cond = TCG_COND_GTU;
1201 goto do_greater;
1203 break;
1205 case TCG_COND_EQ:
1206 /* X == 0 is X <= 0 is 0 >= X. */
1207 if (c2const && c2 == 0) {
1208 tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0);
1209 c2 = c1;
1210 c2const = 0;
1211 c1 = TCG_TMP0;
1212 goto do_geu;
1214 break;
1216 default:
1217 break;
1220 cc = tgen_cmp(s, type, cond, c1, c2, c2const);
1221 if (facilities & FACILITY_LOAD_ON_COND) {
1222 /* Emit: d = 0, t = 1, d = (cc ? t : d). */
1223 tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1224 tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1225 tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
1226 } else {
1227 /* Emit: d = 1; if (cc) goto over; d = 0; over: */
1228 tcg_out_movi(s, type, dest, 1);
1229 tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1230 tcg_out_movi(s, type, dest, 0);
1234 static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1235 TCGReg c1, TCGArg c2, int c2const, TCGReg r3)
1237 int cc;
1238 if (facilities & FACILITY_LOAD_ON_COND) {
1239 cc = tgen_cmp(s, type, c, c1, c2, c2const);
1240 tcg_out_insn(s, RRF, LOCGR, dest, r3, cc);
1241 } else {
1242 c = tcg_invert_cond(c);
1243 cc = tgen_cmp(s, type, c, c1, c2, c2const);
1245 /* Emit: if (cc) goto over; dest = r3; over: */
1246 tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1247 tcg_out_insn(s, RRE, LGR, dest, r3);
1251 bool tcg_target_deposit_valid(int ofs, int len)
1253 return (facilities & FACILITY_GEN_INST_EXT) != 0;
1256 static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1257 int ofs, int len)
1259 int lsb = (63 - ofs);
1260 int msb = lsb - (len - 1);
1261 tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0);
1264 static void tgen_gotoi(TCGContext *s, int cc, tcg_insn_unit *dest)
1266 ptrdiff_t off = dest - s->code_ptr;
1267 if (off == (int16_t)off) {
1268 tcg_out_insn(s, RI, BRC, cc, off);
1269 } else if (off == (int32_t)off) {
1270 tcg_out_insn(s, RIL, BRCL, cc, off);
1271 } else {
1272 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1273 tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1277 static void tgen_branch(TCGContext *s, int cc, int labelno)
1279 TCGLabel* l = &s->labels[labelno];
1280 if (l->has_value) {
1281 tgen_gotoi(s, cc, l->u.value_ptr);
1282 } else if (USE_LONG_BRANCHES) {
1283 tcg_out16(s, RIL_BRCL | (cc << 4));
1284 tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, labelno, -2);
1285 s->code_ptr += 2;
1286 } else {
1287 tcg_out16(s, RI_BRC | (cc << 4));
1288 tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, labelno, -2);
1289 s->code_ptr += 1;
1293 static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1294 TCGReg r1, TCGReg r2, int labelno)
1296 TCGLabel* l = &s->labels[labelno];
1297 intptr_t off;
1299 if (l->has_value) {
1300 off = l->u.value_ptr - s->code_ptr;
1301 } else {
1302 /* We need to keep the offset unchanged for retranslation. */
1303 off = s->code_ptr[1];
1304 tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, labelno, -2);
1307 tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1308 tcg_out16(s, off);
1309 tcg_out16(s, cc << 12 | (opc & 0xff));
1312 static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1313 TCGReg r1, int i2, int labelno)
1315 TCGLabel* l = &s->labels[labelno];
1316 tcg_target_long off;
1318 if (l->has_value) {
1319 off = l->u.value_ptr - s->code_ptr;
1320 } else {
1321 /* We need to keep the offset unchanged for retranslation. */
1322 off = s->code_ptr[1];
1323 tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, labelno, -2);
1326 tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1327 tcg_out16(s, off);
1328 tcg_out16(s, (i2 << 8) | (opc & 0xff));
1331 static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1332 TCGReg r1, TCGArg c2, int c2const, int labelno)
1334 int cc;
1336 if (facilities & FACILITY_GEN_INST_EXT) {
1337 bool is_unsigned = is_unsigned_cond(c);
1338 bool in_range;
1339 S390Opcode opc;
1341 cc = tcg_cond_to_s390_cond[c];
1343 if (!c2const) {
1344 opc = (type == TCG_TYPE_I32
1345 ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
1346 : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
1347 tgen_compare_branch(s, opc, cc, r1, c2, labelno);
1348 return;
1351 /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1352 If the immediate we've been given does not fit that range, we'll
1353 fall back to separate compare and branch instructions using the
1354 larger comparison range afforded by COMPARE IMMEDIATE. */
1355 if (type == TCG_TYPE_I32) {
1356 if (is_unsigned) {
1357 opc = RIE_CLIJ;
1358 in_range = (uint32_t)c2 == (uint8_t)c2;
1359 } else {
1360 opc = RIE_CIJ;
1361 in_range = (int32_t)c2 == (int8_t)c2;
1363 } else {
1364 if (is_unsigned) {
1365 opc = RIE_CLGIJ;
1366 in_range = (uint64_t)c2 == (uint8_t)c2;
1367 } else {
1368 opc = RIE_CGIJ;
1369 in_range = (int64_t)c2 == (int8_t)c2;
1372 if (in_range) {
1373 tgen_compare_imm_branch(s, opc, cc, r1, c2, labelno);
1374 return;
1378 cc = tgen_cmp(s, type, c, r1, c2, c2const);
1379 tgen_branch(s, cc, labelno);
1382 static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
1384 ptrdiff_t off = dest - s->code_ptr;
1385 if (off == (int32_t)off) {
1386 tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1387 } else {
1388 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1389 tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1393 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
1394 TCGReg base, TCGReg index, int disp)
1396 switch (opc) {
1397 case MO_UB:
1398 tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
1399 break;
1400 case MO_SB:
1401 tcg_out_insn(s, RXY, LGB, data, base, index, disp);
1402 break;
1404 case MO_UW | MO_BSWAP:
1405 /* swapped unsigned halfword load with upper bits zeroed */
1406 tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1407 tgen_ext16u(s, TCG_TYPE_I64, data, data);
1408 break;
1409 case MO_UW:
1410 tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
1411 break;
1413 case MO_SW | MO_BSWAP:
1414 /* swapped sign-extended halfword load */
1415 tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1416 tgen_ext16s(s, TCG_TYPE_I64, data, data);
1417 break;
1418 case MO_SW:
1419 tcg_out_insn(s, RXY, LGH, data, base, index, disp);
1420 break;
1422 case MO_UL | MO_BSWAP:
1423 /* swapped unsigned int load with upper bits zeroed */
1424 tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1425 tgen_ext32u(s, data, data);
1426 break;
1427 case MO_UL:
1428 tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
1429 break;
1431 case MO_SL | MO_BSWAP:
1432 /* swapped sign-extended int load */
1433 tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1434 tgen_ext32s(s, data, data);
1435 break;
1436 case MO_SL:
1437 tcg_out_insn(s, RXY, LGF, data, base, index, disp);
1438 break;
1440 case MO_Q | MO_BSWAP:
1441 tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
1442 break;
1443 case MO_Q:
1444 tcg_out_insn(s, RXY, LG, data, base, index, disp);
1445 break;
1447 default:
1448 tcg_abort();
1452 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
1453 TCGReg base, TCGReg index, int disp)
1455 switch (opc) {
1456 case MO_UB:
1457 if (disp >= 0 && disp < 0x1000) {
1458 tcg_out_insn(s, RX, STC, data, base, index, disp);
1459 } else {
1460 tcg_out_insn(s, RXY, STCY, data, base, index, disp);
1462 break;
1464 case MO_UW | MO_BSWAP:
1465 tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
1466 break;
1467 case MO_UW:
1468 if (disp >= 0 && disp < 0x1000) {
1469 tcg_out_insn(s, RX, STH, data, base, index, disp);
1470 } else {
1471 tcg_out_insn(s, RXY, STHY, data, base, index, disp);
1473 break;
1475 case MO_UL | MO_BSWAP:
1476 tcg_out_insn(s, RXY, STRV, data, base, index, disp);
1477 break;
1478 case MO_UL:
1479 if (disp >= 0 && disp < 0x1000) {
1480 tcg_out_insn(s, RX, ST, data, base, index, disp);
1481 } else {
1482 tcg_out_insn(s, RXY, STY, data, base, index, disp);
1484 break;
1486 case MO_Q | MO_BSWAP:
1487 tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
1488 break;
1489 case MO_Q:
1490 tcg_out_insn(s, RXY, STG, data, base, index, disp);
1491 break;
1493 default:
1494 tcg_abort();
1498 #if defined(CONFIG_SOFTMMU)
1499 /* We're expecting to use a 20-bit signed offset on the tlb memory ops.
1500 Using the offset of the second entry in the last tlb table ensures
1501 that we can index all of the elements of the first entry. */
1502 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1503 > 0x7ffff);
1505 /* Load and compare a TLB entry, leaving the flags set. Loads the TLB
1506 addend into R2. Returns a register with the santitized guest address. */
1507 static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc,
1508 int mem_index, bool is_ld)
1510 TCGMemOp s_bits = opc & MO_SIZE;
1511 uint64_t tlb_mask = TARGET_PAGE_MASK | ((1 << s_bits) - 1);
1512 int ofs;
1514 if (facilities & FACILITY_GEN_INST_EXT) {
1515 tcg_out_risbg(s, TCG_REG_R2, addr_reg,
1516 64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS,
1517 63 - CPU_TLB_ENTRY_BITS,
1518 64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1);
1519 tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1520 } else {
1521 tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1522 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1523 tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_R3, addr_reg);
1524 tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2,
1525 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
1526 tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1529 if (is_ld) {
1530 ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
1531 } else {
1532 ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1534 if (TARGET_LONG_BITS == 32) {
1535 tcg_out_mem(s, RX_C, RXY_CY, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs);
1536 } else {
1537 tcg_out_mem(s, 0, RXY_CG, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs);
1540 ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1541 tcg_out_mem(s, 0, RXY_LG, TCG_REG_R2, TCG_REG_R2, TCG_AREG0, ofs);
1543 if (TARGET_LONG_BITS == 32) {
1544 tgen_ext32u(s, TCG_REG_R3, addr_reg);
1545 return TCG_REG_R3;
1547 return addr_reg;
1550 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
1551 TCGReg data, TCGReg addr, int mem_index,
1552 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1554 TCGLabelQemuLdst *label = new_ldst_label(s);
1556 label->is_ld = is_ld;
1557 label->opc = opc;
1558 label->datalo_reg = data;
1559 label->addrlo_reg = addr;
1560 label->mem_index = mem_index;
1561 label->raddr = raddr;
1562 label->label_ptr[0] = label_ptr;
1565 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1567 TCGReg addr_reg = lb->addrlo_reg;
1568 TCGReg data_reg = lb->datalo_reg;
1569 TCGMemOp opc = lb->opc;
1571 patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2);
1573 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1574 if (TARGET_LONG_BITS == 64) {
1575 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1577 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, lb->mem_index);
1578 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1579 tcg_out_call(s, qemu_ld_helpers[opc]);
1580 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1582 tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1585 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1587 TCGReg addr_reg = lb->addrlo_reg;
1588 TCGReg data_reg = lb->datalo_reg;
1589 TCGMemOp opc = lb->opc;
1591 patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2);
1593 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1594 if (TARGET_LONG_BITS == 64) {
1595 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1597 switch (opc & MO_SIZE) {
1598 case MO_UB:
1599 tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1600 break;
1601 case MO_UW:
1602 tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1603 break;
1604 case MO_UL:
1605 tgen_ext32u(s, TCG_REG_R4, data_reg);
1606 break;
1607 case MO_Q:
1608 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1609 break;
1610 default:
1611 tcg_abort();
1613 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index);
1614 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1615 tcg_out_call(s, qemu_st_helpers[opc]);
1617 tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1619 #else
1620 static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
1621 TCGReg *index_reg, tcg_target_long *disp)
1623 if (TARGET_LONG_BITS == 32) {
1624 tgen_ext32u(s, TCG_TMP0, *addr_reg);
1625 *addr_reg = TCG_TMP0;
1627 if (GUEST_BASE < 0x80000) {
1628 *index_reg = TCG_REG_NONE;
1629 *disp = GUEST_BASE;
1630 } else {
1631 *index_reg = TCG_GUEST_BASE_REG;
1632 *disp = 0;
1635 #endif /* CONFIG_SOFTMMU */
1637 static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1638 TCGMemOp opc, int mem_index)
1640 #ifdef CONFIG_SOFTMMU
1641 tcg_insn_unit *label_ptr;
1642 TCGReg base_reg;
1644 base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
1646 label_ptr = s->code_ptr + 1;
1647 tcg_out_insn(s, RI, BRC, S390_CC_NE, 0);
1649 tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1651 add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg, mem_index,
1652 s->code_ptr, label_ptr);
1653 #else
1654 TCGReg index_reg;
1655 tcg_target_long disp;
1657 tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1658 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1659 #endif
1662 static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1663 TCGMemOp opc, int mem_index)
1665 #ifdef CONFIG_SOFTMMU
1666 tcg_insn_unit *label_ptr;
1667 TCGReg base_reg;
1669 base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
1671 label_ptr = s->code_ptr + 1;
1672 tcg_out_insn(s, RI, BRC, S390_CC_NE, 0);
1674 tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1676 add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg, mem_index,
1677 s->code_ptr, label_ptr);
1678 #else
1679 TCGReg index_reg;
1680 tcg_target_long disp;
1682 tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1683 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1684 #endif
1687 # define OP_32_64(x) \
1688 case glue(glue(INDEX_op_,x),_i32): \
1689 case glue(glue(INDEX_op_,x),_i64)
1691 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1692 const TCGArg *args, const int *const_args)
1694 S390Opcode op;
1695 TCGArg a0, a1, a2;
1697 switch (opc) {
1698 case INDEX_op_exit_tb:
1699 /* return value */
1700 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]);
1701 tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
1702 break;
1704 case INDEX_op_goto_tb:
1705 if (s->tb_jmp_offset) {
1706 tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
1707 s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
1708 s->code_ptr += 2;
1709 } else {
1710 /* load address stored at s->tb_next + args[0] */
1711 tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_TMP0, s->tb_next + args[0]);
1712 /* and go there */
1713 tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_TMP0);
1715 s->tb_next_offset[args[0]] = tcg_current_code_size(s);
1716 break;
1718 OP_32_64(ld8u):
1719 /* ??? LLC (RXY format) is only present with the extended-immediate
1720 facility, whereas LLGC is always present. */
1721 tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
1722 break;
1724 OP_32_64(ld8s):
1725 /* ??? LB is no smaller than LGB, so no point to using it. */
1726 tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
1727 break;
1729 OP_32_64(ld16u):
1730 /* ??? LLH (RXY format) is only present with the extended-immediate
1731 facility, whereas LLGH is always present. */
1732 tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
1733 break;
1735 case INDEX_op_ld16s_i32:
1736 tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
1737 break;
1739 case INDEX_op_ld_i32:
1740 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1741 break;
1743 OP_32_64(st8):
1744 tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
1745 TCG_REG_NONE, args[2]);
1746 break;
1748 OP_32_64(st16):
1749 tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
1750 TCG_REG_NONE, args[2]);
1751 break;
1753 case INDEX_op_st_i32:
1754 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1755 break;
1757 case INDEX_op_add_i32:
1758 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
1759 if (const_args[2]) {
1760 do_addi_32:
1761 if (a0 == a1) {
1762 if (a2 == (int16_t)a2) {
1763 tcg_out_insn(s, RI, AHI, a0, a2);
1764 break;
1766 if (facilities & FACILITY_EXT_IMM) {
1767 tcg_out_insn(s, RIL, AFI, a0, a2);
1768 break;
1771 tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
1772 } else if (a0 == a1) {
1773 tcg_out_insn(s, RR, AR, a0, a2);
1774 } else {
1775 tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
1777 break;
1778 case INDEX_op_sub_i32:
1779 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
1780 if (const_args[2]) {
1781 a2 = -a2;
1782 goto do_addi_32;
1784 tcg_out_insn(s, RR, SR, args[0], args[2]);
1785 break;
1787 case INDEX_op_and_i32:
1788 if (const_args[2]) {
1789 tgen_andi(s, TCG_TYPE_I32, args[0], args[2]);
1790 } else {
1791 tcg_out_insn(s, RR, NR, args[0], args[2]);
1793 break;
1794 case INDEX_op_or_i32:
1795 if (const_args[2]) {
1796 tgen64_ori(s, args[0], args[2] & 0xffffffff);
1797 } else {
1798 tcg_out_insn(s, RR, OR, args[0], args[2]);
1800 break;
1801 case INDEX_op_xor_i32:
1802 if (const_args[2]) {
1803 tgen64_xori(s, args[0], args[2] & 0xffffffff);
1804 } else {
1805 tcg_out_insn(s, RR, XR, args[0], args[2]);
1807 break;
1809 case INDEX_op_neg_i32:
1810 tcg_out_insn(s, RR, LCR, args[0], args[1]);
1811 break;
1813 case INDEX_op_mul_i32:
1814 if (const_args[2]) {
1815 if ((int32_t)args[2] == (int16_t)args[2]) {
1816 tcg_out_insn(s, RI, MHI, args[0], args[2]);
1817 } else {
1818 tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
1820 } else {
1821 tcg_out_insn(s, RRE, MSR, args[0], args[2]);
1823 break;
1825 case INDEX_op_div2_i32:
1826 tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
1827 break;
1828 case INDEX_op_divu2_i32:
1829 tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
1830 break;
1832 case INDEX_op_shl_i32:
1833 op = RS_SLL;
1834 do_shift32:
1835 if (const_args[2]) {
1836 tcg_out_sh32(s, op, args[0], TCG_REG_NONE, args[2]);
1837 } else {
1838 tcg_out_sh32(s, op, args[0], args[2], 0);
1840 break;
1841 case INDEX_op_shr_i32:
1842 op = RS_SRL;
1843 goto do_shift32;
1844 case INDEX_op_sar_i32:
1845 op = RS_SRA;
1846 goto do_shift32;
1848 case INDEX_op_rotl_i32:
1849 /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */
1850 if (const_args[2]) {
1851 tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
1852 } else {
1853 tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
1855 break;
1856 case INDEX_op_rotr_i32:
1857 if (const_args[2]) {
1858 tcg_out_sh64(s, RSY_RLL, args[0], args[1],
1859 TCG_REG_NONE, (32 - args[2]) & 31);
1860 } else {
1861 tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
1862 tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
1864 break;
1866 case INDEX_op_ext8s_i32:
1867 tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
1868 break;
1869 case INDEX_op_ext16s_i32:
1870 tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
1871 break;
1872 case INDEX_op_ext8u_i32:
1873 tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
1874 break;
1875 case INDEX_op_ext16u_i32:
1876 tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
1877 break;
1879 OP_32_64(bswap16):
1880 /* The TCG bswap definition requires bits 0-47 already be zero.
1881 Thus we don't need the G-type insns to implement bswap16_i64. */
1882 tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
1883 tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16);
1884 break;
1885 OP_32_64(bswap32):
1886 tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
1887 break;
1889 case INDEX_op_add2_i32:
1890 if (const_args[4]) {
1891 tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
1892 } else {
1893 tcg_out_insn(s, RR, ALR, args[0], args[4]);
1895 tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
1896 break;
1897 case INDEX_op_sub2_i32:
1898 if (const_args[4]) {
1899 tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
1900 } else {
1901 tcg_out_insn(s, RR, SLR, args[0], args[4]);
1903 tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
1904 break;
1906 case INDEX_op_br:
1907 tgen_branch(s, S390_CC_ALWAYS, args[0]);
1908 break;
1910 case INDEX_op_brcond_i32:
1911 tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
1912 args[1], const_args[1], args[3]);
1913 break;
1914 case INDEX_op_setcond_i32:
1915 tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
1916 args[2], const_args[2]);
1917 break;
1918 case INDEX_op_movcond_i32:
1919 tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
1920 args[2], const_args[2], args[3]);
1921 break;
1923 case INDEX_op_qemu_ld_i32:
1924 /* ??? Technically we can use a non-extending instruction. */
1925 case INDEX_op_qemu_ld_i64:
1926 tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3]);
1927 break;
1928 case INDEX_op_qemu_st_i32:
1929 case INDEX_op_qemu_st_i64:
1930 tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]);
1931 break;
1933 case INDEX_op_ld16s_i64:
1934 tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
1935 break;
1936 case INDEX_op_ld32u_i64:
1937 tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
1938 break;
1939 case INDEX_op_ld32s_i64:
1940 tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
1941 break;
1942 case INDEX_op_ld_i64:
1943 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1944 break;
1946 case INDEX_op_st32_i64:
1947 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1948 break;
1949 case INDEX_op_st_i64:
1950 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1951 break;
1953 case INDEX_op_add_i64:
1954 a0 = args[0], a1 = args[1], a2 = args[2];
1955 if (const_args[2]) {
1956 do_addi_64:
1957 if (a0 == a1) {
1958 if (a2 == (int16_t)a2) {
1959 tcg_out_insn(s, RI, AGHI, a0, a2);
1960 break;
1962 if (facilities & FACILITY_EXT_IMM) {
1963 if (a2 == (int32_t)a2) {
1964 tcg_out_insn(s, RIL, AGFI, a0, a2);
1965 break;
1966 } else if (a2 == (uint32_t)a2) {
1967 tcg_out_insn(s, RIL, ALGFI, a0, a2);
1968 break;
1969 } else if (-a2 == (uint32_t)-a2) {
1970 tcg_out_insn(s, RIL, SLGFI, a0, -a2);
1971 break;
1975 tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
1976 } else if (a0 == a1) {
1977 tcg_out_insn(s, RRE, AGR, a0, a2);
1978 } else {
1979 tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
1981 break;
1982 case INDEX_op_sub_i64:
1983 a0 = args[0], a1 = args[1], a2 = args[2];
1984 if (const_args[2]) {
1985 a2 = -a2;
1986 goto do_addi_64;
1987 } else {
1988 tcg_out_insn(s, RRE, SGR, args[0], args[2]);
1990 break;
1992 case INDEX_op_and_i64:
1993 if (const_args[2]) {
1994 tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
1995 } else {
1996 tcg_out_insn(s, RRE, NGR, args[0], args[2]);
1998 break;
1999 case INDEX_op_or_i64:
2000 if (const_args[2]) {
2001 tgen64_ori(s, args[0], args[2]);
2002 } else {
2003 tcg_out_insn(s, RRE, OGR, args[0], args[2]);
2005 break;
2006 case INDEX_op_xor_i64:
2007 if (const_args[2]) {
2008 tgen64_xori(s, args[0], args[2]);
2009 } else {
2010 tcg_out_insn(s, RRE, XGR, args[0], args[2]);
2012 break;
2014 case INDEX_op_neg_i64:
2015 tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2016 break;
2017 case INDEX_op_bswap64_i64:
2018 tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2019 break;
2021 case INDEX_op_mul_i64:
2022 if (const_args[2]) {
2023 if (args[2] == (int16_t)args[2]) {
2024 tcg_out_insn(s, RI, MGHI, args[0], args[2]);
2025 } else {
2026 tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
2028 } else {
2029 tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
2031 break;
2033 case INDEX_op_div2_i64:
2034 /* ??? We get an unnecessary sign-extension of the dividend
2035 into R3 with this definition, but as we do in fact always
2036 produce both quotient and remainder using INDEX_op_div_i64
2037 instead requires jumping through even more hoops. */
2038 tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
2039 break;
2040 case INDEX_op_divu2_i64:
2041 tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
2042 break;
2043 case INDEX_op_mulu2_i64:
2044 tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
2045 break;
2047 case INDEX_op_shl_i64:
2048 op = RSY_SLLG;
2049 do_shift64:
2050 if (const_args[2]) {
2051 tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2052 } else {
2053 tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2055 break;
2056 case INDEX_op_shr_i64:
2057 op = RSY_SRLG;
2058 goto do_shift64;
2059 case INDEX_op_sar_i64:
2060 op = RSY_SRAG;
2061 goto do_shift64;
2063 case INDEX_op_rotl_i64:
2064 if (const_args[2]) {
2065 tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2066 TCG_REG_NONE, args[2]);
2067 } else {
2068 tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2070 break;
2071 case INDEX_op_rotr_i64:
2072 if (const_args[2]) {
2073 tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2074 TCG_REG_NONE, (64 - args[2]) & 63);
2075 } else {
2076 /* We can use the smaller 32-bit negate because only the
2077 low 6 bits are examined for the rotate. */
2078 tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2079 tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2081 break;
2083 case INDEX_op_ext8s_i64:
2084 tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
2085 break;
2086 case INDEX_op_ext16s_i64:
2087 tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
2088 break;
2089 case INDEX_op_ext32s_i64:
2090 tgen_ext32s(s, args[0], args[1]);
2091 break;
2092 case INDEX_op_ext8u_i64:
2093 tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
2094 break;
2095 case INDEX_op_ext16u_i64:
2096 tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
2097 break;
2098 case INDEX_op_ext32u_i64:
2099 tgen_ext32u(s, args[0], args[1]);
2100 break;
2102 case INDEX_op_add2_i64:
2103 if (const_args[4]) {
2104 if ((int64_t)args[4] >= 0) {
2105 tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2106 } else {
2107 tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2109 } else {
2110 tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2112 tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2113 break;
2114 case INDEX_op_sub2_i64:
2115 if (const_args[4]) {
2116 if ((int64_t)args[4] >= 0) {
2117 tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2118 } else {
2119 tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2121 } else {
2122 tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2124 tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2125 break;
2127 case INDEX_op_brcond_i64:
2128 tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2129 args[1], const_args[1], args[3]);
2130 break;
2131 case INDEX_op_setcond_i64:
2132 tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2133 args[2], const_args[2]);
2134 break;
2135 case INDEX_op_movcond_i64:
2136 tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2137 args[2], const_args[2], args[3]);
2138 break;
2140 OP_32_64(deposit):
2141 tgen_deposit(s, args[0], args[2], args[3], args[4]);
2142 break;
2144 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2145 case INDEX_op_mov_i64:
2146 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2147 case INDEX_op_movi_i64:
2148 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2149 default:
2150 tcg_abort();
2154 static const TCGTargetOpDef s390_op_defs[] = {
2155 { INDEX_op_exit_tb, { } },
2156 { INDEX_op_goto_tb, { } },
2157 { INDEX_op_br, { } },
2159 { INDEX_op_ld8u_i32, { "r", "r" } },
2160 { INDEX_op_ld8s_i32, { "r", "r" } },
2161 { INDEX_op_ld16u_i32, { "r", "r" } },
2162 { INDEX_op_ld16s_i32, { "r", "r" } },
2163 { INDEX_op_ld_i32, { "r", "r" } },
2164 { INDEX_op_st8_i32, { "r", "r" } },
2165 { INDEX_op_st16_i32, { "r", "r" } },
2166 { INDEX_op_st_i32, { "r", "r" } },
2168 { INDEX_op_add_i32, { "r", "r", "ri" } },
2169 { INDEX_op_sub_i32, { "r", "0", "ri" } },
2170 { INDEX_op_mul_i32, { "r", "0", "rK" } },
2172 { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } },
2173 { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } },
2175 { INDEX_op_and_i32, { "r", "0", "ri" } },
2176 { INDEX_op_or_i32, { "r", "0", "rO" } },
2177 { INDEX_op_xor_i32, { "r", "0", "rX" } },
2179 { INDEX_op_neg_i32, { "r", "r" } },
2181 { INDEX_op_shl_i32, { "r", "0", "Ri" } },
2182 { INDEX_op_shr_i32, { "r", "0", "Ri" } },
2183 { INDEX_op_sar_i32, { "r", "0", "Ri" } },
2185 { INDEX_op_rotl_i32, { "r", "r", "Ri" } },
2186 { INDEX_op_rotr_i32, { "r", "r", "Ri" } },
2188 { INDEX_op_ext8s_i32, { "r", "r" } },
2189 { INDEX_op_ext8u_i32, { "r", "r" } },
2190 { INDEX_op_ext16s_i32, { "r", "r" } },
2191 { INDEX_op_ext16u_i32, { "r", "r" } },
2193 { INDEX_op_bswap16_i32, { "r", "r" } },
2194 { INDEX_op_bswap32_i32, { "r", "r" } },
2196 { INDEX_op_add2_i32, { "r", "r", "0", "1", "rA", "r" } },
2197 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "rA", "r" } },
2199 { INDEX_op_brcond_i32, { "r", "rC" } },
2200 { INDEX_op_setcond_i32, { "r", "r", "rC" } },
2201 { INDEX_op_movcond_i32, { "r", "r", "rC", "r", "0" } },
2202 { INDEX_op_deposit_i32, { "r", "0", "r" } },
2204 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2205 { INDEX_op_qemu_ld_i64, { "r", "L" } },
2206 { INDEX_op_qemu_st_i32, { "L", "L" } },
2207 { INDEX_op_qemu_st_i64, { "L", "L" } },
2209 { INDEX_op_ld8u_i64, { "r", "r" } },
2210 { INDEX_op_ld8s_i64, { "r", "r" } },
2211 { INDEX_op_ld16u_i64, { "r", "r" } },
2212 { INDEX_op_ld16s_i64, { "r", "r" } },
2213 { INDEX_op_ld32u_i64, { "r", "r" } },
2214 { INDEX_op_ld32s_i64, { "r", "r" } },
2215 { INDEX_op_ld_i64, { "r", "r" } },
2217 { INDEX_op_st8_i64, { "r", "r" } },
2218 { INDEX_op_st16_i64, { "r", "r" } },
2219 { INDEX_op_st32_i64, { "r", "r" } },
2220 { INDEX_op_st_i64, { "r", "r" } },
2222 { INDEX_op_add_i64, { "r", "r", "ri" } },
2223 { INDEX_op_sub_i64, { "r", "0", "ri" } },
2224 { INDEX_op_mul_i64, { "r", "0", "rK" } },
2226 { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } },
2227 { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } },
2228 { INDEX_op_mulu2_i64, { "b", "a", "0", "r" } },
2230 { INDEX_op_and_i64, { "r", "0", "ri" } },
2231 { INDEX_op_or_i64, { "r", "0", "rO" } },
2232 { INDEX_op_xor_i64, { "r", "0", "rX" } },
2234 { INDEX_op_neg_i64, { "r", "r" } },
2236 { INDEX_op_shl_i64, { "r", "r", "Ri" } },
2237 { INDEX_op_shr_i64, { "r", "r", "Ri" } },
2238 { INDEX_op_sar_i64, { "r", "r", "Ri" } },
2240 { INDEX_op_rotl_i64, { "r", "r", "Ri" } },
2241 { INDEX_op_rotr_i64, { "r", "r", "Ri" } },
2243 { INDEX_op_ext8s_i64, { "r", "r" } },
2244 { INDEX_op_ext8u_i64, { "r", "r" } },
2245 { INDEX_op_ext16s_i64, { "r", "r" } },
2246 { INDEX_op_ext16u_i64, { "r", "r" } },
2247 { INDEX_op_ext32s_i64, { "r", "r" } },
2248 { INDEX_op_ext32u_i64, { "r", "r" } },
2250 { INDEX_op_bswap16_i64, { "r", "r" } },
2251 { INDEX_op_bswap32_i64, { "r", "r" } },
2252 { INDEX_op_bswap64_i64, { "r", "r" } },
2254 { INDEX_op_add2_i64, { "r", "r", "0", "1", "rA", "r" } },
2255 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "rA", "r" } },
2257 { INDEX_op_brcond_i64, { "r", "rC" } },
2258 { INDEX_op_setcond_i64, { "r", "r", "rC" } },
2259 { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } },
2260 { INDEX_op_deposit_i64, { "r", "0", "r" } },
2262 { -1 },
2265 static void query_facilities(void)
2267 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2269 /* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this
2270 is present on all 64-bit systems, but let's check for it anyway. */
2271 if (hwcap & HWCAP_S390_STFLE) {
2272 register int r0 __asm__("0");
2273 register void *r1 __asm__("1");
2275 /* stfle 0(%r1) */
2276 r1 = &facilities;
2277 asm volatile(".word 0xb2b0,0x1000"
2278 : "=r"(r0) : "0"(0), "r"(r1) : "memory", "cc");
2282 static void tcg_target_init(TCGContext *s)
2284 query_facilities();
2286 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2287 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2289 tcg_regset_clear(tcg_target_call_clobber_regs);
2290 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
2291 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
2292 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
2293 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
2294 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
2295 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
2296 /* The r6 register is technically call-saved, but it's also a parameter
2297 register, so it can get killed by setup for the qemu_st helper. */
2298 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
2299 /* The return register can be considered call-clobbered. */
2300 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
2302 tcg_regset_clear(s->reserved_regs);
2303 tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
2304 /* XXX many insns can't be used with R0, so we better avoid it for now */
2305 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
2306 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2308 tcg_add_target_add_op_defs(s390_op_defs);
2311 #define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \
2312 + TCG_STATIC_CALL_ARGS_SIZE \
2313 + CPU_TEMP_BUF_NLONGS * sizeof(long)))
2315 static void tcg_target_qemu_prologue(TCGContext *s)
2317 /* stmg %r6,%r15,48(%r15) (save registers) */
2318 tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
2320 /* aghi %r15,-frame_size */
2321 tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
2323 tcg_set_frame(s, TCG_REG_CALL_STACK,
2324 TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
2325 CPU_TEMP_BUF_NLONGS * sizeof(long));
2327 if (GUEST_BASE >= 0x80000) {
2328 tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
2329 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2332 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2333 /* br %r3 (go to TB) */
2334 tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
2336 tb_ret_addr = s->code_ptr;
2338 /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
2339 tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
2340 FRAME_SIZE + 48);
2342 /* br %r14 (return) */
2343 tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
2346 typedef struct {
2347 DebugFrameCIE cie;
2348 DebugFrameFDEHeader fde;
2349 uint8_t fde_def_cfa[4];
2350 uint8_t fde_reg_ofs[18];
2351 } DebugFrame;
2353 /* We're expecting a 2 byte uleb128 encoded value. */
2354 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2356 #define ELF_HOST_MACHINE EM_S390
2358 static DebugFrame debug_frame = {
2359 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2360 .cie.id = -1,
2361 .cie.version = 1,
2362 .cie.code_align = 1,
2363 .cie.data_align = 8, /* sleb128 8 */
2364 .cie.return_column = TCG_REG_R14,
2366 /* Total FDE size does not include the "len" member. */
2367 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2369 .fde_def_cfa = {
2370 12, TCG_REG_CALL_STACK, /* DW_CFA_def_cfa %r15, ... */
2371 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2372 (FRAME_SIZE >> 7)
2374 .fde_reg_ofs = {
2375 0x86, 6, /* DW_CFA_offset, %r6, 48 */
2376 0x87, 7, /* DW_CFA_offset, %r7, 56 */
2377 0x88, 8, /* DW_CFA_offset, %r8, 64 */
2378 0x89, 9, /* DW_CFA_offset, %r92, 72 */
2379 0x8a, 10, /* DW_CFA_offset, %r10, 80 */
2380 0x8b, 11, /* DW_CFA_offset, %r11, 88 */
2381 0x8c, 12, /* DW_CFA_offset, %r12, 96 */
2382 0x8d, 13, /* DW_CFA_offset, %r13, 104 */
2383 0x8e, 14, /* DW_CFA_offset, %r14, 112 */
2387 void tcg_register_jit(void *buf, size_t buf_size)
2389 debug_frame.fde.func_start = (uintptr_t)buf;
2390 debug_frame.fde.func_len = buf_size;
2392 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));