hw/arm/smmuv3: Remove SMMUNotifierNode
[qemu/kevin.git] / tcg / ppc / tcg-target.inc.c
blob36b479170794dd5883e15ba547219668187f4586
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "elf.h"
26 #include "tcg-pool.inc.c"
28 #if defined _CALL_DARWIN || defined __APPLE__
29 #define TCG_TARGET_CALL_DARWIN
30 #endif
31 #ifdef _CALL_SYSV
32 # define TCG_TARGET_CALL_ALIGN_ARGS 1
33 #endif
35 /* For some memory operations, we need a scratch that isn't R0. For the AIX
36 calling convention, we can re-use the TOC register since we'll be reloading
37 it at every call. Otherwise R12 will do nicely as neither a call-saved
38 register nor a parameter register. */
39 #ifdef _CALL_AIX
40 # define TCG_REG_TMP1 TCG_REG_R2
41 #else
42 # define TCG_REG_TMP1 TCG_REG_R12
43 #endif
45 #define TCG_REG_TB TCG_REG_R31
46 #define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
48 /* Shorthand for size of a pointer. Avoid promotion to unsigned. */
49 #define SZP ((int)sizeof(void *))
51 /* Shorthand for size of a register. */
52 #define SZR (TCG_TARGET_REG_BITS / 8)
54 #define TCG_CT_CONST_S16 0x100
55 #define TCG_CT_CONST_U16 0x200
56 #define TCG_CT_CONST_S32 0x400
57 #define TCG_CT_CONST_U32 0x800
58 #define TCG_CT_CONST_ZERO 0x1000
59 #define TCG_CT_CONST_MONE 0x2000
60 #define TCG_CT_CONST_WSZ 0x4000
62 static tcg_insn_unit *tb_ret_addr;
64 bool have_isa_2_06;
65 bool have_isa_3_00;
67 #define HAVE_ISA_2_06 have_isa_2_06
68 #define HAVE_ISEL have_isa_2_06
70 #ifndef CONFIG_SOFTMMU
71 #define TCG_GUEST_BASE_REG 30
72 #endif
74 #ifdef CONFIG_DEBUG_TCG
75 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
76 "r0",
77 "r1",
78 "r2",
79 "r3",
80 "r4",
81 "r5",
82 "r6",
83 "r7",
84 "r8",
85 "r9",
86 "r10",
87 "r11",
88 "r12",
89 "r13",
90 "r14",
91 "r15",
92 "r16",
93 "r17",
94 "r18",
95 "r19",
96 "r20",
97 "r21",
98 "r22",
99 "r23",
100 "r24",
101 "r25",
102 "r26",
103 "r27",
104 "r28",
105 "r29",
106 "r30",
107 "r31"
109 #endif
111 static const int tcg_target_reg_alloc_order[] = {
112 TCG_REG_R14, /* call saved registers */
113 TCG_REG_R15,
114 TCG_REG_R16,
115 TCG_REG_R17,
116 TCG_REG_R18,
117 TCG_REG_R19,
118 TCG_REG_R20,
119 TCG_REG_R21,
120 TCG_REG_R22,
121 TCG_REG_R23,
122 TCG_REG_R24,
123 TCG_REG_R25,
124 TCG_REG_R26,
125 TCG_REG_R27,
126 TCG_REG_R28,
127 TCG_REG_R29,
128 TCG_REG_R30,
129 TCG_REG_R31,
130 TCG_REG_R12, /* call clobbered, non-arguments */
131 TCG_REG_R11,
132 TCG_REG_R2,
133 TCG_REG_R13,
134 TCG_REG_R10, /* call clobbered, arguments */
135 TCG_REG_R9,
136 TCG_REG_R8,
137 TCG_REG_R7,
138 TCG_REG_R6,
139 TCG_REG_R5,
140 TCG_REG_R4,
141 TCG_REG_R3,
144 static const int tcg_target_call_iarg_regs[] = {
145 TCG_REG_R3,
146 TCG_REG_R4,
147 TCG_REG_R5,
148 TCG_REG_R6,
149 TCG_REG_R7,
150 TCG_REG_R8,
151 TCG_REG_R9,
152 TCG_REG_R10
155 static const int tcg_target_call_oarg_regs[] = {
156 TCG_REG_R3,
157 TCG_REG_R4
160 static const int tcg_target_callee_save_regs[] = {
161 #ifdef TCG_TARGET_CALL_DARWIN
162 TCG_REG_R11,
163 #endif
164 TCG_REG_R14,
165 TCG_REG_R15,
166 TCG_REG_R16,
167 TCG_REG_R17,
168 TCG_REG_R18,
169 TCG_REG_R19,
170 TCG_REG_R20,
171 TCG_REG_R21,
172 TCG_REG_R22,
173 TCG_REG_R23,
174 TCG_REG_R24,
175 TCG_REG_R25,
176 TCG_REG_R26,
177 TCG_REG_R27, /* currently used for the global env */
178 TCG_REG_R28,
179 TCG_REG_R29,
180 TCG_REG_R30,
181 TCG_REG_R31
184 static inline bool in_range_b(tcg_target_long target)
186 return target == sextract64(target, 0, 26);
189 static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target)
191 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
192 tcg_debug_assert(in_range_b(disp));
193 return disp & 0x3fffffc;
196 static bool reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target)
198 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
199 if (in_range_b(disp)) {
200 *pc = (*pc & ~0x3fffffc) | (disp & 0x3fffffc);
201 return true;
203 return false;
206 static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target)
208 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
209 tcg_debug_assert(disp == (int16_t) disp);
210 return disp & 0xfffc;
213 static bool reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target)
215 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
216 if (disp == (int16_t) disp) {
217 *pc = (*pc & ~0xfffc) | (disp & 0xfffc);
218 return true;
220 return false;
223 /* parse target specific constraints */
224 static const char *target_parse_constraint(TCGArgConstraint *ct,
225 const char *ct_str, TCGType type)
227 switch (*ct_str++) {
228 case 'A': case 'B': case 'C': case 'D':
229 ct->ct |= TCG_CT_REG;
230 tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
231 break;
232 case 'r':
233 ct->ct |= TCG_CT_REG;
234 ct->u.regs = 0xffffffff;
235 break;
236 case 'L': /* qemu_ld constraint */
237 ct->ct |= TCG_CT_REG;
238 ct->u.regs = 0xffffffff;
239 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
240 #ifdef CONFIG_SOFTMMU
241 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
242 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
243 #endif
244 break;
245 case 'S': /* qemu_st constraint */
246 ct->ct |= TCG_CT_REG;
247 ct->u.regs = 0xffffffff;
248 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
249 #ifdef CONFIG_SOFTMMU
250 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
251 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
252 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
253 #endif
254 break;
255 case 'I':
256 ct->ct |= TCG_CT_CONST_S16;
257 break;
258 case 'J':
259 ct->ct |= TCG_CT_CONST_U16;
260 break;
261 case 'M':
262 ct->ct |= TCG_CT_CONST_MONE;
263 break;
264 case 'T':
265 ct->ct |= TCG_CT_CONST_S32;
266 break;
267 case 'U':
268 ct->ct |= TCG_CT_CONST_U32;
269 break;
270 case 'W':
271 ct->ct |= TCG_CT_CONST_WSZ;
272 break;
273 case 'Z':
274 ct->ct |= TCG_CT_CONST_ZERO;
275 break;
276 default:
277 return NULL;
279 return ct_str;
282 /* test if a constant matches the constraint */
283 static int tcg_target_const_match(tcg_target_long val, TCGType type,
284 const TCGArgConstraint *arg_ct)
286 int ct = arg_ct->ct;
287 if (ct & TCG_CT_CONST) {
288 return 1;
291 /* The only 32-bit constraint we use aside from
292 TCG_CT_CONST is TCG_CT_CONST_S16. */
293 if (type == TCG_TYPE_I32) {
294 val = (int32_t)val;
297 if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
298 return 1;
299 } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
300 return 1;
301 } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
302 return 1;
303 } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
304 return 1;
305 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
306 return 1;
307 } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
308 return 1;
309 } else if ((ct & TCG_CT_CONST_WSZ)
310 && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
311 return 1;
313 return 0;
316 #define OPCD(opc) ((opc)<<26)
317 #define XO19(opc) (OPCD(19)|((opc)<<1))
318 #define MD30(opc) (OPCD(30)|((opc)<<2))
319 #define MDS30(opc) (OPCD(30)|((opc)<<1))
320 #define XO31(opc) (OPCD(31)|((opc)<<1))
321 #define XO58(opc) (OPCD(58)|(opc))
322 #define XO62(opc) (OPCD(62)|(opc))
324 #define B OPCD( 18)
325 #define BC OPCD( 16)
326 #define LBZ OPCD( 34)
327 #define LHZ OPCD( 40)
328 #define LHA OPCD( 42)
329 #define LWZ OPCD( 32)
330 #define LWZUX XO31( 55)
331 #define STB OPCD( 38)
332 #define STH OPCD( 44)
333 #define STW OPCD( 36)
335 #define STD XO62( 0)
336 #define STDU XO62( 1)
337 #define STDX XO31(149)
339 #define LD XO58( 0)
340 #define LDX XO31( 21)
341 #define LDU XO58( 1)
342 #define LDUX XO31( 53)
343 #define LWA XO58( 2)
344 #define LWAX XO31(341)
346 #define ADDIC OPCD( 12)
347 #define ADDI OPCD( 14)
348 #define ADDIS OPCD( 15)
349 #define ORI OPCD( 24)
350 #define ORIS OPCD( 25)
351 #define XORI OPCD( 26)
352 #define XORIS OPCD( 27)
353 #define ANDI OPCD( 28)
354 #define ANDIS OPCD( 29)
355 #define MULLI OPCD( 7)
356 #define CMPLI OPCD( 10)
357 #define CMPI OPCD( 11)
358 #define SUBFIC OPCD( 8)
360 #define LWZU OPCD( 33)
361 #define STWU OPCD( 37)
363 #define RLWIMI OPCD( 20)
364 #define RLWINM OPCD( 21)
365 #define RLWNM OPCD( 23)
367 #define RLDICL MD30( 0)
368 #define RLDICR MD30( 1)
369 #define RLDIMI MD30( 3)
370 #define RLDCL MDS30( 8)
372 #define BCLR XO19( 16)
373 #define BCCTR XO19(528)
374 #define CRAND XO19(257)
375 #define CRANDC XO19(129)
376 #define CRNAND XO19(225)
377 #define CROR XO19(449)
378 #define CRNOR XO19( 33)
380 #define EXTSB XO31(954)
381 #define EXTSH XO31(922)
382 #define EXTSW XO31(986)
383 #define ADD XO31(266)
384 #define ADDE XO31(138)
385 #define ADDME XO31(234)
386 #define ADDZE XO31(202)
387 #define ADDC XO31( 10)
388 #define AND XO31( 28)
389 #define SUBF XO31( 40)
390 #define SUBFC XO31( 8)
391 #define SUBFE XO31(136)
392 #define SUBFME XO31(232)
393 #define SUBFZE XO31(200)
394 #define OR XO31(444)
395 #define XOR XO31(316)
396 #define MULLW XO31(235)
397 #define MULHW XO31( 75)
398 #define MULHWU XO31( 11)
399 #define DIVW XO31(491)
400 #define DIVWU XO31(459)
401 #define CMP XO31( 0)
402 #define CMPL XO31( 32)
403 #define LHBRX XO31(790)
404 #define LWBRX XO31(534)
405 #define LDBRX XO31(532)
406 #define STHBRX XO31(918)
407 #define STWBRX XO31(662)
408 #define STDBRX XO31(660)
409 #define MFSPR XO31(339)
410 #define MTSPR XO31(467)
411 #define SRAWI XO31(824)
412 #define NEG XO31(104)
413 #define MFCR XO31( 19)
414 #define MFOCRF (MFCR | (1u << 20))
415 #define NOR XO31(124)
416 #define CNTLZW XO31( 26)
417 #define CNTLZD XO31( 58)
418 #define CNTTZW XO31(538)
419 #define CNTTZD XO31(570)
420 #define CNTPOPW XO31(378)
421 #define CNTPOPD XO31(506)
422 #define ANDC XO31( 60)
423 #define ORC XO31(412)
424 #define EQV XO31(284)
425 #define NAND XO31(476)
426 #define ISEL XO31( 15)
428 #define MULLD XO31(233)
429 #define MULHD XO31( 73)
430 #define MULHDU XO31( 9)
431 #define DIVD XO31(489)
432 #define DIVDU XO31(457)
434 #define LBZX XO31( 87)
435 #define LHZX XO31(279)
436 #define LHAX XO31(343)
437 #define LWZX XO31( 23)
438 #define STBX XO31(215)
439 #define STHX XO31(407)
440 #define STWX XO31(151)
442 #define EIEIO XO31(854)
443 #define HWSYNC XO31(598)
444 #define LWSYNC (HWSYNC | (1u << 21))
446 #define SPR(a, b) ((((a)<<5)|(b))<<11)
447 #define LR SPR(8, 0)
448 #define CTR SPR(9, 0)
450 #define SLW XO31( 24)
451 #define SRW XO31(536)
452 #define SRAW XO31(792)
454 #define SLD XO31( 27)
455 #define SRD XO31(539)
456 #define SRAD XO31(794)
457 #define SRADI XO31(413<<1)
459 #define TW XO31( 4)
460 #define TRAP (TW | TO(31))
462 #define NOP ORI /* ori 0,0,0 */
464 #define RT(r) ((r)<<21)
465 #define RS(r) ((r)<<21)
466 #define RA(r) ((r)<<16)
467 #define RB(r) ((r)<<11)
468 #define TO(t) ((t)<<21)
469 #define SH(s) ((s)<<11)
470 #define MB(b) ((b)<<6)
471 #define ME(e) ((e)<<1)
472 #define BO(o) ((o)<<21)
473 #define MB64(b) ((b)<<5)
474 #define FXM(b) (1 << (19 - (b)))
476 #define LK 1
478 #define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
479 #define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
480 #define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
481 #define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
483 #define BF(n) ((n)<<23)
484 #define BI(n, c) (((c)+((n)*4))<<16)
485 #define BT(n, c) (((c)+((n)*4))<<21)
486 #define BA(n, c) (((c)+((n)*4))<<16)
487 #define BB(n, c) (((c)+((n)*4))<<11)
488 #define BC_(n, c) (((c)+((n)*4))<<6)
490 #define BO_COND_TRUE BO(12)
491 #define BO_COND_FALSE BO( 4)
492 #define BO_ALWAYS BO(20)
494 enum {
495 CR_LT,
496 CR_GT,
497 CR_EQ,
498 CR_SO
501 static const uint32_t tcg_to_bc[] = {
502 [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE,
503 [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE,
504 [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE,
505 [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE,
506 [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE,
507 [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE,
508 [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
509 [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
510 [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
511 [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
514 /* The low bit here is set if the RA and RB fields must be inverted. */
515 static const uint32_t tcg_to_isel[] = {
516 [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ),
517 [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1,
518 [TCG_COND_LT] = ISEL | BC_(7, CR_LT),
519 [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1,
520 [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1,
521 [TCG_COND_GT] = ISEL | BC_(7, CR_GT),
522 [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
523 [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
524 [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
525 [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
528 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
529 intptr_t value, intptr_t addend)
531 tcg_insn_unit *target;
533 value += addend;
534 target = (tcg_insn_unit *)value;
536 switch (type) {
537 case R_PPC_REL14:
538 return reloc_pc14(code_ptr, target);
539 case R_PPC_REL24:
540 return reloc_pc24(code_ptr, target);
541 case R_PPC_ADDR16:
543 * We are (slightly) abusing this relocation type. In particular,
544 * assert that the low 2 bits are zero, and do not modify them.
545 * That way we can use this with LD et al that have opcode bits
546 * in the low 2 bits of the insn.
548 if ((value & 3) || value != (int16_t)value) {
549 return false;
551 *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
552 break;
553 default:
554 g_assert_not_reached();
556 return true;
559 static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
560 TCGReg base, tcg_target_long offset);
562 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
564 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
565 if (ret != arg) {
566 tcg_out32(s, OR | SAB(arg, ret, arg));
570 static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
571 int sh, int mb)
573 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
574 sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
575 mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
576 tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
579 static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
580 int sh, int mb, int me)
582 tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
585 static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
587 tcg_out_rld(s, RLDICL, dst, src, 0, 32);
590 static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
592 tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
595 static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
597 tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
600 static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
602 tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
605 static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
607 tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
610 /* Emit a move into ret of arg, if it can be done in one insn. */
611 static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
613 if (arg == (int16_t)arg) {
614 tcg_out32(s, ADDI | TAI(ret, 0, arg));
615 return true;
617 if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
618 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
619 return true;
621 return false;
624 static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
625 tcg_target_long arg, bool in_prologue)
627 intptr_t tb_diff;
628 tcg_target_long tmp;
629 int shift;
631 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
633 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
634 arg = (int32_t)arg;
637 /* Load 16-bit immediates with one insn. */
638 if (tcg_out_movi_one(s, ret, arg)) {
639 return;
642 /* Load addresses within the TB with one insn. */
643 tb_diff = arg - (intptr_t)s->code_gen_ptr;
644 if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
645 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
646 return;
649 /* Load 32-bit immediates with two insns. Note that we've already
650 eliminated bare ADDIS, so we know both insns are required. */
651 if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
652 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
653 tcg_out32(s, ORI | SAI(ret, ret, arg));
654 return;
656 if (arg == (uint32_t)arg && !(arg & 0x8000)) {
657 tcg_out32(s, ADDI | TAI(ret, 0, arg));
658 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
659 return;
662 /* Load masked 16-bit value. */
663 if (arg > 0 && (arg & 0x8000)) {
664 tmp = arg | 0x7fff;
665 if ((tmp & (tmp + 1)) == 0) {
666 int mb = clz64(tmp + 1) + 1;
667 tcg_out32(s, ADDI | TAI(ret, 0, arg));
668 tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
669 return;
673 /* Load common masks with 2 insns. */
674 shift = ctz64(arg);
675 tmp = arg >> shift;
676 if (tmp == (int16_t)tmp) {
677 tcg_out32(s, ADDI | TAI(ret, 0, tmp));
678 tcg_out_shli64(s, ret, ret, shift);
679 return;
681 shift = clz64(arg);
682 if (tcg_out_movi_one(s, ret, arg << shift)) {
683 tcg_out_shri64(s, ret, ret, shift);
684 return;
687 /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */
688 if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
689 tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
690 return;
693 /* Use the constant pool, if possible. */
694 if (!in_prologue && USE_REG_TB) {
695 new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
696 -(intptr_t)s->code_gen_ptr);
697 tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
698 return;
701 tmp = arg >> 31 >> 1;
702 tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
703 if (tmp) {
704 tcg_out_shli64(s, ret, ret, 32);
706 if (arg & 0xffff0000) {
707 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
709 if (arg & 0xffff) {
710 tcg_out32(s, ORI | SAI(ret, ret, arg));
714 static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
715 tcg_target_long arg)
717 tcg_out_movi_int(s, type, ret, arg, false);
720 static bool mask_operand(uint32_t c, int *mb, int *me)
722 uint32_t lsb, test;
724 /* Accept a bit pattern like:
725 0....01....1
726 1....10....0
727 0..01..10..0
728 Keep track of the transitions. */
729 if (c == 0 || c == -1) {
730 return false;
732 test = c;
733 lsb = test & -test;
734 test += lsb;
735 if (test & (test - 1)) {
736 return false;
739 *me = clz32(lsb);
740 *mb = test ? clz32(test & -test) + 1 : 0;
741 return true;
744 static bool mask64_operand(uint64_t c, int *mb, int *me)
746 uint64_t lsb;
748 if (c == 0) {
749 return false;
752 lsb = c & -c;
753 /* Accept 1..10..0. */
754 if (c == -lsb) {
755 *mb = 0;
756 *me = clz64(lsb);
757 return true;
759 /* Accept 0..01..1. */
760 if (lsb == 1 && (c & (c + 1)) == 0) {
761 *mb = clz64(c + 1) + 1;
762 *me = 63;
763 return true;
765 return false;
768 static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
770 int mb, me;
772 if (mask_operand(c, &mb, &me)) {
773 tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
774 } else if ((c & 0xffff) == c) {
775 tcg_out32(s, ANDI | SAI(src, dst, c));
776 return;
777 } else if ((c & 0xffff0000) == c) {
778 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
779 return;
780 } else {
781 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
782 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
786 static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
788 int mb, me;
790 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
791 if (mask64_operand(c, &mb, &me)) {
792 if (mb == 0) {
793 tcg_out_rld(s, RLDICR, dst, src, 0, me);
794 } else {
795 tcg_out_rld(s, RLDICL, dst, src, 0, mb);
797 } else if ((c & 0xffff) == c) {
798 tcg_out32(s, ANDI | SAI(src, dst, c));
799 return;
800 } else if ((c & 0xffff0000) == c) {
801 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
802 return;
803 } else {
804 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
805 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
809 static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
810 int op_lo, int op_hi)
812 if (c >> 16) {
813 tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
814 src = dst;
816 if (c & 0xffff) {
817 tcg_out32(s, op_lo | SAI(src, dst, c));
818 src = dst;
822 static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
824 tcg_out_zori32(s, dst, src, c, ORI, ORIS);
827 static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
829 tcg_out_zori32(s, dst, src, c, XORI, XORIS);
832 static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target)
834 ptrdiff_t disp = tcg_pcrel_diff(s, target);
835 if (in_range_b(disp)) {
836 tcg_out32(s, B | (disp & 0x3fffffc) | mask);
837 } else {
838 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
839 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
840 tcg_out32(s, BCCTR | BO_ALWAYS | mask);
844 static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
845 TCGReg base, tcg_target_long offset)
847 tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
848 bool is_store = false;
849 TCGReg rs = TCG_REG_TMP1;
851 switch (opi) {
852 case LD: case LWA:
853 align = 3;
854 /* FALLTHRU */
855 default:
856 if (rt != TCG_REG_R0) {
857 rs = rt;
858 break;
860 break;
861 case STD:
862 align = 3;
863 /* FALLTHRU */
864 case STB: case STH: case STW:
865 is_store = true;
866 break;
869 /* For unaligned, or very large offsets, use the indexed form. */
870 if (offset & align || offset != (int32_t)offset) {
871 if (rs == base) {
872 rs = TCG_REG_R0;
874 tcg_debug_assert(!is_store || rs != rt);
875 tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
876 tcg_out32(s, opx | TAB(rt, base, rs));
877 return;
880 l0 = (int16_t)offset;
881 offset = (offset - l0) >> 16;
882 l1 = (int16_t)offset;
884 if (l1 < 0 && orig >= 0) {
885 extra = 0x4000;
886 l1 = (int16_t)(offset - 0x4000);
888 if (l1) {
889 tcg_out32(s, ADDIS | TAI(rs, base, l1));
890 base = rs;
892 if (extra) {
893 tcg_out32(s, ADDIS | TAI(rs, base, extra));
894 base = rs;
896 if (opi != ADDI || base != rt || l0 != 0) {
897 tcg_out32(s, opi | TAI(rt, base, l0));
901 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
902 TCGReg arg1, intptr_t arg2)
904 int opi, opx;
906 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
907 if (type == TCG_TYPE_I32) {
908 opi = LWZ, opx = LWZX;
909 } else {
910 opi = LD, opx = LDX;
912 tcg_out_mem_long(s, opi, opx, ret, arg1, arg2);
915 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
916 TCGReg arg1, intptr_t arg2)
918 int opi, opx;
920 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
921 if (type == TCG_TYPE_I32) {
922 opi = STW, opx = STWX;
923 } else {
924 opi = STD, opx = STDX;
926 tcg_out_mem_long(s, opi, opx, arg, arg1, arg2);
929 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
930 TCGReg base, intptr_t ofs)
932 return false;
935 static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
936 int const_arg2, int cr, TCGType type)
938 int imm;
939 uint32_t op;
941 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
943 /* Simplify the comparisons below wrt CMPI. */
944 if (type == TCG_TYPE_I32) {
945 arg2 = (int32_t)arg2;
948 switch (cond) {
949 case TCG_COND_EQ:
950 case TCG_COND_NE:
951 if (const_arg2) {
952 if ((int16_t) arg2 == arg2) {
953 op = CMPI;
954 imm = 1;
955 break;
956 } else if ((uint16_t) arg2 == arg2) {
957 op = CMPLI;
958 imm = 1;
959 break;
962 op = CMPL;
963 imm = 0;
964 break;
966 case TCG_COND_LT:
967 case TCG_COND_GE:
968 case TCG_COND_LE:
969 case TCG_COND_GT:
970 if (const_arg2) {
971 if ((int16_t) arg2 == arg2) {
972 op = CMPI;
973 imm = 1;
974 break;
977 op = CMP;
978 imm = 0;
979 break;
981 case TCG_COND_LTU:
982 case TCG_COND_GEU:
983 case TCG_COND_LEU:
984 case TCG_COND_GTU:
985 if (const_arg2) {
986 if ((uint16_t) arg2 == arg2) {
987 op = CMPLI;
988 imm = 1;
989 break;
992 op = CMPL;
993 imm = 0;
994 break;
996 default:
997 tcg_abort();
999 op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1001 if (imm) {
1002 tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1003 } else {
1004 if (const_arg2) {
1005 tcg_out_movi(s, type, TCG_REG_R0, arg2);
1006 arg2 = TCG_REG_R0;
1008 tcg_out32(s, op | RA(arg1) | RB(arg2));
1012 static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1013 TCGReg dst, TCGReg src)
1015 if (type == TCG_TYPE_I32) {
1016 tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1017 tcg_out_shri32(s, dst, dst, 5);
1018 } else {
1019 tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1020 tcg_out_shri64(s, dst, dst, 6);
1024 static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
1026 /* X != 0 implies X + -1 generates a carry. Extra addition
1027 trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */
1028 if (dst != src) {
1029 tcg_out32(s, ADDIC | TAI(dst, src, -1));
1030 tcg_out32(s, SUBFE | TAB(dst, dst, src));
1031 } else {
1032 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1033 tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1037 static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1038 bool const_arg2)
1040 if (const_arg2) {
1041 if ((uint32_t)arg2 == arg2) {
1042 tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1043 } else {
1044 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1045 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1047 } else {
1048 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1050 return TCG_REG_R0;
1053 static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1054 TCGArg arg0, TCGArg arg1, TCGArg arg2,
1055 int const_arg2)
1057 int crop, sh;
1059 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1061 /* Ignore high bits of a potential constant arg2. */
1062 if (type == TCG_TYPE_I32) {
1063 arg2 = (uint32_t)arg2;
1066 /* Handle common and trivial cases before handling anything else. */
1067 if (arg2 == 0) {
1068 switch (cond) {
1069 case TCG_COND_EQ:
1070 tcg_out_setcond_eq0(s, type, arg0, arg1);
1071 return;
1072 case TCG_COND_NE:
1073 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1074 tcg_out_ext32u(s, TCG_REG_R0, arg1);
1075 arg1 = TCG_REG_R0;
1077 tcg_out_setcond_ne0(s, arg0, arg1);
1078 return;
1079 case TCG_COND_GE:
1080 tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1081 arg1 = arg0;
1082 /* FALLTHRU */
1083 case TCG_COND_LT:
1084 /* Extract the sign bit. */
1085 if (type == TCG_TYPE_I32) {
1086 tcg_out_shri32(s, arg0, arg1, 31);
1087 } else {
1088 tcg_out_shri64(s, arg0, arg1, 63);
1090 return;
1091 default:
1092 break;
1096 /* If we have ISEL, we can implement everything with 3 or 4 insns.
1097 All other cases below are also at least 3 insns, so speed up the
1098 code generator by not considering them and always using ISEL. */
1099 if (HAVE_ISEL) {
1100 int isel, tab;
1102 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1104 isel = tcg_to_isel[cond];
1106 tcg_out_movi(s, type, arg0, 1);
1107 if (isel & 1) {
1108 /* arg0 = (bc ? 0 : 1) */
1109 tab = TAB(arg0, 0, arg0);
1110 isel &= ~1;
1111 } else {
1112 /* arg0 = (bc ? 1 : 0) */
1113 tcg_out_movi(s, type, TCG_REG_R0, 0);
1114 tab = TAB(arg0, arg0, TCG_REG_R0);
1116 tcg_out32(s, isel | tab);
1117 return;
1120 switch (cond) {
1121 case TCG_COND_EQ:
1122 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1123 tcg_out_setcond_eq0(s, type, arg0, arg1);
1124 return;
1126 case TCG_COND_NE:
1127 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1128 /* Discard the high bits only once, rather than both inputs. */
1129 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1130 tcg_out_ext32u(s, TCG_REG_R0, arg1);
1131 arg1 = TCG_REG_R0;
1133 tcg_out_setcond_ne0(s, arg0, arg1);
1134 return;
1136 case TCG_COND_GT:
1137 case TCG_COND_GTU:
1138 sh = 30;
1139 crop = 0;
1140 goto crtest;
1142 case TCG_COND_LT:
1143 case TCG_COND_LTU:
1144 sh = 29;
1145 crop = 0;
1146 goto crtest;
1148 case TCG_COND_GE:
1149 case TCG_COND_GEU:
1150 sh = 31;
1151 crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1152 goto crtest;
1154 case TCG_COND_LE:
1155 case TCG_COND_LEU:
1156 sh = 31;
1157 crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1158 crtest:
1159 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1160 if (crop) {
1161 tcg_out32(s, crop);
1163 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1164 tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1165 break;
1167 default:
1168 tcg_abort();
1172 static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1174 if (l->has_value) {
1175 bc |= reloc_pc14_val(s->code_ptr, l->u.value_ptr);
1176 } else {
1177 tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1179 tcg_out32(s, bc);
1182 static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1183 TCGArg arg1, TCGArg arg2, int const_arg2,
1184 TCGLabel *l, TCGType type)
1186 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1187 tcg_out_bc(s, tcg_to_bc[cond], l);
1190 static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1191 TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1192 TCGArg v2, bool const_c2)
1194 /* If for some reason both inputs are zero, don't produce bad code. */
1195 if (v1 == 0 && v2 == 0) {
1196 tcg_out_movi(s, type, dest, 0);
1197 return;
1200 tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1202 if (HAVE_ISEL) {
1203 int isel = tcg_to_isel[cond];
1205 /* Swap the V operands if the operation indicates inversion. */
1206 if (isel & 1) {
1207 int t = v1;
1208 v1 = v2;
1209 v2 = t;
1210 isel &= ~1;
1212 /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */
1213 if (v2 == 0) {
1214 tcg_out_movi(s, type, TCG_REG_R0, 0);
1216 tcg_out32(s, isel | TAB(dest, v1, v2));
1217 } else {
1218 if (dest == v2) {
1219 cond = tcg_invert_cond(cond);
1220 v2 = v1;
1221 } else if (dest != v1) {
1222 if (v1 == 0) {
1223 tcg_out_movi(s, type, dest, 0);
1224 } else {
1225 tcg_out_mov(s, type, dest, v1);
1228 /* Branch forward over one insn */
1229 tcg_out32(s, tcg_to_bc[cond] | 8);
1230 if (v2 == 0) {
1231 tcg_out_movi(s, type, dest, 0);
1232 } else {
1233 tcg_out_mov(s, type, dest, v2);
1238 static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
1239 TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
1241 if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
1242 tcg_out32(s, opc | RA(a0) | RS(a1));
1243 } else {
1244 tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
1245 /* Note that the only other valid constant for a2 is 0. */
1246 if (HAVE_ISEL) {
1247 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
1248 tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
1249 } else if (!const_a2 && a0 == a2) {
1250 tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
1251 tcg_out32(s, opc | RA(a0) | RS(a1));
1252 } else {
1253 tcg_out32(s, opc | RA(a0) | RS(a1));
1254 tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
1255 if (const_a2) {
1256 tcg_out_movi(s, type, a0, 0);
1257 } else {
1258 tcg_out_mov(s, type, a0, a2);
1264 static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1265 const int *const_args)
1267 static const struct { uint8_t bit1, bit2; } bits[] = {
1268 [TCG_COND_LT ] = { CR_LT, CR_LT },
1269 [TCG_COND_LE ] = { CR_LT, CR_GT },
1270 [TCG_COND_GT ] = { CR_GT, CR_GT },
1271 [TCG_COND_GE ] = { CR_GT, CR_LT },
1272 [TCG_COND_LTU] = { CR_LT, CR_LT },
1273 [TCG_COND_LEU] = { CR_LT, CR_GT },
1274 [TCG_COND_GTU] = { CR_GT, CR_GT },
1275 [TCG_COND_GEU] = { CR_GT, CR_LT },
1278 TCGCond cond = args[4], cond2;
1279 TCGArg al, ah, bl, bh;
1280 int blconst, bhconst;
1281 int op, bit1, bit2;
1283 al = args[0];
1284 ah = args[1];
1285 bl = args[2];
1286 bh = args[3];
1287 blconst = const_args[2];
1288 bhconst = const_args[3];
1290 switch (cond) {
1291 case TCG_COND_EQ:
1292 op = CRAND;
1293 goto do_equality;
1294 case TCG_COND_NE:
1295 op = CRNAND;
1296 do_equality:
1297 tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1298 tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1299 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1300 break;
1302 case TCG_COND_LT:
1303 case TCG_COND_LE:
1304 case TCG_COND_GT:
1305 case TCG_COND_GE:
1306 case TCG_COND_LTU:
1307 case TCG_COND_LEU:
1308 case TCG_COND_GTU:
1309 case TCG_COND_GEU:
1310 bit1 = bits[cond].bit1;
1311 bit2 = bits[cond].bit2;
1312 op = (bit1 != bit2 ? CRANDC : CRAND);
1313 cond2 = tcg_unsigned_cond(cond);
1315 tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1316 tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1317 tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1318 tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1319 break;
1321 default:
1322 tcg_abort();
1326 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1327 const int *const_args)
1329 tcg_out_cmp2(s, args + 1, const_args + 1);
1330 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1331 tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1334 static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1335 const int *const_args)
1337 tcg_out_cmp2(s, args, const_args);
1338 tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1341 static void tcg_out_mb(TCGContext *s, TCGArg a0)
1343 uint32_t insn = HWSYNC;
1344 a0 &= TCG_MO_ALL;
1345 if (a0 == TCG_MO_LD_LD) {
1346 insn = LWSYNC;
1347 } else if (a0 == TCG_MO_ST_ST) {
1348 insn = EIEIO;
1350 tcg_out32(s, insn);
1353 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1354 uintptr_t addr)
1356 if (TCG_TARGET_REG_BITS == 64) {
1357 tcg_insn_unit i1, i2;
1358 intptr_t tb_diff = addr - tc_ptr;
1359 intptr_t br_diff = addr - (jmp_addr + 4);
1360 uint64_t pair;
1362 /* This does not exercise the range of the branch, but we do
1363 still need to be able to load the new value of TCG_REG_TB.
1364 But this does still happen quite often. */
1365 if (tb_diff == (int16_t)tb_diff) {
1366 i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
1367 i2 = B | (br_diff & 0x3fffffc);
1368 } else {
1369 intptr_t lo = (int16_t)tb_diff;
1370 intptr_t hi = (int32_t)(tb_diff - lo);
1371 assert(tb_diff == hi + lo);
1372 i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
1373 i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
1375 #ifdef HOST_WORDS_BIGENDIAN
1376 pair = (uint64_t)i1 << 32 | i2;
1377 #else
1378 pair = (uint64_t)i2 << 32 | i1;
1379 #endif
1381 /* As per the enclosing if, this is ppc64. Avoid the _Static_assert
1382 within atomic_set that would fail to build a ppc32 host. */
1383 atomic_set__nocheck((uint64_t *)jmp_addr, pair);
1384 flush_icache_range(jmp_addr, jmp_addr + 8);
1385 } else {
1386 intptr_t diff = addr - jmp_addr;
1387 tcg_debug_assert(in_range_b(diff));
1388 atomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
1389 flush_icache_range(jmp_addr, jmp_addr + 4);
1393 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1395 #ifdef _CALL_AIX
1396 /* Look through the descriptor. If the branch is in range, and we
1397 don't have to spend too much effort on building the toc. */
1398 void *tgt = ((void **)target)[0];
1399 uintptr_t toc = ((uintptr_t *)target)[1];
1400 intptr_t diff = tcg_pcrel_diff(s, tgt);
1402 if (in_range_b(diff) && toc == (uint32_t)toc) {
1403 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1404 tcg_out_b(s, LK, tgt);
1405 } else {
1406 /* Fold the low bits of the constant into the addresses below. */
1407 intptr_t arg = (intptr_t)target;
1408 int ofs = (int16_t)arg;
1410 if (ofs + 8 < 0x8000) {
1411 arg -= ofs;
1412 } else {
1413 ofs = 0;
1415 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1416 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1417 tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1418 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1419 tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1421 #elif defined(_CALL_ELF) && _CALL_ELF == 2
1422 intptr_t diff;
1424 /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1425 address, which the callee uses to compute its TOC address. */
1426 /* FIXME: when the branch is in range, we could avoid r12 load if we
1427 knew that the destination uses the same TOC, and what its local
1428 entry point offset is. */
1429 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1431 diff = tcg_pcrel_diff(s, target);
1432 if (in_range_b(diff)) {
1433 tcg_out_b(s, LK, target);
1434 } else {
1435 tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1436 tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1438 #else
1439 tcg_out_b(s, LK, target);
1440 #endif
1443 static const uint32_t qemu_ldx_opc[16] = {
1444 [MO_UB] = LBZX,
1445 [MO_UW] = LHZX,
1446 [MO_UL] = LWZX,
1447 [MO_Q] = LDX,
1448 [MO_SW] = LHAX,
1449 [MO_SL] = LWAX,
1450 [MO_BSWAP | MO_UB] = LBZX,
1451 [MO_BSWAP | MO_UW] = LHBRX,
1452 [MO_BSWAP | MO_UL] = LWBRX,
1453 [MO_BSWAP | MO_Q] = LDBRX,
1456 static const uint32_t qemu_stx_opc[16] = {
1457 [MO_UB] = STBX,
1458 [MO_UW] = STHX,
1459 [MO_UL] = STWX,
1460 [MO_Q] = STDX,
1461 [MO_BSWAP | MO_UB] = STBX,
1462 [MO_BSWAP | MO_UW] = STHBRX,
1463 [MO_BSWAP | MO_UL] = STWBRX,
1464 [MO_BSWAP | MO_Q] = STDBRX,
1467 static const uint32_t qemu_exts_opc[4] = {
1468 EXTSB, EXTSH, EXTSW, 0
1471 #if defined (CONFIG_SOFTMMU)
1472 #include "tcg-ldst.inc.c"
1474 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
1475 * int mmu_idx, uintptr_t ra)
1477 static void * const qemu_ld_helpers[16] = {
1478 [MO_UB] = helper_ret_ldub_mmu,
1479 [MO_LEUW] = helper_le_lduw_mmu,
1480 [MO_LEUL] = helper_le_ldul_mmu,
1481 [MO_LEQ] = helper_le_ldq_mmu,
1482 [MO_BEUW] = helper_be_lduw_mmu,
1483 [MO_BEUL] = helper_be_ldul_mmu,
1484 [MO_BEQ] = helper_be_ldq_mmu,
1487 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
1488 * uintxx_t val, int mmu_idx, uintptr_t ra)
1490 static void * const qemu_st_helpers[16] = {
1491 [MO_UB] = helper_ret_stb_mmu,
1492 [MO_LEUW] = helper_le_stw_mmu,
1493 [MO_LEUL] = helper_le_stl_mmu,
1494 [MO_LEQ] = helper_le_stq_mmu,
1495 [MO_BEUW] = helper_be_stw_mmu,
1496 [MO_BEUL] = helper_be_stl_mmu,
1497 [MO_BEQ] = helper_be_stq_mmu,
1500 /* We expect tlb_mask to be before tlb_table. */
1501 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
1502 offsetof(CPUArchState, tlb_mask));
1504 /* Perform the TLB load and compare. Places the result of the comparison
1505 in CR7, loads the addend of the TLB into R3, and returns the register
1506 containing the guest address (zero-extended into R4). Clobbers R0 and R2. */
1508 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
1509 TCGReg addrlo, TCGReg addrhi,
1510 int mem_index, bool is_read)
1512 int cmp_off
1513 = (is_read
1514 ? offsetof(CPUTLBEntry, addr_read)
1515 : offsetof(CPUTLBEntry, addr_write));
1516 int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
1517 int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
1518 TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
1519 unsigned s_bits = opc & MO_SIZE;
1520 unsigned a_bits = get_alignment_bits(opc);
1522 if (table_off > 0x7fff) {
1523 int mask_hi = mask_off - (int16_t)mask_off;
1524 int table_hi = table_off - (int16_t)table_off;
1526 table_base = TCG_REG_R4;
1527 if (mask_hi == table_hi) {
1528 mask_base = table_base;
1529 } else if (mask_hi) {
1530 mask_base = TCG_REG_R3;
1531 tcg_out32(s, ADDIS | TAI(mask_base, TCG_AREG0, mask_hi >> 16));
1533 tcg_out32(s, ADDIS | TAI(table_base, TCG_AREG0, table_hi >> 16));
1534 mask_off -= mask_hi;
1535 table_off -= table_hi;
1538 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
1539 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, mask_base, mask_off);
1540 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, table_base, table_off);
1542 /* Extract the page index, shifted into place for tlb index. */
1543 if (TCG_TARGET_REG_BITS == 32) {
1544 tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
1545 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1546 } else {
1547 tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
1548 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1550 tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
1552 /* Load the TLB comparator. */
1553 if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
1554 uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
1555 ? LWZUX : LDUX);
1556 tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
1557 } else {
1558 tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
1559 if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1560 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
1561 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
1562 } else {
1563 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
1567 /* Load the TLB addend for use on the fast path. Do this asap
1568 to minimize any load use delay. */
1569 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
1570 offsetof(CPUTLBEntry, addend));
1572 /* Clear the non-page, non-alignment bits from the address */
1573 if (TCG_TARGET_REG_BITS == 32) {
1574 /* We don't support unaligned accesses on 32-bits.
1575 * Preserve the bottom bits and thus trigger a comparison
1576 * failure on unaligned accesses.
1578 if (a_bits < s_bits) {
1579 a_bits = s_bits;
1581 tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
1582 (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1583 } else {
1584 TCGReg t = addrlo;
1586 /* If the access is unaligned, we need to make sure we fail if we
1587 * cross a page boundary. The trick is to add the access size-1
1588 * to the address before masking the low bits. That will make the
1589 * address overflow to the next page if we cross a page boundary,
1590 * which will then force a mismatch of the TLB compare.
1592 if (a_bits < s_bits) {
1593 unsigned a_mask = (1 << a_bits) - 1;
1594 unsigned s_mask = (1 << s_bits) - 1;
1595 tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
1596 t = TCG_REG_R0;
1599 /* Mask the address for the requested alignment. */
1600 if (TARGET_LONG_BITS == 32) {
1601 tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
1602 (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1603 /* Zero-extend the address for use in the final address. */
1604 tcg_out_ext32u(s, TCG_REG_R4, addrlo);
1605 addrlo = TCG_REG_R4;
1606 } else if (a_bits == 0) {
1607 tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
1608 } else {
1609 tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
1610 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
1611 tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
1615 if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1616 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1617 0, 7, TCG_TYPE_I32);
1618 tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
1619 tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1620 } else {
1621 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1622 0, 7, TCG_TYPE_TL);
1625 return addrlo;
1628 /* Record the context of a call to the out of line helper code for the slow
1629 path for a load or store, so that we can later generate the correct
1630 helper code. */
1631 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1632 TCGReg datalo_reg, TCGReg datahi_reg,
1633 TCGReg addrlo_reg, TCGReg addrhi_reg,
1634 tcg_insn_unit *raddr, tcg_insn_unit *lptr)
1636 TCGLabelQemuLdst *label = new_ldst_label(s);
1638 label->is_ld = is_ld;
1639 label->oi = oi;
1640 label->datalo_reg = datalo_reg;
1641 label->datahi_reg = datahi_reg;
1642 label->addrlo_reg = addrlo_reg;
1643 label->addrhi_reg = addrhi_reg;
1644 label->raddr = raddr;
1645 label->label_ptr[0] = lptr;
1648 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1650 TCGMemOpIdx oi = lb->oi;
1651 TCGMemOp opc = get_memop(oi);
1652 TCGReg hi, lo, arg = TCG_REG_R3;
1654 if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) {
1655 return false;
1658 tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
1660 lo = lb->addrlo_reg;
1661 hi = lb->addrhi_reg;
1662 if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1663 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1664 arg |= 1;
1665 #endif
1666 tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
1667 tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
1668 } else {
1669 /* If the address needed to be zero-extended, we'll have already
1670 placed it in R4. The only remaining case is 64-bit guest. */
1671 tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
1674 tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
1675 tcg_out32(s, MFSPR | RT(arg) | LR);
1677 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1679 lo = lb->datalo_reg;
1680 hi = lb->datahi_reg;
1681 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
1682 tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
1683 tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
1684 } else if (opc & MO_SIGN) {
1685 uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
1686 tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
1687 } else {
1688 tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
1691 tcg_out_b(s, 0, lb->raddr);
1692 return true;
1695 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1697 TCGMemOpIdx oi = lb->oi;
1698 TCGMemOp opc = get_memop(oi);
1699 TCGMemOp s_bits = opc & MO_SIZE;
1700 TCGReg hi, lo, arg = TCG_REG_R3;
1702 if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) {
1703 return false;
1706 tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
1708 lo = lb->addrlo_reg;
1709 hi = lb->addrhi_reg;
1710 if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1711 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1712 arg |= 1;
1713 #endif
1714 tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
1715 tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
1716 } else {
1717 /* If the address needed to be zero-extended, we'll have already
1718 placed it in R4. The only remaining case is 64-bit guest. */
1719 tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
1722 lo = lb->datalo_reg;
1723 hi = lb->datahi_reg;
1724 if (TCG_TARGET_REG_BITS == 32) {
1725 switch (s_bits) {
1726 case MO_64:
1727 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1728 arg |= 1;
1729 #endif
1730 tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
1731 /* FALLTHRU */
1732 case MO_32:
1733 tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
1734 break;
1735 default:
1736 tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
1737 break;
1739 } else {
1740 if (s_bits == MO_64) {
1741 tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
1742 } else {
1743 tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
1747 tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
1748 tcg_out32(s, MFSPR | RT(arg) | LR);
1750 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1752 tcg_out_b(s, 0, lb->raddr);
1753 return true;
1755 #endif /* SOFTMMU */
1757 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
1759 TCGReg datalo, datahi, addrlo, rbase;
1760 TCGReg addrhi __attribute__((unused));
1761 TCGMemOpIdx oi;
1762 TCGMemOp opc, s_bits;
1763 #ifdef CONFIG_SOFTMMU
1764 int mem_index;
1765 tcg_insn_unit *label_ptr;
1766 #endif
1768 datalo = *args++;
1769 datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
1770 addrlo = *args++;
1771 addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
1772 oi = *args++;
1773 opc = get_memop(oi);
1774 s_bits = opc & MO_SIZE;
1776 #ifdef CONFIG_SOFTMMU
1777 mem_index = get_mmuidx(oi);
1778 addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
1780 /* Load a pointer into the current opcode w/conditional branch-link. */
1781 label_ptr = s->code_ptr;
1782 tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
1784 rbase = TCG_REG_R3;
1785 #else /* !CONFIG_SOFTMMU */
1786 rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
1787 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1788 tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
1789 addrlo = TCG_REG_TMP1;
1791 #endif
1793 if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
1794 if (opc & MO_BSWAP) {
1795 tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
1796 tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
1797 tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
1798 } else if (rbase != 0) {
1799 tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
1800 tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
1801 tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
1802 } else if (addrlo == datahi) {
1803 tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
1804 tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
1805 } else {
1806 tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
1807 tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
1809 } else {
1810 uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
1811 if (!HAVE_ISA_2_06 && insn == LDBRX) {
1812 tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
1813 tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
1814 tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
1815 tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
1816 } else if (insn) {
1817 tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
1818 } else {
1819 insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
1820 tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
1821 insn = qemu_exts_opc[s_bits];
1822 tcg_out32(s, insn | RA(datalo) | RS(datalo));
1826 #ifdef CONFIG_SOFTMMU
1827 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1828 s->code_ptr, label_ptr);
1829 #endif
1832 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
1834 TCGReg datalo, datahi, addrlo, rbase;
1835 TCGReg addrhi __attribute__((unused));
1836 TCGMemOpIdx oi;
1837 TCGMemOp opc, s_bits;
1838 #ifdef CONFIG_SOFTMMU
1839 int mem_index;
1840 tcg_insn_unit *label_ptr;
1841 #endif
1843 datalo = *args++;
1844 datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
1845 addrlo = *args++;
1846 addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
1847 oi = *args++;
1848 opc = get_memop(oi);
1849 s_bits = opc & MO_SIZE;
1851 #ifdef CONFIG_SOFTMMU
1852 mem_index = get_mmuidx(oi);
1853 addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
1855 /* Load a pointer into the current opcode w/conditional branch-link. */
1856 label_ptr = s->code_ptr;
1857 tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
1859 rbase = TCG_REG_R3;
1860 #else /* !CONFIG_SOFTMMU */
1861 rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
1862 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1863 tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
1864 addrlo = TCG_REG_TMP1;
1866 #endif
1868 if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
1869 if (opc & MO_BSWAP) {
1870 tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
1871 tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
1872 tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
1873 } else if (rbase != 0) {
1874 tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
1875 tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
1876 tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
1877 } else {
1878 tcg_out32(s, STW | TAI(datahi, addrlo, 0));
1879 tcg_out32(s, STW | TAI(datalo, addrlo, 4));
1881 } else {
1882 uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
1883 if (!HAVE_ISA_2_06 && insn == STDBRX) {
1884 tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
1885 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
1886 tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
1887 tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
1888 } else {
1889 tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
1893 #ifdef CONFIG_SOFTMMU
1894 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1895 s->code_ptr, label_ptr);
1896 #endif
1899 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
1901 int i;
1902 for (i = 0; i < count; ++i) {
1903 p[i] = NOP;
1907 /* Parameters for function call generation, used in tcg.c. */
1908 #define TCG_TARGET_STACK_ALIGN 16
1909 #define TCG_TARGET_EXTEND_ARGS 1
1911 #ifdef _CALL_AIX
1912 # define LINK_AREA_SIZE (6 * SZR)
1913 # define LR_OFFSET (1 * SZR)
1914 # define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR)
1915 #elif defined(TCG_TARGET_CALL_DARWIN)
1916 # define LINK_AREA_SIZE (6 * SZR)
1917 # define LR_OFFSET (2 * SZR)
1918 #elif TCG_TARGET_REG_BITS == 64
1919 # if defined(_CALL_ELF) && _CALL_ELF == 2
1920 # define LINK_AREA_SIZE (4 * SZR)
1921 # define LR_OFFSET (1 * SZR)
1922 # endif
1923 #else /* TCG_TARGET_REG_BITS == 32 */
1924 # if defined(_CALL_SYSV)
1925 # define LINK_AREA_SIZE (2 * SZR)
1926 # define LR_OFFSET (1 * SZR)
1927 # endif
1928 #endif
1929 #ifndef LR_OFFSET
1930 # error "Unhandled abi"
1931 #endif
1932 #ifndef TCG_TARGET_CALL_STACK_OFFSET
1933 # define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE
1934 #endif
1936 #define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
1937 #define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
1939 #define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \
1940 + TCG_STATIC_CALL_ARGS_SIZE \
1941 + CPU_TEMP_BUF_SIZE \
1942 + REG_SAVE_SIZE \
1943 + TCG_TARGET_STACK_ALIGN - 1) \
1944 & -TCG_TARGET_STACK_ALIGN)
1946 #define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
1948 static void tcg_target_qemu_prologue(TCGContext *s)
1950 int i;
1952 #ifdef _CALL_AIX
1953 void **desc = (void **)s->code_ptr;
1954 desc[0] = desc + 2; /* entry point */
1955 desc[1] = 0; /* environment pointer */
1956 s->code_ptr = (void *)(desc + 2); /* skip over descriptor */
1957 #endif
1959 tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
1960 CPU_TEMP_BUF_SIZE);
1962 /* Prologue */
1963 tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
1964 tcg_out32(s, (SZR == 8 ? STDU : STWU)
1965 | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
1967 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
1968 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
1969 TCG_REG_R1, REG_SAVE_BOT + i * SZR);
1971 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
1973 #ifndef CONFIG_SOFTMMU
1974 if (guest_base) {
1975 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
1976 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
1978 #endif
1980 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1981 tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
1982 if (USE_REG_TB) {
1983 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
1985 tcg_out32(s, BCCTR | BO_ALWAYS);
1987 /* Epilogue */
1988 s->code_gen_epilogue = tb_ret_addr = s->code_ptr;
1990 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
1991 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
1992 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
1993 TCG_REG_R1, REG_SAVE_BOT + i * SZR);
1995 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
1996 tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
1997 tcg_out32(s, BCLR | BO_ALWAYS);
2000 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
2001 const int *const_args)
2003 TCGArg a0, a1, a2;
2004 int c;
2006 switch (opc) {
2007 case INDEX_op_exit_tb:
2008 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
2009 tcg_out_b(s, 0, tb_ret_addr);
2010 break;
2011 case INDEX_op_goto_tb:
2012 if (s->tb_jmp_insn_offset) {
2013 /* Direct jump. */
2014 if (TCG_TARGET_REG_BITS == 64) {
2015 /* Ensure the next insns are 8-byte aligned. */
2016 if ((uintptr_t)s->code_ptr & 7) {
2017 tcg_out32(s, NOP);
2019 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2020 tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2021 tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2022 } else {
2023 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2024 tcg_out32(s, B);
2025 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
2026 break;
2028 } else {
2029 /* Indirect jump. */
2030 tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
2031 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
2032 (intptr_t)(s->tb_jmp_insn_offset + args[0]));
2034 tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
2035 tcg_out32(s, BCCTR | BO_ALWAYS);
2036 set_jmp_reset_offset(s, args[0]);
2037 if (USE_REG_TB) {
2038 /* For the unlinked case, need to reset TCG_REG_TB. */
2039 c = -tcg_current_code_size(s);
2040 assert(c == (int16_t)c);
2041 tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, c));
2043 break;
2044 case INDEX_op_goto_ptr:
2045 tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2046 if (USE_REG_TB) {
2047 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
2049 tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2050 tcg_out32(s, BCCTR | BO_ALWAYS);
2051 break;
2052 case INDEX_op_br:
2054 TCGLabel *l = arg_label(args[0]);
2055 uint32_t insn = B;
2057 if (l->has_value) {
2058 insn |= reloc_pc24_val(s->code_ptr, l->u.value_ptr);
2059 } else {
2060 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2062 tcg_out32(s, insn);
2064 break;
2065 case INDEX_op_ld8u_i32:
2066 case INDEX_op_ld8u_i64:
2067 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2068 break;
2069 case INDEX_op_ld8s_i32:
2070 case INDEX_op_ld8s_i64:
2071 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2072 tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
2073 break;
2074 case INDEX_op_ld16u_i32:
2075 case INDEX_op_ld16u_i64:
2076 tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2077 break;
2078 case INDEX_op_ld16s_i32:
2079 case INDEX_op_ld16s_i64:
2080 tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2081 break;
2082 case INDEX_op_ld_i32:
2083 case INDEX_op_ld32u_i64:
2084 tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2085 break;
2086 case INDEX_op_ld32s_i64:
2087 tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2088 break;
2089 case INDEX_op_ld_i64:
2090 tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2091 break;
2092 case INDEX_op_st8_i32:
2093 case INDEX_op_st8_i64:
2094 tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2095 break;
2096 case INDEX_op_st16_i32:
2097 case INDEX_op_st16_i64:
2098 tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2099 break;
2100 case INDEX_op_st_i32:
2101 case INDEX_op_st32_i64:
2102 tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2103 break;
2104 case INDEX_op_st_i64:
2105 tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2106 break;
2108 case INDEX_op_add_i32:
2109 a0 = args[0], a1 = args[1], a2 = args[2];
2110 if (const_args[2]) {
2111 do_addi_32:
2112 tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2113 } else {
2114 tcg_out32(s, ADD | TAB(a0, a1, a2));
2116 break;
2117 case INDEX_op_sub_i32:
2118 a0 = args[0], a1 = args[1], a2 = args[2];
2119 if (const_args[1]) {
2120 if (const_args[2]) {
2121 tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2122 } else {
2123 tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2125 } else if (const_args[2]) {
2126 a2 = -a2;
2127 goto do_addi_32;
2128 } else {
2129 tcg_out32(s, SUBF | TAB(a0, a2, a1));
2131 break;
2133 case INDEX_op_and_i32:
2134 a0 = args[0], a1 = args[1], a2 = args[2];
2135 if (const_args[2]) {
2136 tcg_out_andi32(s, a0, a1, a2);
2137 } else {
2138 tcg_out32(s, AND | SAB(a1, a0, a2));
2140 break;
2141 case INDEX_op_and_i64:
2142 a0 = args[0], a1 = args[1], a2 = args[2];
2143 if (const_args[2]) {
2144 tcg_out_andi64(s, a0, a1, a2);
2145 } else {
2146 tcg_out32(s, AND | SAB(a1, a0, a2));
2148 break;
2149 case INDEX_op_or_i64:
2150 case INDEX_op_or_i32:
2151 a0 = args[0], a1 = args[1], a2 = args[2];
2152 if (const_args[2]) {
2153 tcg_out_ori32(s, a0, a1, a2);
2154 } else {
2155 tcg_out32(s, OR | SAB(a1, a0, a2));
2157 break;
2158 case INDEX_op_xor_i64:
2159 case INDEX_op_xor_i32:
2160 a0 = args[0], a1 = args[1], a2 = args[2];
2161 if (const_args[2]) {
2162 tcg_out_xori32(s, a0, a1, a2);
2163 } else {
2164 tcg_out32(s, XOR | SAB(a1, a0, a2));
2166 break;
2167 case INDEX_op_andc_i32:
2168 a0 = args[0], a1 = args[1], a2 = args[2];
2169 if (const_args[2]) {
2170 tcg_out_andi32(s, a0, a1, ~a2);
2171 } else {
2172 tcg_out32(s, ANDC | SAB(a1, a0, a2));
2174 break;
2175 case INDEX_op_andc_i64:
2176 a0 = args[0], a1 = args[1], a2 = args[2];
2177 if (const_args[2]) {
2178 tcg_out_andi64(s, a0, a1, ~a2);
2179 } else {
2180 tcg_out32(s, ANDC | SAB(a1, a0, a2));
2182 break;
2183 case INDEX_op_orc_i32:
2184 if (const_args[2]) {
2185 tcg_out_ori32(s, args[0], args[1], ~args[2]);
2186 break;
2188 /* FALLTHRU */
2189 case INDEX_op_orc_i64:
2190 tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2191 break;
2192 case INDEX_op_eqv_i32:
2193 if (const_args[2]) {
2194 tcg_out_xori32(s, args[0], args[1], ~args[2]);
2195 break;
2197 /* FALLTHRU */
2198 case INDEX_op_eqv_i64:
2199 tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2200 break;
2201 case INDEX_op_nand_i32:
2202 case INDEX_op_nand_i64:
2203 tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2204 break;
2205 case INDEX_op_nor_i32:
2206 case INDEX_op_nor_i64:
2207 tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2208 break;
2210 case INDEX_op_clz_i32:
2211 tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2212 args[2], const_args[2]);
2213 break;
2214 case INDEX_op_ctz_i32:
2215 tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2216 args[2], const_args[2]);
2217 break;
2218 case INDEX_op_ctpop_i32:
2219 tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2220 break;
2222 case INDEX_op_clz_i64:
2223 tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2224 args[2], const_args[2]);
2225 break;
2226 case INDEX_op_ctz_i64:
2227 tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2228 args[2], const_args[2]);
2229 break;
2230 case INDEX_op_ctpop_i64:
2231 tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2232 break;
2234 case INDEX_op_mul_i32:
2235 a0 = args[0], a1 = args[1], a2 = args[2];
2236 if (const_args[2]) {
2237 tcg_out32(s, MULLI | TAI(a0, a1, a2));
2238 } else {
2239 tcg_out32(s, MULLW | TAB(a0, a1, a2));
2241 break;
2243 case INDEX_op_div_i32:
2244 tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2245 break;
2247 case INDEX_op_divu_i32:
2248 tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2249 break;
2251 case INDEX_op_shl_i32:
2252 if (const_args[2]) {
2253 tcg_out_shli32(s, args[0], args[1], args[2]);
2254 } else {
2255 tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2257 break;
2258 case INDEX_op_shr_i32:
2259 if (const_args[2]) {
2260 tcg_out_shri32(s, args[0], args[1], args[2]);
2261 } else {
2262 tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2264 break;
2265 case INDEX_op_sar_i32:
2266 if (const_args[2]) {
2267 tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2]));
2268 } else {
2269 tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2271 break;
2272 case INDEX_op_rotl_i32:
2273 if (const_args[2]) {
2274 tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2275 } else {
2276 tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2277 | MB(0) | ME(31));
2279 break;
2280 case INDEX_op_rotr_i32:
2281 if (const_args[2]) {
2282 tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2283 } else {
2284 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2285 tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2286 | MB(0) | ME(31));
2288 break;
2290 case INDEX_op_brcond_i32:
2291 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2292 arg_label(args[3]), TCG_TYPE_I32);
2293 break;
2294 case INDEX_op_brcond_i64:
2295 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2296 arg_label(args[3]), TCG_TYPE_I64);
2297 break;
2298 case INDEX_op_brcond2_i32:
2299 tcg_out_brcond2(s, args, const_args);
2300 break;
2302 case INDEX_op_neg_i32:
2303 case INDEX_op_neg_i64:
2304 tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2305 break;
2307 case INDEX_op_not_i32:
2308 case INDEX_op_not_i64:
2309 tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2310 break;
2312 case INDEX_op_add_i64:
2313 a0 = args[0], a1 = args[1], a2 = args[2];
2314 if (const_args[2]) {
2315 do_addi_64:
2316 tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2317 } else {
2318 tcg_out32(s, ADD | TAB(a0, a1, a2));
2320 break;
2321 case INDEX_op_sub_i64:
2322 a0 = args[0], a1 = args[1], a2 = args[2];
2323 if (const_args[1]) {
2324 if (const_args[2]) {
2325 tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2326 } else {
2327 tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2329 } else if (const_args[2]) {
2330 a2 = -a2;
2331 goto do_addi_64;
2332 } else {
2333 tcg_out32(s, SUBF | TAB(a0, a2, a1));
2335 break;
2337 case INDEX_op_shl_i64:
2338 if (const_args[2]) {
2339 tcg_out_shli64(s, args[0], args[1], args[2]);
2340 } else {
2341 tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2343 break;
2344 case INDEX_op_shr_i64:
2345 if (const_args[2]) {
2346 tcg_out_shri64(s, args[0], args[1], args[2]);
2347 } else {
2348 tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2350 break;
2351 case INDEX_op_sar_i64:
2352 if (const_args[2]) {
2353 int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
2354 tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
2355 } else {
2356 tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2358 break;
2359 case INDEX_op_rotl_i64:
2360 if (const_args[2]) {
2361 tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2362 } else {
2363 tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2365 break;
2366 case INDEX_op_rotr_i64:
2367 if (const_args[2]) {
2368 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2369 } else {
2370 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2371 tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2373 break;
2375 case INDEX_op_mul_i64:
2376 a0 = args[0], a1 = args[1], a2 = args[2];
2377 if (const_args[2]) {
2378 tcg_out32(s, MULLI | TAI(a0, a1, a2));
2379 } else {
2380 tcg_out32(s, MULLD | TAB(a0, a1, a2));
2382 break;
2383 case INDEX_op_div_i64:
2384 tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2385 break;
2386 case INDEX_op_divu_i64:
2387 tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2388 break;
2390 case INDEX_op_qemu_ld_i32:
2391 tcg_out_qemu_ld(s, args, false);
2392 break;
2393 case INDEX_op_qemu_ld_i64:
2394 tcg_out_qemu_ld(s, args, true);
2395 break;
2396 case INDEX_op_qemu_st_i32:
2397 tcg_out_qemu_st(s, args, false);
2398 break;
2399 case INDEX_op_qemu_st_i64:
2400 tcg_out_qemu_st(s, args, true);
2401 break;
2403 case INDEX_op_ext8s_i32:
2404 case INDEX_op_ext8s_i64:
2405 c = EXTSB;
2406 goto gen_ext;
2407 case INDEX_op_ext16s_i32:
2408 case INDEX_op_ext16s_i64:
2409 c = EXTSH;
2410 goto gen_ext;
2411 case INDEX_op_ext_i32_i64:
2412 case INDEX_op_ext32s_i64:
2413 c = EXTSW;
2414 goto gen_ext;
2415 gen_ext:
2416 tcg_out32(s, c | RS(args[1]) | RA(args[0]));
2417 break;
2418 case INDEX_op_extu_i32_i64:
2419 tcg_out_ext32u(s, args[0], args[1]);
2420 break;
2422 case INDEX_op_setcond_i32:
2423 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2424 const_args[2]);
2425 break;
2426 case INDEX_op_setcond_i64:
2427 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2428 const_args[2]);
2429 break;
2430 case INDEX_op_setcond2_i32:
2431 tcg_out_setcond2(s, args, const_args);
2432 break;
2434 case INDEX_op_bswap16_i32:
2435 case INDEX_op_bswap16_i64:
2436 a0 = args[0], a1 = args[1];
2437 /* a1 = abcd */
2438 if (a0 != a1) {
2439 /* a0 = (a1 r<< 24) & 0xff # 000c */
2440 tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2441 /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
2442 tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
2443 } else {
2444 /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
2445 tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23);
2446 /* a0 = (a1 r<< 24) & 0xff # 000c */
2447 tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2448 /* a0 = a0 | r0 # 00dc */
2449 tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0));
2451 break;
2453 case INDEX_op_bswap32_i32:
2454 case INDEX_op_bswap32_i64:
2455 /* Stolen from gcc's builtin_bswap32 */
2456 a1 = args[1];
2457 a0 = args[0] == a1 ? TCG_REG_R0 : args[0];
2459 /* a1 = args[1] # abcd */
2460 /* a0 = rotate_left (a1, 8) # bcda */
2461 tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2462 /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */
2463 tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2464 /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */
2465 tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2467 if (a0 == TCG_REG_R0) {
2468 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2470 break;
2472 case INDEX_op_bswap64_i64:
2473 a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
2474 if (a0 == a1) {
2475 a0 = TCG_REG_R0;
2476 a2 = a1;
2479 /* a1 = # abcd efgh */
2480 /* a0 = rl32(a1, 8) # 0000 fghe */
2481 tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2482 /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */
2483 tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2484 /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */
2485 tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2487 /* a0 = rl64(a0, 32) # hgfe 0000 */
2488 /* a2 = rl64(a1, 32) # efgh abcd */
2489 tcg_out_rld(s, RLDICL, a0, a0, 32, 0);
2490 tcg_out_rld(s, RLDICL, a2, a1, 32, 0);
2492 /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */
2493 tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31);
2494 /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */
2495 tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7);
2496 /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */
2497 tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23);
2499 if (a0 == 0) {
2500 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2502 break;
2504 case INDEX_op_deposit_i32:
2505 if (const_args[2]) {
2506 uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
2507 tcg_out_andi32(s, args[0], args[0], ~mask);
2508 } else {
2509 tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
2510 32 - args[3] - args[4], 31 - args[3]);
2512 break;
2513 case INDEX_op_deposit_i64:
2514 if (const_args[2]) {
2515 uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
2516 tcg_out_andi64(s, args[0], args[0], ~mask);
2517 } else {
2518 tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
2519 64 - args[3] - args[4]);
2521 break;
2523 case INDEX_op_extract_i32:
2524 tcg_out_rlw(s, RLWINM, args[0], args[1],
2525 32 - args[2], 32 - args[3], 31);
2526 break;
2527 case INDEX_op_extract_i64:
2528 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
2529 break;
2531 case INDEX_op_movcond_i32:
2532 tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
2533 args[3], args[4], const_args[2]);
2534 break;
2535 case INDEX_op_movcond_i64:
2536 tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
2537 args[3], args[4], const_args[2]);
2538 break;
2540 #if TCG_TARGET_REG_BITS == 64
2541 case INDEX_op_add2_i64:
2542 #else
2543 case INDEX_op_add2_i32:
2544 #endif
2545 /* Note that the CA bit is defined based on the word size of the
2546 environment. So in 64-bit mode it's always carry-out of bit 63.
2547 The fallback code using deposit works just as well for 32-bit. */
2548 a0 = args[0], a1 = args[1];
2549 if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
2550 a0 = TCG_REG_R0;
2552 if (const_args[4]) {
2553 tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
2554 } else {
2555 tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
2557 if (const_args[5]) {
2558 tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
2559 } else {
2560 tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
2562 if (a0 != args[0]) {
2563 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2565 break;
2567 #if TCG_TARGET_REG_BITS == 64
2568 case INDEX_op_sub2_i64:
2569 #else
2570 case INDEX_op_sub2_i32:
2571 #endif
2572 a0 = args[0], a1 = args[1];
2573 if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
2574 a0 = TCG_REG_R0;
2576 if (const_args[2]) {
2577 tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
2578 } else {
2579 tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
2581 if (const_args[3]) {
2582 tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
2583 } else {
2584 tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
2586 if (a0 != args[0]) {
2587 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2589 break;
2591 case INDEX_op_muluh_i32:
2592 tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
2593 break;
2594 case INDEX_op_mulsh_i32:
2595 tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
2596 break;
2597 case INDEX_op_muluh_i64:
2598 tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
2599 break;
2600 case INDEX_op_mulsh_i64:
2601 tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
2602 break;
2604 case INDEX_op_mb:
2605 tcg_out_mb(s, args[0]);
2606 break;
2608 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2609 case INDEX_op_mov_i64:
2610 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2611 case INDEX_op_movi_i64:
2612 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2613 default:
2614 tcg_abort();
2618 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2620 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2621 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2622 static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
2623 static const TCGTargetOpDef S_S = { .args_ct_str = { "S", "S" } };
2624 static const TCGTargetOpDef r_ri = { .args_ct_str = { "r", "ri" } };
2625 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2626 static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
2627 static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
2628 static const TCGTargetOpDef S_S_S = { .args_ct_str = { "S", "S", "S" } };
2629 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2630 static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
2631 static const TCGTargetOpDef r_r_rT = { .args_ct_str = { "r", "r", "rT" } };
2632 static const TCGTargetOpDef r_r_rU = { .args_ct_str = { "r", "r", "rU" } };
2633 static const TCGTargetOpDef r_rI_ri
2634 = { .args_ct_str = { "r", "rI", "ri" } };
2635 static const TCGTargetOpDef r_rI_rT
2636 = { .args_ct_str = { "r", "rI", "rT" } };
2637 static const TCGTargetOpDef r_r_rZW
2638 = { .args_ct_str = { "r", "r", "rZW" } };
2639 static const TCGTargetOpDef L_L_L_L
2640 = { .args_ct_str = { "L", "L", "L", "L" } };
2641 static const TCGTargetOpDef S_S_S_S
2642 = { .args_ct_str = { "S", "S", "S", "S" } };
2643 static const TCGTargetOpDef movc
2644 = { .args_ct_str = { "r", "r", "ri", "rZ", "rZ" } };
2645 static const TCGTargetOpDef dep
2646 = { .args_ct_str = { "r", "0", "rZ" } };
2647 static const TCGTargetOpDef br2
2648 = { .args_ct_str = { "r", "r", "ri", "ri" } };
2649 static const TCGTargetOpDef setc2
2650 = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
2651 static const TCGTargetOpDef add2
2652 = { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } };
2653 static const TCGTargetOpDef sub2
2654 = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
2656 switch (op) {
2657 case INDEX_op_goto_ptr:
2658 return &r;
2660 case INDEX_op_ld8u_i32:
2661 case INDEX_op_ld8s_i32:
2662 case INDEX_op_ld16u_i32:
2663 case INDEX_op_ld16s_i32:
2664 case INDEX_op_ld_i32:
2665 case INDEX_op_st8_i32:
2666 case INDEX_op_st16_i32:
2667 case INDEX_op_st_i32:
2668 case INDEX_op_ctpop_i32:
2669 case INDEX_op_neg_i32:
2670 case INDEX_op_not_i32:
2671 case INDEX_op_ext8s_i32:
2672 case INDEX_op_ext16s_i32:
2673 case INDEX_op_bswap16_i32:
2674 case INDEX_op_bswap32_i32:
2675 case INDEX_op_extract_i32:
2676 case INDEX_op_ld8u_i64:
2677 case INDEX_op_ld8s_i64:
2678 case INDEX_op_ld16u_i64:
2679 case INDEX_op_ld16s_i64:
2680 case INDEX_op_ld32u_i64:
2681 case INDEX_op_ld32s_i64:
2682 case INDEX_op_ld_i64:
2683 case INDEX_op_st8_i64:
2684 case INDEX_op_st16_i64:
2685 case INDEX_op_st32_i64:
2686 case INDEX_op_st_i64:
2687 case INDEX_op_ctpop_i64:
2688 case INDEX_op_neg_i64:
2689 case INDEX_op_not_i64:
2690 case INDEX_op_ext8s_i64:
2691 case INDEX_op_ext16s_i64:
2692 case INDEX_op_ext32s_i64:
2693 case INDEX_op_ext_i32_i64:
2694 case INDEX_op_extu_i32_i64:
2695 case INDEX_op_bswap16_i64:
2696 case INDEX_op_bswap32_i64:
2697 case INDEX_op_bswap64_i64:
2698 case INDEX_op_extract_i64:
2699 return &r_r;
2701 case INDEX_op_add_i32:
2702 case INDEX_op_and_i32:
2703 case INDEX_op_or_i32:
2704 case INDEX_op_xor_i32:
2705 case INDEX_op_andc_i32:
2706 case INDEX_op_orc_i32:
2707 case INDEX_op_eqv_i32:
2708 case INDEX_op_shl_i32:
2709 case INDEX_op_shr_i32:
2710 case INDEX_op_sar_i32:
2711 case INDEX_op_rotl_i32:
2712 case INDEX_op_rotr_i32:
2713 case INDEX_op_setcond_i32:
2714 case INDEX_op_and_i64:
2715 case INDEX_op_andc_i64:
2716 case INDEX_op_shl_i64:
2717 case INDEX_op_shr_i64:
2718 case INDEX_op_sar_i64:
2719 case INDEX_op_rotl_i64:
2720 case INDEX_op_rotr_i64:
2721 case INDEX_op_setcond_i64:
2722 return &r_r_ri;
2723 case INDEX_op_mul_i32:
2724 case INDEX_op_mul_i64:
2725 return &r_r_rI;
2726 case INDEX_op_div_i32:
2727 case INDEX_op_divu_i32:
2728 case INDEX_op_nand_i32:
2729 case INDEX_op_nor_i32:
2730 case INDEX_op_muluh_i32:
2731 case INDEX_op_mulsh_i32:
2732 case INDEX_op_orc_i64:
2733 case INDEX_op_eqv_i64:
2734 case INDEX_op_nand_i64:
2735 case INDEX_op_nor_i64:
2736 case INDEX_op_div_i64:
2737 case INDEX_op_divu_i64:
2738 case INDEX_op_mulsh_i64:
2739 case INDEX_op_muluh_i64:
2740 return &r_r_r;
2741 case INDEX_op_sub_i32:
2742 return &r_rI_ri;
2743 case INDEX_op_add_i64:
2744 return &r_r_rT;
2745 case INDEX_op_or_i64:
2746 case INDEX_op_xor_i64:
2747 return &r_r_rU;
2748 case INDEX_op_sub_i64:
2749 return &r_rI_rT;
2750 case INDEX_op_clz_i32:
2751 case INDEX_op_ctz_i32:
2752 case INDEX_op_clz_i64:
2753 case INDEX_op_ctz_i64:
2754 return &r_r_rZW;
2756 case INDEX_op_brcond_i32:
2757 case INDEX_op_brcond_i64:
2758 return &r_ri;
2760 case INDEX_op_movcond_i32:
2761 case INDEX_op_movcond_i64:
2762 return &movc;
2763 case INDEX_op_deposit_i32:
2764 case INDEX_op_deposit_i64:
2765 return &dep;
2766 case INDEX_op_brcond2_i32:
2767 return &br2;
2768 case INDEX_op_setcond2_i32:
2769 return &setc2;
2770 case INDEX_op_add2_i64:
2771 case INDEX_op_add2_i32:
2772 return &add2;
2773 case INDEX_op_sub2_i64:
2774 case INDEX_op_sub2_i32:
2775 return &sub2;
2777 case INDEX_op_qemu_ld_i32:
2778 return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
2779 ? &r_L : &r_L_L);
2780 case INDEX_op_qemu_st_i32:
2781 return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
2782 ? &S_S : &S_S_S);
2783 case INDEX_op_qemu_ld_i64:
2784 return (TCG_TARGET_REG_BITS == 64 ? &r_L
2785 : TARGET_LONG_BITS == 32 ? &L_L_L : &L_L_L_L);
2786 case INDEX_op_qemu_st_i64:
2787 return (TCG_TARGET_REG_BITS == 64 ? &S_S
2788 : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
2790 default:
2791 return NULL;
2795 static void tcg_target_init(TCGContext *s)
2797 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2798 unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
2800 if (hwcap & PPC_FEATURE_ARCH_2_06) {
2801 have_isa_2_06 = true;
2803 #ifdef PPC_FEATURE2_ARCH_3_00
2804 if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
2805 have_isa_3_00 = true;
2807 #endif
2809 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
2810 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
2812 tcg_target_call_clobber_regs = 0;
2813 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
2814 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
2815 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
2816 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
2817 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
2818 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
2819 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
2820 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2821 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2822 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2823 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2824 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
2826 s->reserved_regs = 0;
2827 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
2828 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
2829 #if defined(_CALL_SYSV)
2830 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
2831 #endif
2832 #if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
2833 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
2834 #endif
2835 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
2836 if (USE_REG_TB) {
2837 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */
2841 #ifdef __ELF__
2842 typedef struct {
2843 DebugFrameCIE cie;
2844 DebugFrameFDEHeader fde;
2845 uint8_t fde_def_cfa[4];
2846 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
2847 } DebugFrame;
2849 /* We're expecting a 2 byte uleb128 encoded value. */
2850 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2852 #if TCG_TARGET_REG_BITS == 64
2853 # define ELF_HOST_MACHINE EM_PPC64
2854 #else
2855 # define ELF_HOST_MACHINE EM_PPC
2856 #endif
2858 static DebugFrame debug_frame = {
2859 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2860 .cie.id = -1,
2861 .cie.version = 1,
2862 .cie.code_align = 1,
2863 .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */
2864 .cie.return_column = 65,
2866 /* Total FDE size does not include the "len" member. */
2867 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2869 .fde_def_cfa = {
2870 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */
2871 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2872 (FRAME_SIZE >> 7)
2874 .fde_reg_ofs = {
2875 /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
2876 0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
2880 void tcg_register_jit(void *buf, size_t buf_size)
2882 uint8_t *p = &debug_frame.fde_reg_ofs[3];
2883 int i;
2885 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
2886 p[0] = 0x80 + tcg_target_callee_save_regs[i];
2887 p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
2890 debug_frame.fde.func_start = (uintptr_t)buf;
2891 debug_frame.fde.func_len = buf_size;
2893 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2895 #endif /* __ELF__ */
2897 void flush_icache_range(uintptr_t start, uintptr_t stop)
2899 uintptr_t p, start1, stop1;
2900 size_t dsize = qemu_dcache_linesize;
2901 size_t isize = qemu_icache_linesize;
2903 start1 = start & ~(dsize - 1);
2904 stop1 = (stop + dsize - 1) & ~(dsize - 1);
2905 for (p = start1; p < stop1; p += dsize) {
2906 asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
2908 asm volatile ("sync" : : : "memory");
2910 start &= start & ~(isize - 1);
2911 stop1 = (stop + isize - 1) & ~(isize - 1);
2912 for (p = start1; p < stop1; p += isize) {
2913 asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
2915 asm volatile ("sync" : : : "memory");
2916 asm volatile ("isync" : : : "memory");