2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include "../tcg-pool.c.inc"
27 #include "../tcg-ldst.c.inc"
30 * Standardize on the _CALL_FOO symbols used by GCC:
31 * Apple XCode does not define _CALL_DARWIN.
32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX.
34 #if TCG_TARGET_REG_BITS == 64
37 # elif defined(_CALL_ELF) && _CALL_ELF == 1
39 # elif defined(_CALL_ELF) && _CALL_ELF == 2
45 # if defined(_CALL_SYSV) || defined(_CALL_DARWIN)
47 # elif defined(__APPLE__)
49 # elif defined(__ELF__)
56 #if TCG_TARGET_REG_BITS == 64
57 # define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND
58 # define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
60 # define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
61 # define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
64 # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
65 # define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
67 # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
68 # define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
71 /* For some memory operations, we need a scratch that isn't R0. For the AIX
72 calling convention, we can re-use the TOC register since we'll be reloading
73 it at every call. Otherwise R12 will do nicely as neither a call-saved
74 register nor a parameter register. */
76 # define TCG_REG_TMP1 TCG_REG_R2
78 # define TCG_REG_TMP1 TCG_REG_R12
80 #define TCG_REG_TMP2 TCG_REG_R11
82 #define TCG_VEC_TMP1 TCG_REG_V0
83 #define TCG_VEC_TMP2 TCG_REG_V1
85 #define TCG_REG_TB TCG_REG_R31
86 #define USE_REG_TB (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00)
88 /* Shorthand for size of a pointer. Avoid promotion to unsigned. */
89 #define SZP ((int)sizeof(void *))
91 /* Shorthand for size of a register. */
92 #define SZR (TCG_TARGET_REG_BITS / 8)
94 #define TCG_CT_CONST_S16 0x100
95 #define TCG_CT_CONST_U16 0x200
96 #define TCG_CT_CONST_S32 0x400
97 #define TCG_CT_CONST_U32 0x800
98 #define TCG_CT_CONST_ZERO 0x1000
99 #define TCG_CT_CONST_MONE 0x2000
100 #define TCG_CT_CONST_WSZ 0x4000
101 #define TCG_CT_CONST_CMP 0x8000
103 #define ALL_GENERAL_REGS 0xffffffffu
104 #define ALL_VECTOR_REGS 0xffffffff00000000ull
106 #ifndef R_PPC64_PCREL34
107 #define R_PPC64_PCREL34 132
110 #define have_isel (cpuinfo & CPUINFO_ISEL)
112 #define TCG_GUEST_BASE_REG TCG_REG_R30
114 #ifdef CONFIG_DEBUG_TCG
115 static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
116 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
117 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
118 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
119 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
120 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
121 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
122 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
123 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
127 static const int tcg_target_reg_alloc_order[] = {
128 TCG_REG_R14, /* call saved registers */
146 TCG_REG_R12, /* call clobbered, non-arguments */
150 TCG_REG_R10, /* call clobbered, arguments */
159 /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
160 TCG_REG_V2, /* call clobbered, vectors */
180 static const int tcg_target_call_iarg_regs[] = {
191 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
193 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
194 tcg_debug_assert(slot >= 0 && slot <= 1);
195 return TCG_REG_R3 + slot;
198 static const int tcg_target_callee_save_regs[] = {
215 TCG_REG_R27, /* currently used for the global env */
222 /* For PPC, we use TB+4 instead of TB as the base. */
223 static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target)
225 return tcg_tbrel_diff(s, target) - 4;
228 static inline bool in_range_b(tcg_target_long target)
230 return target == sextract64(target, 0, 26);
233 static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
234 const tcg_insn_unit *target)
236 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
237 tcg_debug_assert(in_range_b(disp));
238 return disp & 0x3fffffc;
241 static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
243 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
244 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
246 if (in_range_b(disp)) {
247 *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
253 static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
254 const tcg_insn_unit *target)
256 ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
257 tcg_debug_assert(disp == (int16_t) disp);
258 return disp & 0xfffc;
261 static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
263 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
264 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
266 if (disp == (int16_t) disp) {
267 *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
273 static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
275 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
276 ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
278 if (disp == sextract64(disp, 0, 34)) {
279 src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
280 src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
286 static bool mask_operand(uint32_t c, int *mb, int *me);
287 static bool mask64_operand(uint64_t c, int *mb, int *me);
289 /* test if a constant matches the constraint */
290 static bool tcg_target_const_match(int64_t sval, int ct,
291 TCGType type, TCGCond cond, int vece)
293 uint64_t uval = sval;
296 if (ct & TCG_CT_CONST) {
300 if (type == TCG_TYPE_I32) {
301 uval = (uint32_t)sval;
302 sval = (int32_t)sval;
305 if (ct & TCG_CT_CONST_CMP) {
309 ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16;
315 ct |= TCG_CT_CONST_S16;
321 ct |= TCG_CT_CONST_U16;
325 if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) {
328 if (uval == (uint32_t)uval && mask_operand(uval, &mb, &me)) {
331 if (TCG_TARGET_REG_BITS == 64 &&
332 mask64_operand(uval << clz64(uval), &mb, &me)) {
337 g_assert_not_reached();
341 if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) {
344 if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) {
347 if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) {
350 if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) {
353 if ((ct & TCG_CT_CONST_ZERO) && sval == 0) {
356 if ((ct & TCG_CT_CONST_MONE) && sval == -1) {
359 if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) {
365 #define OPCD(opc) ((opc)<<26)
366 #define XO19(opc) (OPCD(19)|((opc)<<1))
367 #define MD30(opc) (OPCD(30)|((opc)<<2))
368 #define MDS30(opc) (OPCD(30)|((opc)<<1))
369 #define XO31(opc) (OPCD(31)|((opc)<<1))
370 #define XO58(opc) (OPCD(58)|(opc))
371 #define XO62(opc) (OPCD(62)|(opc))
372 #define VX4(opc) (OPCD(4)|(opc))
377 #define LBZ OPCD( 34)
378 #define LHZ OPCD( 40)
379 #define LHA OPCD( 42)
380 #define LWZ OPCD( 32)
381 #define LWZUX XO31( 55)
383 #define LDX XO31( 21)
385 #define LDUX XO31( 53)
387 #define LWAX XO31(341)
390 #define STB OPCD( 38)
391 #define STH OPCD( 44)
392 #define STW OPCD( 36)
394 #define STDU XO62( 1)
395 #define STDX XO31(149)
398 #define PLWA OPCD( 41)
399 #define PLD OPCD( 57)
400 #define PLXSD OPCD( 42)
401 #define PLXV OPCD(25 * 2 + 1) /* force tx=1 */
403 #define PSTD OPCD( 61)
404 #define PSTXSD OPCD( 46)
405 #define PSTXV OPCD(27 * 2 + 1) /* force sx=1 */
407 #define ADDIC OPCD( 12)
408 #define ADDI OPCD( 14)
409 #define ADDIS OPCD( 15)
410 #define ORI OPCD( 24)
411 #define ORIS OPCD( 25)
412 #define XORI OPCD( 26)
413 #define XORIS OPCD( 27)
414 #define ANDI OPCD( 28)
415 #define ANDIS OPCD( 29)
416 #define MULLI OPCD( 7)
417 #define CMPLI OPCD( 10)
418 #define CMPI OPCD( 11)
419 #define SUBFIC OPCD( 8)
421 #define LWZU OPCD( 33)
422 #define STWU OPCD( 37)
424 #define RLWIMI OPCD( 20)
425 #define RLWINM OPCD( 21)
426 #define RLWNM OPCD( 23)
428 #define RLDICL MD30( 0)
429 #define RLDICR MD30( 1)
430 #define RLDIMI MD30( 3)
431 #define RLDCL MDS30( 8)
433 #define BCLR XO19( 16)
434 #define BCCTR XO19(528)
435 #define CRAND XO19(257)
436 #define CRANDC XO19(129)
437 #define CRNAND XO19(225)
438 #define CROR XO19(449)
439 #define CRNOR XO19( 33)
440 #define ADDPCIS XO19( 2)
442 #define EXTSB XO31(954)
443 #define EXTSH XO31(922)
444 #define EXTSW XO31(986)
445 #define ADD XO31(266)
446 #define ADDE XO31(138)
447 #define ADDME XO31(234)
448 #define ADDZE XO31(202)
449 #define ADDC XO31( 10)
450 #define AND XO31( 28)
451 #define SUBF XO31( 40)
452 #define SUBFC XO31( 8)
453 #define SUBFE XO31(136)
454 #define SUBFME XO31(232)
455 #define SUBFZE XO31(200)
457 #define XOR XO31(316)
458 #define MULLW XO31(235)
459 #define MULHW XO31( 75)
460 #define MULHWU XO31( 11)
461 #define DIVW XO31(491)
462 #define DIVWU XO31(459)
463 #define MODSW XO31(779)
464 #define MODUW XO31(267)
466 #define CMPL XO31( 32)
467 #define LHBRX XO31(790)
468 #define LWBRX XO31(534)
469 #define LDBRX XO31(532)
470 #define STHBRX XO31(918)
471 #define STWBRX XO31(662)
472 #define STDBRX XO31(660)
473 #define MFSPR XO31(339)
474 #define MTSPR XO31(467)
475 #define SRAWI XO31(824)
476 #define NEG XO31(104)
477 #define MFCR XO31( 19)
478 #define MFOCRF (MFCR | (1u << 20))
479 #define NOR XO31(124)
480 #define CNTLZW XO31( 26)
481 #define CNTLZD XO31( 58)
482 #define CNTTZW XO31(538)
483 #define CNTTZD XO31(570)
484 #define CNTPOPW XO31(378)
485 #define CNTPOPD XO31(506)
486 #define ANDC XO31( 60)
487 #define ORC XO31(412)
488 #define EQV XO31(284)
489 #define NAND XO31(476)
490 #define ISEL XO31( 15)
492 #define MULLD XO31(233)
493 #define MULHD XO31( 73)
494 #define MULHDU XO31( 9)
495 #define DIVD XO31(489)
496 #define DIVDU XO31(457)
497 #define MODSD XO31(777)
498 #define MODUD XO31(265)
500 #define LBZX XO31( 87)
501 #define LHZX XO31(279)
502 #define LHAX XO31(343)
503 #define LWZX XO31( 23)
504 #define STBX XO31(215)
505 #define STHX XO31(407)
506 #define STWX XO31(151)
508 #define EIEIO XO31(854)
509 #define HWSYNC XO31(598)
510 #define LWSYNC (HWSYNC | (1u << 21))
512 #define SPR(a, b) ((((a)<<5)|(b))<<11)
514 #define CTR SPR(9, 0)
516 #define SLW XO31( 24)
517 #define SRW XO31(536)
518 #define SRAW XO31(792)
520 #define SLD XO31( 27)
521 #define SRD XO31(539)
522 #define SRAD XO31(794)
523 #define SRADI XO31(413<<1)
525 #define BRH XO31(219)
526 #define BRW XO31(155)
527 #define BRD XO31(187)
530 #define TRAP (TW | TO(31))
532 #define SETBC XO31(384) /* v3.10 */
533 #define SETBCR XO31(416) /* v3.10 */
534 #define SETNBC XO31(448) /* v3.10 */
535 #define SETNBCR XO31(480) /* v3.10 */
537 #define NOP ORI /* ori 0,0,0 */
539 #define LVX XO31(103)
540 #define LVEBX XO31(7)
541 #define LVEHX XO31(39)
542 #define LVEWX XO31(71)
543 #define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
544 #define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
545 #define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */
546 #define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */
547 #define LXSD (OPCD(57) | 2) /* v3.00 */
548 #define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */
550 #define STVX XO31(231)
551 #define STVEWX XO31(199)
552 #define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
553 #define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */
554 #define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
555 #define STXSD (OPCD(61) | 2) /* v3.00 */
557 #define VADDSBS VX4(768)
558 #define VADDUBS VX4(512)
559 #define VADDUBM VX4(0)
560 #define VADDSHS VX4(832)
561 #define VADDUHS VX4(576)
562 #define VADDUHM VX4(64)
563 #define VADDSWS VX4(896)
564 #define VADDUWS VX4(640)
565 #define VADDUWM VX4(128)
566 #define VADDUDM VX4(192) /* v2.07 */
568 #define VSUBSBS VX4(1792)
569 #define VSUBUBS VX4(1536)
570 #define VSUBUBM VX4(1024)
571 #define VSUBSHS VX4(1856)
572 #define VSUBUHS VX4(1600)
573 #define VSUBUHM VX4(1088)
574 #define VSUBSWS VX4(1920)
575 #define VSUBUWS VX4(1664)
576 #define VSUBUWM VX4(1152)
577 #define VSUBUDM VX4(1216) /* v2.07 */
579 #define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */
580 #define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */
582 #define VMAXSB VX4(258)
583 #define VMAXSH VX4(322)
584 #define VMAXSW VX4(386)
585 #define VMAXSD VX4(450) /* v2.07 */
586 #define VMAXUB VX4(2)
587 #define VMAXUH VX4(66)
588 #define VMAXUW VX4(130)
589 #define VMAXUD VX4(194) /* v2.07 */
590 #define VMINSB VX4(770)
591 #define VMINSH VX4(834)
592 #define VMINSW VX4(898)
593 #define VMINSD VX4(962) /* v2.07 */
594 #define VMINUB VX4(514)
595 #define VMINUH VX4(578)
596 #define VMINUW VX4(642)
597 #define VMINUD VX4(706) /* v2.07 */
599 #define VCMPEQUB VX4(6)
600 #define VCMPEQUH VX4(70)
601 #define VCMPEQUW VX4(134)
602 #define VCMPEQUD VX4(199) /* v2.07 */
603 #define VCMPGTSB VX4(774)
604 #define VCMPGTSH VX4(838)
605 #define VCMPGTSW VX4(902)
606 #define VCMPGTSD VX4(967) /* v2.07 */
607 #define VCMPGTUB VX4(518)
608 #define VCMPGTUH VX4(582)
609 #define VCMPGTUW VX4(646)
610 #define VCMPGTUD VX4(711) /* v2.07 */
611 #define VCMPNEB VX4(7) /* v3.00 */
612 #define VCMPNEH VX4(71) /* v3.00 */
613 #define VCMPNEW VX4(135) /* v3.00 */
615 #define VSLB VX4(260)
616 #define VSLH VX4(324)
617 #define VSLW VX4(388)
618 #define VSLD VX4(1476) /* v2.07 */
619 #define VSRB VX4(516)
620 #define VSRH VX4(580)
621 #define VSRW VX4(644)
622 #define VSRD VX4(1732) /* v2.07 */
623 #define VSRAB VX4(772)
624 #define VSRAH VX4(836)
625 #define VSRAW VX4(900)
626 #define VSRAD VX4(964) /* v2.07 */
629 #define VRLW VX4(132)
630 #define VRLD VX4(196) /* v2.07 */
632 #define VMULEUB VX4(520)
633 #define VMULEUH VX4(584)
634 #define VMULEUW VX4(648) /* v2.07 */
635 #define VMULOUB VX4(8)
636 #define VMULOUH VX4(72)
637 #define VMULOUW VX4(136) /* v2.07 */
638 #define VMULUWM VX4(137) /* v2.07 */
639 #define VMULLD VX4(457) /* v3.10 */
640 #define VMSUMUHM VX4(38)
642 #define VMRGHB VX4(12)
643 #define VMRGHH VX4(76)
644 #define VMRGHW VX4(140)
645 #define VMRGLB VX4(268)
646 #define VMRGLH VX4(332)
647 #define VMRGLW VX4(396)
649 #define VPKUHUM VX4(14)
650 #define VPKUWUM VX4(78)
652 #define VAND VX4(1028)
653 #define VANDC VX4(1092)
654 #define VNOR VX4(1284)
655 #define VOR VX4(1156)
656 #define VXOR VX4(1220)
657 #define VEQV VX4(1668) /* v2.07 */
658 #define VNAND VX4(1412) /* v2.07 */
659 #define VORC VX4(1348) /* v2.07 */
661 #define VSPLTB VX4(524)
662 #define VSPLTH VX4(588)
663 #define VSPLTW VX4(652)
664 #define VSPLTISB VX4(780)
665 #define VSPLTISH VX4(844)
666 #define VSPLTISW VX4(908)
668 #define VSLDOI VX4(44)
670 #define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
671 #define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
672 #define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
674 #define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */
675 #define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */
676 #define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */
677 #define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */
678 #define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */
679 #define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */
681 #define RT(r) ((r)<<21)
682 #define RS(r) ((r)<<21)
683 #define RA(r) ((r)<<16)
684 #define RB(r) ((r)<<11)
685 #define TO(t) ((t)<<21)
686 #define SH(s) ((s)<<11)
687 #define MB(b) ((b)<<6)
688 #define ME(e) ((e)<<1)
689 #define BO(o) ((o)<<21)
690 #define MB64(b) ((b)<<5)
691 #define FXM(b) (1 << (19 - (b)))
693 #define VRT(r) (((r) & 31) << 21)
694 #define VRA(r) (((r) & 31) << 16)
695 #define VRB(r) (((r) & 31) << 11)
696 #define VRC(r) (((r) & 31) << 6)
700 #define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
701 #define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
702 #define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
703 #define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
705 #define BF(n) ((n)<<23)
706 #define BI(n, c) (((c)+((n)*4))<<16)
707 #define BT(n, c) (((c)+((n)*4))<<21)
708 #define BA(n, c) (((c)+((n)*4))<<16)
709 #define BB(n, c) (((c)+((n)*4))<<11)
710 #define BC_(n, c) (((c)+((n)*4))<<6)
712 #define BO_COND_TRUE BO(12)
713 #define BO_COND_FALSE BO( 4)
714 #define BO_ALWAYS BO(20)
723 static const uint32_t tcg_to_bc[16] = {
724 [TCG_COND_EQ] = BC | BI(0, CR_EQ) | BO_COND_TRUE,
725 [TCG_COND_NE] = BC | BI(0, CR_EQ) | BO_COND_FALSE,
726 [TCG_COND_TSTEQ] = BC | BI(0, CR_EQ) | BO_COND_TRUE,
727 [TCG_COND_TSTNE] = BC | BI(0, CR_EQ) | BO_COND_FALSE,
728 [TCG_COND_LT] = BC | BI(0, CR_LT) | BO_COND_TRUE,
729 [TCG_COND_GE] = BC | BI(0, CR_LT) | BO_COND_FALSE,
730 [TCG_COND_LE] = BC | BI(0, CR_GT) | BO_COND_FALSE,
731 [TCG_COND_GT] = BC | BI(0, CR_GT) | BO_COND_TRUE,
732 [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE,
733 [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE,
734 [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE,
735 [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE,
738 /* The low bit here is set if the RA and RB fields must be inverted. */
739 static const uint32_t tcg_to_isel[16] = {
740 [TCG_COND_EQ] = ISEL | BC_(0, CR_EQ),
741 [TCG_COND_NE] = ISEL | BC_(0, CR_EQ) | 1,
742 [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ),
743 [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1,
744 [TCG_COND_LT] = ISEL | BC_(0, CR_LT),
745 [TCG_COND_GE] = ISEL | BC_(0, CR_LT) | 1,
746 [TCG_COND_LE] = ISEL | BC_(0, CR_GT) | 1,
747 [TCG_COND_GT] = ISEL | BC_(0, CR_GT),
748 [TCG_COND_LTU] = ISEL | BC_(0, CR_LT),
749 [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1,
750 [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1,
751 [TCG_COND_GTU] = ISEL | BC_(0, CR_GT),
754 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
755 intptr_t value, intptr_t addend)
757 const tcg_insn_unit *target;
762 target = (const tcg_insn_unit *)value;
766 return reloc_pc14(code_ptr, target);
768 return reloc_pc24(code_ptr, target);
769 case R_PPC64_PCREL34:
770 return reloc_pc34(code_ptr, target);
773 * We are (slightly) abusing this relocation type. In particular,
774 * assert that the low 2 bits are zero, and do not modify them.
775 * That way we can use this with LD et al that have opcode bits
776 * in the low 2 bits of the insn.
778 if ((value & 3) || value != (int16_t)value) {
781 *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
785 * We are abusing this relocation type. Again, this points to
786 * a pair of insns, lis + load. This is an absolute address
787 * relocation for PPC32 so the lis cannot be removed.
791 if (hi + lo != value) {
794 code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
795 code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
798 g_assert_not_reached();
803 /* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
804 static bool tcg_out_need_prefix_align(TCGContext *s)
806 return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
809 static void tcg_out_prefix_align(TCGContext *s)
811 if (tcg_out_need_prefix_align(s)) {
816 static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
818 return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
821 /* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
822 static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
823 unsigned ra, tcg_target_long imm, bool r)
827 p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
828 i = opc | TAI(rt, ra, imm);
830 tcg_out_prefix_align(s);
835 /* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
836 static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
837 unsigned ra, tcg_target_long imm, bool r)
841 p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
842 i = opc | TAI(rt, ra, imm);
844 tcg_out_prefix_align(s);
849 static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
850 TCGReg base, tcg_target_long offset);
852 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
859 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
862 if (ret < TCG_REG_V0) {
863 if (arg < TCG_REG_V0) {
864 tcg_out32(s, OR | SAB(arg, ret, arg));
866 } else if (have_isa_2_07) {
867 tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
868 | VRT(arg) | RA(ret));
871 /* Altivec does not support vector->integer moves. */
874 } else if (arg < TCG_REG_V0) {
876 tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
877 | VRT(ret) | RA(arg));
880 /* Altivec does not support integer->vector moves. */
887 tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
888 tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
891 g_assert_not_reached();
896 static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
897 int sh, int mb, bool rc)
899 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
900 sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
901 mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
902 tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc);
905 static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
908 tcg_out_rld_rc(s, op, ra, rs, sh, mb, false);
911 static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
912 int sh, int mb, int me, bool rc)
914 tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me) | rc);
917 static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
918 int sh, int mb, int me)
920 tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false);
923 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
925 tcg_out32(s, EXTSB | RA(dst) | RS(src));
928 static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src)
930 tcg_out32(s, ANDI | SAI(src, dst, 0xff));
933 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
935 tcg_out32(s, EXTSH | RA(dst) | RS(src));
938 static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
940 tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
943 static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
945 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
946 tcg_out32(s, EXTSW | RA(dst) | RS(src));
949 static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
951 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
952 tcg_out_rld(s, RLDICL, dst, src, 0, 32);
955 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
957 tcg_out_ext32s(s, dst, src);
960 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
962 tcg_out_ext32u(s, dst, src);
965 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
967 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
968 tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
971 static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
973 tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
976 static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
978 tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
981 static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
983 /* Limit immediate shift count lest we create an illegal insn. */
984 tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
987 static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
989 tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
992 static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
994 tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
997 static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
999 tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
1002 static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm)
1004 uint32_t d0, d1, d2;
1006 tcg_debug_assert((imm & 0xffff) == 0);
1007 tcg_debug_assert(imm == (int32_t)imm);
1009 d2 = extract32(imm, 16, 1);
1010 d1 = extract32(imm, 17, 5);
1011 d0 = extract32(imm, 22, 10);
1012 tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2);
1015 static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
1017 TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
1019 if (have_isa_3_10) {
1020 tcg_out32(s, BRH | RA(dst) | RS(src));
1021 if (flags & TCG_BSWAP_OS) {
1022 tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst);
1023 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1024 tcg_out_ext16u(s, dst, dst);
1031 * dep(a, b, m) -> (a & ~m) | (b & m)
1033 * Begin with: src = xxxxabcd
1035 /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */
1036 tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
1037 /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */
1038 tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
1040 if (flags & TCG_BSWAP_OS) {
1041 tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp);
1043 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
1047 static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
1049 TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
1051 if (have_isa_3_10) {
1052 tcg_out32(s, BRW | RA(dst) | RS(src));
1053 if (flags & TCG_BSWAP_OS) {
1054 tcg_out_ext32s(s, dst, dst);
1055 } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1056 tcg_out_ext32u(s, dst, dst);
1062 * Stolen from gcc's builtin_bswap32.
1064 * dep(a, b, m) -> (a & ~m) | (b & m)
1066 * Begin with: src = xxxxabcd
1068 /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */
1069 tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
1070 /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */
1071 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
1072 /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */
1073 tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
1075 if (flags & TCG_BSWAP_OS) {
1076 tcg_out_ext32s(s, dst, tmp);
1078 tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
1082 static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
1084 TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
1085 TCGReg t1 = dst == src ? dst : TCG_REG_R0;
1087 if (have_isa_3_10) {
1088 tcg_out32(s, BRD | RA(dst) | RS(src));
1094 * dep(a, b, m) -> (a & ~m) | (b & m)
1096 * Begin with: src = abcdefgh
1098 /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */
1099 tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
1100 /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */
1101 tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
1102 /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */
1103 tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
1105 /* t0 = rol64(t0, 32) = hgfe0000 */
1106 tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
1107 /* t1 = rol64(src, 32) = efghabcd */
1108 tcg_out_rld(s, RLDICL, t1, src, 32, 0);
1110 /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */
1111 tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
1112 /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */
1113 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
1114 /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */
1115 tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
1117 tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
1120 /* Emit a move into ret of arg, if it can be done in one insn. */
1121 static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
1123 if (arg == (int16_t)arg) {
1124 tcg_out32(s, ADDI | TAI(ret, 0, arg));
1127 if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
1128 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1134 static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
1135 tcg_target_long arg, bool in_prologue)
1138 tcg_target_long tmp;
1141 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1143 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1147 /* Load 16-bit immediates with one insn. */
1148 if (tcg_out_movi_one(s, ret, arg)) {
1152 /* Load addresses within the TB with one insn. */
1153 tb_diff = ppc_tbrel_diff(s, (void *)arg);
1154 if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
1155 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
1160 * Load values up to 34 bits, and pc-relative addresses,
1161 * with one prefixed insn.
1163 if (have_isa_3_10) {
1164 if (arg == sextract64(arg, 0, 34)) {
1165 /* pli ret,value = paddi ret,0,value,0 */
1166 tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
1170 tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
1171 if (tmp == sextract64(tmp, 0, 34)) {
1172 /* pla ret,value = paddi ret,0,value,1 */
1173 tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
1178 /* Load 32-bit immediates with two insns. Note that we've already
1179 eliminated bare ADDIS, so we know both insns are required. */
1180 if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
1181 tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1182 tcg_out32(s, ORI | SAI(ret, ret, arg));
1185 if (arg == (uint32_t)arg && !(arg & 0x8000)) {
1186 tcg_out32(s, ADDI | TAI(ret, 0, arg));
1187 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1191 /* Load masked 16-bit value. */
1192 if (arg > 0 && (arg & 0x8000)) {
1194 if ((tmp & (tmp + 1)) == 0) {
1195 int mb = clz64(tmp + 1) + 1;
1196 tcg_out32(s, ADDI | TAI(ret, 0, arg));
1197 tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
1202 /* Load common masks with 2 insns. */
1205 if (tmp == (int16_t)tmp) {
1206 tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1207 tcg_out_shli64(s, ret, ret, shift);
1211 if (tcg_out_movi_one(s, ret, arg << shift)) {
1212 tcg_out_shri64(s, ret, ret, shift);
1216 /* Load addresses within 2GB with 2 insns. */
1217 if (have_isa_3_00) {
1218 intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4;
1222 if (hi == (int32_t)hi) {
1223 tcg_out_addpcis(s, TCG_REG_TMP2, hi);
1224 tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo));
1229 /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */
1230 if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1231 tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1235 /* Use the constant pool, if possible. */
1236 if (!in_prologue && USE_REG_TB) {
1237 new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1238 ppc_tbrel_diff(s, NULL));
1239 tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1242 if (have_isa_3_10) {
1243 tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
1244 new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1247 if (have_isa_3_00) {
1248 tcg_out_addpcis(s, TCG_REG_TMP2, 0);
1249 new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
1250 tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0));
1254 tmp = arg >> 31 >> 1;
1255 tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1257 tcg_out_shli64(s, ret, ret, 32);
1259 if (arg & 0xffff0000) {
1260 tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1263 tcg_out32(s, ORI | SAI(ret, ret, arg));
1267 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1268 TCGReg ret, int64_t val)
1277 if (low >= -16 && low < 16) {
1278 tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1281 if (have_isa_3_00) {
1282 tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1289 if (low >= -16 && low < 16) {
1290 tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1297 if (low >= -16 && low < 16) {
1298 tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1305 * Otherwise we must load the value from the constant pool.
1309 add = ppc_tbrel_diff(s, NULL);
1310 } else if (have_isa_3_10) {
1311 if (type == TCG_TYPE_V64) {
1312 tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
1313 new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1315 tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
1316 new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
1319 } else if (have_isa_3_00) {
1320 tcg_out_addpcis(s, TCG_REG_TMP1, 0);
1329 load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1330 load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1331 if (TCG_TARGET_REG_BITS == 64) {
1332 new_pool_label(s, val, rel, s->code_ptr, add);
1334 new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1337 load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1338 if (TCG_TARGET_REG_BITS == 64) {
1339 new_pool_l2(s, rel, s->code_ptr, add, val, val);
1341 new_pool_l4(s, rel, s->code_ptr, add,
1342 val >> 32, val, val >> 32, val);
1347 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1348 load_insn |= RA(TCG_REG_TB);
1349 } else if (have_isa_3_00) {
1350 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1352 tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1353 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1355 tcg_out32(s, load_insn);
1358 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1359 tcg_target_long arg)
1364 tcg_debug_assert(ret < TCG_REG_V0);
1365 tcg_out_movi_int(s, type, ret, arg, false);
1369 g_assert_not_reached();
1373 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1378 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1379 tcg_target_long imm)
1381 /* This function is only used for passing structs by reference. */
1382 g_assert_not_reached();
1385 static bool mask_operand(uint32_t c, int *mb, int *me)
1389 /* Accept a bit pattern like:
1393 Keep track of the transitions. */
1394 if (c == 0 || c == -1) {
1400 if (test & (test - 1)) {
1405 *mb = test ? clz32(test & -test) + 1 : 0;
1409 static bool mask64_operand(uint64_t c, int *mb, int *me)
1418 /* Accept 1..10..0. */
1424 /* Accept 0..01..1. */
1425 if (lsb == 1 && (c & (c + 1)) == 0) {
1426 *mb = clz64(c + 1) + 1;
1433 static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1437 if (mask_operand(c, &mb, &me)) {
1438 tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1439 } else if ((c & 0xffff) == c) {
1440 tcg_out32(s, ANDI | SAI(src, dst, c));
1442 } else if ((c & 0xffff0000) == c) {
1443 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1446 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1447 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1451 static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1455 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1456 if (mask64_operand(c, &mb, &me)) {
1458 tcg_out_rld(s, RLDICR, dst, src, 0, me);
1460 tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1462 } else if ((c & 0xffff) == c) {
1463 tcg_out32(s, ANDI | SAI(src, dst, c));
1465 } else if ((c & 0xffff0000) == c) {
1466 tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1469 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1470 tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1474 static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1475 int op_lo, int op_hi)
1478 tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1482 tcg_out32(s, op_lo | SAI(src, dst, c));
1487 static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1489 tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1492 static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1494 tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1497 static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1499 ptrdiff_t disp = tcg_pcrel_diff(s, target);
1500 if (in_range_b(disp)) {
1501 tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1503 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1504 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1505 tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1509 static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1510 TCGReg base, tcg_target_long offset)
1512 tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1513 bool is_int_store = false;
1514 TCGReg rs = TCG_REG_TMP1;
1521 if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1537 case STB: case STH: case STW:
1538 is_int_store = true;
1542 /* For unaligned or large offsets, use the prefixed form. */
1544 && (offset != (int16_t)offset || (offset & align))
1545 && offset == sextract64(offset, 0, 34)) {
1547 * Note that the MLS:D insns retain their un-prefixed opcode,
1548 * while the 8LS:D insns use a different opcode space.
1559 tcg_out_mls_d(s, opi, rt, base, offset, 0);
1562 tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
1565 tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
1568 tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
1571 tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
1574 tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
1577 tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
1580 tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
1585 /* For unaligned, or very large offsets, use the indexed form. */
1586 if (offset & align || offset != (int32_t)offset || opi == 0) {
1590 tcg_debug_assert(!is_int_store || rs != rt);
1591 tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1592 tcg_out32(s, opx | TAB(rt & 31, base, rs));
1596 l0 = (int16_t)offset;
1597 offset = (offset - l0) >> 16;
1598 l1 = (int16_t)offset;
1600 if (l1 < 0 && orig >= 0) {
1602 l1 = (int16_t)(offset - 0x4000);
1605 tcg_out32(s, ADDIS | TAI(rs, base, l1));
1609 tcg_out32(s, ADDIS | TAI(rs, base, extra));
1612 if (opi != ADDI || base != rt || l0 != 0) {
1613 tcg_out32(s, opi | TAI(rt & 31, base, l0));
1617 static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1618 TCGReg va, TCGReg vb, int shb)
1620 tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1623 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1624 TCGReg base, intptr_t offset)
1630 if (ret < TCG_REG_V0) {
1631 tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1634 if (have_isa_2_07 && have_vsx) {
1635 tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1638 tcg_debug_assert((offset & 3) == 0);
1639 tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1640 shift = (offset - 4) & 0xc;
1642 tcg_out_vsldoi(s, ret, ret, ret, shift);
1646 if (ret < TCG_REG_V0) {
1647 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1648 tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1653 tcg_debug_assert(ret >= TCG_REG_V0);
1655 tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1659 tcg_debug_assert((offset & 7) == 0);
1660 tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1662 tcg_out_vsldoi(s, ret, ret, ret, 8);
1666 tcg_debug_assert(ret >= TCG_REG_V0);
1667 tcg_debug_assert((offset & 15) == 0);
1668 tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1669 LVX, ret, base, offset);
1672 g_assert_not_reached();
1676 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1677 TCGReg base, intptr_t offset)
1683 if (arg < TCG_REG_V0) {
1684 tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1687 if (have_isa_2_07 && have_vsx) {
1688 tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1691 assert((offset & 3) == 0);
1692 tcg_debug_assert((offset & 3) == 0);
1693 shift = (offset - 4) & 0xc;
1695 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1698 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1701 if (arg < TCG_REG_V0) {
1702 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1703 tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1708 tcg_debug_assert(arg >= TCG_REG_V0);
1710 tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1711 STXSDX, arg, base, offset);
1714 tcg_debug_assert((offset & 7) == 0);
1716 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1719 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1720 tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1723 tcg_debug_assert(arg >= TCG_REG_V0);
1724 tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1725 STVX, arg, base, offset);
1728 g_assert_not_reached();
1732 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1733 TCGReg base, intptr_t ofs)
1739 * Set dest non-zero if and only if (arg1 & arg2) is non-zero.
1740 * If RC, then also set RC0.
1742 static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2,
1743 bool const_arg2, TCGType type, bool rc)
1748 tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc);
1752 if (type == TCG_TYPE_I32) {
1753 arg2 = (uint32_t)arg2;
1756 if ((arg2 & ~0xffff) == 0) {
1757 tcg_out32(s, ANDI | SAI(arg1, dest, arg2));
1760 if ((arg2 & ~0xffff0000ull) == 0) {
1761 tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16));
1764 if (arg2 == (uint32_t)arg2 && mask_operand(arg2, &mb, &me)) {
1765 tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc);
1768 if (TCG_TARGET_REG_BITS == 64) {
1769 int sh = clz64(arg2);
1770 if (mask64_operand(arg2 << sh, &mb, &me)) {
1771 tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc);
1775 /* Constraints should satisfy this. */
1776 g_assert_not_reached();
1779 static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1780 int const_arg2, int cr, TCGType type)
1785 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1788 * Simplify the comparisons below wrt CMPI.
1789 * All of the tests are 16-bit, so a 32-bit sign extend always works.
1791 if (type == TCG_TYPE_I32) {
1792 arg2 = (int32_t)arg2;
1799 if ((int16_t) arg2 == arg2) {
1803 } else if ((uint16_t) arg2 == arg2) {
1813 case TCG_COND_TSTEQ:
1814 case TCG_COND_TSTNE:
1815 tcg_debug_assert(cr == 0);
1816 tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true);
1824 if ((int16_t) arg2 == arg2) {
1839 if ((uint16_t) arg2 == arg2) {
1850 g_assert_not_reached();
1852 op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1855 tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1858 tcg_out_movi(s, type, TCG_REG_R0, arg2);
1861 tcg_out32(s, op | RA(arg1) | RB(arg2));
1865 static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1866 TCGReg dst, TCGReg src, bool neg)
1868 if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1870 * X != 0 implies X + -1 generates a carry.
1871 * RT = (~X + X) + CA
1875 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1876 tcg_out32(s, SUBFE | TAB(dst, src, src));
1880 if (type == TCG_TYPE_I32) {
1881 tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1882 tcg_out_shri32(s, dst, dst, 5);
1884 tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1885 tcg_out_shri64(s, dst, dst, 6);
1888 tcg_out32(s, NEG | RT(dst) | RA(dst));
1892 static void tcg_out_setcond_ne0(TCGContext *s, TCGType type,
1893 TCGReg dst, TCGReg src, bool neg)
1895 if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1897 * X != 0 implies X + -1 generates a carry. Extra addition
1898 * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.
1900 tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1901 tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1904 tcg_out_setcond_eq0(s, type, dst, src, false);
1906 tcg_out32(s, ADDI | TAI(dst, dst, -1));
1908 tcg_out_xori32(s, dst, dst, 1);
1912 static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1916 if ((uint32_t)arg2 == arg2) {
1917 tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1919 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1920 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1923 tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1928 static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1929 TCGArg arg0, TCGArg arg1, TCGArg arg2,
1930 int const_arg2, bool neg)
1935 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1937 /* Ignore high bits of a potential constant arg2. */
1938 if (type == TCG_TYPE_I32) {
1939 arg2 = (uint32_t)arg2;
1942 /* With SETBC/SETBCR, we can always implement with 2 insns. */
1943 if (have_isa_3_10) {
1944 tcg_insn_unit bi, opc;
1946 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1948 /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */
1949 bi = tcg_to_bc[cond] & (0x1f << 16);
1950 if (tcg_to_bc[cond] & BO(8)) {
1951 opc = neg ? SETNBC : SETBC;
1953 opc = neg ? SETNBCR : SETBCR;
1955 tcg_out32(s, opc | RT(arg0) | bi);
1959 /* Handle common and trivial cases before handling anything else. */
1963 tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1966 tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1969 tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1973 /* Extract the sign bit. */
1974 if (type == TCG_TYPE_I32) {
1976 tcg_out_sari32(s, arg0, arg1, 31);
1978 tcg_out_shri32(s, arg0, arg1, 31);
1982 tcg_out_sari64(s, arg0, arg1, 63);
1984 tcg_out_shri64(s, arg0, arg1, 63);
1993 /* If we have ISEL, we can implement everything with 3 or 4 insns.
1994 All other cases below are also at least 3 insns, so speed up the
1995 code generator by not considering them and always using ISEL. */
1999 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
2001 isel = tcg_to_isel[cond];
2003 tcg_out_movi(s, type, arg0, neg ? -1 : 1);
2005 /* arg0 = (bc ? 0 : 1) */
2006 tab = TAB(arg0, 0, arg0);
2009 /* arg0 = (bc ? 1 : 0) */
2010 tcg_out_movi(s, type, TCG_REG_R0, 0);
2011 tab = TAB(arg0, arg0, TCG_REG_R0);
2013 tcg_out32(s, isel | tab);
2020 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
2021 tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
2025 arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
2026 tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
2029 case TCG_COND_TSTEQ:
2030 tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
2031 tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg);
2034 case TCG_COND_TSTNE:
2035 tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
2036 tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg);
2045 sh = 30; /* CR7 CR_GT */
2054 sh = 29; /* CR7 CR_LT */
2058 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
2059 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
2060 tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
2062 tcg_out32(s, ADDI | TAI(arg0, arg0, -1));
2064 tcg_out32(s, NEG | RT(arg0) | RA(arg0));
2066 tcg_out_xori32(s, arg0, arg0, 1);
2071 g_assert_not_reached();
2075 static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd)
2077 tcg_out32(s, tcg_to_bc[cond] | bd);
2080 static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l)
2084 bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
2086 tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
2088 tcg_out_bc(s, cond, bd);
2091 static void tcg_out_brcond(TCGContext *s, TCGCond cond,
2092 TCGArg arg1, TCGArg arg2, int const_arg2,
2093 TCGLabel *l, TCGType type)
2095 tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
2096 tcg_out_bc_lab(s, cond, l);
2099 static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
2100 TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
2101 TCGArg v2, bool const_c2)
2103 /* If for some reason both inputs are zero, don't produce bad code. */
2104 if (v1 == 0 && v2 == 0) {
2105 tcg_out_movi(s, type, dest, 0);
2109 tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type);
2112 int isel = tcg_to_isel[cond];
2114 /* Swap the V operands if the operation indicates inversion. */
2121 /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */
2123 tcg_out_movi(s, type, TCG_REG_R0, 0);
2125 tcg_out32(s, isel | TAB(dest, v1, v2));
2128 cond = tcg_invert_cond(cond);
2130 } else if (dest != v1) {
2132 tcg_out_movi(s, type, dest, 0);
2134 tcg_out_mov(s, type, dest, v1);
2137 /* Branch forward over one insn */
2138 tcg_out_bc(s, cond, 8);
2140 tcg_out_movi(s, type, dest, 0);
2142 tcg_out_mov(s, type, dest, v2);
2147 static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
2148 TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
2150 if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2151 tcg_out32(s, opc | RA(a0) | RS(a1));
2153 tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type);
2154 /* Note that the only other valid constant for a2 is 0. */
2156 tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
2157 tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
2158 } else if (!const_a2 && a0 == a2) {
2159 tcg_out_bc(s, TCG_COND_EQ, 8);
2160 tcg_out32(s, opc | RA(a0) | RS(a1));
2162 tcg_out32(s, opc | RA(a0) | RS(a1));
2163 tcg_out_bc(s, TCG_COND_NE, 8);
2165 tcg_out_movi(s, type, a0, 0);
2167 tcg_out_mov(s, type, a0, a2);
2173 static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
2174 const int *const_args)
2176 static const struct { uint8_t bit1, bit2; } bits[] = {
2177 [TCG_COND_LT ] = { CR_LT, CR_LT },
2178 [TCG_COND_LE ] = { CR_LT, CR_GT },
2179 [TCG_COND_GT ] = { CR_GT, CR_GT },
2180 [TCG_COND_GE ] = { CR_GT, CR_LT },
2181 [TCG_COND_LTU] = { CR_LT, CR_LT },
2182 [TCG_COND_LEU] = { CR_LT, CR_GT },
2183 [TCG_COND_GTU] = { CR_GT, CR_GT },
2184 [TCG_COND_GEU] = { CR_GT, CR_LT },
2187 TCGCond cond = args[4], cond2;
2188 TCGArg al, ah, bl, bh;
2189 int blconst, bhconst;
2196 blconst = const_args[2];
2197 bhconst = const_args[3];
2206 tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
2207 tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
2208 tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2211 case TCG_COND_TSTEQ:
2212 case TCG_COND_TSTNE:
2214 tcg_out_andi32(s, TCG_REG_R0, al, bl);
2216 tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl));
2219 tcg_out_andi32(s, TCG_REG_TMP1, ah, bh);
2221 tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh));
2223 tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1);
2234 bit1 = bits[cond].bit1;
2235 bit2 = bits[cond].bit2;
2236 op = (bit1 != bit2 ? CRANDC : CRAND);
2237 cond2 = tcg_unsigned_cond(cond);
2239 tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
2240 tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
2241 tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
2242 tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ));
2246 g_assert_not_reached();
2250 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
2251 const int *const_args)
2253 tcg_out_cmp2(s, args + 1, const_args + 1);
2254 tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0));
2255 tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31);
2258 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
2259 const int *const_args)
2261 tcg_out_cmp2(s, args, const_args);
2262 tcg_out_bc_lab(s, TCG_COND_EQ, arg_label(args[5]));
2265 static void tcg_out_mb(TCGContext *s, TCGArg a0)
2269 if (a0 & TCG_MO_ST_LD) {
2278 static void tcg_out_call_int(TCGContext *s, int lk,
2279 const tcg_insn_unit *target)
2282 /* Look through the descriptor. If the branch is in range, and we
2283 don't have to spend too much effort on building the toc. */
2284 const void *tgt = ((const void * const *)target)[0];
2285 uintptr_t toc = ((const uintptr_t *)target)[1];
2286 intptr_t diff = tcg_pcrel_diff(s, tgt);
2288 if (in_range_b(diff) && toc == (uint32_t)toc) {
2289 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
2290 tcg_out_b(s, lk, tgt);
2292 /* Fold the low bits of the constant into the addresses below. */
2293 intptr_t arg = (intptr_t)target;
2294 int ofs = (int16_t)arg;
2296 if (ofs + 8 < 0x8000) {
2301 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
2302 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
2303 tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
2304 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
2305 tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2307 #elif defined(_CALL_ELF) && _CALL_ELF == 2
2310 /* In the ELFv2 ABI, we have to set up r12 to contain the destination
2311 address, which the callee uses to compute its TOC address. */
2312 /* FIXME: when the branch is in range, we could avoid r12 load if we
2313 knew that the destination uses the same TOC, and what its local
2314 entry point offset is. */
2315 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
2317 diff = tcg_pcrel_diff(s, target);
2318 if (in_range_b(diff)) {
2319 tcg_out_b(s, lk, target);
2321 tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
2322 tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2325 tcg_out_b(s, lk, target);
2329 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
2330 const TCGHelperInfo *info)
2332 tcg_out_call_int(s, LK, target);
2335 static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
2342 [MO_BSWAP | MO_UB] = LBZX,
2343 [MO_BSWAP | MO_UW] = LHBRX,
2344 [MO_BSWAP | MO_UL] = LWBRX,
2345 [MO_BSWAP | MO_UQ] = LDBRX,
2348 static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
2353 [MO_BSWAP | MO_UB] = STBX,
2354 [MO_BSWAP | MO_UW] = STHBRX,
2355 [MO_BSWAP | MO_UL] = STWBRX,
2356 [MO_BSWAP | MO_UQ] = STDBRX,
2359 static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
2364 tcg_out32(s, MFSPR | RT(arg) | LR);
2369 * For the purposes of ppc32 sorting 4 input registers into 4 argument
2370 * registers, there is an outside chance we would require 3 temps.
2372 static const TCGLdstHelperParam ldst_helper_param = {
2373 .ra_gen = ldst_ra_gen,
2375 .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
2378 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2380 MemOp opc = get_memop(lb->oi);
2382 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2386 tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
2387 tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
2388 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
2390 tcg_out_b(s, 0, lb->raddr);
2394 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2396 MemOp opc = get_memop(lb->oi);
2398 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2402 tcg_out_st_helper_args(s, lb, &ldst_helper_param);
2403 tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
2405 tcg_out_b(s, 0, lb->raddr);
2415 bool tcg_target_has_memory_bswap(MemOp memop)
2419 if ((memop & MO_SIZE) <= MO_64) {
2424 * Reject 16-byte memop with 16-byte atomicity,
2425 * but do allow a pair of 64-bit operations.
2427 aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2428 return aa.atom <= MO_64;
2431 /* We expect to use a 16-bit negative offset from ENV. */
2432 #define MIN_TLB_MASK_TABLE_OFS -32768
2435 * For system-mode, perform the TLB load and compare.
2436 * For user-mode, perform any required alignment tests.
2437 * In both cases, return a TCGLabelQemuLdst structure if the slow path
2438 * is required and fill in @h with the host address for the fast path.
2440 static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2441 TCGReg addrlo, TCGReg addrhi,
2442 MemOpIdx oi, bool is_ld)
2444 TCGType addr_type = s->addr_type;
2445 TCGLabelQemuLdst *ldst = NULL;
2446 MemOp opc = get_memop(oi);
2447 MemOp a_bits, s_bits;
2450 * Book II, Section 1.4, Single-Copy Atomicity, specifies:
2452 * Before 3.0, "An access that is not atomic is performed as a set of
2453 * smaller disjoint atomic accesses. In general, the number and alignment
2454 * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN.
2456 * As of 3.0, "the non-atomic access is performed as described in
2457 * the corresponding list", which matches MO_ATOM_SUBALIGN.
2459 s_bits = opc & MO_SIZE;
2460 h->aa = atom_and_align_for_opc(s, opc,
2461 have_isa_3_00 ? MO_ATOM_SUBALIGN
2464 a_bits = h->aa.align;
2466 if (tcg_use_softmmu) {
2467 int mem_index = get_mmuidx(oi);
2468 int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
2469 : offsetof(CPUTLBEntry, addr_write);
2470 int fast_off = tlb_mask_table_ofs(s, mem_index);
2471 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2472 int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2474 ldst = new_ldst_label(s);
2475 ldst->is_ld = is_ld;
2477 ldst->addrlo_reg = addrlo;
2478 ldst->addrhi_reg = addrhi;
2480 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
2481 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
2482 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
2484 /* Extract the page index, shifted into place for tlb index. */
2485 if (TCG_TARGET_REG_BITS == 32) {
2486 tcg_out_shri32(s, TCG_REG_R0, addrlo,
2487 s->page_bits - CPU_TLB_ENTRY_BITS);
2489 tcg_out_shri64(s, TCG_REG_R0, addrlo,
2490 s->page_bits - CPU_TLB_ENTRY_BITS);
2492 tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
2495 * Load the (low part) TLB comparator into TMP2.
2496 * For 64-bit host, always load the entire 64-bit slot for simplicity.
2497 * We will ignore the high bits with tcg_out_cmp(..., addr_type).
2499 if (TCG_TARGET_REG_BITS == 64) {
2501 tcg_out32(s, LDUX | TAB(TCG_REG_TMP2,
2502 TCG_REG_TMP1, TCG_REG_TMP2));
2504 tcg_out32(s, ADD | TAB(TCG_REG_TMP1,
2505 TCG_REG_TMP1, TCG_REG_TMP2));
2506 tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2,
2507 TCG_REG_TMP1, cmp_off);
2509 } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) {
2510 tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2,
2511 TCG_REG_TMP1, TCG_REG_TMP2));
2513 tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2514 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
2515 cmp_off + 4 * HOST_BIG_ENDIAN);
2519 * Load the TLB addend for use on the fast path.
2520 * Do this asap to minimize any load use delay.
2522 if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
2523 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2524 offsetof(CPUTLBEntry, addend));
2527 /* Clear the non-page, non-alignment bits from the address in R0. */
2528 if (TCG_TARGET_REG_BITS == 32) {
2530 * We don't support unaligned accesses on 32-bits.
2531 * Preserve the bottom bits and thus trigger a comparison
2532 * failure on unaligned accesses.
2534 if (a_bits < s_bits) {
2537 tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
2538 (32 - a_bits) & 31, 31 - s->page_bits);
2543 * If the access is unaligned, we need to make sure we fail if we
2544 * cross a page boundary. The trick is to add the access size-1
2545 * to the address before masking the low bits. That will make the
2546 * address overflow to the next page if we cross a page boundary,
2547 * which will then force a mismatch of the TLB compare.
2549 if (a_bits < s_bits) {
2550 unsigned a_mask = (1 << a_bits) - 1;
2551 unsigned s_mask = (1 << s_bits) - 1;
2552 tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2556 /* Mask the address for the requested alignment. */
2557 if (addr_type == TCG_TYPE_I32) {
2558 tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2559 (32 - a_bits) & 31, 31 - s->page_bits);
2560 } else if (a_bits == 0) {
2561 tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
2563 tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2564 64 - s->page_bits, s->page_bits - a_bits);
2565 tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
2569 if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
2570 /* Low part comparison into cr7. */
2571 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
2572 0, 7, TCG_TYPE_I32);
2574 /* Load the high part TLB comparator into TMP2. */
2575 tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
2576 cmp_off + 4 * !HOST_BIG_ENDIAN);
2578 /* Load addend, deferred for this case. */
2579 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2580 offsetof(CPUTLBEntry, addend));
2582 /* High part comparison into cr6. */
2583 tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2,
2584 0, 6, TCG_TYPE_I32);
2586 /* Combine comparisons into cr0. */
2587 tcg_out32(s, CRAND | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2589 /* Full comparison into cr0. */
2590 tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
2594 /* Load a pointer into the current opcode w/conditional branch-link. */
2595 ldst->label_ptr[0] = s->code_ptr;
2596 tcg_out_bc(s, TCG_COND_NE, LK);
2598 h->base = TCG_REG_TMP1;
2601 ldst = new_ldst_label(s);
2602 ldst->is_ld = is_ld;
2604 ldst->addrlo_reg = addrlo;
2605 ldst->addrhi_reg = addrhi;
2607 /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2608 tcg_debug_assert(a_bits < 16);
2609 tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
2611 ldst->label_ptr[0] = s->code_ptr;
2612 tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2615 h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
2618 if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
2619 /* Zero-extend the guest address for use in the host address. */
2620 tcg_out_ext32u(s, TCG_REG_R0, addrlo);
2621 h->index = TCG_REG_R0;
2629 static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2630 TCGReg addrlo, TCGReg addrhi,
2631 MemOpIdx oi, TCGType data_type)
2633 MemOp opc = get_memop(oi);
2634 TCGLabelQemuLdst *ldst;
2637 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
2639 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2640 if (opc & MO_BSWAP) {
2641 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2642 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2643 tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
2644 } else if (h.base != 0) {
2645 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2646 tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
2647 tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
2648 } else if (h.index == datahi) {
2649 tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2650 tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2652 tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2653 tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2656 uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2657 if (!have_isa_2_06 && insn == LDBRX) {
2658 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2659 tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2660 tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
2661 tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2663 tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2665 insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2666 tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2667 tcg_out_movext(s, TCG_TYPE_REG, datalo,
2668 TCG_TYPE_REG, opc & MO_SSIZE, datalo);
2673 ldst->type = data_type;
2674 ldst->datalo_reg = datalo;
2675 ldst->datahi_reg = datahi;
2676 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2680 static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2681 TCGReg addrlo, TCGReg addrhi,
2682 MemOpIdx oi, TCGType data_type)
2684 MemOp opc = get_memop(oi);
2685 TCGLabelQemuLdst *ldst;
2688 ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
2690 if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2691 if (opc & MO_BSWAP) {
2692 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2693 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2694 tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
2695 } else if (h.base != 0) {
2696 tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2697 tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
2698 tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
2700 tcg_out32(s, STW | TAI(datahi, h.index, 0));
2701 tcg_out32(s, STW | TAI(datalo, h.index, 4));
2704 uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2705 if (!have_isa_2_06 && insn == STDBRX) {
2706 tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2707 tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, h.index, 4));
2708 tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2709 tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP1));
2711 tcg_out32(s, insn | SAB(datalo, h.base, h.index));
2716 ldst->type = data_type;
2717 ldst->datalo_reg = datalo;
2718 ldst->datahi_reg = datahi;
2719 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2723 static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
2724 TCGReg addr_reg, MemOpIdx oi, bool is_ld)
2726 TCGLabelQemuLdst *ldst;
2732 ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld);
2734 /* Compose the final address, as LQ/STQ have no indexing. */
2737 index = TCG_REG_TMP1;
2738 tcg_out32(s, ADD | TAB(index, h.base, h.index));
2740 need_bswap = get_memop(oi) & MO_BSWAP;
2742 if (h.aa.atom == MO_128) {
2743 tcg_debug_assert(!need_bswap);
2744 tcg_debug_assert(datalo & 1);
2745 tcg_debug_assert(datahi == datalo - 1);
2746 tcg_debug_assert(!is_ld || datahi != index);
2747 insn = is_ld ? LQ : STQ;
2748 tcg_out32(s, insn | TAI(datahi, index, 0));
2752 if (HOST_BIG_ENDIAN ^ need_bswap) {
2753 d1 = datahi, d2 = datalo;
2755 d1 = datalo, d2 = datahi;
2759 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
2760 insn = is_ld ? LDBRX : STDBRX;
2761 tcg_out32(s, insn | TAB(d1, 0, index));
2762 tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
2764 insn = is_ld ? LD : STD;
2765 tcg_out32(s, insn | TAI(d1, index, 0));
2766 tcg_out32(s, insn | TAI(d2, index, 8));
2771 ldst->type = TCG_TYPE_I128;
2772 ldst->datalo_reg = datalo;
2773 ldst->datahi_reg = datahi;
2774 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2778 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2781 for (i = 0; i < count; ++i) {
2786 /* Parameters for function call generation, used in tcg.c. */
2787 #define TCG_TARGET_STACK_ALIGN 16
2790 # define LINK_AREA_SIZE (6 * SZR)
2791 # define LR_OFFSET (1 * SZR)
2792 # define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR)
2793 #elif defined(_CALL_DARWIN)
2794 # define LINK_AREA_SIZE (6 * SZR)
2795 # define LR_OFFSET (2 * SZR)
2796 #elif TCG_TARGET_REG_BITS == 64
2797 # if defined(_CALL_ELF) && _CALL_ELF == 2
2798 # define LINK_AREA_SIZE (4 * SZR)
2799 # define LR_OFFSET (1 * SZR)
2801 #else /* TCG_TARGET_REG_BITS == 32 */
2802 # if defined(_CALL_SYSV)
2803 # define LINK_AREA_SIZE (2 * SZR)
2804 # define LR_OFFSET (1 * SZR)
2808 # error "Unhandled abi"
2810 #ifndef TCG_TARGET_CALL_STACK_OFFSET
2811 # define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE
2814 #define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2815 #define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2817 #define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \
2818 + TCG_STATIC_CALL_ARGS_SIZE \
2819 + CPU_TEMP_BUF_SIZE \
2821 + TCG_TARGET_STACK_ALIGN - 1) \
2822 & -TCG_TARGET_STACK_ALIGN)
2824 #define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2826 static void tcg_target_qemu_prologue(TCGContext *s)
2831 const void **desc = (const void **)s->code_ptr;
2832 desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */
2833 desc[1] = 0; /* environment pointer */
2834 s->code_ptr = (void *)(desc + 2); /* skip over descriptor */
2837 tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2841 tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2842 tcg_out32(s, (SZR == 8 ? STDU : STWU)
2843 | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2845 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2846 tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2847 TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2849 tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2851 if (!tcg_use_softmmu && guest_base) {
2852 tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2853 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2856 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2857 tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2858 tcg_out32(s, BCCTR | BO_ALWAYS);
2861 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2863 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2864 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2865 tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2866 TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2868 tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2869 tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2870 tcg_out32(s, BCLR | BO_ALWAYS);
2873 static void tcg_out_tb_start(TCGContext *s)
2875 /* Load TCG_REG_TB. */
2877 if (have_isa_3_00) {
2879 tcg_out_addpcis(s, TCG_REG_TB, 0);
2881 /* bcl 20,31,$+4 (preferred form for getting nia) */
2882 tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
2883 tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
2888 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2890 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2891 tcg_out_b(s, 0, tcg_code_gen_epilogue);
2894 static void tcg_out_goto_tb(TCGContext *s, int which)
2896 uintptr_t ptr = get_jmp_target_addr(s, which);
2899 /* Direct branch will be patched by tb_target_set_jmp_target. */
2900 set_jmp_insn_offset(s, which);
2903 /* When branch is out of range, fall through to indirect. */
2905 ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
2906 tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
2907 } else if (have_isa_3_10) {
2908 ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
2909 tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
2910 } else if (have_isa_3_00) {
2911 ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
2913 tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo);
2914 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2917 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo);
2918 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2921 tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2922 tcg_out32(s, BCCTR | BO_ALWAYS);
2923 set_jmp_reset_offset(s, which);
2926 void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2927 uintptr_t jmp_rx, uintptr_t jmp_rw)
2929 uintptr_t addr = tb->jmp_target_addr[n];
2930 intptr_t diff = addr - jmp_rx;
2933 if (in_range_b(diff)) {
2934 insn = B | (diff & 0x3fffffc);
2939 qatomic_set((uint32_t *)jmp_rw, insn);
2940 flush_idcache_range(jmp_rx, jmp_rw, 4);
2943 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2944 const TCGArg args[TCG_MAX_OP_ARGS],
2945 const int const_args[TCG_MAX_OP_ARGS])
2950 case INDEX_op_goto_ptr:
2951 tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2952 tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2953 tcg_out32(s, BCCTR | BO_ALWAYS);
2957 TCGLabel *l = arg_label(args[0]);
2961 insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
2964 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2969 case INDEX_op_ld8u_i32:
2970 case INDEX_op_ld8u_i64:
2971 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2973 case INDEX_op_ld8s_i32:
2974 case INDEX_op_ld8s_i64:
2975 tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2976 tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]);
2978 case INDEX_op_ld16u_i32:
2979 case INDEX_op_ld16u_i64:
2980 tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2982 case INDEX_op_ld16s_i32:
2983 case INDEX_op_ld16s_i64:
2984 tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2986 case INDEX_op_ld_i32:
2987 case INDEX_op_ld32u_i64:
2988 tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2990 case INDEX_op_ld32s_i64:
2991 tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2993 case INDEX_op_ld_i64:
2994 tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2996 case INDEX_op_st8_i32:
2997 case INDEX_op_st8_i64:
2998 tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
3000 case INDEX_op_st16_i32:
3001 case INDEX_op_st16_i64:
3002 tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
3004 case INDEX_op_st_i32:
3005 case INDEX_op_st32_i64:
3006 tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
3008 case INDEX_op_st_i64:
3009 tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
3012 case INDEX_op_add_i32:
3013 a0 = args[0], a1 = args[1], a2 = args[2];
3014 if (const_args[2]) {
3016 tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
3018 tcg_out32(s, ADD | TAB(a0, a1, a2));
3021 case INDEX_op_sub_i32:
3022 a0 = args[0], a1 = args[1], a2 = args[2];
3023 if (const_args[1]) {
3024 if (const_args[2]) {
3025 tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
3027 tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3029 } else if (const_args[2]) {
3033 tcg_out32(s, SUBF | TAB(a0, a2, a1));
3037 case INDEX_op_and_i32:
3038 a0 = args[0], a1 = args[1], a2 = args[2];
3039 if (const_args[2]) {
3040 tcg_out_andi32(s, a0, a1, a2);
3042 tcg_out32(s, AND | SAB(a1, a0, a2));
3045 case INDEX_op_and_i64:
3046 a0 = args[0], a1 = args[1], a2 = args[2];
3047 if (const_args[2]) {
3048 tcg_out_andi64(s, a0, a1, a2);
3050 tcg_out32(s, AND | SAB(a1, a0, a2));
3053 case INDEX_op_or_i64:
3054 case INDEX_op_or_i32:
3055 a0 = args[0], a1 = args[1], a2 = args[2];
3056 if (const_args[2]) {
3057 tcg_out_ori32(s, a0, a1, a2);
3059 tcg_out32(s, OR | SAB(a1, a0, a2));
3062 case INDEX_op_xor_i64:
3063 case INDEX_op_xor_i32:
3064 a0 = args[0], a1 = args[1], a2 = args[2];
3065 if (const_args[2]) {
3066 tcg_out_xori32(s, a0, a1, a2);
3068 tcg_out32(s, XOR | SAB(a1, a0, a2));
3071 case INDEX_op_andc_i32:
3072 a0 = args[0], a1 = args[1], a2 = args[2];
3073 if (const_args[2]) {
3074 tcg_out_andi32(s, a0, a1, ~a2);
3076 tcg_out32(s, ANDC | SAB(a1, a0, a2));
3079 case INDEX_op_andc_i64:
3080 a0 = args[0], a1 = args[1], a2 = args[2];
3081 if (const_args[2]) {
3082 tcg_out_andi64(s, a0, a1, ~a2);
3084 tcg_out32(s, ANDC | SAB(a1, a0, a2));
3087 case INDEX_op_orc_i32:
3088 if (const_args[2]) {
3089 tcg_out_ori32(s, args[0], args[1], ~args[2]);
3093 case INDEX_op_orc_i64:
3094 tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
3096 case INDEX_op_eqv_i32:
3097 if (const_args[2]) {
3098 tcg_out_xori32(s, args[0], args[1], ~args[2]);
3102 case INDEX_op_eqv_i64:
3103 tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
3105 case INDEX_op_nand_i32:
3106 case INDEX_op_nand_i64:
3107 tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
3109 case INDEX_op_nor_i32:
3110 case INDEX_op_nor_i64:
3111 tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
3114 case INDEX_op_clz_i32:
3115 tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
3116 args[2], const_args[2]);
3118 case INDEX_op_ctz_i32:
3119 tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
3120 args[2], const_args[2]);
3122 case INDEX_op_ctpop_i32:
3123 tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
3126 case INDEX_op_clz_i64:
3127 tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
3128 args[2], const_args[2]);
3130 case INDEX_op_ctz_i64:
3131 tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
3132 args[2], const_args[2]);
3134 case INDEX_op_ctpop_i64:
3135 tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
3138 case INDEX_op_mul_i32:
3139 a0 = args[0], a1 = args[1], a2 = args[2];
3140 if (const_args[2]) {
3141 tcg_out32(s, MULLI | TAI(a0, a1, a2));
3143 tcg_out32(s, MULLW | TAB(a0, a1, a2));
3147 case INDEX_op_div_i32:
3148 tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
3151 case INDEX_op_divu_i32:
3152 tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
3155 case INDEX_op_rem_i32:
3156 tcg_out32(s, MODSW | TAB(args[0], args[1], args[2]));
3159 case INDEX_op_remu_i32:
3160 tcg_out32(s, MODUW | TAB(args[0], args[1], args[2]));
3163 case INDEX_op_shl_i32:
3164 if (const_args[2]) {
3165 /* Limit immediate shift count lest we create an illegal insn. */
3166 tcg_out_shli32(s, args[0], args[1], args[2] & 31);
3168 tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
3171 case INDEX_op_shr_i32:
3172 if (const_args[2]) {
3173 /* Limit immediate shift count lest we create an illegal insn. */
3174 tcg_out_shri32(s, args[0], args[1], args[2] & 31);
3176 tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
3179 case INDEX_op_sar_i32:
3180 if (const_args[2]) {
3181 tcg_out_sari32(s, args[0], args[1], args[2]);
3183 tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
3186 case INDEX_op_rotl_i32:
3187 if (const_args[2]) {
3188 tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
3190 tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
3194 case INDEX_op_rotr_i32:
3195 if (const_args[2]) {
3196 tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
3198 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
3199 tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
3204 case INDEX_op_brcond_i32:
3205 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
3206 arg_label(args[3]), TCG_TYPE_I32);
3208 case INDEX_op_brcond_i64:
3209 tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
3210 arg_label(args[3]), TCG_TYPE_I64);
3212 case INDEX_op_brcond2_i32:
3213 tcg_out_brcond2(s, args, const_args);
3216 case INDEX_op_neg_i32:
3217 case INDEX_op_neg_i64:
3218 tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
3221 case INDEX_op_not_i32:
3222 case INDEX_op_not_i64:
3223 tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
3226 case INDEX_op_add_i64:
3227 a0 = args[0], a1 = args[1], a2 = args[2];
3228 if (const_args[2]) {
3230 tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
3232 tcg_out32(s, ADD | TAB(a0, a1, a2));
3235 case INDEX_op_sub_i64:
3236 a0 = args[0], a1 = args[1], a2 = args[2];
3237 if (const_args[1]) {
3238 if (const_args[2]) {
3239 tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
3241 tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3243 } else if (const_args[2]) {
3247 tcg_out32(s, SUBF | TAB(a0, a2, a1));
3251 case INDEX_op_shl_i64:
3252 if (const_args[2]) {
3253 /* Limit immediate shift count lest we create an illegal insn. */
3254 tcg_out_shli64(s, args[0], args[1], args[2] & 63);
3256 tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
3259 case INDEX_op_shr_i64:
3260 if (const_args[2]) {
3261 /* Limit immediate shift count lest we create an illegal insn. */
3262 tcg_out_shri64(s, args[0], args[1], args[2] & 63);
3264 tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
3267 case INDEX_op_sar_i64:
3268 if (const_args[2]) {
3269 tcg_out_sari64(s, args[0], args[1], args[2]);
3271 tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
3274 case INDEX_op_rotl_i64:
3275 if (const_args[2]) {
3276 tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
3278 tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
3281 case INDEX_op_rotr_i64:
3282 if (const_args[2]) {
3283 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
3285 tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
3286 tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
3290 case INDEX_op_mul_i64:
3291 a0 = args[0], a1 = args[1], a2 = args[2];
3292 if (const_args[2]) {
3293 tcg_out32(s, MULLI | TAI(a0, a1, a2));
3295 tcg_out32(s, MULLD | TAB(a0, a1, a2));
3298 case INDEX_op_div_i64:
3299 tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
3301 case INDEX_op_divu_i64:
3302 tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
3304 case INDEX_op_rem_i64:
3305 tcg_out32(s, MODSD | TAB(args[0], args[1], args[2]));
3307 case INDEX_op_remu_i64:
3308 tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
3311 case INDEX_op_qemu_ld_a64_i32:
3312 if (TCG_TARGET_REG_BITS == 32) {
3313 tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
3314 args[3], TCG_TYPE_I32);
3318 case INDEX_op_qemu_ld_a32_i32:
3319 tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
3321 case INDEX_op_qemu_ld_a32_i64:
3322 if (TCG_TARGET_REG_BITS == 64) {
3323 tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
3324 args[2], TCG_TYPE_I64);
3326 tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
3327 args[3], TCG_TYPE_I64);
3330 case INDEX_op_qemu_ld_a64_i64:
3331 if (TCG_TARGET_REG_BITS == 64) {
3332 tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
3333 args[2], TCG_TYPE_I64);
3335 tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
3336 args[4], TCG_TYPE_I64);
3339 case INDEX_op_qemu_ld_a32_i128:
3340 case INDEX_op_qemu_ld_a64_i128:
3341 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3342 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
3345 case INDEX_op_qemu_st_a64_i32:
3346 if (TCG_TARGET_REG_BITS == 32) {
3347 tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
3348 args[3], TCG_TYPE_I32);
3352 case INDEX_op_qemu_st_a32_i32:
3353 tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
3355 case INDEX_op_qemu_st_a32_i64:
3356 if (TCG_TARGET_REG_BITS == 64) {
3357 tcg_out_qemu_st(s, args[0], -1, args[1], -1,
3358 args[2], TCG_TYPE_I64);
3360 tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
3361 args[3], TCG_TYPE_I64);
3364 case INDEX_op_qemu_st_a64_i64:
3365 if (TCG_TARGET_REG_BITS == 64) {
3366 tcg_out_qemu_st(s, args[0], -1, args[1], -1,
3367 args[2], TCG_TYPE_I64);
3369 tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
3370 args[4], TCG_TYPE_I64);
3373 case INDEX_op_qemu_st_a32_i128:
3374 case INDEX_op_qemu_st_a64_i128:
3375 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3376 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
3379 case INDEX_op_setcond_i32:
3380 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
3381 const_args[2], false);
3383 case INDEX_op_setcond_i64:
3384 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
3385 const_args[2], false);
3387 case INDEX_op_negsetcond_i32:
3388 tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
3389 const_args[2], true);
3391 case INDEX_op_negsetcond_i64:
3392 tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
3393 const_args[2], true);
3395 case INDEX_op_setcond2_i32:
3396 tcg_out_setcond2(s, args, const_args);
3399 case INDEX_op_bswap16_i32:
3400 case INDEX_op_bswap16_i64:
3401 tcg_out_bswap16(s, args[0], args[1], args[2]);
3403 case INDEX_op_bswap32_i32:
3404 tcg_out_bswap32(s, args[0], args[1], 0);
3406 case INDEX_op_bswap32_i64:
3407 tcg_out_bswap32(s, args[0], args[1], args[2]);
3409 case INDEX_op_bswap64_i64:
3410 tcg_out_bswap64(s, args[0], args[1]);
3413 case INDEX_op_deposit_i32:
3414 if (const_args[2]) {
3415 uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
3416 tcg_out_andi32(s, args[0], args[0], ~mask);
3418 tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
3419 32 - args[3] - args[4], 31 - args[3]);
3422 case INDEX_op_deposit_i64:
3423 if (const_args[2]) {
3424 uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
3425 tcg_out_andi64(s, args[0], args[0], ~mask);
3427 tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
3428 64 - args[3] - args[4]);
3432 case INDEX_op_extract_i32:
3433 tcg_out_rlw(s, RLWINM, args[0], args[1],
3434 32 - args[2], 32 - args[3], 31);
3436 case INDEX_op_extract_i64:
3437 tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
3440 case INDEX_op_movcond_i32:
3441 tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
3442 args[3], args[4], const_args[2]);
3444 case INDEX_op_movcond_i64:
3445 tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
3446 args[3], args[4], const_args[2]);
3449 #if TCG_TARGET_REG_BITS == 64
3450 case INDEX_op_add2_i64:
3452 case INDEX_op_add2_i32:
3454 /* Note that the CA bit is defined based on the word size of the
3455 environment. So in 64-bit mode it's always carry-out of bit 63.
3456 The fallback code using deposit works just as well for 32-bit. */
3457 a0 = args[0], a1 = args[1];
3458 if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3461 if (const_args[4]) {
3462 tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3464 tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3466 if (const_args[5]) {
3467 tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3469 tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3471 if (a0 != args[0]) {
3472 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3476 #if TCG_TARGET_REG_BITS == 64
3477 case INDEX_op_sub2_i64:
3479 case INDEX_op_sub2_i32:
3481 a0 = args[0], a1 = args[1];
3482 if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3485 if (const_args[2]) {
3486 tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3488 tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3490 if (const_args[3]) {
3491 tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3493 tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3495 if (a0 != args[0]) {
3496 tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3500 case INDEX_op_muluh_i32:
3501 tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
3503 case INDEX_op_mulsh_i32:
3504 tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
3506 case INDEX_op_muluh_i64:
3507 tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
3509 case INDEX_op_mulsh_i64:
3510 tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
3514 tcg_out_mb(s, args[0]);
3517 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
3518 case INDEX_op_mov_i64:
3519 case INDEX_op_call: /* Always emitted via tcg_out_call. */
3520 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */
3521 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */
3522 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */
3523 case INDEX_op_ext8s_i64:
3524 case INDEX_op_ext8u_i32:
3525 case INDEX_op_ext8u_i64:
3526 case INDEX_op_ext16s_i32:
3527 case INDEX_op_ext16s_i64:
3528 case INDEX_op_ext16u_i32:
3529 case INDEX_op_ext16u_i64:
3530 case INDEX_op_ext32s_i64:
3531 case INDEX_op_ext32u_i64:
3532 case INDEX_op_ext_i32_i64:
3533 case INDEX_op_extu_i32_i64:
3534 case INDEX_op_extrl_i64_i32:
3536 g_assert_not_reached();
3540 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3543 case INDEX_op_and_vec:
3544 case INDEX_op_or_vec:
3545 case INDEX_op_xor_vec:
3546 case INDEX_op_andc_vec:
3547 case INDEX_op_not_vec:
3548 case INDEX_op_nor_vec:
3549 case INDEX_op_eqv_vec:
3550 case INDEX_op_nand_vec:
3552 case INDEX_op_orc_vec:
3553 return have_isa_2_07;
3554 case INDEX_op_add_vec:
3555 case INDEX_op_sub_vec:
3556 case INDEX_op_smax_vec:
3557 case INDEX_op_smin_vec:
3558 case INDEX_op_umax_vec:
3559 case INDEX_op_umin_vec:
3560 case INDEX_op_shlv_vec:
3561 case INDEX_op_shrv_vec:
3562 case INDEX_op_sarv_vec:
3563 case INDEX_op_rotlv_vec:
3564 return vece <= MO_32 || have_isa_2_07;
3565 case INDEX_op_ssadd_vec:
3566 case INDEX_op_sssub_vec:
3567 case INDEX_op_usadd_vec:
3568 case INDEX_op_ussub_vec:
3569 return vece <= MO_32;
3570 case INDEX_op_cmp_vec:
3571 case INDEX_op_shli_vec:
3572 case INDEX_op_shri_vec:
3573 case INDEX_op_sari_vec:
3574 case INDEX_op_rotli_vec:
3575 return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3576 case INDEX_op_neg_vec:
3577 return vece >= MO_32 && have_isa_3_00;
3578 case INDEX_op_mul_vec:
3584 return have_isa_2_07 ? 1 : -1;
3586 return have_isa_3_10;
3589 case INDEX_op_bitsel_vec:
3591 case INDEX_op_rotrv_vec:
3598 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3599 TCGReg dst, TCGReg src)
3601 tcg_debug_assert(dst >= TCG_REG_V0);
3603 /* Splat from integer reg allowed via constraints for v3.00. */
3604 if (src < TCG_REG_V0) {
3605 tcg_debug_assert(have_isa_3_00);
3608 tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3611 tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3614 /* Fail, so that we fall back on either dupm or mov+dup. */
3620 * Recall we use (or emulate) VSX integer loads, so the integer is
3621 * right justified within the left (zero-index) double-word.
3625 tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3628 tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3631 tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3635 tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3638 tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3639 tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3642 g_assert_not_reached();
3647 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3648 TCGReg out, TCGReg base, intptr_t offset)
3652 tcg_debug_assert(out >= TCG_REG_V0);
3655 if (have_isa_3_00) {
3656 tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3658 tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3660 elt = extract32(offset, 0, 4);
3661 #if !HOST_BIG_ENDIAN
3664 tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3667 tcg_debug_assert((offset & 1) == 0);
3668 if (have_isa_3_00) {
3669 tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3671 tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3673 elt = extract32(offset, 1, 3);
3674 #if !HOST_BIG_ENDIAN
3677 tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3680 if (have_isa_3_00) {
3681 tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3684 tcg_debug_assert((offset & 3) == 0);
3685 tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3686 elt = extract32(offset, 2, 2);
3687 #if !HOST_BIG_ENDIAN
3690 tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3694 tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3697 tcg_debug_assert((offset & 7) == 0);
3698 tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3699 tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3700 elt = extract32(offset, 3, 1);
3701 #if !HOST_BIG_ENDIAN
3705 tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3707 tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3711 g_assert_not_reached();
3716 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3717 unsigned vecl, unsigned vece,
3718 const TCGArg args[TCG_MAX_OP_ARGS],
3719 const int const_args[TCG_MAX_OP_ARGS])
3721 static const uint32_t
3722 add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3723 sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3724 mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3725 neg_op[4] = { 0, 0, VNEGW, VNEGD },
3726 eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3727 ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3728 gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3729 gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3730 ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3731 usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3732 sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3733 ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3734 umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3735 smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3736 umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3737 smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3738 shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3739 shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3740 sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3741 mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3742 mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3743 muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3744 mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3745 pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3746 rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3748 TCGType type = vecl + TCG_TYPE_V64;
3749 TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3753 case INDEX_op_ld_vec:
3754 tcg_out_ld(s, type, a0, a1, a2);
3756 case INDEX_op_st_vec:
3757 tcg_out_st(s, type, a0, a1, a2);
3759 case INDEX_op_dupm_vec:
3760 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3763 case INDEX_op_add_vec:
3764 insn = add_op[vece];
3766 case INDEX_op_sub_vec:
3767 insn = sub_op[vece];
3769 case INDEX_op_neg_vec:
3770 insn = neg_op[vece];
3774 case INDEX_op_mul_vec:
3775 insn = mul_op[vece];
3777 case INDEX_op_ssadd_vec:
3778 insn = ssadd_op[vece];
3780 case INDEX_op_sssub_vec:
3781 insn = sssub_op[vece];
3783 case INDEX_op_usadd_vec:
3784 insn = usadd_op[vece];
3786 case INDEX_op_ussub_vec:
3787 insn = ussub_op[vece];
3789 case INDEX_op_smin_vec:
3790 insn = smin_op[vece];
3792 case INDEX_op_umin_vec:
3793 insn = umin_op[vece];
3795 case INDEX_op_smax_vec:
3796 insn = smax_op[vece];
3798 case INDEX_op_umax_vec:
3799 insn = umax_op[vece];
3801 case INDEX_op_shlv_vec:
3802 insn = shlv_op[vece];
3804 case INDEX_op_shrv_vec:
3805 insn = shrv_op[vece];
3807 case INDEX_op_sarv_vec:
3808 insn = sarv_op[vece];
3810 case INDEX_op_and_vec:
3813 case INDEX_op_or_vec:
3816 case INDEX_op_xor_vec:
3819 case INDEX_op_andc_vec:
3822 case INDEX_op_not_vec:
3826 case INDEX_op_orc_vec:
3829 case INDEX_op_nand_vec:
3832 case INDEX_op_nor_vec:
3835 case INDEX_op_eqv_vec:
3839 case INDEX_op_cmp_vec:
3848 insn = gts_op[vece];
3851 insn = gtu_op[vece];
3854 g_assert_not_reached();
3858 case INDEX_op_bitsel_vec:
3859 tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3862 case INDEX_op_dup2_vec:
3863 assert(TCG_TARGET_REG_BITS == 32);
3864 /* With inputs a1 = xLxx, a2 = xHxx */
3865 tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */
3866 tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */
3867 tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */
3870 case INDEX_op_ppc_mrgh_vec:
3871 insn = mrgh_op[vece];
3873 case INDEX_op_ppc_mrgl_vec:
3874 insn = mrgl_op[vece];
3876 case INDEX_op_ppc_muleu_vec:
3877 insn = muleu_op[vece];
3879 case INDEX_op_ppc_mulou_vec:
3880 insn = mulou_op[vece];
3882 case INDEX_op_ppc_pkum_vec:
3883 insn = pkum_op[vece];
3885 case INDEX_op_rotlv_vec:
3886 insn = rotl_op[vece];
3888 case INDEX_op_ppc_msum_vec:
3889 tcg_debug_assert(vece == MO_16);
3890 tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3893 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
3894 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
3896 g_assert_not_reached();
3899 tcg_debug_assert(insn != 0);
3900 tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3903 static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3904 TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3908 if (vece == MO_32) {
3910 * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3911 * So using negative numbers gets us the 4th bit easily.
3913 imm = sextract32(imm, 0, 5);
3915 imm &= (8 << vece) - 1;
3918 /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
3919 t1 = tcg_constant_vec(type, MO_8, imm);
3920 vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3921 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3924 static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3925 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3927 bool need_swap = false, need_inv = false;
3929 tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3937 if (have_isa_3_00 && vece <= MO_32) {
3951 need_swap = need_inv = true;
3954 g_assert_not_reached();
3958 cond = tcg_invert_cond(cond);
3962 t1 = v1, v1 = v2, v2 = t1;
3963 cond = tcg_swap_cond(cond);
3966 vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3967 tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3970 tcg_gen_not_vec(vece, v0, v0);
3974 static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3975 TCGv_vec v1, TCGv_vec v2)
3977 TCGv_vec t1 = tcg_temp_new_vec(type);
3978 TCGv_vec t2 = tcg_temp_new_vec(type);
3984 vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3985 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3986 vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3987 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3988 vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3989 tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3990 vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3991 tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3992 vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3993 tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3997 tcg_debug_assert(!have_isa_2_07);
3999 * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4000 * So using -16 is a quick way to represent 16.
4002 c16 = tcg_constant_vec(type, MO_8, -16);
4003 c0 = tcg_constant_vec(type, MO_8, 0);
4005 vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
4006 tcgv_vec_arg(v2), tcgv_vec_arg(c16));
4007 vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
4008 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4009 vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
4010 tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
4011 vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
4012 tcgv_vec_arg(t1), tcgv_vec_arg(c16));
4013 tcg_gen_add_vec(MO_32, v0, t1, t2);
4017 g_assert_not_reached();
4019 tcg_temp_free_vec(t1);
4020 tcg_temp_free_vec(t2);
4023 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
4027 TCGv_vec v0, v1, v2, t0;
4031 v0 = temp_tcgv_vec(arg_temp(a0));
4032 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
4033 a2 = va_arg(va, TCGArg);
4036 case INDEX_op_shli_vec:
4037 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
4039 case INDEX_op_shri_vec:
4040 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
4042 case INDEX_op_sari_vec:
4043 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
4045 case INDEX_op_rotli_vec:
4046 expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
4048 case INDEX_op_cmp_vec:
4049 v2 = temp_tcgv_vec(arg_temp(a2));
4050 expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
4052 case INDEX_op_mul_vec:
4053 v2 = temp_tcgv_vec(arg_temp(a2));
4054 expand_vec_mul(type, vece, v0, v1, v2);
4056 case INDEX_op_rotlv_vec:
4057 v2 = temp_tcgv_vec(arg_temp(a2));
4058 t0 = tcg_temp_new_vec(type);
4059 tcg_gen_neg_vec(vece, t0, v2);
4060 tcg_gen_rotlv_vec(vece, v0, v1, t0);
4061 tcg_temp_free_vec(t0);
4064 g_assert_not_reached();
4069 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
4072 case INDEX_op_goto_ptr:
4075 case INDEX_op_ld8u_i32:
4076 case INDEX_op_ld8s_i32:
4077 case INDEX_op_ld16u_i32:
4078 case INDEX_op_ld16s_i32:
4079 case INDEX_op_ld_i32:
4080 case INDEX_op_ctpop_i32:
4081 case INDEX_op_neg_i32:
4082 case INDEX_op_not_i32:
4083 case INDEX_op_ext8s_i32:
4084 case INDEX_op_ext16s_i32:
4085 case INDEX_op_bswap16_i32:
4086 case INDEX_op_bswap32_i32:
4087 case INDEX_op_extract_i32:
4088 case INDEX_op_ld8u_i64:
4089 case INDEX_op_ld8s_i64:
4090 case INDEX_op_ld16u_i64:
4091 case INDEX_op_ld16s_i64:
4092 case INDEX_op_ld32u_i64:
4093 case INDEX_op_ld32s_i64:
4094 case INDEX_op_ld_i64:
4095 case INDEX_op_ctpop_i64:
4096 case INDEX_op_neg_i64:
4097 case INDEX_op_not_i64:
4098 case INDEX_op_ext8s_i64:
4099 case INDEX_op_ext16s_i64:
4100 case INDEX_op_ext32s_i64:
4101 case INDEX_op_ext_i32_i64:
4102 case INDEX_op_extu_i32_i64:
4103 case INDEX_op_bswap16_i64:
4104 case INDEX_op_bswap32_i64:
4105 case INDEX_op_bswap64_i64:
4106 case INDEX_op_extract_i64:
4107 return C_O1_I1(r, r);
4109 case INDEX_op_st8_i32:
4110 case INDEX_op_st16_i32:
4111 case INDEX_op_st_i32:
4112 case INDEX_op_st8_i64:
4113 case INDEX_op_st16_i64:
4114 case INDEX_op_st32_i64:
4115 case INDEX_op_st_i64:
4116 return C_O0_I2(r, r);
4118 case INDEX_op_add_i32:
4119 case INDEX_op_and_i32:
4120 case INDEX_op_or_i32:
4121 case INDEX_op_xor_i32:
4122 case INDEX_op_andc_i32:
4123 case INDEX_op_orc_i32:
4124 case INDEX_op_eqv_i32:
4125 case INDEX_op_shl_i32:
4126 case INDEX_op_shr_i32:
4127 case INDEX_op_sar_i32:
4128 case INDEX_op_rotl_i32:
4129 case INDEX_op_rotr_i32:
4130 case INDEX_op_and_i64:
4131 case INDEX_op_andc_i64:
4132 case INDEX_op_shl_i64:
4133 case INDEX_op_shr_i64:
4134 case INDEX_op_sar_i64:
4135 case INDEX_op_rotl_i64:
4136 case INDEX_op_rotr_i64:
4137 return C_O1_I2(r, r, ri);
4139 case INDEX_op_mul_i32:
4140 case INDEX_op_mul_i64:
4141 return C_O1_I2(r, r, rI);
4143 case INDEX_op_div_i32:
4144 case INDEX_op_divu_i32:
4145 case INDEX_op_rem_i32:
4146 case INDEX_op_remu_i32:
4147 case INDEX_op_nand_i32:
4148 case INDEX_op_nor_i32:
4149 case INDEX_op_muluh_i32:
4150 case INDEX_op_mulsh_i32:
4151 case INDEX_op_orc_i64:
4152 case INDEX_op_eqv_i64:
4153 case INDEX_op_nand_i64:
4154 case INDEX_op_nor_i64:
4155 case INDEX_op_div_i64:
4156 case INDEX_op_divu_i64:
4157 case INDEX_op_rem_i64:
4158 case INDEX_op_remu_i64:
4159 case INDEX_op_mulsh_i64:
4160 case INDEX_op_muluh_i64:
4161 return C_O1_I2(r, r, r);
4163 case INDEX_op_sub_i32:
4164 return C_O1_I2(r, rI, ri);
4165 case INDEX_op_add_i64:
4166 return C_O1_I2(r, r, rT);
4167 case INDEX_op_or_i64:
4168 case INDEX_op_xor_i64:
4169 return C_O1_I2(r, r, rU);
4170 case INDEX_op_sub_i64:
4171 return C_O1_I2(r, rI, rT);
4172 case INDEX_op_clz_i32:
4173 case INDEX_op_ctz_i32:
4174 case INDEX_op_clz_i64:
4175 case INDEX_op_ctz_i64:
4176 return C_O1_I2(r, r, rZW);
4178 case INDEX_op_brcond_i32:
4179 case INDEX_op_brcond_i64:
4180 return C_O0_I2(r, rC);
4181 case INDEX_op_setcond_i32:
4182 case INDEX_op_setcond_i64:
4183 case INDEX_op_negsetcond_i32:
4184 case INDEX_op_negsetcond_i64:
4185 return C_O1_I2(r, r, rC);
4186 case INDEX_op_movcond_i32:
4187 case INDEX_op_movcond_i64:
4188 return C_O1_I4(r, r, rC, rZ, rZ);
4190 case INDEX_op_deposit_i32:
4191 case INDEX_op_deposit_i64:
4192 return C_O1_I2(r, 0, rZ);
4193 case INDEX_op_brcond2_i32:
4194 return C_O0_I4(r, r, ri, ri);
4195 case INDEX_op_setcond2_i32:
4196 return C_O1_I4(r, r, r, ri, ri);
4197 case INDEX_op_add2_i64:
4198 case INDEX_op_add2_i32:
4199 return C_O2_I4(r, r, r, r, rI, rZM);
4200 case INDEX_op_sub2_i64:
4201 case INDEX_op_sub2_i32:
4202 return C_O2_I4(r, r, rI, rZM, r, r);
4204 case INDEX_op_qemu_ld_a32_i32:
4205 return C_O1_I1(r, r);
4206 case INDEX_op_qemu_ld_a64_i32:
4207 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
4208 case INDEX_op_qemu_ld_a32_i64:
4209 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
4210 case INDEX_op_qemu_ld_a64_i64:
4211 return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
4213 case INDEX_op_qemu_st_a32_i32:
4214 return C_O0_I2(r, r);
4215 case INDEX_op_qemu_st_a64_i32:
4216 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4217 case INDEX_op_qemu_st_a32_i64:
4218 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4219 case INDEX_op_qemu_st_a64_i64:
4220 return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
4222 case INDEX_op_qemu_ld_a32_i128:
4223 case INDEX_op_qemu_ld_a64_i128:
4224 return C_N1O1_I1(o, m, r);
4225 case INDEX_op_qemu_st_a32_i128:
4226 case INDEX_op_qemu_st_a64_i128:
4227 return C_O0_I3(o, m, r);
4229 case INDEX_op_add_vec:
4230 case INDEX_op_sub_vec:
4231 case INDEX_op_mul_vec:
4232 case INDEX_op_and_vec:
4233 case INDEX_op_or_vec:
4234 case INDEX_op_xor_vec:
4235 case INDEX_op_andc_vec:
4236 case INDEX_op_orc_vec:
4237 case INDEX_op_nor_vec:
4238 case INDEX_op_eqv_vec:
4239 case INDEX_op_nand_vec:
4240 case INDEX_op_cmp_vec:
4241 case INDEX_op_ssadd_vec:
4242 case INDEX_op_sssub_vec:
4243 case INDEX_op_usadd_vec:
4244 case INDEX_op_ussub_vec:
4245 case INDEX_op_smax_vec:
4246 case INDEX_op_smin_vec:
4247 case INDEX_op_umax_vec:
4248 case INDEX_op_umin_vec:
4249 case INDEX_op_shlv_vec:
4250 case INDEX_op_shrv_vec:
4251 case INDEX_op_sarv_vec:
4252 case INDEX_op_rotlv_vec:
4253 case INDEX_op_rotrv_vec:
4254 case INDEX_op_ppc_mrgh_vec:
4255 case INDEX_op_ppc_mrgl_vec:
4256 case INDEX_op_ppc_muleu_vec:
4257 case INDEX_op_ppc_mulou_vec:
4258 case INDEX_op_ppc_pkum_vec:
4259 case INDEX_op_dup2_vec:
4260 return C_O1_I2(v, v, v);
4262 case INDEX_op_not_vec:
4263 case INDEX_op_neg_vec:
4264 return C_O1_I1(v, v);
4266 case INDEX_op_dup_vec:
4267 return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
4269 case INDEX_op_ld_vec:
4270 case INDEX_op_dupm_vec:
4271 return C_O1_I1(v, r);
4273 case INDEX_op_st_vec:
4274 return C_O0_I2(v, r);
4276 case INDEX_op_bitsel_vec:
4277 case INDEX_op_ppc_msum_vec:
4278 return C_O1_I3(v, v, v, v);
4281 g_assert_not_reached();
4285 static void tcg_target_init(TCGContext *s)
4287 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
4288 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
4290 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
4291 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
4294 tcg_target_call_clobber_regs = 0;
4295 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
4296 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
4297 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
4298 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
4299 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
4300 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
4301 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
4302 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
4303 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
4304 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
4305 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
4306 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
4308 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
4309 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
4310 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
4311 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
4312 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
4313 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
4314 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
4315 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
4316 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
4317 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
4318 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
4319 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
4320 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
4321 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
4322 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
4323 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4324 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
4325 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
4326 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
4327 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
4329 s->reserved_regs = 0;
4330 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
4331 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
4332 #if defined(_CALL_SYSV)
4333 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
4335 #if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
4336 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
4338 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
4339 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
4340 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
4341 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
4343 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */
4350 DebugFrameFDEHeader fde;
4351 uint8_t fde_def_cfa[4];
4352 uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
4355 /* We're expecting a 2 byte uleb128 encoded value. */
4356 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
4358 #if TCG_TARGET_REG_BITS == 64
4359 # define ELF_HOST_MACHINE EM_PPC64
4361 # define ELF_HOST_MACHINE EM_PPC
4364 static DebugFrame debug_frame = {
4365 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4368 .cie.code_align = 1,
4369 .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */
4370 .cie.return_column = 65,
4372 /* Total FDE size does not include the "len" member. */
4373 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
4376 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */
4377 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
4381 /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
4382 0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
4386 void tcg_register_jit(const void *buf, size_t buf_size)
4388 uint8_t *p = &debug_frame.fde_reg_ofs[3];
4391 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4392 p[0] = 0x80 + tcg_target_callee_save_regs[i];
4393 p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4396 debug_frame.fde.func_start = (uintptr_t)buf;
4397 debug_frame.fde.func_len = buf_size;
4399 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4401 #endif /* __ELF__ */