1 /* PPU intrinsics as defined by the C/C++ Language extension for Cell BEA.
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
4 This file is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 3 of the License, or (at your option)
9 This file is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 Under Section 7 of GPL version 3, you are granted additional
15 permissions described in the GCC Runtime Library Exception, version
16 3.1, as published by the Free Software Foundation.
18 You should have received a copy of the GNU General Public License and
19 a copy of the GCC Runtime Library Exception along with this program;
20 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
21 <http://www.gnu.org/licenses/>. */
25 supervisor/hypervisor mode ops. */
27 #ifndef _PPU_INTRINSICS_H
28 #define _PPU_INTRINSICS_H
30 #if !defined(__PPU__) && !defined(__ppc__) && !defined(__ppc64__) \
32 #error ppu_intrinsics.h included on wrong platform/compiler
40 * unsigned int __cntlzw(unsigned int)
41 * unsigned int __cntlzd(unsigned long long)
42 * int __mulhw(int, int)
43 * unsigned int __mulhwu(unsigned int, unsigned int)
44 * long long __mulhd(long long, long long)
45 * unsigned long long __mulhdu(unsigned long long, unsigned long long)
57 * void __db10cyc(void)
58 * void __db12cyc(void)
59 * void __db16cyc(void)
61 * void __mtspr(unsigned int spr, unsigned long long value)
62 * unsigned long long __mfspr(unsigned int spr)
63 * unsigned long long __mftb(void)
65 * void __icbi(void *base)
66 * void __dcbi(void *base)
68 * void __dcbf(void *base)
69 * void __dcbz(void *base)
70 * void __dcbst(void *base)
71 * void __dcbtst(void *base)
72 * void __dcbt(void *base)
73 * void __dcbt_TH1000(void *EATRUNC, bool D, bool UG, int ID)
74 * void __dcbt_TH1010(bool GO, int S, int UNITCNT, bool T, bool U, int ID)
76 * unsigned __lwarx(void *base)
77 * unsigned long long __ldarx(void *base)
78 * bool __stwcx(void *base, unsigned value)
79 * bool __stdcx(void *base, unsigned long long value)
81 * unsigned short __lhbrx(void *base)
82 * unsigned int __lwbrx(void *base)
83 * unsigned long long __ldbrx(void *base)
84 * void __sthbrx(void *base, unsigned short value)
85 * void __stwbrx(void *base, unsigned int value)
86 * void __stdbrx(void *base, unsigned long long value)
88 * double __fabs(double x)
89 * float __fabsf(float x)
90 * double __fnabs(double x)
91 * float __fnabsf(float x)
92 * double __fmadd(double x, double y, double z)
93 * double __fmsub(double x, double y, double z)
94 * double __fnmadd(double x, double y, double z)
95 * double __fnmsub(double x, double y, double z)
96 * float __fmadds(float x, float y, float z)
97 * float __fmsubs(float x, float y, float z)
98 * float __fnmadds(float x, float y, float z)
99 * float __fnmsubs(float x, float y, float z)
100 * double __fsel(double x, double y, double z)
101 * float __fsels(float x, float y, float z)
102 * double __frsqrte(double x)
103 * float __fres(float x)
104 * double __fsqrt(double x)
105 * float __fsqrts(float x)
106 * long long __fctid(double x)
107 * long long __fctiw(double x)
108 * double __fcfid(long long x)
109 * double __mffs(void)
110 * void __mtfsf(int mask, double value)
111 * void __mtfsfi(int bits, int field)
114 * double __setflm(double)
117 * void __protected_unlimited_stream_set (unsigned int direction, const void *add, unsigned int ID)
118 * void __protected_stream_set (unsigned int direction, const void *add, unsigned int ID)
119 * void __protected_stream_stop_all (void)
120 * void __protected_stream_stop (unsigned int ID)
121 * void __protected_stream_count (unsigned int unit_cnt, unsigned int ID)
122 * void __protected_stream_go (void)
125 typedef int __V4SI
__attribute__((vector_size(16)));
127 #define __cntlzw(v) __builtin_clz(v)
128 #define __cntlzd(v) __builtin_clzll(v)
130 #define __mulhw(a,b) __extension__ \
132 __asm__ ("mulhw %0,%1,%2" \
138 #define __mulhwu(a,b) __extension__ \
139 ({unsigned int result; \
140 __asm__ ("mulhwu %0,%1,%2" \
142 : "r" ((unsigned int) (a)), \
143 "r" ((unsigned int) (b))); \
147 #define __mulhd(a,b) __extension__ \
148 ({ long long result; \
149 __asm__ ("mulhd %0,%1,%2" \
151 : "r" ((long long) (a)), \
152 "r" ((long long) (b))); \
155 #define __mulhdu(a,b) __extension__ \
156 ({unsigned long long result; \
157 __asm__ ("mulhdu %0,%1,%2" \
159 : "r" ((unsigned long long) (a)), \
160 "r" ((unsigned long long) (b))); \
162 #endif /* __powerpc64__ */
164 #define __sync() __asm__ volatile ("sync" : : : "memory")
165 #define __isync() __asm__ volatile ("isync" : : : "memory")
166 #define __lwsync() __asm__ volatile ("lwsync" : : : "memory")
167 #define __eieio() __asm__ volatile ("eieio" : : : "memory")
169 #define __nop() __asm__ volatile ("ori 0,0,0" : : : "memory")
170 #define __cctpl() __asm__ volatile ("or 1,1,1" : : : "memory")
171 #define __cctpm() __asm__ volatile ("or 2,2,2" : : : "memory")
172 #define __cctph() __asm__ volatile ("or 3,3,3" : : : "memory")
173 #define __db8cyc() __asm__ volatile ("or 28,28,28" : : : "memory")
174 #define __db10cyc() __asm__ volatile ("or 29,29,29" : : : "memory")
175 #define __db12cyc() __asm__ volatile ("or 30,30,30" : : : "memory")
176 #define __db16cyc() __asm__ volatile ("or 31,31,31" : : : "memory")
179 #define __mtspr(spr, value) \
180 __asm__ volatile ("mtspr %0,%1" : : "n" (spr), "r" (value))
182 #define __mfspr(spr) __extension__ \
183 ({ unsigned long long result; \
184 __asm__ volatile ("mfspr %0,%1" : "=r" (result) : "n" (spr)); \
186 #endif /* __powerpc64__ */
189 /* Work around the hardware bug in the current Cell implementation. */
190 #define __mftb() __extension__ \
191 ({ unsigned long long result; \
192 __asm__ volatile ("1: mftb %[current_tb]\n" \
193 "\tcmpwi 7, %[current_tb], 0\n" \
195 : [current_tb] "=r" (result): \
199 #define __mftb() __extension__ \
200 ({ unsigned long long result; \
202 __asm__ volatile ("1:\n" \
208 : "=r" (result), "=r" (t)); \
210 #endif /* __powerpc64__ */
212 #define __dcbf(base) \
213 __asm__ volatile ("dcbf %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
215 #define __dcbz(base) \
216 __asm__ volatile ("dcbz %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
218 #define __dcbst(base) \
219 __asm__ volatile ("dcbst %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
221 #define __dcbtst(base) \
222 __asm__ volatile ("dcbtst %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
224 #define __dcbt(base) \
225 __asm__ volatile ("dcbt %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
227 #define __icbi(base) \
228 __asm__ volatile ("icbi %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
230 #define __dcbt_TH1000(EATRUNC, D, UG, ID) \
231 __asm__ volatile ("dcbt %y0,8" \
232 : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (EATRUNC)) & ~0x7F) \
233 | ((((D) & 1) << 6) \
234 | (((UG) & 1) << 5) \
235 | ((ID) & 0xF)))) : : "memory")
237 #define __dcbt_TH1010(GO, S, UNITCNT, T, U, ID) \
238 __asm__ volatile ("dcbt %y0,10" \
239 : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (GO) & 1) << 31) \
240 | (((S) & 0x3) << 29) \
241 | (((UNITCNT) & 0x3FF) << 7) \
244 | ((ID) & 0xF))) : : "memory")
246 #define __protected_unlimited_stream_set(DIRECTION, ADDR, ID) \
247 __dcbt_TH1000 ((ADDR), (DIRECTION)>>1, 1, (ID))
249 #define __protected_stream_set(DIRECTION, ADDR, ID) \
250 __dcbt_TH1000 ((ADDR), (DIRECTION)>>1, 0, (ID))
252 #define __protected_stream_stop_all() \
253 __dcbt_TH1010 (0, 3, 0, 0, 0, 0)
255 #define __protected_stream_stop(ID) \
256 __dcbt_TH1010 (0, 2, 0, 0, 0, (ID))
258 #define __protected_stream_count(COUNT, ID) \
259 __dcbt_TH1010 (0, 0, (COUNT), 0, 0, (ID))
261 #define __protected_stream_go() \
262 __dcbt_TH1010 (1, 0, 0, 0, 0, 0)
264 #define __lhbrx(base) __extension__ \
265 ({unsigned short result; \
266 typedef struct {char a[2];} halfwordsize; \
267 halfwordsize *ptrp = (halfwordsize*)(void*)(base); \
268 __asm__ ("lhbrx %0,%y1" \
273 #define __lwbrx(base) __extension__ \
274 ({unsigned int result; \
275 typedef struct {char a[4];} wordsize; \
276 wordsize *ptrp = (wordsize*)(void*)(base); \
277 __asm__ ("lwbrx %0,%y1" \
284 #define __ldbrx(base) __extension__ \
285 ({unsigned long long result; \
286 typedef struct {char a[8];} doublewordsize; \
287 doublewordsize *ptrp = (doublewordsize*)(void*)(base); \
288 __asm__ ("ldbrx %0,%y1" \
293 #define __ldbrx(base) __extension__ \
294 ({unsigned long long result; \
295 typedef struct {char a[8];} doublewordsize; \
296 doublewordsize *ptrp = (doublewordsize*)(void*)(base); \
297 __asm__ ("lwbrx %L0,%y1\n" \
300 : "Z" (*ptrp), "Z" (*((char *) ptrp + 4))); \
302 #endif /* __powerpc64__ */
305 #define __sthbrx(base, value) do { \
306 typedef struct {char a[2];} halfwordsize; \
307 halfwordsize *ptrp = (halfwordsize*)(void*)(base); \
308 __asm__ ("sthbrx %1,%y0" \
313 #define __stwbrx(base, value) do { \
314 typedef struct {char a[4];} wordsize; \
315 wordsize *ptrp = (wordsize*)(void*)(base); \
316 __asm__ ("stwbrx %1,%y0" \
322 #define __stdbrx(base, value) do { \
323 typedef struct {char a[8];} doublewordsize; \
324 doublewordsize *ptrp = (doublewordsize*)(void*)(base); \
325 __asm__ ("stdbrx %1,%y0" \
330 #define __stdbrx(base, value) do { \
331 typedef struct {char a[8];} doublewordsize; \
332 doublewordsize *ptrp = (doublewordsize*)(void*)(base); \
333 __asm__ ("stwbrx %L2,%y0\n" \
335 : "=Z" (*ptrp), "=Z" (*((char *) ptrp + 4)) \
338 #endif /* __powerpc64__ */
341 #define __lwarx(base) __extension__ \
342 ({unsigned int result; \
343 typedef struct {char a[4];} wordsize; \
344 wordsize *ptrp = (wordsize*)(void*)(base); \
345 __asm__ volatile ("lwarx %0,%y1" \
351 #define __ldarx(base) __extension__ \
352 ({unsigned long long result; \
353 typedef struct {char a[8];} doublewordsize; \
354 doublewordsize *ptrp = (doublewordsize*)(void*)(base); \
355 __asm__ volatile ("ldarx %0,%y1" \
359 #endif /* __powerpc64__ */
361 #define __stwcx(base, value) __extension__ \
362 ({unsigned int result; \
363 typedef struct {char a[4];} wordsize; \
364 wordsize *ptrp = (wordsize*)(void*)(base); \
365 __asm__ volatile ("stwcx. %2,%y1\n" \
369 : "r" (value) : "cr0"); \
370 ((result & 0x20000000) >> 29); })
374 #define __stdcx(base, value) __extension__ \
375 ({unsigned long long result; \
376 typedef struct {char a[8];} doublewordsize; \
377 doublewordsize *ptrp = (doublewordsize*)(void*)(base); \
378 __asm__ volatile ("stdcx. %2,%y1\n" \
382 : "r" (value) : "cr0"); \
383 ((result & 0x20000000) >> 29); })
384 #endif /* __powerpc64__ */
386 #define __mffs() __extension__ \
388 __asm__ volatile ("mffs %0" : "=d" (result)); \
391 #define __mtfsf(mask,value) \
392 __asm__ volatile ("mtfsf %0,%1" : : "n" (mask), "d" ((double) (value)))
394 #define __mtfsfi(bits,field) \
395 __asm__ volatile ("mtfsfi %0,%1" : : "n" (bits), "n" (field))
397 #define __mtfsb0(bit) __asm__ volatile ("mtfsb0 %0" : : "n" (bit))
398 #define __mtfsb1(bit) __asm__ volatile ("mtfsb1 %0" : : "n" (bit))
400 #define __setflm(v) __extension__ \
402 __asm__ volatile ("mffs %0\n\tmtfsf 255,%1" \
404 : "d" ((double) (v))); \
407 /* __builtin_fabs may perform unnecessary rounding. */
409 /* Rename __fabs and __fabsf to work around internal prototypes defined
410 in bits/mathcalls.h with some glibc versions. */
411 #define __fabs __ppu_fabs
412 #define __fabsf __ppu_fabsf
414 static __inline__
double __fabs(double x
) __attribute__((always_inline
));
415 static __inline__
double
419 __asm__("fabs %0,%1" : "=d"(r
) : "d"(x
));
423 static __inline__
float __fabsf(float x
) __attribute__((always_inline
));
424 static __inline__
float
428 __asm__("fabs %0,%1" : "=f"(r
) : "f"(x
));
432 static __inline__
double __fnabs(double x
) __attribute__((always_inline
));
433 static __inline__
double
437 __asm__("fnabs %0,%1" : "=d"(r
) : "d"(x
));
441 static __inline__
float __fnabsf(float x
) __attribute__((always_inline
));
442 static __inline__
float
446 __asm__("fnabs %0,%1" : "=f"(r
) : "f"(x
));
450 static __inline__
double __fmadd(double x
, double y
, double z
)
451 __attribute__((always_inline
));
452 static __inline__
double
453 __fmadd(double x
, double y
, double z
)
456 __asm__("fmadd %0,%1,%2,%3" : "=d"(r
) : "d"(x
),"d"(y
),"d"(z
));
460 static __inline__
double __fmsub(double x
, double y
, double z
)
461 __attribute__((always_inline
));
462 static __inline__
double
463 __fmsub(double x
, double y
, double z
)
466 __asm__("fmsub %0,%1,%2,%3" : "=d"(r
) : "d"(x
),"d"(y
),"d"(z
));
470 static __inline__
double __fnmadd(double x
, double y
, double z
)
471 __attribute__((always_inline
));
472 static __inline__
double
473 __fnmadd(double x
, double y
, double z
)
476 __asm__("fnmadd %0,%1,%2,%3" : "=d"(r
) : "d"(x
),"d"(y
),"d"(z
));
480 static __inline__
double __fnmsub(double x
, double y
, double z
)
481 __attribute__((always_inline
));
482 static __inline__
double
483 __fnmsub(double x
, double y
, double z
)
486 __asm__("fnmsub %0,%1,%2,%3" : "=d"(r
) : "d"(x
),"d"(y
),"d"(z
));
490 static __inline__
float __fmadds(float x
, float y
, float z
)
491 __attribute__((always_inline
));
492 static __inline__
float
493 __fmadds(float x
, float y
, float z
)
496 __asm__("fmadds %0,%1,%2,%3" : "=f"(r
) : "f"(x
),"f"(y
),"f"(z
));
500 static __inline__
float __fmsubs(float x
, float y
, float z
)
501 __attribute__((always_inline
));
502 static __inline__
float
503 __fmsubs(float x
, float y
, float z
)
506 __asm__("fmsubs %0,%1,%2,%3" : "=f"(r
) : "f"(x
),"f"(y
),"f"(z
));
510 static __inline__
float __fnmadds(float x
, float y
, float z
)
511 __attribute__((always_inline
));
512 static __inline__
float
513 __fnmadds(float x
, float y
, float z
)
516 __asm__("fnmadds %0,%1,%2,%3" : "=f"(r
) : "f"(x
),"f"(y
),"f"(z
));
520 static __inline__
float __fnmsubs(float x
, float y
, float z
)
521 __attribute__((always_inline
));
522 static __inline__
float
523 __fnmsubs(float x
, float y
, float z
)
526 __asm__("fnmsubs %0,%1,%2,%3" : "=f"(r
) : "f"(x
),"f"(y
),"f"(z
));
530 static __inline__
double __fsel(double x
, double y
, double z
)
531 __attribute__((always_inline
));
532 static __inline__
double
533 __fsel(double x
, double y
, double z
)
536 __asm__("fsel %0,%1,%2,%3" : "=d"(r
) : "d"(x
),"d"(y
),"d"(z
));
540 static __inline__
float __fsels(float x
, float y
, float z
)
541 __attribute__((always_inline
));
542 static __inline__
float
543 __fsels(float x
, float y
, float z
)
546 __asm__("fsel %0,%1,%2,%3" : "=f"(r
) : "f"(x
),"f"(y
),"f"(z
));
550 static __inline__
double __frsqrte(double x
) __attribute__((always_inline
));
551 static __inline__
double
555 __asm__("frsqrte %0,%1" : "=d" (r
) : "d" (x
));
559 static __inline__
float __fres(float x
) __attribute__((always_inline
));
560 static __inline__
float
564 __asm__("fres %0,%1" : "=f"(r
) : "f"(x
));
568 static __inline__
double __fsqrt(double x
) __attribute__((always_inline
));
569 static __inline__
double
573 __asm__("fsqrt %0,%1" : "=d"(r
) : "d"(x
));
577 static __inline__
float __fsqrts(float x
) __attribute__((always_inline
));
578 static __inline__
float
582 __asm__("fsqrts %0,%1" : "=f"(r
) : "f"(x
));
586 static __inline__
double __fmul (double a
, double b
) __attribute__ ((always_inline
));
587 static __inline__
double
588 __fmul(double a
, double b
)
591 __asm__ ("fmul %0,%1,%2" : "=d" (d
) : "d" (a
), "d" (b
));
595 static __inline__
float __fmuls (float a
, float b
) __attribute__ ((always_inline
));
596 static __inline__
float
597 __fmuls (float a
, float b
)
600 __asm__ ("fmuls %0,%1,%2" : "=d" (d
) : "f" (a
), "f" (b
));
604 static __inline__
float __frsp (float a
) __attribute__ ((always_inline
));
605 static __inline__
float
609 __asm__ ("frsp %0,%1" : "=d" (d
) : "f" (a
));
613 static __inline__
double __fcfid (long long a
) __attribute__((always_inline
));
614 static __inline__
double
615 __fcfid (long long a
)
618 __asm__ ("fcfid %0,%1" : "=d" (d
) : "d" (a
));
622 static __inline__
long long __fctid (double a
) __attribute__ ((always_inline
));
623 static __inline__
long long
627 __asm__ ("fctid %0,%1" : "=d" (d
) : "d" (a
));
631 static __inline__
long long __fctidz (double a
) __attribute__ ((always_inline
));
632 static __inline__
long long
636 __asm__ ("fctidz %0,%1" : "=d" (d
) : "d" (a
));
640 static __inline__
int __fctiw (double a
) __attribute__ ((always_inline
));
641 static __inline__
int
644 unsigned long long d
;
645 __asm__ ("fctiw %0,%1" : "=d" (d
) : "d" (a
));
649 static __inline__
int __fctiwz (double a
) __attribute__ ((always_inline
));
650 static __inline__
int
654 __asm__ ("fctiwz %0,%1" : "=d" (d
) : "d" (a
));
659 #define __rldcl(a,b,mb) __extension__ \
661 unsigned long long d; \
662 __asm__ ("rldcl %0,%1,%2,%3" : "=r" (d) : "r" (a), "r" (b), "i" (mb)); \
666 #define __rldcr(a,b,me) __extension__ \
668 unsigned long long d; \
669 __asm__ ("rldcr %0,%1,%2,%3" : "=r" (d) : "r" (a), "r" (b), "i" (me)); \
673 #define __rldic(a,sh,mb) __extension__ \
675 unsigned long long d; \
676 __asm__ ("rldic %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (mb)); \
680 #define __rldicl(a,sh,mb) __extension__ \
682 unsigned long long d; \
683 __asm__ ("rldicl %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (mb)); \
687 #define __rldicr(a,sh,me) __extension__ \
689 unsigned long long d; \
690 __asm__ ("rldicr %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (me)); \
694 #define __rldimi(a,b,sh,mb) __extension__ \
696 unsigned long long d; \
697 __asm__ ("rldimi %0,%1,%2,%3" : "=r" (d) : "r" (b), "i" (sh), "i" (mb), "0" (a)); \
700 #endif /* __powerpc64__ */
702 #define __rlwimi(a,b,sh,mb,me) __extension__ \
705 __asm__ ("rlwimi %0,%1,%2,%3,%4" : "=r" (d) : "r" (b), "i" (sh), "i" (mb), "i" (me), "0" (a)); \
709 #define __rlwinm(a,sh,mb,me) __extension__ \
712 __asm__ ("rlwinm %0,%1,%2,%3,%4" : "=r" (d) : "r" (a), "i" (sh), "i" (mb), "i" (me)); \
716 #define __rlwnm(a,b,mb,me) __extension__ \
719 __asm__ ("rlwnm %0,%1,%2,%3,%4" : "=r" (d) : "r" (a), "r" (b), "i" (mb), "i" (me)); \
727 #endif /* _PPU_INTRINSICS_H */