sysdeps/powerpc/fpu/fenv_libc.h

   1 /* Internal libc stuff for floating point environment routines.
   2    Copyright (C) 1997-2024 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 #ifndef _FENV_LIBC_H
  20 #define _FENV_LIBC_H    1
  21
  22 #include <fenv.h>
  23 #include <ldsodefs.h>
  24 #include <sysdep.h>
  25
  26 extern const fenv_t *__fe_nomask_env_priv (void);
  27
  28 extern const fenv_t *__fe_mask_env (void) attribute_hidden;
  29
  30 /* If the old env had any enabled exceptions and the new env has no enabled
  31    exceptions, then mask SIGFPE in the MSR FE0/FE1 bits.  This may allow the
  32    FPU to run faster because it always takes the default action and can not
  33    generate SIGFPE.  */
  34 #define __TEST_AND_ENTER_NON_STOP(old, new) \
  35   do { \
  36     if (((old) & FPSCR_ENABLES_MASK) != 0 && ((new) & FPSCR_ENABLES_MASK) == 0) \
  37       (void) __fe_mask_env (); \
  38   } while (0)
  39
  40 /* If the old env has no enabled exceptions and the new env has any enabled
  41    exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits.  This will put the
  42    hardware into "precise mode" and may cause the FPU to run slower on some
  43    hardware.  */
  44 #define __TEST_AND_EXIT_NON_STOP(old, new) \
  45   do { \
  46     if (((old) & FPSCR_ENABLES_MASK) == 0 && ((new) & FPSCR_ENABLES_MASK) != 0) \
  47       (void) __fe_nomask_env_priv (); \
  48   } while (0)
  49
  50 /* The sticky bits in the FPSCR indicating exceptions have occurred.  */
  51 #define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID)
  52
  53 /* Equivalent to fegetenv, but returns a fenv_t instead of taking a
  54    pointer.  */
  55 #define fegetenv_register() __builtin_mffs()
  56
  57 /* Equivalent to fegetenv_register, but only returns bits for
  58    status, exception enables, and mode.
  59    Nicely, it turns out that the 'mffsl' instruction will decode to
  60    'mffs' on architectures older than "power9" because the additional
  61    bits set for 'mffsl' are "don't care" for 'mffs'.  'mffs' is a superset
  62    of 'mffsl'.  */
  63 #define fegetenv_control()                                      \
  64   ({register double __fr;                                               \
  65     __asm__ __volatile__ (                                              \
  66       ".machine push; .machine \"power9\"; mffsl %0; .machine pop"      \
  67       : "=f" (__fr));                                                   \
  68     __fr;                                                               \
  69   })
  70
  71 /* Starting with GCC 14 __builtin_set_fpscr_rn can be used to return the
  72    FPSCR fields as a double.  This support is available
  73    on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined.
  74    To retain backward compatibility with older GCC, we still retain the
  75    old inline assembly implementation.*/
  76 #ifdef __SET_FPSCR_RN_RETURNS_FPSCR__
  77 #define __fe_mffscrn(rn)  __builtin_set_fpscr_rn (rn)
  78 #else
  79 #define __fe_mffscrn(rn)                                                \
  80   ({register fenv_union_t __fr;                                         \
  81     if (__builtin_constant_p (rn))                                      \
  82       __asm__ __volatile__ (                                            \
  83         ".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \
  84         : "=f" (__fr.fenv) : "n" (rn));                                 \
  85     else                                                                \
  86     {                                                                   \
  87       __fr.l = (rn);                                                    \
  88       __asm__ __volatile__ (                                            \
  89         ".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \
  90         : "=f" (__fr.fenv) : "f" (__fr.fenv));                          \
  91     }                                                                   \
  92     __fr.fenv;                                                          \
  93   })
  94 #endif
  95
  96 /* Like fegetenv_control, but also sets the rounding mode.  */
  97 #ifdef _ARCH_PWR9
  98 #define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
  99 #else
 100 /* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
 101    but not sufficient, because it does not set the rounding mode.
 102    Explicitly set the rounding mode when 'mffscrn' actually doesn't.  */
 103 #define fegetenv_and_set_rn(rn)                                         \
 104   ({register fenv_union_t __fr;                                         \
 105     __fr.fenv = __fe_mffscrn (rn);                                      \
 106     if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))) \
 107       __fesetround_inline (rn);                                         \
 108     __fr.fenv;                                                          \
 109   })
 110 #endif
 111
 112 /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer.  */
 113 #define fesetenv_register(env) \
 114         do { \
 115           double d = (env); \
 116           if(GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \
 117             asm volatile (".machine push; " \
 118                           ".machine \"power6\"; " \
 119                           "mtfsf 0xff,%0,1,0; " \
 120                           ".machine pop" : : "f" (d)); \
 121           else \
 122             __builtin_mtfsf (0xff, d); \
 123         } while(0)
 124
 125 /* Set the last 2 nibbles of the FPSCR, which contain the
 126    exception enables and the rounding mode.
 127    'fegetenv_control' retrieves these bits by reading the FPSCR.  */
 128 #define fesetenv_control(env) __builtin_mtfsf (0b00000011, (env));
 129
 130 /* This very handy macro:
 131    - Sets the rounding mode to 'round to nearest';
 132    - Sets the processor into IEEE mode; and
 133    - Prevents exceptions from being raised for inexact results.
 134    These things happen to be exactly what you need for typical elementary
 135    functions.  */
 136 #define relax_fenv_state() \
 137         do { \
 138            if (GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \
 139              asm volatile (".machine push; .machine \"power6\"; " \
 140                   "mtfsfi 7,0,1; .machine pop"); \
 141            asm volatile ("mtfsfi 7,0"); \
 142         } while(0)
 143
 144 /* Set/clear a particular FPSCR bit (for instance,
 145    reset_fpscr_bit(FPSCR_VE);
 146    prevents INVALID exceptions from being raised).  */
 147 #define set_fpscr_bit(x) asm volatile ("mtfsb1 %0" : : "n"(x))
 148 #define reset_fpscr_bit(x) asm volatile ("mtfsb0 %0" : : "n"(x))
 149
 150 typedef union
 151 {
 152   fenv_t fenv;
 153   unsigned long long l;
 154 } fenv_union_t;
 155
 156
 157 static inline int
 158 __fesetround_inline (int round)
 159 {
 160 #ifdef _ARCH_PWR9
 161   __fe_mffscrn (round);
 162 #else
 163   if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
 164     __fe_mffscrn (round);
 165   else if ((unsigned int) round < 2)
 166     {
 167        asm volatile ("mtfsb0 30");
 168        if ((unsigned int) round == 0)
 169          asm volatile ("mtfsb0 31");
 170        else
 171          asm volatile ("mtfsb1 31");
 172     }
 173   else
 174     {
 175        asm volatile ("mtfsb1 30");
 176        if ((unsigned int) round == 2)
 177          asm volatile ("mtfsb0 31");
 178        else
 179          asm volatile ("mtfsb1 31");
 180     }
 181 #endif
 182   return 0;
 183 }
 184
 185 /* Same as __fesetround_inline, and it also disable the floating-point
 186    inexact execption (bit 60 - XE, assuming NI is 0).  It does not check
 187    if ROUND is a valid value.  */
 188 static inline void
 189 __fesetround_inline_disable_inexact (const int round)
 190 {
 191   asm volatile ("mtfsfi 7,%0" : : "n" (round));
 192 }
 193
 194 #define FPSCR_MASK(bit) (1 << (31 - (bit)))
 195
 196 /* Definitions of all the FPSCR bit numbers */
 197 enum {
 198   FPSCR_FX = 0,    /* exception summary */
 199 #define FPSCR_FX_MASK (FPSCR_MASK (FPSCR_FX))
 200   FPSCR_FEX,       /* enabled exception summary */
 201 #define FPSCR_FEX_MASK (FPSCR_MASK FPSCR_FEX))
 202   FPSCR_VX,        /* invalid operation summary */
 203 #define FPSCR_VX_MASK (FPSCR_MASK (FPSCR_VX))
 204   FPSCR_OX,        /* overflow */
 205 #define FPSCR_OX_MASK (FPSCR_MASK (FPSCR_OX))
 206   FPSCR_UX,        /* underflow */
 207 #define FPSCR_UX_MASK (FPSCR_MASK (FPSCR_UX))
 208   FPSCR_ZX,        /* zero divide */
 209 #define FPSCR_ZX_MASK (FPSCR_MASK (FPSCR_ZX))
 210   FPSCR_XX,        /* inexact */
 211 #define FPSCR_XX_MASK (FPSCR_MASK (FPSCR_XX))
 212   FPSCR_VXSNAN,    /* invalid operation for sNaN */
 213 #define FPSCR_VXSNAN_MASK (FPSCR_MASK (FPSCR_VXSNAN))
 214   FPSCR_VXISI,     /* invalid operation for Inf-Inf */
 215 #define FPSCR_VXISI_MASK (FPSCR_MASK (FPSCR_VXISI))
 216   FPSCR_VXIDI,     /* invalid operation for Inf/Inf */
 217 #define FPSCR_VXIDI_MASK (FPSCR_MASK (FPSCR_VXIDI))
 218   FPSCR_VXZDZ,     /* invalid operation for 0/0 */
 219 #define FPSCR_VXZDZ_MASK (FPSCR_MASK (FPSCR_VXZDZ))
 220   FPSCR_VXIMZ,     /* invalid operation for Inf*0 */
 221 #define FPSCR_VXIMZ_MASK (FPSCR_MASK (FPSCR_VXIMZ))
 222   FPSCR_VXVC,      /* invalid operation for invalid compare */
 223 #define FPSCR_VXVC_MASK (FPSCR_MASK (FPSCR_VXVC))
 224   FPSCR_FR,        /* fraction rounded [fraction was incremented by round] */
 225 #define FPSCR_FR_MASK (FPSCR_MASK (FPSCR_FR))
 226   FPSCR_FI,        /* fraction inexact */
 227 #define FPSCR_FI_MASK (FPSCR_MASK (FPSCR_FI))
 228   FPSCR_FPRF_C,    /* result class descriptor */
 229 #define FPSCR_FPRF_C_MASK (FPSCR_MASK (FPSCR_FPRF_C))
 230   FPSCR_FPRF_FL,   /* result less than (usually, less than 0) */
 231 #define FPSCR_FPRF_FL_MASK (FPSCR_MASK (FPSCR_FPRF_FL))
 232   FPSCR_FPRF_FG,   /* result greater than */
 233 #define FPSCR_FPRF_FG_MASK (FPSCR_MASK (FPSCR_FPRF_FG))
 234   FPSCR_FPRF_FE,   /* result equal to */
 235 #define FPSCR_FPRF_FE_MASK (FPSCR_MASK (FPSCR_FPRF_FE))
 236   FPSCR_FPRF_FU,   /* result unordered */
 237 #define FPSCR_FPRF_FU_MASK (FPSCR_MASK (FPSCR_FPRF_FU))
 238   FPSCR_20,        /* reserved */
 239   FPSCR_VXSOFT,    /* invalid operation set by software */
 240 #define FPSCR_VXSOFT_MASK (FPSCR_MASK (FPSCR_VXSOFT))
 241   FPSCR_VXSQRT,    /* invalid operation for square root */
 242 #define FPSCR_VXSQRT_MASK (FPSCR_MASK (FPSCR_VXSQRT))
 243   FPSCR_VXCVI,     /* invalid operation for invalid integer convert */
 244 #define FPSCR_VXCVI_MASK (FPSCR_MASK (FPSCR_VXCVI))
 245   FPSCR_VE,        /* invalid operation exception enable */
 246 #define FPSCR_VE_MASK (FPSCR_MASK (FPSCR_VE))
 247   FPSCR_OE,        /* overflow exception enable */
 248 #define FPSCR_OE_MASK (FPSCR_MASK (FPSCR_OE))
 249   FPSCR_UE,        /* underflow exception enable */
 250 #define FPSCR_UE_MASK (FPSCR_MASK (FPSCR_UE))
 251   FPSCR_ZE,        /* zero divide exception enable */
 252 #define FPSCR_ZE_MASK (FPSCR_MASK (FPSCR_ZE))
 253   FPSCR_XE,        /* inexact exception enable */
 254 #define FPSCR_XE_MASK (FPSCR_MASK (FPSCR_XE))
 255 #ifdef _ARCH_PWR6
 256   FPSCR_29,        /* Reserved in ISA 2.05  */
 257 #define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_29))
 258 #else
 259   FPSCR_NI,        /* non-IEEE mode (typically, no denormalised numbers) */
 260 #define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_NI))
 261 #endif /* _ARCH_PWR6 */
 262   /* the remaining two least-significant bits keep the rounding mode */
 263   FPSCR_RN_hi,
 264 #define FPSCR_RN_hi_MASK (FPSCR_MASK (FPSCR_RN_hi))
 265   FPSCR_RN_lo
 266 #define FPSCR_RN_lo_MASK (FPSCR_MASK (FPSCR_RN_lo))
 267 };
 268
 269 #define FPSCR_RN_MASK (FPSCR_RN_hi_MASK|FPSCR_RN_lo_MASK)
 270 #define FPSCR_ENABLES_MASK \
 271   (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK)
 272 #define FPSCR_BASIC_EXCEPTIONS_MASK \
 273   (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK)
 274 #define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK| \
 275   FPSCR_VXSNAN_MASK|FPSCR_VXISI_MASK|FPSCR_VXIDI_MASK|FPSCR_VXZDZ_MASK| \
 276   FPSCR_VXIMZ_MASK|FPSCR_VXVC_MASK|FPSCR_VXSOFT_MASK|FPSCR_VXSQRT_MASK| \
 277   FPSCR_VXCVI_MASK)
 278 #define FPSCR_FPRF_MASK \
 279   (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \
 280    FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK)
 281 #define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK)
 282 #define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK)
 283
 284 /* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits
 285    in the FPSCR, albeit shifted to different but corresponding locations.
 286    Similarly, the exception indicator bits in the FPSCR correspond one-to-one
 287    with the exception enable bits. It is thus possible to map the FENV(1)
 288    exceptions directly to the FPSCR enables with a simple mask and shift,
 289    and vice versa. */
 290 #define FPSCR_EXCEPT_TO_ENABLE_SHIFT 22
 291
 292 static inline int
 293 fenv_reg_to_exceptions (unsigned long long l)
 294 {
 295   return (((int)l) & FPSCR_ENABLES_MASK) << FPSCR_EXCEPT_TO_ENABLE_SHIFT;
 296 }
 297
 298 static inline unsigned long long
 299 fenv_exceptions_to_reg (int excepts)
 300 {
 301   return (unsigned long long)
 302     (excepts & FE_ALL_EXCEPT) >> FPSCR_EXCEPT_TO_ENABLE_SHIFT;
 303 }
 304
 305 #ifdef _ARCH_PWR6
 306   /* Not supported in ISA 2.05.  Provided for source compat only.  */
 307 # define FPSCR_NI 29
 308 #endif /* _ARCH_PWR6 */
 309
 310 /* This operation (i) sets the appropriate FPSCR bits for its
 311    parameter, (ii) converts sNaN to the corresponding qNaN, and (iii)
 312    otherwise passes its parameter through unchanged (in particular, -0
 313    and +0 stay as they were).  The `obvious' way to do this is optimised
 314    out by gcc.  */
 315 #define f_wash(x) \
 316    ({ double d; asm volatile ("fmul %0,%1,%2" \
 317                               : "=f"(d) \
 318                               : "f" (x), "f"((float)1.0)); d; })
 319 #define f_washf(x) \
 320    ({ float f; asm volatile ("fmuls %0,%1,%2" \
 321                              : "=f"(f) \
 322                              : "f" (x), "f"((float)1.0)); f; })
 323
 324 #endif /* fenv_libc.h */