lib/fenv-private.h

   1 /* Common definitions for the implementation of the various <fenv.h> modules.
   2    Copyright (C) 1997-2024 Free Software Foundation, Inc.
   3
   4    This file is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU Lesser General Public License as
   6    published by the Free Software Foundation; either version 2.1 of the
   7    License, or (at your option) any later version.
   8
   9    This file is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 /* Based on glibc/sysdeps/<cpu>/{fpu_control.h,fenv_private.h,fenv_libc.h}.  */
  18
  19 #if (defined __x86_64__ || defined _M_X64) || (defined __i386 || defined _M_IX86)
  20
  21 # if !(defined __x86_64__ || defined _M_X64)
  22 #  if __GLIBC__ + (__GLIBC_MINOR__ >= 33) > 2
  23 /* glibc >= 2.33 has an API that tells us whether the CPU has an SSE unit.  */
  24 #   include <sys/platform/x86.h>
  25 #  elif defined __sun
  26 /* Solaris has a global variable that tells us whether the CPU has an SSE unit.  */
  27 extern int _sse_hw;
  28 #  endif
  29 # endif
  30
  31 /* CPU_HAS_SSE ()  returns true if the CPU has an SSE unit.  */
  32 # if defined __x86_64__ || defined _M_X64
  33 #  define CPU_HAS_SSE() 1
  34 # else
  35 #  if __GLIBC__ + (__GLIBC_MINOR__ >= 33) > 2
  36 #   define CPU_HAS_SSE() CPU_FEATURE_PRESENT (SSE)
  37 #  elif defined __sun
  38 #   define CPU_HAS_SSE() _sse_hw
  39 #  else
  40 /* Otherwise, we assume that the SSE unit is present.
  41    Only very old 32-bit processors, before Pentium 4, don't have it.
  42    Don't bother testing it, through a 'cpuid' instruction.  */
  43 #   define CPU_HAS_SSE() 1
  44 #  endif
  45 # endif
  46
  47 /* fstat bits 5..2,0 indicate which floating-point exceptions have occurred
  48    in the 387 compatible floating-point unit since the respective bit was last
  49    set to zero.
  50    mxcsr bits 5..2,0 indicate which floating-point exceptions have occurred
  51    in the SSE floating-point unit since the respective bit was last set to
  52    zero.  */
  53 /* fctrl bits 5..2,0 indicate which floating-point exceptions shall, when
  54    occurring in the 387 compatible floating-point unit, *not* trigger a trap
  55    rather than merely set the corresponding bit in the fstat register.
  56    mxcsr bits 12..9,7 indicate which floating-point exceptions shall, when
  57    occurring in the SSE floating-point unit, *not* trigger a trap rather
  58    than merely set the corresponding bit in the mxcsr register.  */
  59
  60 /* Macros that access the control word of the 387 unit, the so-called fctrl
  61    register.  */
  62 # define _FPU_GETCW(cw) __asm__ __volatile__ ("fnstcw %0" : "=m" (*&cw))
  63 # define _FPU_SETCW(cw) __asm__ __volatile__ ("fldcw %0" : : "m" (*&cw))
  64
  65 /* Macros that access the status word of the 387 unit, the so-called fstat
  66    register.  */
  67 # define _FPU_GETSTAT(cw) __asm__ __volatile__ ("fnstsw %0" : "=m" (*&cw))
  68
  69 /* Macros that access the control and status word of the SSE unit, the mxcsr
  70    register.  */
  71 # if defined __GNUC__ || defined __clang__
  72 #  define _FPU_GETSSECW(cw) __asm__ __volatile__ ("stmxcsr %0" : "=m" (*&cw))
  73 #  define _FPU_SETSSECW(cw) __asm__ __volatile__ ("ldmxcsr %0" : : "m" (*&cw))
  74 # elif defined _MSC_VER
  75 #  include <mmintrin.h>
  76 /* Documentation:
  77    <https://learn.microsoft.com/en-us/cpp/intrinsics/x86-intrinsics-list>
  78    <https://learn.microsoft.com/en-us/cpp/intrinsics/x64-amd64-intrinsics-list>
  79    <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getcsr&ig_expand=3548>
  80    <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setcsr&ig_expand=5924>
  81  */
  82 #  define _FPU_GETSSECW(cw) ((cw) = _mm_getcsr ())
  83 #  define _FPU_SETSSECW(cw) _mm_setcsr (cw)
  84 # endif
  85
  86 /* The floating-point environment of the 387 unit.  */
  87 typedef struct
  88   {
  89     /* 7 32-bit words:  */
  90     unsigned short __control_word;      /* fctrl register */
  91     unsigned short __reserved1;
  92     unsigned short __status_word;       /* fstat register */
  93     unsigned short __reserved2;
  94     unsigned int more[5];
  95   }
  96 x86_387_fenv_t;
  97
  98 # if defined _MSC_VER
  99 /* The MSVC header files have different values for the floating-point exceptions
 100    than all the other platforms.  Define some handy macros for conversion.  */
 101 #  define exceptions_to_x86hardware(exceptions) \
 102      (  ((exceptions) & FE_INVALID   ? 0x01 : 0) \
 103       | ((exceptions) & FE_DIVBYZERO ? 0x04 : 0) \
 104       | ((exceptions) & FE_OVERFLOW  ? 0x08 : 0) \
 105       | ((exceptions) & FE_UNDERFLOW ? 0x10 : 0) \
 106       | ((exceptions) & FE_INEXACT   ? 0x20 : 0))
 107 #  define x86hardware_to_exceptions(fstat) \
 108      (  ((fstat) & 0x01 ? FE_INVALID   : 0) \
 109       | ((fstat) & 0x04 ? FE_DIVBYZERO : 0) \
 110       | ((fstat) & 0x08 ? FE_OVERFLOW  : 0) \
 111       | ((fstat) & 0x10 ? FE_UNDERFLOW : 0) \
 112       | ((fstat) & 0x20 ? FE_INEXACT   : 0))
 113 # else
 114 #  define exceptions_to_x86hardware(exceptions) (exceptions)
 115 #  define x86hardware_to_exceptions(fstat) (fstat)
 116 # endif
 117
 118 /* When _MSC_VER is defined, the 387 compatible floating-point unit is *not*
 119    in use.  Only the SSE floating-point unit is used.  This can be inferred
 120    from two facts:
 121      - sizeof (long double) == sizeof (double).  That is, 'long double'
 122        values are just 'double' values and can be processed in the SSE unit.
 123      - After fegetenv (&env), the value of env._Fe_stat is *not* the fstat
 124        register of the 387 unit.  Rather, it is a artificial value.  In
 125        particular, (env._Fe_stat & 0x3f) is
 126        == x86hardware_to_exceptions (_FPU_GETSSECW () & 0x3f).  */
 127
 128 #elif defined __aarch64__ /* arm64 */
 129
 130 /* fpsr bits 4..0 indicate which floating-point exceptions have occurred
 131    since the respective bit was last set to zero.  */
 132 /* fpcr bits 12..8 indicate which floating-point exceptions shall, when
 133    occurring, trigger a trap rather than merely set the corresponding bit
 134    in the fpsr register.  */
 135
 136 # if __GNUC__ >= 6 && !defined __clang__
 137 #  define _FPU_GETCW(fpcr) (fpcr = __builtin_aarch64_get_fpcr ())
 138 #  define _FPU_SETCW(fpcr) __builtin_aarch64_set_fpcr (fpcr)
 139 # elif __clang_major__ >= 4
 140 #  define _FPU_GETCW(fpcr) (fpcr = __builtin_arm_rsr ("fpcr"))
 141 #  define _FPU_SETCW(fpcr) __builtin_arm_wsr ("fpcr", fpcr)
 142 # else
 143 #  define _FPU_GETCW(fpcr) \
 144    __asm__ __volatile__ ("mrs %0, fpcr" : "=r" (fpcr))
 145 #  define _FPU_SETCW(fpcr) \
 146    __asm__ __volatile__ ("msr fpcr, %0" : : "r" (fpcr))
 147 # endif
 148
 149 # if __GNUC__ >= 6 && !defined __clang__
 150 #  define _FPU_GETFPSR(fpsr) (fpsr = __builtin_aarch64_get_fpsr ())
 151 #  define _FPU_SETFPSR(fpsr) __builtin_aarch64_set_fpsr (fpsr)
 152 # elif __clang_major__ >= 4
 153 #  define _FPU_GETFPSR(fpsr) (fpsr = __builtin_arm_rsr ("fpsr"))
 154 #  define _FPU_SETFPSR(fpsr) __builtin_arm_wsr ("fpsr", fpsr)
 155 # else
 156 #  define _FPU_GETFPSR(fpsr) \
 157    __asm__ __volatile__ ("mrs %0, fpsr" : "=r" (fpsr))
 158 #  define _FPU_SETFPSR(fpsr) \
 159    __asm__ __volatile__ ("msr fpsr, %0" : : "r" (fpsr))
 160 # endif
 161
 162 #elif defined __arm__
 163
 164 /* fpscr bits 23..22 indicate the rounding direction.  */
 165 /* fpscr bits 4..0 indicate which floating-point exceptions have occurred
 166    since the respective bit was last set to zero.  */
 167 /* fpscr bits 12..8 indicate which floating-point exceptions shall, when
 168    occurring, trigger a trap rather than merely set the corresponding bit
 169    in the fpscr register.  */
 170
 171 # if !defined __SOFTFP__
 172 #  define _FPU_GETCW(cw) \
 173    __asm__ __volatile__ ("vmrs %0, fpscr" : "=r" (cw))
 174 #  define _FPU_SETCW(cw) \
 175    __asm__ __volatile__ ("vmsr fpscr, %0" : : "r" (cw))
 176 # endif
 177
 178 #elif defined __alpha
 179
 180 /* System calls.  */
 181 extern unsigned long __ieee_get_fp_control (void);
 182 extern void __ieee_set_fp_control (unsigned long);
 183
 184 # define _FPU_GETCW(fpcr) \
 185   __asm__ __volatile__ ("excb; mf_fpcr %0" : "=f" (fpcr))
 186 # define _FPU_SETCW(fpcr) \
 187   __asm__ __volatile__ ("mt_fpcr %0; excb" : : "f" (fpcr))
 188
 189 #elif defined __hppa
 190
 191 /* Bits 31..27 of the first 32-bit word of %fr0 indicate which floating-point
 192    exceptions have occurred since the respective bit was last set to zero.  */
 193 /* Bits 4..0 of the first 32-bit word of %fr0 indicate which floating-point
 194    exceptions shall, when occurring, trigger a trap rather than merely set the
 195    corresponding flag bit.  */
 196
 197 /* The status register is located in bits 0 to 31 of floating-point register 0.  */
 198 # define _FPU_GETCW(cw) \
 199 ({                                                                              \
 200   union { __extension__ unsigned long long __fpreg; unsigned int __halfreg[2]; } __fullfp; \
 201   /* Get the current status word. */                                            \
 202   __asm__ ("fstd %%fr0,0(%1)\n\t"                                               \
 203            "fldd 0(%1),%%fr0\n\t"                                               \
 204            : "=m" (__fullfp.__fpreg) : "r" (&__fullfp.__fpreg) : "%r0");        \
 205   cw = __fullfp.__halfreg[0];                                                   \
 206 })
 207 # define _FPU_SETCW(cw) \
 208 ({                                                                              \
 209   union { __extension__ unsigned long long __fpreg; unsigned int __halfreg[2]; } __fullfp; \
 210   /* Get the current status word and set the control word.  */                  \
 211   __asm__ ("fstd %%fr0,0(%1)\n\t"                                               \
 212            : "=m" (__fullfp.__fpreg) : "r" (&__fullfp.__fpreg) : "%r0");        \
 213   __fullfp.__halfreg[0] = cw;                                                   \
 214   __asm__ ("fldd 0(%1),%%fr0\n\t"                                               \
 215            : : "m" (__fullfp.__fpreg), "r" (&__fullfp.__fpreg) : "%r0" );       \
 216 })
 217
 218 #elif defined __ia64__
 219
 220 /* fpsr bits 12..9,7 indicate which floating-point exceptions have occurred
 221    since the respective bit was last set to zero.  */
 222 /* fpsr bits 5..2,0 indicate which floating-point exceptions shall, when
 223    occurring, *not* trigger a trap rather than merely set the corresponding
 224    bit in the fpsr register.  */
 225
 226 # define _FPU_GETCW(fpsr) \
 227   __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr))
 228 # define _FPU_SETCW(fpsr) \
 229   __asm__ __volatile__ ("mov.m ar.fpsr=%0" :: "r" (fpsr) : "memory")
 230
 231 #elif defined __m68k__
 232
 233 /* fpsr bits 7..3 indicate which floating-point exceptions have occurred
 234    since the respective bit was last set to zero.  */
 235 /* fpcr bits 15..8 indicate which floating-point exceptions shall, when
 236    occurring, trigger a trap rather than merely set the corresponding bit
 237    in the fpsr register:
 238      - bit 15: branch/set on unordered
 239      - bit 14: signaling not-a-number
 240      - bit 13: operand error
 241      - bit 12: overflow
 242      - bit 11: underflow
 243      - bit 10: divide by zero
 244      - bit 9:  inexact operation
 245      - bit 8:  inexact decimal input
 246    FE_INVALID corresponds to all three: bit 15, bit 14, bit 13.  */
 247
 248 # define _FPU_GETCW(cw) __asm__ __volatile__ ("fmove%.l %!, %0" : "=dm" (cw))
 249 # define _FPU_SETCW(cw) __asm__ __volatile__ ("fmove%.l %0, %!" : : "dm" (cw))
 250
 251 # define _FPU_GETFPSR(cw) __asm__ __volatile__ ("fmove%.l %/fpsr, %0" : "=dm" (cw))
 252 # define _FPU_SETFPSR(cw) __asm__ __volatile__ ("fmove%.l %0, %/fpsr" : : "dm" (cw))
 253
 254 #elif defined __mips__
 255
 256 /* fcsr bits 6..2 indicate which floating-point exceptions have occurred
 257    since the respective bit was last set to zero.
 258    fcsr bits 17..12 indicate which floating-point exceptions have occurred
 259    in the most recent instruction.  */
 260 /* fcsr bits 11..7 indicate which floating-point exceptions shall, when
 261    occurring, trigger a trap rather than merely set the corresponding bit
 262    in the fcsr register.  */
 263
 264 # define _FPU_GETCW(cw) __asm__ __volatile__ ("cfc1 %0,$31" : "=r" (cw))
 265 # define _FPU_SETCW(cw) __asm__ __volatile__ ("ctc1 %0,$31" : : "r" (cw))
 266
 267 #elif defined __loongarch__
 268
 269 /* fcsr0 bits 20..16 indicate which floating-point exceptions have occurred
 270    since the respective bit was last set to zero.
 271    fcsr0 bits 28..24 indicate which floating-point exceptions have occurred
 272    in the most recent instruction.  */
 273 /* fcsr0 bits 4..0 indicate which floating-point exceptions shall, when
 274    occurring, trigger a trap rather than merely set the corresponding bit
 275    in the fcsr0 register.  */
 276
 277 # define _FPU_GETCW(cw) __asm__ __volatile__ ("movfcsr2gr %0,$r0" : "=r" (cw))
 278 # define _FPU_SETCW(cw) __asm__ __volatile__ ("movgr2fcsr $r0,%0" : : "r" (cw))
 279
 280 #elif defined __powerpc__
 281
 282 /* fpscr bits 28..25 indicate which floating-point exceptions, other than
 283    FE_INVALID, have occurred since the respective bit was last set to zero.
 284    fpscr bits 24..19, 10..8 do the same thing, for various kinds of Invalid
 285    Operation.  fpscr bit 29 is the summary (the OR) of all these bits.  */
 286 /* fpscr bits 7..3 indicate which floating-point exceptions shall, when
 287    occurring, trigger a trap rather than merely set the corresponding bit
 288    in the fpscr register.  */
 289
 290 # define _FPU_GETCW_AS_DOUBLE(cw) \
 291   do { double env; __asm__ __volatile__ ("mffs %0" : "=f" (env)); cw = env; } \
 292   while (0)
 293 # define _FPU_SETCW_AS_DOUBLE(cw) \
 294   __asm__ __volatile__ ("mtfsf 0xff,%0" : : "f" (cw))
 295
 296 # if defined __NetBSD__
 297 /* Modifying the FE0 and FE1 bits of the machine state register (MSR) is
 298    only possible from the kernel.  NetBSD allows it to be done from user
 299    space, by emulating the mfmsr and mtmsr instructions when they trap.
 300    In other words, these instructions are actually system calls in NetBSD.  */
 301 #  define _GETMSR(msr) __asm__ __volatile__ ("mfmsr %0" : "=r" (msr))
 302 #  define _SETMSR(msr) __asm__ __volatile__ ("mtmsr %0" : : "r" (msr))
 303 #  define MSR_FP_EXC_MASK 0x00000900
 304 /* This allows us to simulate the Linux prctl() through a macro.  */
 305 #  define PR_SET_FPEXC 1
 306 #  define PR_FP_EXC_DISABLED  0x00000000  /* FP exceptions disabled */
 307 #  define PR_FP_EXC_NONRECOV  0x00000100  /* async non-recoverable exc. mode */
 308 #  define PR_FP_EXC_ASYNC     0x00000800  /* async recoverable exception mode */
 309 #  define PR_FP_EXC_PRECISE   0x00000900  /* precise exception mode */
 310 #  define prctl(operation,arg) \
 311      do {                                         \
 312        if ((operation) == PR_SET_FPEXC)           \
 313          {                                        \
 314            unsigned int local_msr;                \
 315            _GETMSR (local_msr);                   \
 316            local_msr &= ~MSR_FP_EXC_MASK;         \
 317            local_msr |= (arg) & MSR_FP_EXC_MASK;  \
 318            _SETMSR (local_msr);                   \
 319          }                                        \
 320      } while (0)
 321 # endif
 322
 323 #elif defined __riscv
 324
 325 /* fcsr bits 4..0 indicate which floating-point exceptions have occurred
 326    since the respective bit was last set to zero.  */
 327
 328 /* Trapping of floating-point exceptions does not work on RISC-V.  That's
 329    because the fcsr register has only bits for floating-point exception status,
 330    but no bits for trapping floating-point exceptions.  */
 331
 332 #elif defined __s390__ || defined __s390x__
 333
 334 /* fpc bits 23..19 indicate which floating-point exceptions have occurred
 335    since the respective bit was last set to zero.
 336    fpc bits 15..11 are part of the "data exception code" (DXC) and have a
 337    similar meaning if bits 9..8 are both zero.  */
 338 /* fpc bits 31..27 indicate which floating-point exceptions shall, when
 339    occurring, trigger a trap rather than merely set the corresponding bit
 340    in the fpc register.  */
 341
 342 # define _FPU_GETCW(cw)  __asm__ __volatile__ ("efpc %0" : "=d" (cw))
 343 # define _FPU_SETCW(cw)  __asm__ __volatile__ ("sfpc %0" : : "d" (cw))
 344
 345 #elif defined __sh__
 346
 347 /* fpscr bits 6..2 indicate which floating-point exceptions have occurred
 348    since the respective bit was last set to zero.  */
 349 /* fpscr bits 11..7 indicate which floating-point exceptions shall, when
 350    occurring, trigger a trap rather than merely set the corresponding bit
 351    in the fpscr register.  */
 352
 353 # define _FPU_GETCW(cw) __asm__ ("sts fpscr,%0" : "=r" (cw))
 354 # define _FPU_SETCW(cw) __asm__ ("lds %0,fpscr" : : "r" (cw))
 355
 356 #elif defined __sparc
 357
 358 /* fsr bits 9..5 indicate which floating-point exceptions have occurred
 359    since the respective bit was last set to zero.  */
 360 /* fsr bits 27..23 indicate which floating-point exceptions shall, when
 361    occurring, trigger a trap rather than merely set the corresponding bit
 362    in the fsr register.  */
 363
 364 # if defined __sparcv9 || defined __arch64__ /* sparc64 */
 365 #  define _FPU_GETCW(X)   __asm__ __volatile__ ("stx %%fsr,%0" : "=m" (X))
 366 #  define _FPU_SETCW(X)   __asm__ __volatile__ ("ldx %0,%%fsr" : : "m" (X))
 367 # else
 368 #  define _FPU_GETCW(X)   __asm__ __volatile__ ("st %%fsr,%0" : "=m" (X))
 369 #  define _FPU_SETCW(X)   __asm__ __volatile__ ("ld %0,%%fsr" : : "m" (X))
 370 # endif
 371
 372 #endif
 373
 374 #if defined _AIX && defined __powerpc__ /* AIX */
 375
 376 /* <fpxcp.h> defines a type fpflag_t and macros FP_*.  */
 377
 378 /* Convert from an 'int exceptions' to an fpflag_t.  */
 379 # if 0 /* Unoptimized */
 380 #  define exceptions_to_fpflag(exceptions) \
 381      (  ((exceptions) & FE_INVALID   ? FP_INVALID     : 0) \
 382       | ((exceptions) & FE_DIVBYZERO ? FP_DIV_BY_ZERO : 0) \
 383       | ((exceptions) & FE_OVERFLOW  ? FP_OVERFLOW    : 0) \
 384       | ((exceptions) & FE_UNDERFLOW ? FP_UNDERFLOW   : 0) \
 385       | ((exceptions) & FE_INEXACT   ? FP_INEXACT     : 0))
 386 # else /* Optimized */
 387 #  define exceptions_to_fpflag(exceptions) \
 388      ((exceptions) & FE_ALL_EXCEPT)
 389 # endif
 390
 391 /* Convert from an fpflag_t to an 'int exceptions'.  */
 392 # if 0 /* Unoptimized */
 393 #  define fpflag_to_exceptions(f) \
 394      (  ((f) & FP_INVALID     ? FE_INVALID   : 0) \
 395       | ((f) & FP_DIV_BY_ZERO ? FE_DIVBYZERO : 0) \
 396       | ((f) & FP_OVERFLOW    ? FE_OVERFLOW  : 0) \
 397       | ((f) & FP_UNDERFLOW   ? FE_UNDERFLOW : 0) \
 398       | ((f) & FP_INEXACT     ? FE_INEXACT   : 0))
 399 # else /* Optimized */
 400 #  define fpflag_to_exceptions(f) \
 401      ((f) & FE_ALL_EXCEPT)
 402 # endif
 403
 404 /* The implementation of fegetexcept().  Avoids a module dependency.  */
 405 # define fegetexcept_impl() \
 406     (  (fp_is_enabled (TRP_INVALID)     ? FE_INVALID   : 0) \
 407      | (fp_is_enabled (TRP_DIV_BY_ZERO) ? FE_DIVBYZERO : 0) \
 408      | (fp_is_enabled (TRP_OVERFLOW)    ? FE_OVERFLOW  : 0) \
 409      | (fp_is_enabled (TRP_UNDERFLOW)   ? FE_UNDERFLOW : 0) \
 410      | (fp_is_enabled (TRP_INEXACT)     ? FE_INEXACT   : 0))
 411
 412 #endif