lib/fenv-private.h

   1 /* Common definitions for the implementation of the various <fenv.h> modules.
   2    Copyright (C) 1997-2024 Free Software Foundation, Inc.
   3
   4    This file is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU Lesser General Public License as
   6    published by the Free Software Foundation; either version 2.1 of the
   7    License, or (at your option) any later version.
   8
   9    This file is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 /* Based on glibc/sysdeps/<cpu>/{fpu_control.h,fenv_private.h,fenv_libc.h}.  */
  18
  19 #if (defined __x86_64__ || defined _M_X64) || (defined __i386 || defined _M_IX86)
  20
  21 # if !(defined __x86_64__ || defined _M_X64)
  22 #  if __GLIBC__ + (__GLIBC_MINOR__ >= 33) > 2
  23 /* glibc >= 2.33 has an API that tells us whether the CPU has an SSE unit.  */
  24 #   include <sys/platform/x86.h>
  25 #  elif defined __sun
  26 /* Solaris has a global variable that tells us whether the CPU has an SSE unit.  */
  27 extern int _sse_hw;
  28 #  endif
  29 # endif
  30
  31 /* CPU_HAS_SSE ()  returns true if the CPU has an SSE unit.  */
  32 # if defined __x86_64__ || defined _M_X64
  33 #  define CPU_HAS_SSE() 1
  34 # else
  35 #  if __GLIBC__ + (__GLIBC_MINOR__ >= 33) > 2
  36 #   define CPU_HAS_SSE() CPU_FEATURE_PRESENT (SSE)
  37 #  elif defined __sun
  38 #   define CPU_HAS_SSE() _sse_hw
  39 #  else
  40 /* Otherwise, we assume that the SSE unit is present.
  41    Only very old 32-bit processors, before Pentium 4, don't have it.
  42    Don't bother testing it, through a 'cpuid' instruction.  */
  43 #   define CPU_HAS_SSE() 1
  44 #  endif
  45 # endif
  46
  47 /* fstat bits 5..2,0 indicate which floating-point exceptions have occurred
  48    in the 387 compatible floating-point unit since the respective bit was last
  49    set to zero.
  50    mxcsr bits 5..2,0 indicate which floating-point exceptions have occurred
  51    in the SSE floating-point unit since the respective bit was last set to
  52    zero.  */
  53 /* fctrl bits 5..2,0 indicate which floating-point exceptions shall, when
  54    occurring in the 387 compatible floating-point unit, *not* trigger a trap
  55    rather than merely set the corresponding bit in the fstat register.
  56    mxcsr bits 12..9,7 indicate which floating-point exceptions shall, when
  57    occurring in the SSE floating-point unit, *not* trigger a trap rather
  58    than merely set the corresponding bit in the mxcsr register.  */
  59
  60 /* Macros that access the control word of the 387 unit, the so-called fctrl
  61    register.  */
  62 # define _FPU_GETCW(cw) __asm__ __volatile__ ("fnstcw %0" : "=m" (*&cw))
  63 # define _FPU_SETCW(cw) __asm__ __volatile__ ("fldcw %0" : : "m" (*&cw))
  64
  65 /* Macros that access the status word of the 387 unit, the so-called fstat
  66    register.  */
  67 # define _FPU_GETSTAT(cw) __asm__ __volatile__ ("fnstsw %0" : "=m" (*&cw))
  68
  69 /* Macros that access the control and status word of the SSE unit, the mxcsr
  70    register.  */
  71 # if defined __GNUC__ || defined __clang__
  72 #  define _FPU_GETSSECW(cw) __asm__ __volatile__ ("stmxcsr %0" : "=m" (*&cw))
  73 #  define _FPU_SETSSECW(cw) __asm__ __volatile__ ("ldmxcsr %0" : : "m" (*&cw))
  74 # elif defined _MSC_VER
  75 #  include <mmintrin.h>
  76 /* Documentation:
  77    <https://learn.microsoft.com/en-us/cpp/intrinsics/x86-intrinsics-list>
  78    <https://learn.microsoft.com/en-us/cpp/intrinsics/x64-amd64-intrinsics-list>
  79    <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getcsr&ig_expand=3548>
  80    <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setcsr&ig_expand=5924>
  81  */
  82 #  define _FPU_GETSSECW(cw) ((cw) = _mm_getcsr ())
  83 #  define _FPU_SETSSECW(cw) _mm_setcsr (cw)
  84 # endif
  85
  86 /* The floating-point environment of the 387 unit.  */
  87 typedef struct
  88   {
  89     /* 7 32-bit words:  */
  90     unsigned short __control_word;      /* fctrl register */
  91     unsigned short __reserved1;
  92     unsigned short __status_word;       /* fstat register */
  93     unsigned short __reserved2;
  94     unsigned int more[5];
  95   }
  96 x86_387_fenv_t;
  97
  98 # if defined _MSC_VER
  99 /* The MSVC header files have different values for the floating-point exceptions
 100    than all the other platforms.  Define some handy macros for conversion.  */
 101 #  define exceptions_to_x86hardware(exceptions) \
 102      (  ((exceptions) & FE_INVALID   ? 0x01 : 0) \
 103       | ((exceptions) & FE_DIVBYZERO ? 0x04 : 0) \
 104       | ((exceptions) & FE_OVERFLOW  ? 0x08 : 0) \
 105       | ((exceptions) & FE_UNDERFLOW ? 0x10 : 0) \
 106       | ((exceptions) & FE_INEXACT   ? 0x20 : 0))
 107 #  define x86hardware_to_exceptions(fstat) \
 108      (  ((fstat) & 0x01 ? FE_INVALID   : 0) \
 109       | ((fstat) & 0x04 ? FE_DIVBYZERO : 0) \
 110       | ((fstat) & 0x08 ? FE_OVERFLOW  : 0) \
 111       | ((fstat) & 0x10 ? FE_UNDERFLOW : 0) \
 112       | ((fstat) & 0x20 ? FE_INEXACT   : 0))
 113 # else
 114 #  define exceptions_to_x86hardware(exceptions) (exceptions)
 115 #  define x86hardware_to_exceptions(fstat) (fstat)
 116 # endif
 117
 118 /* When _MSC_VER is defined, the 387 compatible floating-point unit is *not*
 119    in use.  Only the SSE floating-point unit is used.  This can be inferred
 120    from two facts:
 121      - sizeof (long double) == sizeof (double).  That is, 'long double'
 122        values are just 'double' values and can be processed in the SSE unit.
 123      - After fegetenv (&env), the value of env._Fe_stat is *not* the fstat
 124        register of the 387 unit.  Rather, it is a artificial value.  In
 125        particular, (env._Fe_stat & 0x3f) is
 126        == x86hardware_to_exceptions (_FPU_GETSSECW () & 0x3f).  */
 127
 128 #elif defined __aarch64__ /* arm64 */
 129
 130 /* fpsr bits 4..0 indicate which floating-point exceptions have occurred
 131    since the respective bit was last set to zero.  */
 132 /* fpcr bits 12..8 indicate which floating-point exceptions shall, when
 133    occurring, trigger a trap rather than merely set the corresponding bit
 134    in the fpsr register.  */
 135
 136 # if __GNUC__ >= 6
 137 #  define _FPU_GETCW(fpcr) (fpcr = __builtin_aarch64_get_fpcr ())
 138 #  define _FPU_SETCW(fpcr) __builtin_aarch64_set_fpcr (fpcr)
 139 # else
 140 #  define _FPU_GETCW(fpcr) \
 141    __asm__ __volatile__ ("mrs %0, fpcr" : "=r" (fpcr))
 142 #  define _FPU_SETCW(fpcr) \
 143    __asm__ __volatile__ ("msr fpcr, %0" : : "r" (fpcr))
 144 # endif
 145
 146 # if __GNUC__ >= 6
 147 #  define _FPU_GETFPSR(fpsr) (fpsr = __builtin_aarch64_get_fpsr ())
 148 #  define _FPU_SETFPSR(fpsr) __builtin_aarch64_set_fpsr (fpsr)
 149 # else
 150 #  define _FPU_GETFPSR(fpsr) \
 151    __asm__ __volatile__ ("mrs %0, fpsr" : "=r" (fpsr))
 152 #  define _FPU_SETFPSR(fpsr) \
 153    __asm__ __volatile__ ("msr fpsr, %0" : : "r" (fpsr))
 154 # endif
 155
 156 #elif defined __arm__
 157
 158 /* fpscr bits 23..22 indicate the rounding direction.  */
 159 /* fpscr bits 4..0 indicate which floating-point exceptions have occurred
 160    since the respective bit was last set to zero.  */
 161 /* fpscr bits 12..8 indicate which floating-point exceptions shall, when
 162    occurring, trigger a trap rather than merely set the corresponding bit
 163    in the fpscr register.  */
 164
 165 # if !defined __SOFTFP__
 166 #  define _FPU_GETCW(cw) \
 167    __asm__ __volatile__ ("vmrs %0, fpscr" : "=r" (cw))
 168 #  define _FPU_SETCW(cw) \
 169    __asm__ __volatile__ ("vmsr fpscr, %0" : : "r" (cw))
 170 # endif
 171
 172 #elif defined __alpha
 173
 174 /* System calls.  */
 175 extern unsigned long __ieee_get_fp_control (void);
 176 extern void __ieee_set_fp_control (unsigned long);
 177
 178 # define _FPU_GETCW(fpcr) \
 179   __asm__ __volatile__ ("excb; mf_fpcr %0" : "=f" (fpcr))
 180 # define _FPU_SETCW(fpcr) \
 181   __asm__ __volatile__ ("mt_fpcr %0; excb" : : "f" (fpcr))
 182
 183 #elif defined __hppa
 184
 185 /* Bits 31..27 of the first 32-bit word of %fr0 indicate which floating-point
 186    exceptions have occurred since the respective bit was last set to zero.  */
 187 /* Bits 4..0 of the first 32-bit word of %fr0 indicate which floating-point
 188    exceptions shall, when occurring, trigger a trap rather than merely set the
 189    corresponding flag bit.  */
 190
 191 /* The status register is located in bits 0 to 31 of floating-point register 0.  */
 192 # define _FPU_GETCW(cw) \
 193 ({                                                                              \
 194   union { __extension__ unsigned long long __fpreg; unsigned int __halfreg[2]; } __fullfp; \
 195   /* Get the current status word. */                                            \
 196   __asm__ ("fstd %%fr0,0(%1)\n\t"                                               \
 197            "fldd 0(%1),%%fr0\n\t"                                               \
 198            : "=m" (__fullfp.__fpreg) : "r" (&__fullfp.__fpreg) : "%r0");        \
 199   cw = __fullfp.__halfreg[0];                                                   \
 200 })
 201 # define _FPU_SETCW(cw) \
 202 ({                                                                              \
 203   union { __extension__ unsigned long long __fpreg; unsigned int __halfreg[2]; } __fullfp; \
 204   /* Get the current status word and set the control word.  */                  \
 205   __asm__ ("fstd %%fr0,0(%1)\n\t"                                               \
 206            : "=m" (__fullfp.__fpreg) : "r" (&__fullfp.__fpreg) : "%r0");        \
 207   __fullfp.__halfreg[0] = cw;                                                   \
 208   __asm__ ("fldd 0(%1),%%fr0\n\t"                                               \
 209            : : "m" (__fullfp.__fpreg), "r" (&__fullfp.__fpreg) : "%r0" );       \
 210 })
 211
 212 #elif defined __ia64__
 213
 214 /* fpsr bits 12..9,7 indicate which floating-point exceptions have occurred
 215    since the respective bit was last set to zero.  */
 216 /* fpsr bits 5..2,0 indicate which floating-point exceptions shall, when
 217    occurring, *not* trigger a trap rather than merely set the corresponding
 218    bit in the fpsr register.  */
 219
 220 # define _FPU_GETCW(fpsr) \
 221   __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr))
 222 # define _FPU_SETCW(fpsr) \
 223   __asm__ __volatile__ ("mov.m ar.fpsr=%0" :: "r" (fpsr) : "memory")
 224
 225 #elif defined __m68k__
 226
 227 /* fpsr bits 7..3 indicate which floating-point exceptions have occurred
 228    since the respective bit was last set to zero.  */
 229 /* fpcr bits 15..8 indicate which floating-point exceptions shall, when
 230    occurring, trigger a trap rather than merely set the corresponding bit
 231    in the fpsr register:
 232      - bit 15: branch/set on unordered
 233      - bit 14: signaling not-a-number
 234      - bit 13: operand error
 235      - bit 12: overflow
 236      - bit 11: underflow
 237      - bit 10: divide by zero
 238      - bit 9:  inexact operation
 239      - bit 8:  inexact decimal input
 240    FE_INVALID corresponds to all three: bit 15, bit 14, bit 13.  */
 241
 242 # define _FPU_GETCW(cw) __asm__ __volatile__ ("fmove%.l %!, %0" : "=dm" (cw))
 243 # define _FPU_SETCW(cw) __asm__ __volatile__ ("fmove%.l %0, %!" : : "dm" (cw))
 244
 245 # define _FPU_GETFPSR(cw) __asm__ __volatile__ ("fmove%.l %/fpsr, %0" : "=dm" (cw))
 246 # define _FPU_SETFPSR(cw) __asm__ __volatile__ ("fmove%.l %0, %/fpsr" : : "dm" (cw))
 247
 248 #elif defined __mips__
 249
 250 /* fcsr bits 6..2 indicate which floating-point exceptions have occurred
 251    since the respective bit was last set to zero.
 252    fcsr bits 17..12 indicate which floating-point exceptions have occurred
 253    in the most recent instruction.  */
 254 /* fcsr bits 11..7 indicate which floating-point exceptions shall, when
 255    occurring, trigger a trap rather than merely set the corresponding bit
 256    in the fcsr register.  */
 257
 258 # define _FPU_GETCW(cw) __asm__ __volatile__ ("cfc1 %0,$31" : "=r" (cw))
 259 # define _FPU_SETCW(cw) __asm__ __volatile__ ("ctc1 %0,$31" : : "r" (cw))
 260
 261 #elif defined __loongarch__
 262
 263 /* fcsr0 bits 20..16 indicate which floating-point exceptions have occurred
 264    since the respective bit was last set to zero.
 265    fcsr0 bits 28..24 indicate which floating-point exceptions have occurred
 266    in the most recent instruction.  */
 267 /* fcsr0 bits 4..0 indicate which floating-point exceptions shall, when
 268    occurring, trigger a trap rather than merely set the corresponding bit
 269    in the fcsr0 register.  */
 270
 271 # define _FPU_GETCW(cw) __asm__ __volatile__ ("movfcsr2gr %0,$r0" : "=r" (cw))
 272 # define _FPU_SETCW(cw) __asm__ __volatile__ ("movgr2fcsr $r0,%0" : : "r" (cw))
 273
 274 #elif defined __powerpc__
 275
 276 /* fpscr bits 28..25 indicate which floating-point exceptions, other than
 277    FE_INVALID, have occurred since the respective bit was last set to zero.
 278    fpscr bits 24..19, 10..8 do the same thing, for various kinds of Invalid
 279    Operation.  fpscr bit 29 is the summary (the OR) of all these bits.  */
 280 /* fpscr bits 7..3 indicate which floating-point exceptions shall, when
 281    occurring, trigger a trap rather than merely set the corresponding bit
 282    in the fpscr register.  */
 283
 284 # define _FPU_GETCW_AS_DOUBLE(cw) \
 285   do { double env; __asm__ __volatile__ ("mffs %0" : "=f" (env)); cw = env; } \
 286   while (0)
 287 # define _FPU_SETCW_AS_DOUBLE(cw) \
 288   __asm__ __volatile__ ("mtfsf 0xff,%0" : : "f" (cw))
 289
 290 # if defined __NetBSD__
 291 /* Modifying the FE0 and FE1 bits of the machine state register (MSR) is
 292    only possible from the kernel.  NetBSD allows it to be done from user
 293    space, by emulating the mfmsr and mtmsr instructions when they trap.
 294    In other words, these instructions are actually system calls in NetBSD.  */
 295 #  define _GETMSR(msr) __asm__ __volatile__ ("mfmsr %0" : "=r" (msr))
 296 #  define _SETMSR(msr) __asm__ __volatile__ ("mtmsr %0" : : "r" (msr))
 297 #  define MSR_FP_EXC_MASK 0x00000900
 298 /* This allows us to simulate the Linux prctl() through a macro.  */
 299 #  define PR_SET_FPEXC 1
 300 #  define PR_FP_EXC_DISABLED  0x00000000  /* FP exceptions disabled */
 301 #  define PR_FP_EXC_NONRECOV  0x00000100  /* async non-recoverable exc. mode */
 302 #  define PR_FP_EXC_ASYNC     0x00000800  /* async recoverable exception mode */
 303 #  define PR_FP_EXC_PRECISE   0x00000900  /* precise exception mode */
 304 #  define prctl(operation,arg) \
 305      do {                                         \
 306        if ((operation) == PR_SET_FPEXC)           \
 307          {                                        \
 308            unsigned int local_msr;                \
 309            _GETMSR (local_msr);                   \
 310            local_msr &= ~MSR_FP_EXC_MASK;         \
 311            local_msr |= (arg) & MSR_FP_EXC_MASK;  \
 312            _SETMSR (local_msr);                   \
 313          }                                        \
 314      } while (0)
 315 # endif
 316
 317 #elif defined __riscv
 318
 319 /* fcsr bits 4..0 indicate which floating-point exceptions have occurred
 320    since the respective bit was last set to zero.  */
 321
 322 /* Trapping of floating-point exceptions does not work on RISC-V.  That's
 323    because the fcsr register has only bits for floating-point exception status,
 324    but no bits for trapping floating-point exceptions.  */
 325
 326 #elif defined __s390__ || defined __s390x__
 327
 328 /* fpc bits 23..19 indicate which floating-point exceptions have occurred
 329    since the respective bit was last set to zero.
 330    fpc bits 15..11 are part of the "data exception code" (DXC) and have a
 331    similar meaning if bits 9..8 are both zero.  */
 332 /* fpc bits 31..27 indicate which floating-point exceptions shall, when
 333    occurring, trigger a trap rather than merely set the corresponding bit
 334    in the fpc register.  */
 335
 336 # define _FPU_GETCW(cw)  __asm__ __volatile__ ("efpc %0" : "=d" (cw))
 337 # define _FPU_SETCW(cw)  __asm__ __volatile__ ("sfpc %0" : : "d" (cw))
 338
 339 #elif defined __sh__
 340
 341 /* fpscr bits 6..2 indicate which floating-point exceptions have occurred
 342    since the respective bit was last set to zero.  */
 343 /* fpscr bits 11..7 indicate which floating-point exceptions shall, when
 344    occurring, trigger a trap rather than merely set the corresponding bit
 345    in the fpscr register.  */
 346
 347 # define _FPU_GETCW(cw) __asm__ ("sts fpscr,%0" : "=r" (cw))
 348 # define _FPU_SETCW(cw) __asm__ ("lds %0,fpscr" : : "r" (cw))
 349
 350 #elif defined __sparc
 351
 352 /* fsr bits 9..5 indicate which floating-point exceptions have occurred
 353    since the respective bit was last set to zero.  */
 354 /* fsr bits 27..23 indicate which floating-point exceptions shall, when
 355    occurring, trigger a trap rather than merely set the corresponding bit
 356    in the fsr register.  */
 357
 358 # if defined __sparcv9 || defined __arch64__ /* sparc64 */
 359 #  define _FPU_GETCW(X)   __asm__ __volatile__ ("stx %%fsr,%0" : "=m" (X))
 360 #  define _FPU_SETCW(X)   __asm__ __volatile__ ("ldx %0,%%fsr" : : "m" (X))
 361 # else
 362 #  define _FPU_GETCW(X)   __asm__ __volatile__ ("st %%fsr,%0" : "=m" (X))
 363 #  define _FPU_SETCW(X)   __asm__ __volatile__ ("ld %0,%%fsr" : : "m" (X))
 364 # endif
 365
 366 #endif
 367
 368 #if defined _AIX && defined __powerpc__ /* AIX */
 369
 370 /* <fpxcp.h> defines a type fpflag_t and macros FP_*.  */
 371
 372 /* Convert from an 'int exceptions' to an fpflag_t.  */
 373 # if 0 /* Unoptimized */
 374 #  define exceptions_to_fpflag(exceptions) \
 375      (  ((exceptions) & FE_INVALID   ? FP_INVALID     : 0) \
 376       | ((exceptions) & FE_DIVBYZERO ? FP_DIV_BY_ZERO : 0) \
 377       | ((exceptions) & FE_OVERFLOW  ? FP_OVERFLOW    : 0) \
 378       | ((exceptions) & FE_UNDERFLOW ? FP_UNDERFLOW   : 0) \
 379       | ((exceptions) & FE_INEXACT   ? FP_INEXACT     : 0))
 380 # else /* Optimized */
 381 #  define exceptions_to_fpflag(exceptions) \
 382      ((exceptions) & FE_ALL_EXCEPT)
 383 # endif
 384
 385 /* Convert from an fpflag_t to an 'int exceptions'.  */
 386 # if 0 /* Unoptimized */
 387 #  define fpflag_to_exceptions(f) \
 388      (  ((f) & FP_INVALID     ? FE_INVALID   : 0) \
 389       | ((f) & FP_DIV_BY_ZERO ? FE_DIVBYZERO : 0) \
 390       | ((f) & FP_OVERFLOW    ? FE_OVERFLOW  : 0) \
 391       | ((f) & FP_UNDERFLOW   ? FE_UNDERFLOW : 0) \
 392       | ((f) & FP_INEXACT     ? FE_INEXACT   : 0))
 393 # else /* Optimized */
 394 #  define fpflag_to_exceptions(f) \
 395      ((f) & FE_ALL_EXCEPT)
 396 # endif
 397
 398 /* The implementation of fegetexcept().  Avoids a module dependency.  */
 399 # define fegetexcept_impl() \
 400     (  (fp_is_enabled (TRP_INVALID)     ? FE_INVALID   : 0) \
 401      | (fp_is_enabled (TRP_DIV_BY_ZERO) ? FE_DIVBYZERO : 0) \
 402      | (fp_is_enabled (TRP_OVERFLOW)    ? FE_OVERFLOW  : 0) \
 403      | (fp_is_enabled (TRP_UNDERFLOW)   ? FE_UNDERFLOW : 0) \
 404      | (fp_is_enabled (TRP_INEXACT)     ? FE_INEXACT   : 0))
 405
 406 #endif