autoupdate
[gnulib.git] / lib / fenv-private.h
blob9571b7b93d6719928ad3cedc050ac35936fc4245
1 /* Common definitions for the implementation of the various <fenv.h> modules.
2 Copyright (C) 1997-2024 Free Software Foundation, Inc.
4 This file is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2.1 of the
7 License, or (at your option) any later version.
9 This file is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Based on glibc/sysdeps/<cpu>/{fpu_control.h,fenv_private.h,fenv_libc.h}. */
19 #if (defined __x86_64__ || defined _M_X64) || (defined __i386 || defined _M_IX86)
21 # if !(defined __x86_64__ || defined _M_X64)
22 # if __GLIBC__ + (__GLIBC_MINOR__ >= 33) > 2
23 /* glibc >= 2.33 has an API that tells us whether the CPU has an SSE unit. */
24 # include <sys/platform/x86.h>
25 # elif defined __sun
26 /* Solaris has a global variable that tells us whether the CPU has an SSE unit. */
27 extern int _sse_hw;
28 # endif
29 # endif
31 /* CPU_HAS_SSE () returns true if the CPU has an SSE unit. */
32 # if defined __x86_64__ || defined _M_X64
33 # define CPU_HAS_SSE() 1
34 # else
35 # if __GLIBC__ + (__GLIBC_MINOR__ >= 33) > 2
36 # define CPU_HAS_SSE() CPU_FEATURE_PRESENT (SSE)
37 # elif defined __sun
38 # define CPU_HAS_SSE() _sse_hw
39 # else
40 /* Otherwise, we assume that the SSE unit is present.
41 Only very old 32-bit processors, before Pentium 4, don't have it.
42 Don't bother testing it, through a 'cpuid' instruction. */
43 # define CPU_HAS_SSE() 1
44 # endif
45 # endif
47 /* fstat bits 5..2,0 indicate which floating-point exceptions have occurred
48 in the 387 compatible floating-point unit since the respective bit was last
49 set to zero.
50 mxcsr bits 5..2,0 indicate which floating-point exceptions have occurred
51 in the SSE floating-point unit since the respective bit was last set to
52 zero. */
53 /* fctrl bits 5..2,0 indicate which floating-point exceptions shall, when
54 occurring in the 387 compatible floating-point unit, *not* trigger a trap
55 rather than merely set the corresponding bit in the fstat register.
56 mxcsr bits 12..9,7 indicate which floating-point exceptions shall, when
57 occurring in the SSE floating-point unit, *not* trigger a trap rather
58 than merely set the corresponding bit in the mxcsr register. */
60 /* Macros that access the control word of the 387 unit, the so-called fctrl
61 register. */
62 # define _FPU_GETCW(cw) __asm__ __volatile__ ("fnstcw %0" : "=m" (*&cw))
63 # define _FPU_SETCW(cw) __asm__ __volatile__ ("fldcw %0" : : "m" (*&cw))
65 /* Macros that access the status word of the 387 unit, the so-called fstat
66 register. */
67 # define _FPU_GETSTAT(cw) __asm__ __volatile__ ("fnstsw %0" : "=m" (*&cw))
69 /* Macros that access the control and status word of the SSE unit, the mxcsr
70 register. */
71 # if defined __GNUC__ || defined __clang__
72 # define _FPU_GETSSECW(cw) __asm__ __volatile__ ("stmxcsr %0" : "=m" (*&cw))
73 # define _FPU_SETSSECW(cw) __asm__ __volatile__ ("ldmxcsr %0" : : "m" (*&cw))
74 # elif defined _MSC_VER
75 # include <mmintrin.h>
76 /* Documentation:
77 <https://learn.microsoft.com/en-us/cpp/intrinsics/x86-intrinsics-list>
78 <https://learn.microsoft.com/en-us/cpp/intrinsics/x64-amd64-intrinsics-list>
79 <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getcsr&ig_expand=3548>
80 <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setcsr&ig_expand=5924>
82 # define _FPU_GETSSECW(cw) ((cw) = _mm_getcsr ())
83 # define _FPU_SETSSECW(cw) _mm_setcsr (cw)
84 # endif
86 /* The floating-point environment of the 387 unit. */
87 typedef struct
89 /* 7 32-bit words: */
90 unsigned short __control_word; /* fctrl register */
91 unsigned short __reserved1;
92 unsigned short __status_word; /* fstat register */
93 unsigned short __reserved2;
94 unsigned int more[5];
96 x86_387_fenv_t;
98 # if defined _MSC_VER
99 /* The MSVC header files have different values for the floating-point exceptions
100 than all the other platforms. Define some handy macros for conversion. */
101 # define exceptions_to_x86hardware(exceptions) \
102 ( ((exceptions) & FE_INVALID ? 0x01 : 0) \
103 | ((exceptions) & FE_DIVBYZERO ? 0x04 : 0) \
104 | ((exceptions) & FE_OVERFLOW ? 0x08 : 0) \
105 | ((exceptions) & FE_UNDERFLOW ? 0x10 : 0) \
106 | ((exceptions) & FE_INEXACT ? 0x20 : 0))
107 # define x86hardware_to_exceptions(fstat) \
108 ( ((fstat) & 0x01 ? FE_INVALID : 0) \
109 | ((fstat) & 0x04 ? FE_DIVBYZERO : 0) \
110 | ((fstat) & 0x08 ? FE_OVERFLOW : 0) \
111 | ((fstat) & 0x10 ? FE_UNDERFLOW : 0) \
112 | ((fstat) & 0x20 ? FE_INEXACT : 0))
113 # else
114 # define exceptions_to_x86hardware(exceptions) (exceptions)
115 # define x86hardware_to_exceptions(fstat) (fstat)
116 # endif
118 /* When _MSC_VER is defined, the 387 compatible floating-point unit is *not*
119 in use. Only the SSE floating-point unit is used. This can be inferred
120 from two facts:
121 - sizeof (long double) == sizeof (double). That is, 'long double'
122 values are just 'double' values and can be processed in the SSE unit.
123 - After fegetenv (&env), the value of env._Fe_stat is *not* the fstat
124 register of the 387 unit. Rather, it is a artificial value. In
125 particular, (env._Fe_stat & 0x3f) is
126 == x86hardware_to_exceptions (_FPU_GETSSECW () & 0x3f). */
128 #elif defined __aarch64__ /* arm64 */
130 /* fpsr bits 4..0 indicate which floating-point exceptions have occurred
131 since the respective bit was last set to zero. */
132 /* fpcr bits 12..8 indicate which floating-point exceptions shall, when
133 occurring, trigger a trap rather than merely set the corresponding bit
134 in the fpsr register. */
136 # if __GNUC__ >= 6 && !defined __clang__
137 # define _FPU_GETCW(fpcr) (fpcr = __builtin_aarch64_get_fpcr ())
138 # define _FPU_SETCW(fpcr) __builtin_aarch64_set_fpcr (fpcr)
139 # elif __clang_major__ >= 4
140 # define _FPU_GETCW(fpcr) (fpcr = __builtin_arm_rsr ("fpcr"))
141 # define _FPU_SETCW(fpcr) __builtin_arm_wsr ("fpcr", fpcr)
142 # else
143 # define _FPU_GETCW(fpcr) \
144 __asm__ __volatile__ ("mrs %0, fpcr" : "=r" (fpcr))
145 # define _FPU_SETCW(fpcr) \
146 __asm__ __volatile__ ("msr fpcr, %0" : : "r" (fpcr))
147 # endif
149 # if __GNUC__ >= 6 && !defined __clang__
150 # define _FPU_GETFPSR(fpsr) (fpsr = __builtin_aarch64_get_fpsr ())
151 # define _FPU_SETFPSR(fpsr) __builtin_aarch64_set_fpsr (fpsr)
152 # elif __clang_major__ >= 4
153 # define _FPU_GETFPSR(fpsr) (fpsr = __builtin_arm_rsr ("fpsr"))
154 # define _FPU_SETFPSR(fpsr) __builtin_arm_wsr ("fpsr", fpsr)
155 # else
156 # define _FPU_GETFPSR(fpsr) \
157 __asm__ __volatile__ ("mrs %0, fpsr" : "=r" (fpsr))
158 # define _FPU_SETFPSR(fpsr) \
159 __asm__ __volatile__ ("msr fpsr, %0" : : "r" (fpsr))
160 # endif
162 #elif defined __arm__
164 /* fpscr bits 23..22 indicate the rounding direction. */
165 /* fpscr bits 4..0 indicate which floating-point exceptions have occurred
166 since the respective bit was last set to zero. */
167 /* fpscr bits 12..8 indicate which floating-point exceptions shall, when
168 occurring, trigger a trap rather than merely set the corresponding bit
169 in the fpscr register. */
171 # if !defined __SOFTFP__
172 # define _FPU_GETCW(cw) \
173 __asm__ __volatile__ ("vmrs %0, fpscr" : "=r" (cw))
174 # define _FPU_SETCW(cw) \
175 __asm__ __volatile__ ("vmsr fpscr, %0" : : "r" (cw))
176 # endif
178 #elif defined __alpha
180 /* System calls. */
181 extern unsigned long __ieee_get_fp_control (void);
182 extern void __ieee_set_fp_control (unsigned long);
184 # define _FPU_GETCW(fpcr) \
185 __asm__ __volatile__ ("excb; mf_fpcr %0" : "=f" (fpcr))
186 # define _FPU_SETCW(fpcr) \
187 __asm__ __volatile__ ("mt_fpcr %0; excb" : : "f" (fpcr))
189 #elif defined __hppa
191 /* Bits 31..27 of the first 32-bit word of %fr0 indicate which floating-point
192 exceptions have occurred since the respective bit was last set to zero. */
193 /* Bits 4..0 of the first 32-bit word of %fr0 indicate which floating-point
194 exceptions shall, when occurring, trigger a trap rather than merely set the
195 corresponding flag bit. */
197 /* The status register is located in bits 0 to 31 of floating-point register 0. */
198 # define _FPU_GETCW(cw) \
199 ({ \
200 union { __extension__ unsigned long long __fpreg; unsigned int __halfreg[2]; } __fullfp; \
201 /* Get the current status word. */ \
202 __asm__ ("fstd %%fr0,0(%1)\n\t" \
203 "fldd 0(%1),%%fr0\n\t" \
204 : "=m" (__fullfp.__fpreg) : "r" (&__fullfp.__fpreg) : "%r0"); \
205 cw = __fullfp.__halfreg[0]; \
207 # define _FPU_SETCW(cw) \
208 ({ \
209 union { __extension__ unsigned long long __fpreg; unsigned int __halfreg[2]; } __fullfp; \
210 /* Get the current status word and set the control word. */ \
211 __asm__ ("fstd %%fr0,0(%1)\n\t" \
212 : "=m" (__fullfp.__fpreg) : "r" (&__fullfp.__fpreg) : "%r0"); \
213 __fullfp.__halfreg[0] = cw; \
214 __asm__ ("fldd 0(%1),%%fr0\n\t" \
215 : : "m" (__fullfp.__fpreg), "r" (&__fullfp.__fpreg) : "%r0" ); \
218 #elif defined __ia64__
220 /* fpsr bits 12..9,7 indicate which floating-point exceptions have occurred
221 since the respective bit was last set to zero. */
222 /* fpsr bits 5..2,0 indicate which floating-point exceptions shall, when
223 occurring, *not* trigger a trap rather than merely set the corresponding
224 bit in the fpsr register. */
226 # define _FPU_GETCW(fpsr) \
227 __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr))
228 # define _FPU_SETCW(fpsr) \
229 __asm__ __volatile__ ("mov.m ar.fpsr=%0" :: "r" (fpsr) : "memory")
231 #elif defined __m68k__
233 /* fpsr bits 7..3 indicate which floating-point exceptions have occurred
234 since the respective bit was last set to zero. */
235 /* fpcr bits 15..8 indicate which floating-point exceptions shall, when
236 occurring, trigger a trap rather than merely set the corresponding bit
237 in the fpsr register:
238 - bit 15: branch/set on unordered
239 - bit 14: signaling not-a-number
240 - bit 13: operand error
241 - bit 12: overflow
242 - bit 11: underflow
243 - bit 10: divide by zero
244 - bit 9: inexact operation
245 - bit 8: inexact decimal input
246 FE_INVALID corresponds to all three: bit 15, bit 14, bit 13. */
248 # define _FPU_GETCW(cw) __asm__ __volatile__ ("fmove%.l %!, %0" : "=dm" (cw))
249 # define _FPU_SETCW(cw) __asm__ __volatile__ ("fmove%.l %0, %!" : : "dm" (cw))
251 # define _FPU_GETFPSR(cw) __asm__ __volatile__ ("fmove%.l %/fpsr, %0" : "=dm" (cw))
252 # define _FPU_SETFPSR(cw) __asm__ __volatile__ ("fmove%.l %0, %/fpsr" : : "dm" (cw))
254 #elif defined __mips__
256 /* fcsr bits 6..2 indicate which floating-point exceptions have occurred
257 since the respective bit was last set to zero.
258 fcsr bits 17..12 indicate which floating-point exceptions have occurred
259 in the most recent instruction. */
260 /* fcsr bits 11..7 indicate which floating-point exceptions shall, when
261 occurring, trigger a trap rather than merely set the corresponding bit
262 in the fcsr register. */
264 # define _FPU_GETCW(cw) __asm__ __volatile__ ("cfc1 %0,$31" : "=r" (cw))
265 # define _FPU_SETCW(cw) __asm__ __volatile__ ("ctc1 %0,$31" : : "r" (cw))
267 #elif defined __loongarch__
269 /* fcsr0 bits 20..16 indicate which floating-point exceptions have occurred
270 since the respective bit was last set to zero.
271 fcsr0 bits 28..24 indicate which floating-point exceptions have occurred
272 in the most recent instruction. */
273 /* fcsr0 bits 4..0 indicate which floating-point exceptions shall, when
274 occurring, trigger a trap rather than merely set the corresponding bit
275 in the fcsr0 register. */
277 # define _FPU_GETCW(cw) __asm__ __volatile__ ("movfcsr2gr %0,$r0" : "=r" (cw))
278 # define _FPU_SETCW(cw) __asm__ __volatile__ ("movgr2fcsr $r0,%0" : : "r" (cw))
280 #elif defined __powerpc__
282 /* fpscr bits 28..25 indicate which floating-point exceptions, other than
283 FE_INVALID, have occurred since the respective bit was last set to zero.
284 fpscr bits 24..19, 10..8 do the same thing, for various kinds of Invalid
285 Operation. fpscr bit 29 is the summary (the OR) of all these bits. */
286 /* fpscr bits 7..3 indicate which floating-point exceptions shall, when
287 occurring, trigger a trap rather than merely set the corresponding bit
288 in the fpscr register. */
290 # define _FPU_GETCW_AS_DOUBLE(cw) \
291 do { double env; __asm__ __volatile__ ("mffs %0" : "=f" (env)); cw = env; } \
292 while (0)
293 # define _FPU_SETCW_AS_DOUBLE(cw) \
294 __asm__ __volatile__ ("mtfsf 0xff,%0" : : "f" (cw))
296 # if defined __NetBSD__
297 /* Modifying the FE0 and FE1 bits of the machine state register (MSR) is
298 only possible from the kernel. NetBSD allows it to be done from user
299 space, by emulating the mfmsr and mtmsr instructions when they trap.
300 In other words, these instructions are actually system calls in NetBSD. */
301 # define _GETMSR(msr) __asm__ __volatile__ ("mfmsr %0" : "=r" (msr))
302 # define _SETMSR(msr) __asm__ __volatile__ ("mtmsr %0" : : "r" (msr))
303 # define MSR_FP_EXC_MASK 0x00000900
304 /* This allows us to simulate the Linux prctl() through a macro. */
305 # define PR_SET_FPEXC 1
306 # define PR_FP_EXC_DISABLED 0x00000000 /* FP exceptions disabled */
307 # define PR_FP_EXC_NONRECOV 0x00000100 /* async non-recoverable exc. mode */
308 # define PR_FP_EXC_ASYNC 0x00000800 /* async recoverable exception mode */
309 # define PR_FP_EXC_PRECISE 0x00000900 /* precise exception mode */
310 # define prctl(operation,arg) \
311 do { \
312 if ((operation) == PR_SET_FPEXC) \
314 unsigned int local_msr; \
315 _GETMSR (local_msr); \
316 local_msr &= ~MSR_FP_EXC_MASK; \
317 local_msr |= (arg) & MSR_FP_EXC_MASK; \
318 _SETMSR (local_msr); \
320 } while (0)
321 # endif
323 #elif defined __riscv
325 /* fcsr bits 4..0 indicate which floating-point exceptions have occurred
326 since the respective bit was last set to zero. */
328 /* Trapping of floating-point exceptions does not work on RISC-V. That's
329 because the fcsr register has only bits for floating-point exception status,
330 but no bits for trapping floating-point exceptions. */
332 #elif defined __s390__ || defined __s390x__
334 /* fpc bits 23..19 indicate which floating-point exceptions have occurred
335 since the respective bit was last set to zero.
336 fpc bits 15..11 are part of the "data exception code" (DXC) and have a
337 similar meaning if bits 9..8 are both zero. */
338 /* fpc bits 31..27 indicate which floating-point exceptions shall, when
339 occurring, trigger a trap rather than merely set the corresponding bit
340 in the fpc register. */
342 # define _FPU_GETCW(cw) __asm__ __volatile__ ("efpc %0" : "=d" (cw))
343 # define _FPU_SETCW(cw) __asm__ __volatile__ ("sfpc %0" : : "d" (cw))
345 #elif defined __sh__
347 /* fpscr bits 6..2 indicate which floating-point exceptions have occurred
348 since the respective bit was last set to zero. */
349 /* fpscr bits 11..7 indicate which floating-point exceptions shall, when
350 occurring, trigger a trap rather than merely set the corresponding bit
351 in the fpscr register. */
353 # define _FPU_GETCW(cw) __asm__ ("sts fpscr,%0" : "=r" (cw))
354 # define _FPU_SETCW(cw) __asm__ ("lds %0,fpscr" : : "r" (cw))
356 #elif defined __sparc
358 /* fsr bits 9..5 indicate which floating-point exceptions have occurred
359 since the respective bit was last set to zero. */
360 /* fsr bits 27..23 indicate which floating-point exceptions shall, when
361 occurring, trigger a trap rather than merely set the corresponding bit
362 in the fsr register. */
364 # if defined __sparcv9 || defined __arch64__ /* sparc64 */
365 # define _FPU_GETCW(X) __asm__ __volatile__ ("stx %%fsr,%0" : "=m" (X))
366 # define _FPU_SETCW(X) __asm__ __volatile__ ("ldx %0,%%fsr" : : "m" (X))
367 # else
368 # define _FPU_GETCW(X) __asm__ __volatile__ ("st %%fsr,%0" : "=m" (X))
369 # define _FPU_SETCW(X) __asm__ __volatile__ ("ld %0,%%fsr" : : "m" (X))
370 # endif
372 #endif
374 #if defined _AIX && defined __powerpc__ /* AIX */
376 /* <fpxcp.h> defines a type fpflag_t and macros FP_*. */
378 /* Convert from an 'int exceptions' to an fpflag_t. */
379 # if 0 /* Unoptimized */
380 # define exceptions_to_fpflag(exceptions) \
381 ( ((exceptions) & FE_INVALID ? FP_INVALID : 0) \
382 | ((exceptions) & FE_DIVBYZERO ? FP_DIV_BY_ZERO : 0) \
383 | ((exceptions) & FE_OVERFLOW ? FP_OVERFLOW : 0) \
384 | ((exceptions) & FE_UNDERFLOW ? FP_UNDERFLOW : 0) \
385 | ((exceptions) & FE_INEXACT ? FP_INEXACT : 0))
386 # else /* Optimized */
387 # define exceptions_to_fpflag(exceptions) \
388 ((exceptions) & FE_ALL_EXCEPT)
389 # endif
391 /* Convert from an fpflag_t to an 'int exceptions'. */
392 # if 0 /* Unoptimized */
393 # define fpflag_to_exceptions(f) \
394 ( ((f) & FP_INVALID ? FE_INVALID : 0) \
395 | ((f) & FP_DIV_BY_ZERO ? FE_DIVBYZERO : 0) \
396 | ((f) & FP_OVERFLOW ? FE_OVERFLOW : 0) \
397 | ((f) & FP_UNDERFLOW ? FE_UNDERFLOW : 0) \
398 | ((f) & FP_INEXACT ? FE_INEXACT : 0))
399 # else /* Optimized */
400 # define fpflag_to_exceptions(f) \
401 ((f) & FE_ALL_EXCEPT)
402 # endif
404 /* The implementation of fegetexcept(). Avoids a module dependency. */
405 # define fegetexcept_impl() \
406 ( (fp_is_enabled (TRP_INVALID) ? FE_INVALID : 0) \
407 | (fp_is_enabled (TRP_DIV_BY_ZERO) ? FE_DIVBYZERO : 0) \
408 | (fp_is_enabled (TRP_OVERFLOW) ? FE_OVERFLOW : 0) \
409 | (fp_is_enabled (TRP_UNDERFLOW) ? FE_UNDERFLOW : 0) \
410 | (fp_is_enabled (TRP_INEXACT) ? FE_INEXACT : 0))
412 #endif