LoongArch: Guard REGNO with REG_P in loongarch_expand_conditional_move [PR115169]
[official-gcc.git] / libgfortran / config / fpu-387.h
blob19a03354f7d2c1dfe1f643bdbb877c7d0b468ec5
1 /* FPU-related code for x86 and x86_64 processors.
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 #ifndef __SSE_MATH__
27 #include "cpuid.h"
28 #endif
30 static int
31 has_sse (void)
33 #ifndef __SSE_MATH__
34 unsigned int eax, ebx, ecx, edx;
36 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
37 return 0;
39 return edx & bit_SSE;
40 #else
41 return 1;
42 #endif
45 /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
46 #define _FPU_MASK_IM 0x01
47 #define _FPU_MASK_DM 0x02
48 #define _FPU_MASK_ZM 0x04
49 #define _FPU_MASK_OM 0x08
50 #define _FPU_MASK_UM 0x10
51 #define _FPU_MASK_PM 0x20
52 #define _FPU_MASK_ALL 0x3f
54 #define _FPU_EX_ALL 0x3f
56 /* i387 rounding modes. */
58 #define _FPU_RC_NEAREST 0x0
59 #define _FPU_RC_DOWN 0x1
60 #define _FPU_RC_UP 0x2
61 #define _FPU_RC_ZERO 0x3
63 #define _FPU_RC_MASK 0x3
65 /* Enable flush to zero mode. */
67 #define MXCSR_FTZ (1 << 15)
70 /* This structure corresponds to the layout of the block
71 written by FSTENV. */
72 struct fenv
74 unsigned short int __control_word;
75 unsigned short int __unused1;
76 unsigned short int __status_word;
77 unsigned short int __unused2;
78 unsigned short int __tags;
79 unsigned short int __unused3;
80 unsigned int __eip;
81 unsigned short int __cs_selector;
82 unsigned int __opcode:11;
83 unsigned int __unused4:5;
84 unsigned int __data_offset;
85 unsigned short int __data_selector;
86 unsigned short int __unused5;
87 unsigned int __mxcsr;
88 } __attribute__ ((gcc_struct));
90 /* Check we can actually store the FPU state in the allocated size. */
91 _Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
92 "GFC_FPE_STATE_BUFFER_SIZE is too small");
94 #ifdef __SSE_MATH__
95 # define __math_force_eval_div(x, y) \
96 do { \
97 __asm__ ("" : "+x" (x)); __asm__ __volatile__ ("" : : "x" (x / y)); \
98 } while (0)
99 #else
100 # define __math_force_eval_div(x, y) \
101 do { \
102 __asm__ ("" : "+t" (x)); __asm__ __volatile__ ("" : : "f" (x / y)); \
103 } while (0)
104 #endif
106 /* Raise the supported floating-point exceptions from EXCEPTS. Other
107 bits in EXCEPTS are ignored. Code originally borrowed from
108 libatomic/config/x86/fenv.c. */
110 static void
111 local_feraiseexcept (int excepts)
113 struct fenv temp;
115 if (excepts & _FPU_MASK_IM)
117 float f = 0.0f;
118 __math_force_eval_div (f, f);
120 if (excepts & _FPU_MASK_DM)
122 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
123 temp.__status_word |= _FPU_MASK_DM;
124 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
125 __asm__ __volatile__ ("fwait");
127 if (excepts & _FPU_MASK_ZM)
129 float f = 1.0f, g = 0.0f;
130 __math_force_eval_div (f, g);
132 if (excepts & _FPU_MASK_OM)
134 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
135 temp.__status_word |= _FPU_MASK_OM;
136 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
137 __asm__ __volatile__ ("fwait");
139 if (excepts & _FPU_MASK_UM)
141 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
142 temp.__status_word |= _FPU_MASK_UM;
143 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
144 __asm__ __volatile__ ("fwait");
146 if (excepts & _FPU_MASK_PM)
148 float f = 1.0f, g = 3.0f;
149 __math_force_eval_div (f, g);
154 void
155 set_fpu_trap_exceptions (int trap, int notrap)
157 int exc_set = 0, exc_clr = 0;
158 unsigned short cw;
160 if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
161 if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
162 if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
163 if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
164 if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
165 if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
167 if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
168 if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
169 if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
170 if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
171 if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
172 if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
174 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
176 cw |= exc_clr;
177 cw &= ~exc_set;
179 __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
181 if (has_sse())
183 unsigned int cw_sse;
185 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
187 /* The SSE exception masks are shifted by 7 bits. */
188 cw_sse |= (exc_clr << 7);
189 cw_sse &= ~(exc_set << 7);
191 /* Clear stalled exception flags. */
192 cw_sse &= ~_FPU_EX_ALL;
194 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
198 void
199 set_fpu (void)
201 set_fpu_trap_exceptions (options.fpe, 0);
205 get_fpu_trap_exceptions (void)
207 unsigned short cw;
208 int mask;
209 int res = 0;
211 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
212 mask = cw;
214 if (has_sse())
216 unsigned int cw_sse;
218 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
220 /* The SSE exception masks are shifted by 7 bits. */
221 mask |= (cw_sse >> 7);
224 mask = ~mask & _FPU_MASK_ALL;
226 if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
227 if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
228 if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
229 if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
230 if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
231 if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
233 return res;
237 support_fpu_trap (int flag __attribute__((unused)))
239 return 1;
243 get_fpu_except_flags (void)
245 unsigned short cw;
246 int excepts;
247 int res = 0;
249 __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
250 excepts = cw;
252 if (has_sse())
254 unsigned int cw_sse;
256 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
257 excepts |= cw_sse;
260 excepts &= _FPU_EX_ALL;
262 if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
263 if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
264 if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
265 if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
266 if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
267 if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
269 return res;
272 void
273 set_fpu_except_flags (int set, int clear)
275 struct fenv temp;
276 int exc_set = 0, exc_clr = 0;
278 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
279 if (set & GFC_FPE_INVALID)
280 exc_set |= _FPU_MASK_IM;
281 if (clear & GFC_FPE_INVALID)
282 exc_clr |= _FPU_MASK_IM;
284 if (set & GFC_FPE_DENORMAL)
285 exc_set |= _FPU_MASK_DM;
286 if (clear & GFC_FPE_DENORMAL)
287 exc_clr |= _FPU_MASK_DM;
289 if (set & GFC_FPE_ZERO)
290 exc_set |= _FPU_MASK_ZM;
291 if (clear & GFC_FPE_ZERO)
292 exc_clr |= _FPU_MASK_ZM;
294 if (set & GFC_FPE_OVERFLOW)
295 exc_set |= _FPU_MASK_OM;
296 if (clear & GFC_FPE_OVERFLOW)
297 exc_clr |= _FPU_MASK_OM;
299 if (set & GFC_FPE_UNDERFLOW)
300 exc_set |= _FPU_MASK_UM;
301 if (clear & GFC_FPE_UNDERFLOW)
302 exc_clr |= _FPU_MASK_UM;
304 if (set & GFC_FPE_INEXACT)
305 exc_set |= _FPU_MASK_PM;
306 if (clear & GFC_FPE_INEXACT)
307 exc_clr |= _FPU_MASK_PM;
310 /* Change the flags. This is tricky on 387 (unlike SSE), because we have
311 FNSTSW but no FLDSW instruction. */
312 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
313 temp.__status_word &= ~exc_clr;
314 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
316 /* Change the flags on SSE. */
318 if (has_sse())
320 unsigned int cw_sse;
322 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
323 cw_sse &= ~exc_clr;
324 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
327 local_feraiseexcept (exc_set);
331 support_fpu_flag (int flag __attribute__((unused)))
333 return 1;
336 void
337 set_fpu_rounding_mode (int round)
339 int round_mode;
340 unsigned short cw;
342 switch (round)
344 case GFC_FPE_TONEAREST:
345 round_mode = _FPU_RC_NEAREST;
346 break;
347 case GFC_FPE_UPWARD:
348 round_mode = _FPU_RC_UP;
349 break;
350 case GFC_FPE_DOWNWARD:
351 round_mode = _FPU_RC_DOWN;
352 break;
353 case GFC_FPE_TOWARDZERO:
354 round_mode = _FPU_RC_ZERO;
355 break;
356 default:
357 return; /* Should be unreachable. */
360 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
362 /* The x87 round control bits are shifted by 10 bits. */
363 cw &= ~(_FPU_RC_MASK << 10);
364 cw |= round_mode << 10;
366 __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
368 if (has_sse())
370 unsigned int cw_sse;
372 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
374 /* The SSE round control bits are shifted by 13 bits. */
375 cw_sse &= ~(_FPU_RC_MASK << 13);
376 cw_sse |= round_mode << 13;
378 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
383 get_fpu_rounding_mode (void)
385 int round_mode;
387 #ifdef __SSE_MATH__
388 unsigned int cw;
390 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
392 /* The SSE round control bits are shifted by 13 bits. */
393 round_mode = cw >> 13;
394 #else
395 unsigned short cw;
397 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
399 /* The x87 round control bits are shifted by 10 bits. */
400 round_mode = cw >> 10;
401 #endif
403 round_mode &= _FPU_RC_MASK;
405 switch (round_mode)
407 case _FPU_RC_NEAREST:
408 return GFC_FPE_TONEAREST;
409 case _FPU_RC_UP:
410 return GFC_FPE_UPWARD;
411 case _FPU_RC_DOWN:
412 return GFC_FPE_DOWNWARD;
413 case _FPU_RC_ZERO:
414 return GFC_FPE_TOWARDZERO;
415 default:
416 return 0; /* Should be unreachable. */
421 support_fpu_rounding_mode (int mode)
423 if (mode == GFC_FPE_AWAY)
424 return 0;
425 else
426 return 1;
429 void
430 get_fpu_state (void *state)
432 struct fenv *envp = state;
434 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
436 /* fnstenv has the side effect of masking all exceptions, so we need
437 to restore the control word after that. */
438 __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
440 if (has_sse())
441 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
444 void
445 set_fpu_state (void *state)
447 struct fenv *envp = state;
449 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
450 complex than this, but I think it suffices in our case. */
451 __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
453 if (has_sse())
454 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
459 support_fpu_underflow_control (int kind)
461 if (!has_sse())
462 return 0;
464 return (kind == 4 || kind == 8) ? 1 : 0;
469 get_fpu_underflow_mode (void)
471 unsigned int cw_sse;
473 if (!has_sse())
474 return 1;
476 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
478 /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
479 return (cw_sse & MXCSR_FTZ) ? 0 : 1;
483 void
484 set_fpu_underflow_mode (int gradual)
486 unsigned int cw_sse;
488 if (!has_sse())
489 return;
491 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
493 if (gradual)
494 cw_sse &= ~MXCSR_FTZ;
495 else
496 cw_sse |= MXCSR_FTZ;
498 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));