Use correct vector type in neutral_op_for_slp_reduction
[official-gcc.git] / libgfortran / config / fpu-387.h
bloba2f4281d0c5b7c0accaddf1624a9aebbaeede095
1 /* FPU-related code for x86 and x86_64 processors.
2 Copyright (C) 2005-2019 Free Software Foundation, Inc.
3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 #ifndef __SSE_MATH__
27 #include "cpuid.h"
28 #endif
30 static int
31 has_sse (void)
33 #ifndef __SSE_MATH__
34 unsigned int eax, ebx, ecx, edx;
36 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
37 return 0;
39 return edx & bit_SSE;
40 #else
41 return 1;
42 #endif
45 /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
46 #define _FPU_MASK_IM 0x01
47 #define _FPU_MASK_DM 0x02
48 #define _FPU_MASK_ZM 0x04
49 #define _FPU_MASK_OM 0x08
50 #define _FPU_MASK_UM 0x10
51 #define _FPU_MASK_PM 0x20
52 #define _FPU_MASK_ALL 0x3f
54 #define _FPU_EX_ALL 0x3f
56 /* i387 rounding modes. */
58 #define _FPU_RC_NEAREST 0x0
59 #define _FPU_RC_DOWN 0x1
60 #define _FPU_RC_UP 0x2
61 #define _FPU_RC_ZERO 0x3
63 #define _FPU_RC_MASK 0x3
65 /* Enable flush to zero mode. */
67 #define MXCSR_FTZ (1 << 15)
70 /* This structure corresponds to the layout of the block
71 written by FSTENV. */
72 typedef struct
74 unsigned short int __control_word;
75 unsigned short int __unused1;
76 unsigned short int __status_word;
77 unsigned short int __unused2;
78 unsigned short int __tags;
79 unsigned short int __unused3;
80 unsigned int __eip;
81 unsigned short int __cs_selector;
82 unsigned short int __opcode;
83 unsigned int __data_offset;
84 unsigned short int __data_selector;
85 unsigned short int __unused5;
86 unsigned int __mxcsr;
88 my_fenv_t;
90 /* Check we can actually store the FPU state in the allocated size. */
91 _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
92 "GFC_FPE_STATE_BUFFER_SIZE is too small");
95 /* Raise the supported floating-point exceptions from EXCEPTS. Other
96 bits in EXCEPTS are ignored. Code originally borrowed from
97 libatomic/config/x86/fenv.c. */
99 static void
100 local_feraiseexcept (int excepts)
102 if (excepts & _FPU_MASK_IM)
104 float f = 0.0f;
105 #ifdef __SSE_MATH__
106 volatile float r __attribute__ ((unused));
107 __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f));
108 r = f; /* Needed to trigger exception. */
109 #else
110 __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f));
111 /* No need for fwait, exception is triggered by emitted fstp. */
112 #endif
114 if (excepts & _FPU_MASK_DM)
116 my_fenv_t temp;
117 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
118 temp.__status_word |= _FPU_MASK_DM;
119 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
120 __asm__ __volatile__ ("fwait");
122 if (excepts & _FPU_MASK_ZM)
124 float f = 1.0f, g = 0.0f;
125 #ifdef __SSE_MATH__
126 volatile float r __attribute__ ((unused));
127 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
128 r = f; /* Needed to trigger exception. */
129 #else
130 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
131 /* No need for fwait, exception is triggered by emitted fstp. */
132 #endif
134 if (excepts & _FPU_MASK_OM)
136 my_fenv_t temp;
137 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
138 temp.__status_word |= _FPU_MASK_OM;
139 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
140 __asm__ __volatile__ ("fwait");
142 if (excepts & _FPU_MASK_UM)
144 my_fenv_t temp;
145 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
146 temp.__status_word |= _FPU_MASK_UM;
147 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
148 __asm__ __volatile__ ("fwait");
150 if (excepts & _FPU_MASK_PM)
152 float f = 1.0f, g = 3.0f;
153 #ifdef __SSE_MATH__
154 volatile float r __attribute__ ((unused));
155 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
156 r = f; /* Needed to trigger exception. */
157 #else
158 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
159 /* No need for fwait, exception is triggered by emitted fstp. */
160 #endif
165 void
166 set_fpu_trap_exceptions (int trap, int notrap)
168 int exc_set = 0, exc_clr = 0;
169 unsigned short cw;
171 if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
172 if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
173 if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
174 if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
175 if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
176 if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
178 if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
179 if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
180 if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
181 if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
182 if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
183 if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
185 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
187 cw |= exc_clr;
188 cw &= ~exc_set;
190 __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
192 if (has_sse())
194 unsigned int cw_sse;
196 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
198 /* The SSE exception masks are shifted by 7 bits. */
199 cw_sse |= (exc_clr << 7);
200 cw_sse &= ~(exc_set << 7);
202 /* Clear stalled exception flags. */
203 cw_sse &= ~_FPU_EX_ALL;
205 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
209 void
210 set_fpu (void)
212 set_fpu_trap_exceptions (options.fpe, 0);
216 get_fpu_trap_exceptions (void)
218 unsigned short cw;
219 int mask;
220 int res = 0;
222 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
223 mask = cw;
225 if (has_sse())
227 unsigned int cw_sse;
229 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
231 /* The SSE exception masks are shifted by 7 bits. */
232 mask |= (cw_sse >> 7);
235 mask = ~mask & _FPU_MASK_ALL;
237 if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
238 if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
239 if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
240 if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
241 if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
242 if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
244 return res;
248 support_fpu_trap (int flag __attribute__((unused)))
250 return 1;
254 get_fpu_except_flags (void)
256 unsigned short cw;
257 int excepts;
258 int res = 0;
260 __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
261 excepts = cw;
263 if (has_sse())
265 unsigned int cw_sse;
267 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
268 excepts |= cw_sse;
271 excepts &= _FPU_EX_ALL;
273 if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
274 if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
275 if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
276 if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
277 if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
278 if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
280 return res;
283 void
284 set_fpu_except_flags (int set, int clear)
286 my_fenv_t temp;
287 int exc_set = 0, exc_clr = 0;
289 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
290 if (set & GFC_FPE_INVALID)
291 exc_set |= _FPU_MASK_IM;
292 if (clear & GFC_FPE_INVALID)
293 exc_clr |= _FPU_MASK_IM;
295 if (set & GFC_FPE_DENORMAL)
296 exc_set |= _FPU_MASK_DM;
297 if (clear & GFC_FPE_DENORMAL)
298 exc_clr |= _FPU_MASK_DM;
300 if (set & GFC_FPE_ZERO)
301 exc_set |= _FPU_MASK_ZM;
302 if (clear & GFC_FPE_ZERO)
303 exc_clr |= _FPU_MASK_ZM;
305 if (set & GFC_FPE_OVERFLOW)
306 exc_set |= _FPU_MASK_OM;
307 if (clear & GFC_FPE_OVERFLOW)
308 exc_clr |= _FPU_MASK_OM;
310 if (set & GFC_FPE_UNDERFLOW)
311 exc_set |= _FPU_MASK_UM;
312 if (clear & GFC_FPE_UNDERFLOW)
313 exc_clr |= _FPU_MASK_UM;
315 if (set & GFC_FPE_INEXACT)
316 exc_set |= _FPU_MASK_PM;
317 if (clear & GFC_FPE_INEXACT)
318 exc_clr |= _FPU_MASK_PM;
321 /* Change the flags. This is tricky on 387 (unlike SSE), because we have
322 FNSTSW but no FLDSW instruction. */
323 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
324 temp.__status_word &= ~exc_clr;
325 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
327 /* Change the flags on SSE. */
329 if (has_sse())
331 unsigned int cw_sse;
333 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
334 cw_sse &= ~exc_clr;
335 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
338 local_feraiseexcept (exc_set);
342 support_fpu_flag (int flag __attribute__((unused)))
344 return 1;
347 void
348 set_fpu_rounding_mode (int round)
350 int round_mode;
351 unsigned short cw;
353 switch (round)
355 case GFC_FPE_TONEAREST:
356 round_mode = _FPU_RC_NEAREST;
357 break;
358 case GFC_FPE_UPWARD:
359 round_mode = _FPU_RC_UP;
360 break;
361 case GFC_FPE_DOWNWARD:
362 round_mode = _FPU_RC_DOWN;
363 break;
364 case GFC_FPE_TOWARDZERO:
365 round_mode = _FPU_RC_ZERO;
366 break;
367 default:
368 return; /* Should be unreachable. */
371 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
373 /* The x87 round control bits are shifted by 10 bits. */
374 cw &= ~(_FPU_RC_MASK << 10);
375 cw |= round_mode << 10;
377 __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
379 if (has_sse())
381 unsigned int cw_sse;
383 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
385 /* The SSE round control bits are shifted by 13 bits. */
386 cw_sse &= ~(_FPU_RC_MASK << 13);
387 cw_sse |= round_mode << 13;
389 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
394 get_fpu_rounding_mode (void)
396 int round_mode;
398 #ifdef __SSE_MATH__
399 unsigned int cw;
401 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
403 /* The SSE round control bits are shifted by 13 bits. */
404 round_mode = cw >> 13;
405 #else
406 unsigned short cw;
408 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
410 /* The x87 round control bits are shifted by 10 bits. */
411 round_mode = cw >> 10;
412 #endif
414 round_mode &= _FPU_RC_MASK;
416 switch (round_mode)
418 case _FPU_RC_NEAREST:
419 return GFC_FPE_TONEAREST;
420 case _FPU_RC_UP:
421 return GFC_FPE_UPWARD;
422 case _FPU_RC_DOWN:
423 return GFC_FPE_DOWNWARD;
424 case _FPU_RC_ZERO:
425 return GFC_FPE_TOWARDZERO;
426 default:
427 return 0; /* Should be unreachable. */
432 support_fpu_rounding_mode (int mode __attribute__((unused)))
434 return 1;
437 void
438 get_fpu_state (void *state)
440 my_fenv_t *envp = state;
442 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
444 /* fnstenv has the side effect of masking all exceptions, so we need
445 to restore the control word after that. */
446 __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
448 if (has_sse())
449 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
452 void
453 set_fpu_state (void *state)
455 my_fenv_t *envp = state;
457 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
458 complex than this, but I think it suffices in our case. */
459 __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
461 if (has_sse())
462 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
467 support_fpu_underflow_control (int kind)
469 if (!has_sse())
470 return 0;
472 return (kind == 4 || kind == 8) ? 1 : 0;
477 get_fpu_underflow_mode (void)
479 unsigned int cw_sse;
481 if (!has_sse())
482 return 1;
484 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
486 /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
487 return (cw_sse & MXCSR_FTZ) ? 0 : 1;
491 void
492 set_fpu_underflow_mode (int gradual)
494 unsigned int cw_sse;
496 if (!has_sse())
497 return;
499 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
501 if (gradual)
502 cw_sse &= ~MXCSR_FTZ;
503 else
504 cw_sse |= MXCSR_FTZ;
506 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));