1 /* FPU-related code for x86 and x86_64 processors.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
36 unsigned int eax
, ebx
, ecx
, edx
;
38 if (!__get_cpuid (1, &eax
, &ebx
, &ecx
, &edx
))
47 /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
48 #define _FPU_MASK_IM 0x01
49 #define _FPU_MASK_DM 0x02
50 #define _FPU_MASK_ZM 0x04
51 #define _FPU_MASK_OM 0x08
52 #define _FPU_MASK_UM 0x10
53 #define _FPU_MASK_PM 0x20
54 #define _FPU_MASK_ALL 0x3f
56 #define _FPU_EX_ALL 0x3f
58 /* i387 rounding modes. */
60 #define _FPU_RC_NEAREST 0x0
61 #define _FPU_RC_DOWN 0x1
62 #define _FPU_RC_UP 0x2
63 #define _FPU_RC_ZERO 0x3
65 #define _FPU_RC_MASK 0x3
67 /* This structure corresponds to the layout of the block
71 unsigned short int __control_word
;
72 unsigned short int __unused1
;
73 unsigned short int __status_word
;
74 unsigned short int __unused2
;
75 unsigned short int __tags
;
76 unsigned short int __unused3
;
78 unsigned short int __cs_selector
;
79 unsigned short int __opcode
;
80 unsigned int __data_offset
;
81 unsigned short int __data_selector
;
82 unsigned short int __unused5
;
88 /* Raise the supported floating-point exceptions from EXCEPTS. Other
89 bits in EXCEPTS are ignored. Code originally borrowed from
90 libatomic/config/x86/fenv.c. */
93 local_feraiseexcept (int excepts
)
95 if (excepts
& _FPU_MASK_IM
)
99 volatile float r
__attribute__ ((unused
));
100 __asm__
__volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f
));
101 r
= f
; /* Needed to trigger exception. */
103 __asm__
__volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f
));
104 /* No need for fwait, exception is triggered by emitted fstp. */
107 if (excepts
& _FPU_MASK_DM
)
110 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (temp
));
111 temp
.__status_word
|= _FPU_MASK_DM
;
112 __asm__
__volatile__ ("fldenv\t%0" : : "m" (temp
));
113 __asm__
__volatile__ ("fwait");
115 if (excepts
& _FPU_MASK_ZM
)
117 float f
= 1.0f
, g
= 0.0f
;
119 volatile float r
__attribute__ ((unused
));
120 __asm__
__volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f
) : "xm" (g
));
121 r
= f
; /* Needed to trigger exception. */
123 __asm__
__volatile__ ("fdivs\t%1" : "+t" (f
) : "m" (g
));
124 /* No need for fwait, exception is triggered by emitted fstp. */
127 if (excepts
& _FPU_MASK_OM
)
130 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (temp
));
131 temp
.__status_word
|= _FPU_MASK_OM
;
132 __asm__
__volatile__ ("fldenv\t%0" : : "m" (temp
));
133 __asm__
__volatile__ ("fwait");
135 if (excepts
& _FPU_MASK_UM
)
138 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (temp
));
139 temp
.__status_word
|= _FPU_MASK_UM
;
140 __asm__
__volatile__ ("fldenv\t%0" : : "m" (temp
));
141 __asm__
__volatile__ ("fwait");
143 if (excepts
& _FPU_MASK_PM
)
145 float f
= 1.0f
, g
= 3.0f
;
147 volatile float r
__attribute__ ((unused
));
148 __asm__
__volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f
) : "xm" (g
));
149 r
= f
; /* Needed to trigger exception. */
151 __asm__
__volatile__ ("fdivs\t%1" : "+t" (f
) : "m" (g
));
152 /* No need for fwait, exception is triggered by emitted fstp. */
159 set_fpu_trap_exceptions (int trap
, int notrap
)
161 int exc_set
= 0, exc_clr
= 0;
164 if (trap
& GFC_FPE_INVALID
) exc_set
|= _FPU_MASK_IM
;
165 if (trap
& GFC_FPE_DENORMAL
) exc_set
|= _FPU_MASK_DM
;
166 if (trap
& GFC_FPE_ZERO
) exc_set
|= _FPU_MASK_ZM
;
167 if (trap
& GFC_FPE_OVERFLOW
) exc_set
|= _FPU_MASK_OM
;
168 if (trap
& GFC_FPE_UNDERFLOW
) exc_set
|= _FPU_MASK_UM
;
169 if (trap
& GFC_FPE_INEXACT
) exc_set
|= _FPU_MASK_PM
;
171 if (notrap
& GFC_FPE_INVALID
) exc_clr
|= _FPU_MASK_IM
;
172 if (notrap
& GFC_FPE_DENORMAL
) exc_clr
|= _FPU_MASK_DM
;
173 if (notrap
& GFC_FPE_ZERO
) exc_clr
|= _FPU_MASK_ZM
;
174 if (notrap
& GFC_FPE_OVERFLOW
) exc_clr
|= _FPU_MASK_OM
;
175 if (notrap
& GFC_FPE_UNDERFLOW
) exc_clr
|= _FPU_MASK_UM
;
176 if (notrap
& GFC_FPE_INEXACT
) exc_clr
|= _FPU_MASK_PM
;
178 __asm__
__volatile__ ("fstcw\t%0" : "=m" (cw
));
183 __asm__
__volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw
));
189 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
191 /* The SSE exception masks are shifted by 7 bits. */
192 cw_sse
|= (exc_clr
<< 7);
193 cw_sse
&= ~(exc_set
<< 7);
195 /* Clear stalled exception flags. */
196 cw_sse
&= ~_FPU_EX_ALL
;
198 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse
));
205 set_fpu_trap_exceptions (options
.fpe
, 0);
209 get_fpu_trap_exceptions (void)
214 __asm__
__volatile__ ("fstcw\t%0" : "=m" (cw
));
221 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
223 /* The SSE exception masks are shifted by 7 bits. */
224 cw
= cw
| ((cw_sse
>> 7) & _FPU_MASK_ALL
);
227 if (~cw
& _FPU_MASK_IM
) res
|= GFC_FPE_INVALID
;
228 if (~cw
& _FPU_MASK_DM
) res
|= GFC_FPE_DENORMAL
;
229 if (~cw
& _FPU_MASK_ZM
) res
|= GFC_FPE_ZERO
;
230 if (~cw
& _FPU_MASK_OM
) res
|= GFC_FPE_OVERFLOW
;
231 if (~cw
& _FPU_MASK_UM
) res
|= GFC_FPE_UNDERFLOW
;
232 if (~cw
& _FPU_MASK_PM
) res
|= GFC_FPE_INEXACT
;
238 support_fpu_trap (int flag
__attribute__((unused
)))
244 get_fpu_except_flags (void)
250 __asm__
__volatile__ ("fnstsw\t%0" : "=am" (cw
));
257 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
261 excepts
&= _FPU_EX_ALL
;
263 if (excepts
& _FPU_MASK_IM
) result
|= GFC_FPE_INVALID
;
264 if (excepts
& _FPU_MASK_DM
) result
|= GFC_FPE_DENORMAL
;
265 if (excepts
& _FPU_MASK_ZM
) result
|= GFC_FPE_ZERO
;
266 if (excepts
& _FPU_MASK_OM
) result
|= GFC_FPE_OVERFLOW
;
267 if (excepts
& _FPU_MASK_UM
) result
|= GFC_FPE_UNDERFLOW
;
268 if (excepts
& _FPU_MASK_PM
) result
|= GFC_FPE_INEXACT
;
274 set_fpu_except_flags (int set
, int clear
)
277 int exc_set
= 0, exc_clr
= 0;
279 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
280 if (set
& GFC_FPE_INVALID
)
281 exc_set
|= _FPU_MASK_IM
;
282 if (clear
& GFC_FPE_INVALID
)
283 exc_clr
|= _FPU_MASK_IM
;
285 if (set
& GFC_FPE_DENORMAL
)
286 exc_set
|= _FPU_MASK_DM
;
287 if (clear
& GFC_FPE_DENORMAL
)
288 exc_clr
|= _FPU_MASK_DM
;
290 if (set
& GFC_FPE_ZERO
)
291 exc_set
|= _FPU_MASK_ZM
;
292 if (clear
& GFC_FPE_ZERO
)
293 exc_clr
|= _FPU_MASK_ZM
;
295 if (set
& GFC_FPE_OVERFLOW
)
296 exc_set
|= _FPU_MASK_OM
;
297 if (clear
& GFC_FPE_OVERFLOW
)
298 exc_clr
|= _FPU_MASK_OM
;
300 if (set
& GFC_FPE_UNDERFLOW
)
301 exc_set
|= _FPU_MASK_UM
;
302 if (clear
& GFC_FPE_UNDERFLOW
)
303 exc_clr
|= _FPU_MASK_UM
;
305 if (set
& GFC_FPE_INEXACT
)
306 exc_set
|= _FPU_MASK_PM
;
307 if (clear
& GFC_FPE_INEXACT
)
308 exc_clr
|= _FPU_MASK_PM
;
311 /* Change the flags. This is tricky on 387 (unlike SSE), because we have
312 FNSTSW but no FLDSW instruction. */
313 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (temp
));
314 temp
.__status_word
&= ~exc_clr
;
315 __asm__
__volatile__ ("fldenv\t%0" : : "m" (temp
));
317 /* Change the flags on SSE. */
323 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
325 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse
));
328 local_feraiseexcept (exc_set
);
332 support_fpu_flag (int flag
__attribute__((unused
)))
338 set_fpu_rounding_mode (int round
)
345 case GFC_FPE_TONEAREST
:
346 round_mode
= _FPU_RC_NEAREST
;
349 round_mode
= _FPU_RC_UP
;
351 case GFC_FPE_DOWNWARD
:
352 round_mode
= _FPU_RC_DOWN
;
354 case GFC_FPE_TOWARDZERO
:
355 round_mode
= _FPU_RC_ZERO
;
358 return; /* Should be unreachable. */
361 __asm__
__volatile__ ("fnstcw\t%0" : "=m" (cw
));
363 /* The x87 round control bits are shifted by 10 bits. */
364 cw
&= ~(_FPU_RC_MASK
<< 10);
365 cw
|= round_mode
<< 10;
367 __asm__
__volatile__ ("fldcw\t%0" : : "m" (cw
));
373 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse
));
375 /* The SSE round control bits are shifted by 13 bits. */
376 cw_sse
&= ~(_FPU_RC_MASK
<< 13);
377 cw_sse
|= round_mode
<< 13;
379 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse
));
384 get_fpu_rounding_mode (void)
391 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (cw
));
393 /* The SSE round control bits are shifted by 13 bits. */
394 round_mode
= cw
>> 13;
398 __asm__
__volatile__ ("fnstcw\t%0" : "=m" (cw
));
400 /* The x87 round control bits are shifted by 10 bits. */
401 round_mode
= cw
>> 10;
404 round_mode
&= _FPU_RC_MASK
;
408 case _FPU_RC_NEAREST
:
409 return GFC_FPE_TONEAREST
;
411 return GFC_FPE_UPWARD
;
413 return GFC_FPE_DOWNWARD
;
415 return GFC_FPE_TOWARDZERO
;
417 return GFC_FPE_INVALID
; /* Should be unreachable. */
422 support_fpu_rounding_mode (int mode
__attribute__((unused
)))
428 get_fpu_state (void *state
)
430 my_fenv_t
*envp
= state
;
432 /* Check we can actually store the FPU state in the allocated size. */
433 assert (sizeof(my_fenv_t
) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE
);
435 __asm__
__volatile__ ("fnstenv\t%0" : "=m" (*envp
));
437 /* fnstenv has the side effect of masking all exceptions, so we need
438 to restore the control word after that. */
439 __asm__
__volatile__ ("fldcw\t%0" : : "m" (envp
->__control_word
));
442 __asm__
__volatile__ ("%vstmxcsr\t%0" : "=m" (envp
->__mxcsr
));
446 set_fpu_state (void *state
)
448 my_fenv_t
*envp
= state
;
450 /* Check we can actually store the FPU state in the allocated size. */
451 assert (sizeof(my_fenv_t
) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE
);
453 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
454 complex than this, but I think it suffices in our case. */
455 __asm__
__volatile__ ("fldenv\t%0" : : "m" (*envp
));
458 __asm__
__volatile__ ("%vldmxcsr\t%0" : : "m" (envp
->__mxcsr
));