9597 Want hypervisor API for FPU management
[unleashed.git] / usr / src / uts / intel / sys / fp.h
blobfe5471e855282b26b08506e2094c87f93bd83060
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
23 * Copyright (c) 2018, Joyent, Inc.
25 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
28 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
30 /* All Rights Reserved */
32 #ifndef _SYS_FP_H
33 #define _SYS_FP_H
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
40 * 80287/80387 and SSE/SSE2 floating point processor definitions
44 * values that go into fp_kind
46 #define FP_NO 0 /* no fp chip, no emulator (no fp support) */
47 #define FP_SW 1 /* no fp chip, using software emulator */
48 #define FP_HW 2 /* chip present bit */
49 #define FP_287 2 /* 80287 chip present */
50 #define FP_387 3 /* 80387 chip present */
51 #define FP_487 6 /* 80487 chip present */
52 #define FP_486 6 /* 80486 chip present */
54 * The following values are bit flags instead of actual values.
55 * E.g. to know if we are using SSE, test (value & __FP_SSE) instead
56 * of (value == __FP_SSE).
58 #define __FP_SSE 0x100 /* .. plus SSE-capable CPU */
59 #define __FP_AVX 0x200 /* .. plus AVX-capable CPU */
62 * values that go into fp_save_mech
64 #define FP_FNSAVE 1 /* fnsave/frstor instructions */
65 #define FP_FXSAVE 2 /* fxsave/fxrstor instructions */
66 #define FP_XSAVE 3 /* xsave/xrstor instructions */
69 * masks for 80387 control word
71 #define FPIM 0x00000001 /* invalid operation */
72 #define FPDM 0x00000002 /* denormalized operand */
73 #define FPZM 0x00000004 /* zero divide */
74 #define FPOM 0x00000008 /* overflow */
75 #define FPUM 0x00000010 /* underflow */
76 #define FPPM 0x00000020 /* precision */
77 #define FPPC 0x00000300 /* precision control */
78 #define FPRC 0x00000C00 /* rounding control */
79 #define FPIC 0x00001000 /* infinity control */
80 #define WFPDE 0x00000080 /* data chain exception */
83 * (Old symbol compatibility)
85 #define FPINV FPIM
86 #define FPDNO FPDM
87 #define FPZDIV FPZM
88 #define FPOVR FPOM
89 #define FPUNR FPUM
90 #define FPPRE FPPM
93 * precision, rounding, and infinity options in control word
95 #define FPSIG24 0x00000000 /* 24-bit significand precision (short) */
96 #define FPSIG53 0x00000200 /* 53-bit significand precision (long) */
97 #define FPSIG64 0x00000300 /* 64-bit significand precision (temp) */
98 #define FPRTN 0x00000000 /* round to nearest or even */
99 #define FPRD 0x00000400 /* round down */
100 #define FPRU 0x00000800 /* round up */
101 #define FPCHOP 0x00000C00 /* chop (truncate toward zero) */
102 #define FPP 0x00000000 /* projective infinity */
103 #define FPA 0x00001000 /* affine infinity */
104 #define WFPB17 0x00020000 /* bit 17 */
105 #define WFPB24 0x00040000 /* bit 24 */
108 * masks for 80387 status word
110 #define FPS_IE 0x00000001 /* invalid operation */
111 #define FPS_DE 0x00000002 /* denormalized operand */
112 #define FPS_ZE 0x00000004 /* zero divide */
113 #define FPS_OE 0x00000008 /* overflow */
114 #define FPS_UE 0x00000010 /* underflow */
115 #define FPS_PE 0x00000020 /* precision */
116 #define FPS_SF 0x00000040 /* stack fault */
117 #define FPS_ES 0x00000080 /* error summary bit */
118 #define FPS_C0 0x00000100 /* C0 bit */
119 #define FPS_C1 0x00000200 /* C1 bit */
120 #define FPS_C2 0x00000400 /* C2 bit */
121 #define FPS_TOP 0x00003800 /* top of stack pointer */
122 #define FPS_C3 0x00004000 /* C3 bit */
123 #define FPS_B 0x00008000 /* busy bit */
126 * Exception flags manually cleared during x87 exception handling.
128 #define FPS_SW_EFLAGS \
129 (FPS_IE|FPS_DE|FPS_ZE|FPS_OE|FPS_UE|FPS_PE|FPS_SF|FPS_ES|FPS_B)
132 * Initial value of FPU control word as per 4th ed. ABI document
133 * - affine infinity
134 * - round to nearest or even
135 * - 64-bit double precision
136 * - all exceptions masked
138 #define FPU_CW_INIT 0x133f
141 * masks and flags for SSE/SSE2 MXCSR
143 #define SSE_IE 0x00000001 /* invalid operation */
144 #define SSE_DE 0x00000002 /* denormalized operand */
145 #define SSE_ZE 0x00000004 /* zero divide */
146 #define SSE_OE 0x00000008 /* overflow */
147 #define SSE_UE 0x00000010 /* underflow */
148 #define SSE_PE 0x00000020 /* precision */
149 #define SSE_DAZ 0x00000040 /* denormals are zero */
150 #define SSE_IM 0x00000080 /* invalid op exception mask */
151 #define SSE_DM 0x00000100 /* denormalize exception mask */
152 #define SSE_ZM 0x00000200 /* zero-divide exception mask */
153 #define SSE_OM 0x00000400 /* overflow exception mask */
154 #define SSE_UM 0x00000800 /* underflow exception mask */
155 #define SSE_PM 0x00001000 /* precision exception mask */
156 #define SSE_RC 0x00006000 /* rounding control */
157 #define SSE_RD 0x00002000 /* rounding control: round down */
158 #define SSE_RU 0x00004000 /* rounding control: round up */
159 #define SSE_FZ 0x00008000 /* flush to zero for masked underflow */
161 #define SSE_MXCSR_EFLAGS \
162 (SSE_IE|SSE_DE|SSE_ZE|SSE_OE|SSE_UE|SSE_PE) /* 0x3f */
164 #define SSE_MXCSR_INIT \
165 (SSE_IM|SSE_DM|SSE_ZM|SSE_OM|SSE_UM|SSE_PM) /* 0x1f80 */
167 #define SSE_MXCSR_MASK_DEFAULT \
168 (0xffff & ~SSE_DAZ) /* 0xffbf */
170 #define SSE_FMT_MXCSR \
171 "\20\20fz\17ru\16rd\15pm\14um\13om\12zm\11dm" \
172 "\10im\7daz\6pe\5ue\4oe\3ze\2de\1ie"
175 * This structure is written to memory by an 'fnsave' instruction
177 struct fnsave_state {
178 uint16_t f_fcw;
179 uint16_t __f_ign0;
180 uint16_t f_fsw;
181 uint16_t __f_ign1;
182 uint16_t f_ftw;
183 uint16_t __f_ign2;
184 uint32_t f_eip;
185 uint16_t f_cs;
186 uint16_t f_fop;
187 uint32_t f_dp;
188 uint16_t f_ds;
189 uint16_t __f_ign3;
190 union {
191 uint16_t fpr_16[5]; /* 80-bits of x87 state */
192 } f_st[8];
193 }; /* 108 bytes */
196 * This structure is written to memory by an 'fxsave' instruction
197 * Note the variant behaviour of this instruction between long mode
198 * and legacy environments!
200 struct fxsave_state {
201 uint16_t fx_fcw;
202 uint16_t fx_fsw;
203 uint16_t fx_fctw; /* compressed tag word */
204 uint16_t fx_fop;
205 #if defined(__amd64)
206 uint64_t fx_rip;
207 uint64_t fx_rdp;
208 #else
209 uint32_t fx_eip;
210 uint16_t fx_cs;
211 uint16_t __fx_ign0;
212 uint32_t fx_dp;
213 uint16_t fx_ds;
214 uint16_t __fx_ign1;
215 #endif
216 uint32_t fx_mxcsr;
217 uint32_t fx_mxcsr_mask;
218 union {
219 uint16_t fpr_16[5]; /* 80-bits of x87 state */
220 u_longlong_t fpr_mmx; /* 64-bit mmx register */
221 uint32_t __fpr_pad[4]; /* (pad out to 128-bits) */
222 } fx_st[8];
223 #if defined(__amd64)
224 upad128_t fx_xmm[16]; /* 128-bit registers */
225 upad128_t __fx_ign2[6];
226 #else
227 upad128_t fx_xmm[8]; /* 128-bit registers */
228 upad128_t __fx_ign2[14];
229 #endif
230 }; /* 512 bytes */
233 * This structure is written to memory by one of the 'xsave' instruction
234 * variants. The first 512 bytes are compatible with the format of the 'fxsave'
235 * area. The header portion of the xsave layout is documented in section
236 * 13.4.2 of the Intel 64 and IA-32 Architectures Software Developer’s Manual,
237 * Volume 1 (IASDv1). The extended portion is documented in section 13.4.3.
239 * Our size is at least AVX_XSAVE_SIZE (832 bytes), asserted in fpnoextflt().
240 * Enabling additional xsave-related CPU features requires an increase in the
241 * size. We dynamically allocate the per-lwp xsave area at runtime, based on
242 * the size needed for the CPU-specific features. This xsave_state structure
243 * simply defines our historical layout for the beginning of the xsave area. The
244 * locations and size of new, extended, components is determined dynamically by
245 * querying the CPU. See the xsave_info structure in cpuid.c.
247 * xsave component usage is tracked using bits in the xs_xstate_bv field. The
248 * components are documented in section 13.1 of IASDv1. For easy reference,
249 * this is a summary of the currently defined component bit definitions:
250 * x87 0x0001
251 * SSE 0x0002
252 * AVX 0x0004
253 * bndreg (MPX) 0x0008
254 * bndcsr (MPX) 0x0010
255 * opmask (AVX512) 0x0020
256 * zmm hi256 (AVX512) 0x0040
257 * zmm hi16 (AVX512) 0x0080
258 * PT 0x0100
259 * PKRU 0x0200
260 * When xsaveopt_ctxt is being used to save into the xsave_state area, the
261 * xs_xstate_bv field is updated by the xsaveopt instruction to indicate which
262 * elements of the xsave area are active.
264 * xs_xcomp_bv should always be 0, since we do not currently use the compressed
265 * form of xsave (xsavec).
267 struct xsave_state {
268 struct fxsave_state xs_fxsave; /* 0-511 legacy region */
269 uint64_t xs_xstate_bv; /* 512-519 start xsave header */
270 uint64_t xs_xcomp_bv; /* 520-527 */
271 uint64_t xs_reserved[6]; /* 528-575 end xsave header */
272 upad128_t xs_ymm[16]; /* 576 AVX component */
276 * Kernel's FPU save area
278 typedef struct {
279 union _kfpu_u {
280 void *kfpu_generic;
281 struct fxsave_state *kfpu_fx;
282 #if defined(__i386)
283 struct fnsave_state *kfpu_fn;
284 #endif
285 struct xsave_state *kfpu_xs;
286 } kfpu_u;
287 uint32_t kfpu_status; /* saved at #mf exception */
288 uint32_t kfpu_xstatus; /* saved at #xm exception */
289 } kfpu_t;
291 extern int fp_kind; /* kind of fp support */
292 extern int fp_save_mech; /* fp save/restore mechanism */
293 extern int fpu_exists; /* FPU hw exists */
295 #ifdef _KERNEL
297 extern int fpu_ignored;
298 extern int fpu_pentium_fdivbug;
300 extern uint32_t sse_mxcsr_mask;
302 extern void fpu_probe(void);
303 extern uint_t fpu_initial_probe(void);
304 extern int fpu_probe_pentium_fdivbug(void);
306 extern void fpu_auxv_info(int *, size_t *);
308 extern void fpnsave_ctxt(void *);
309 extern void fpxsave_ctxt(void *);
310 extern void xsave_ctxt(void *);
311 extern void xsaveopt_ctxt(void *);
312 extern void fpxsave_excp_clr_ctxt(void *);
313 extern void xsave_excp_clr_ctxt(void *);
314 extern void xsaveopt_excp_clr_ctxt(void *);
315 extern void (*fpsave_ctxt)(void *);
316 extern void (*xsavep)(struct xsave_state *, uint64_t);
318 extern void fxsave_insn(struct fxsave_state *);
319 extern void fpsave(struct fnsave_state *);
320 extern void fprestore(struct fnsave_state *);
321 extern void fpxsave(struct fxsave_state *);
322 extern void fpxrestore(struct fxsave_state *);
323 extern void xsave(struct xsave_state *, uint64_t);
324 extern void xsaveopt(struct xsave_state *, uint64_t);
325 extern void xrestore(struct xsave_state *, uint64_t);
327 extern void fpenable(void);
328 extern void fpdisable(void);
329 extern void fpinit(void);
331 extern uint32_t fperr_reset(void);
332 extern uint32_t fpxerr_reset(void);
334 extern uint32_t fpgetcwsw(void);
335 extern uint32_t fpgetmxcsr(void);
337 struct regs;
338 extern int fpnoextflt(struct regs *);
339 extern int fpextovrflt(struct regs *);
340 extern int fpexterrflt(struct regs *);
341 extern int fpsimderrflt(struct regs *);
342 extern void fpsetcw(uint16_t, uint32_t);
343 struct _klwp;
344 extern void fp_lwp_init(struct _klwp *);
345 extern void fp_lwp_cleanup(struct _klwp *);
346 extern void fp_lwp_dup(struct _klwp *);
348 extern const struct fxsave_state sse_initial;
349 extern const struct xsave_state avx_initial;
351 #endif /* _KERNEL */
353 #ifdef __cplusplus
355 #endif
357 #endif /* _SYS_FP_H */