2 * Copyright (C) 1994 Linus Torvalds
4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups
6 * Gareth Hughes <gareth@valinux.com>, May 2000
8 #include <linux/module.h>
9 #include <linux/regset.h>
10 #include <linux/sched.h>
11 #include <linux/slab.h>
13 #include <asm/sigcontext.h>
14 #include <asm/processor.h>
15 #include <asm/math_emu.h>
16 #include <asm/uaccess.h>
17 #include <asm/ptrace.h>
22 # include <asm/sigcontext32.h>
23 # include <asm/user32.h>
25 # define save_i387_xstate_ia32 save_i387_xstate
26 # define restore_i387_xstate_ia32 restore_i387_xstate
27 # define _fpstate_ia32 _fpstate
28 # define _xstate_ia32 _xstate
29 # define sig_xstate_ia32_size sig_xstate_size
30 # define fx_sw_reserved_ia32 fx_sw_reserved
31 # define user_i387_ia32_struct user_i387_struct
32 # define user32_fxsr_struct user_fxsr_struct
35 #ifdef CONFIG_MATH_EMULATION
36 # define HAVE_HWFP (boot_cpu_data.hard_math)
41 static unsigned int mxcsr_feature_mask __read_mostly
= 0xffffffffu
;
42 unsigned int xstate_size
;
43 unsigned int sig_xstate_ia32_size
= sizeof(struct _fpstate_ia32
);
44 static struct i387_fxsave_struct fx_scratch __cpuinitdata
;
46 void __cpuinit
mxcsr_feature_mask_init(void)
48 unsigned long mask
= 0;
52 memset(&fx_scratch
, 0, sizeof(struct i387_fxsave_struct
));
53 asm volatile("fxsave %0" : : "m" (fx_scratch
));
54 mask
= fx_scratch
.mxcsr_mask
;
58 mxcsr_feature_mask
&= mask
;
62 void __cpuinit
init_thread_xstate(void)
65 xstate_size
= sizeof(struct i387_soft_struct
);
75 xstate_size
= sizeof(struct i387_fxsave_struct
);
78 xstate_size
= sizeof(struct i387_fsave_struct
);
84 * Called at bootup to set up the initial FPU state that is later cloned
87 void __cpuinit
fpu_init(void)
89 unsigned long oldcr0
= read_cr0();
91 set_in_cr4(X86_CR4_OSFXSR
);
92 set_in_cr4(X86_CR4_OSXMMEXCPT
);
94 write_cr0(oldcr0
& ~(X86_CR0_TS
|X86_CR0_EM
)); /* clear TS and EM */
98 mxcsr_feature_mask_init();
99 /* clean state in init */
100 current_thread_info()->status
= 0;
103 #endif /* CONFIG_X86_64 */
105 static void fpu_finit(struct fpu
*fpu
)
109 finit_soft_fpu(&fpu
->state
->soft
);
115 struct i387_fxsave_struct
*fx
= &fpu
->state
->fxsave
;
117 memset(fx
, 0, xstate_size
);
120 fx
->mxcsr
= MXCSR_DEFAULT
;
122 struct i387_fsave_struct
*fp
= &fpu
->state
->fsave
;
123 memset(fp
, 0, xstate_size
);
124 fp
->cwd
= 0xffff037fu
;
125 fp
->swd
= 0xffff0000u
;
126 fp
->twd
= 0xffffffffu
;
127 fp
->fos
= 0xffff0000u
;
132 * The _current_ task is using the FPU for the first time
133 * so initialize it and set the mxcsr to its default
134 * value at reset if we support XMM instructions and then
135 * remeber the current task has used the FPU.
137 int init_fpu(struct task_struct
*tsk
)
141 if (tsk_used_math(tsk
)) {
142 if (HAVE_HWFP
&& tsk
== current
)
148 * Memory allocation at the first usage of the FPU and other state.
150 ret
= fpu_alloc(&tsk
->thread
.fpu
);
154 fpu_finit(&tsk
->thread
.fpu
);
156 set_stopped_child_used_math(tsk
);
161 * The xstateregs_active() routine is the same as the fpregs_active() routine,
162 * as the "regset->n" for the xstate regset will be updated based on the feature
163 * capabilites supported by the xsave.
165 int fpregs_active(struct task_struct
*target
, const struct user_regset
*regset
)
167 return tsk_used_math(target
) ? regset
->n
: 0;
170 int xfpregs_active(struct task_struct
*target
, const struct user_regset
*regset
)
172 return (cpu_has_fxsr
&& tsk_used_math(target
)) ? regset
->n
: 0;
175 int xfpregs_get(struct task_struct
*target
, const struct user_regset
*regset
,
176 unsigned int pos
, unsigned int count
,
177 void *kbuf
, void __user
*ubuf
)
184 ret
= init_fpu(target
);
188 sanitize_i387_state(target
);
190 return user_regset_copyout(&pos
, &count
, &kbuf
, &ubuf
,
191 &target
->thread
.fpu
.state
->fxsave
, 0, -1);
194 int xfpregs_set(struct task_struct
*target
, const struct user_regset
*regset
,
195 unsigned int pos
, unsigned int count
,
196 const void *kbuf
, const void __user
*ubuf
)
203 ret
= init_fpu(target
);
207 sanitize_i387_state(target
);
209 ret
= user_regset_copyin(&pos
, &count
, &kbuf
, &ubuf
,
210 &target
->thread
.fpu
.state
->fxsave
, 0, -1);
213 * mxcsr reserved bits must be masked to zero for security reasons.
215 target
->thread
.fpu
.state
->fxsave
.mxcsr
&= mxcsr_feature_mask
;
218 * update the header bits in the xsave header, indicating the
219 * presence of FP and SSE state.
222 target
->thread
.fpu
.state
->xsave
.xsave_hdr
.xstate_bv
|= XSTATE_FPSSE
;
227 int xstateregs_get(struct task_struct
*target
, const struct user_regset
*regset
,
228 unsigned int pos
, unsigned int count
,
229 void *kbuf
, void __user
*ubuf
)
236 ret
= init_fpu(target
);
241 * Copy the 48bytes defined by the software first into the xstate
242 * memory layout in the thread struct, so that we can copy the entire
243 * xstateregs to the user using one user_regset_copyout().
245 memcpy(&target
->thread
.fpu
.state
->fxsave
.sw_reserved
,
246 xstate_fx_sw_bytes
, sizeof(xstate_fx_sw_bytes
));
249 * Copy the xstate memory layout.
251 ret
= user_regset_copyout(&pos
, &count
, &kbuf
, &ubuf
,
252 &target
->thread
.fpu
.state
->xsave
, 0, -1);
256 int xstateregs_set(struct task_struct
*target
, const struct user_regset
*regset
,
257 unsigned int pos
, unsigned int count
,
258 const void *kbuf
, const void __user
*ubuf
)
261 struct xsave_hdr_struct
*xsave_hdr
;
266 ret
= init_fpu(target
);
270 ret
= user_regset_copyin(&pos
, &count
, &kbuf
, &ubuf
,
271 &target
->thread
.fpu
.state
->xsave
, 0, -1);
274 * mxcsr reserved bits must be masked to zero for security reasons.
276 target
->thread
.fpu
.state
->fxsave
.mxcsr
&= mxcsr_feature_mask
;
278 xsave_hdr
= &target
->thread
.fpu
.state
->xsave
.xsave_hdr
;
280 xsave_hdr
->xstate_bv
&= pcntxt_mask
;
282 * These bits must be zero.
284 xsave_hdr
->reserved1
[0] = xsave_hdr
->reserved1
[1] = 0;
289 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
292 * FPU tag word conversions.
295 static inline unsigned short twd_i387_to_fxsr(unsigned short twd
)
297 unsigned int tmp
; /* to avoid 16 bit prefixes in the code */
299 /* Transform each pair of bits into 01 (valid) or 00 (empty) */
301 tmp
= (tmp
| (tmp
>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
302 /* and move the valid bits to the lower byte. */
303 tmp
= (tmp
| (tmp
>> 1)) & 0x3333; /* 00VV00VV00VV00VV */
304 tmp
= (tmp
| (tmp
>> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
305 tmp
= (tmp
| (tmp
>> 4)) & 0x00ff; /* 00000000VVVVVVVV */
310 #define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16);
311 #define FP_EXP_TAG_VALID 0
312 #define FP_EXP_TAG_ZERO 1
313 #define FP_EXP_TAG_SPECIAL 2
314 #define FP_EXP_TAG_EMPTY 3
316 static inline u32
twd_fxsr_to_i387(struct i387_fxsave_struct
*fxsave
)
319 u32 tos
= (fxsave
->swd
>> 11) & 7;
320 u32 twd
= (unsigned long) fxsave
->twd
;
322 u32 ret
= 0xffff0000u
;
325 for (i
= 0; i
< 8; i
++, twd
>>= 1) {
327 st
= FPREG_ADDR(fxsave
, (i
- tos
) & 7);
329 switch (st
->exponent
& 0x7fff) {
331 tag
= FP_EXP_TAG_SPECIAL
;
334 if (!st
->significand
[0] &&
335 !st
->significand
[1] &&
336 !st
->significand
[2] &&
338 tag
= FP_EXP_TAG_ZERO
;
340 tag
= FP_EXP_TAG_SPECIAL
;
343 if (st
->significand
[3] & 0x8000)
344 tag
= FP_EXP_TAG_VALID
;
346 tag
= FP_EXP_TAG_SPECIAL
;
350 tag
= FP_EXP_TAG_EMPTY
;
352 ret
|= tag
<< (2 * i
);
358 * FXSR floating point environment conversions.
362 convert_from_fxsr(struct user_i387_ia32_struct
*env
, struct task_struct
*tsk
)
364 struct i387_fxsave_struct
*fxsave
= &tsk
->thread
.fpu
.state
->fxsave
;
365 struct _fpreg
*to
= (struct _fpreg
*) &env
->st_space
[0];
366 struct _fpxreg
*from
= (struct _fpxreg
*) &fxsave
->st_space
[0];
369 env
->cwd
= fxsave
->cwd
| 0xffff0000u
;
370 env
->swd
= fxsave
->swd
| 0xffff0000u
;
371 env
->twd
= twd_fxsr_to_i387(fxsave
);
374 env
->fip
= fxsave
->rip
;
375 env
->foo
= fxsave
->rdp
;
376 if (tsk
== current
) {
378 * should be actually ds/cs at fpu exception time, but
379 * that information is not available in 64bit mode.
381 asm("mov %%ds, %[fos]" : [fos
] "=r" (env
->fos
));
382 asm("mov %%cs, %[fcs]" : [fcs
] "=r" (env
->fcs
));
384 struct pt_regs
*regs
= task_pt_regs(tsk
);
386 env
->fos
= 0xffff0000 | tsk
->thread
.ds
;
390 env
->fip
= fxsave
->fip
;
391 env
->fcs
= (u16
) fxsave
->fcs
| ((u32
) fxsave
->fop
<< 16);
392 env
->foo
= fxsave
->foo
;
393 env
->fos
= fxsave
->fos
;
396 for (i
= 0; i
< 8; ++i
)
397 memcpy(&to
[i
], &from
[i
], sizeof(to
[0]));
400 static void convert_to_fxsr(struct task_struct
*tsk
,
401 const struct user_i387_ia32_struct
*env
)
404 struct i387_fxsave_struct
*fxsave
= &tsk
->thread
.fpu
.state
->fxsave
;
405 struct _fpreg
*from
= (struct _fpreg
*) &env
->st_space
[0];
406 struct _fpxreg
*to
= (struct _fpxreg
*) &fxsave
->st_space
[0];
409 fxsave
->cwd
= env
->cwd
;
410 fxsave
->swd
= env
->swd
;
411 fxsave
->twd
= twd_i387_to_fxsr(env
->twd
);
412 fxsave
->fop
= (u16
) ((u32
) env
->fcs
>> 16);
414 fxsave
->rip
= env
->fip
;
415 fxsave
->rdp
= env
->foo
;
416 /* cs and ds ignored */
418 fxsave
->fip
= env
->fip
;
419 fxsave
->fcs
= (env
->fcs
& 0xffff);
420 fxsave
->foo
= env
->foo
;
421 fxsave
->fos
= env
->fos
;
424 for (i
= 0; i
< 8; ++i
)
425 memcpy(&to
[i
], &from
[i
], sizeof(from
[0]));
428 int fpregs_get(struct task_struct
*target
, const struct user_regset
*regset
,
429 unsigned int pos
, unsigned int count
,
430 void *kbuf
, void __user
*ubuf
)
432 struct user_i387_ia32_struct env
;
435 ret
= init_fpu(target
);
440 return fpregs_soft_get(target
, regset
, pos
, count
, kbuf
, ubuf
);
443 return user_regset_copyout(&pos
, &count
, &kbuf
, &ubuf
,
444 &target
->thread
.fpu
.state
->fsave
, 0,
448 sanitize_i387_state(target
);
450 if (kbuf
&& pos
== 0 && count
== sizeof(env
)) {
451 convert_from_fxsr(kbuf
, target
);
455 convert_from_fxsr(&env
, target
);
457 return user_regset_copyout(&pos
, &count
, &kbuf
, &ubuf
, &env
, 0, -1);
460 int fpregs_set(struct task_struct
*target
, const struct user_regset
*regset
,
461 unsigned int pos
, unsigned int count
,
462 const void *kbuf
, const void __user
*ubuf
)
464 struct user_i387_ia32_struct env
;
467 ret
= init_fpu(target
);
471 sanitize_i387_state(target
);
474 return fpregs_soft_set(target
, regset
, pos
, count
, kbuf
, ubuf
);
477 return user_regset_copyin(&pos
, &count
, &kbuf
, &ubuf
,
478 &target
->thread
.fpu
.state
->fsave
, 0, -1);
481 if (pos
> 0 || count
< sizeof(env
))
482 convert_from_fxsr(&env
, target
);
484 ret
= user_regset_copyin(&pos
, &count
, &kbuf
, &ubuf
, &env
, 0, -1);
486 convert_to_fxsr(target
, &env
);
489 * update the header bit in the xsave header, indicating the
493 target
->thread
.fpu
.state
->xsave
.xsave_hdr
.xstate_bv
|= XSTATE_FP
;
498 * Signal frame handlers.
501 static inline int save_i387_fsave(struct _fpstate_ia32 __user
*buf
)
503 struct task_struct
*tsk
= current
;
504 struct i387_fsave_struct
*fp
= &tsk
->thread
.fpu
.state
->fsave
;
506 fp
->status
= fp
->swd
;
507 if (__copy_to_user(buf
, fp
, sizeof(struct i387_fsave_struct
)))
512 static int save_i387_fxsave(struct _fpstate_ia32 __user
*buf
)
514 struct task_struct
*tsk
= current
;
515 struct i387_fxsave_struct
*fx
= &tsk
->thread
.fpu
.state
->fxsave
;
516 struct user_i387_ia32_struct env
;
519 convert_from_fxsr(&env
, tsk
);
520 if (__copy_to_user(buf
, &env
, sizeof(env
)))
523 err
|= __put_user(fx
->swd
, &buf
->status
);
524 err
|= __put_user(X86_FXSR_MAGIC
, &buf
->magic
);
528 if (__copy_to_user(&buf
->_fxsr_env
[0], fx
, xstate_size
))
533 static int save_i387_xsave(void __user
*buf
)
535 struct task_struct
*tsk
= current
;
536 struct _fpstate_ia32 __user
*fx
= buf
;
540 sanitize_i387_state(tsk
);
543 * For legacy compatible, we always set FP/SSE bits in the bit
544 * vector while saving the state to the user context.
545 * This will enable us capturing any changes(during sigreturn) to
546 * the FP/SSE bits by the legacy applications which don't touch
547 * xstate_bv in the xsave header.
549 * xsave aware applications can change the xstate_bv in the xsave
550 * header as well as change any contents in the memory layout.
551 * xrestore as part of sigreturn will capture all the changes.
553 tsk
->thread
.fpu
.state
->xsave
.xsave_hdr
.xstate_bv
|= XSTATE_FPSSE
;
555 if (save_i387_fxsave(fx
) < 0)
558 err
= __copy_to_user(&fx
->sw_reserved
, &fx_sw_reserved_ia32
,
559 sizeof(struct _fpx_sw_bytes
));
560 err
|= __put_user(FP_XSTATE_MAGIC2
,
561 (__u32 __user
*) (buf
+ sig_xstate_ia32_size
562 - FP_XSTATE_MAGIC2_SIZE
));
569 int save_i387_xstate_ia32(void __user
*buf
)
571 struct _fpstate_ia32 __user
*fp
= (struct _fpstate_ia32 __user
*) buf
;
572 struct task_struct
*tsk
= current
;
577 if (!access_ok(VERIFY_WRITE
, buf
, sig_xstate_ia32_size
))
580 * This will cause a "finit" to be triggered by the next
581 * attempted FPU operation by the 'current' process.
586 return fpregs_soft_get(current
, NULL
,
587 0, sizeof(struct user_i387_ia32_struct
),
594 return save_i387_xsave(fp
);
596 return save_i387_fxsave(fp
);
598 return save_i387_fsave(fp
);
601 static inline int restore_i387_fsave(struct _fpstate_ia32 __user
*buf
)
603 struct task_struct
*tsk
= current
;
605 return __copy_from_user(&tsk
->thread
.fpu
.state
->fsave
, buf
,
606 sizeof(struct i387_fsave_struct
));
609 static int restore_i387_fxsave(struct _fpstate_ia32 __user
*buf
,
612 struct task_struct
*tsk
= current
;
613 struct user_i387_ia32_struct env
;
616 err
= __copy_from_user(&tsk
->thread
.fpu
.state
->fxsave
, &buf
->_fxsr_env
[0],
618 /* mxcsr reserved bits must be masked to zero for security reasons */
619 tsk
->thread
.fpu
.state
->fxsave
.mxcsr
&= mxcsr_feature_mask
;
620 if (err
|| __copy_from_user(&env
, buf
, sizeof(env
)))
622 convert_to_fxsr(tsk
, &env
);
627 static int restore_i387_xsave(void __user
*buf
)
629 struct _fpx_sw_bytes fx_sw_user
;
630 struct _fpstate_ia32 __user
*fx_user
=
631 ((struct _fpstate_ia32 __user
*) buf
);
632 struct i387_fxsave_struct __user
*fx
=
633 (struct i387_fxsave_struct __user
*) &fx_user
->_fxsr_env
[0];
634 struct xsave_hdr_struct
*xsave_hdr
=
635 ¤t
->thread
.fpu
.state
->xsave
.xsave_hdr
;
639 if (check_for_xstate(fx
, buf
, &fx_sw_user
))
642 mask
= fx_sw_user
.xstate_bv
;
644 err
= restore_i387_fxsave(buf
, fx_sw_user
.xstate_size
);
646 xsave_hdr
->xstate_bv
&= pcntxt_mask
;
648 * These bits must be zero.
650 xsave_hdr
->reserved1
[0] = xsave_hdr
->reserved1
[1] = 0;
653 * Init the state that is not present in the memory layout
654 * and enabled by the OS.
656 mask
= ~(pcntxt_mask
& ~mask
);
657 xsave_hdr
->xstate_bv
&= mask
;
662 * Couldn't find the extended state information in the memory
663 * layout. Restore the FP/SSE and init the other extended state
666 xsave_hdr
->xstate_bv
= XSTATE_FPSSE
;
667 return restore_i387_fxsave(buf
, sizeof(struct i387_fxsave_struct
));
670 int restore_i387_xstate_ia32(void __user
*buf
)
673 struct task_struct
*tsk
= current
;
674 struct _fpstate_ia32 __user
*fp
= (struct _fpstate_ia32 __user
*) buf
;
687 if (!access_ok(VERIFY_READ
, buf
, sig_xstate_ia32_size
))
698 err
= restore_i387_xsave(buf
);
699 else if (cpu_has_fxsr
)
700 err
= restore_i387_fxsave(fp
, sizeof(struct
701 i387_fxsave_struct
));
703 err
= restore_i387_fsave(fp
);
705 err
= fpregs_soft_set(current
, NULL
,
706 0, sizeof(struct user_i387_ia32_struct
),
715 * FPU state for core dumps.
716 * This is only used for a.out dumps now.
717 * It is declared generically using elf_fpregset_t (which is
718 * struct user_i387_struct) but is in fact only used for 32-bit
719 * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
721 int dump_fpu(struct pt_regs
*regs
, struct user_i387_struct
*fpu
)
723 struct task_struct
*tsk
= current
;
726 fpvalid
= !!used_math();
728 fpvalid
= !fpregs_get(tsk
, NULL
,
729 0, sizeof(struct user_i387_ia32_struct
),
734 EXPORT_SYMBOL(dump_fpu
);
736 #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */