2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (C) 1994, David Greenman
4 * Copyright (c) 1982, 1987, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
39 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $
40 * $DragonFly: src/sys/platform/vkernel/i386/cpu_regs.c,v 1.17 2007/07/01 01:11:36 dillon Exp $
43 #include "use_ether.h"
46 #include "opt_atalk.h"
47 #include "opt_compat.h"
49 #include "opt_directio.h"
52 #include "opt_msgbuf.h"
55 #include <sys/param.h>
56 #include <sys/systm.h>
57 #include <sys/sysproto.h>
58 #include <sys/signalvar.h>
59 #include <sys/kernel.h>
60 #include <sys/linker.h>
61 #include <sys/malloc.h>
64 #include <sys/reboot.h>
66 #include <sys/msgbuf.h>
67 #include <sys/sysent.h>
68 #include <sys/sysctl.h>
69 #include <sys/vmmeter.h>
71 #include <sys/upcall.h>
72 #include <sys/usched.h>
76 #include <vm/vm_param.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_page.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_pager.h>
83 #include <vm/vm_extern.h>
85 #include <sys/thread2.h>
93 #include <machine/cpu.h>
94 #include <machine/clock.h>
95 #include <machine/specialreg.h>
96 #include <machine/md_var.h>
97 #include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
98 #include <machine/globaldata.h> /* CPU_prvspace */
99 #include <machine/smp.h>
101 #include <machine/perfmon.h>
103 #include <machine/cputypes.h>
105 #include <bus/isa/rtc.h>
106 #include <machine/vm86.h>
107 #include <sys/random.h>
108 #include <sys/ptrace.h>
109 #include <machine/sigframe.h>
111 extern void dblfault_handler (void);
113 #ifndef CPU_DISABLE_SSE
114 static void set_fpregs_xmm (struct save87
*, struct savexmm
*);
115 static void fill_fpregs_xmm (struct savexmm
*, struct save87
*);
116 #endif /* CPU_DISABLE_SSE */
118 extern void ffs_rawread_setup(void);
119 #endif /* DIRECTIO */
122 int64_t tsc_offsets
[MAXCPU
];
124 int64_t tsc_offsets
[1];
127 #if defined(SWTCH_OPTIM_STATS)
128 extern int swtch_optim_stats
;
129 SYSCTL_INT(_debug
, OID_AUTO
, swtch_optim_stats
,
130 CTLFLAG_RD
, &swtch_optim_stats
, 0, "");
131 SYSCTL_INT(_debug
, OID_AUTO
, tlb_flush_count
,
132 CTLFLAG_RD
, &tlb_flush_count
, 0, "");
136 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS
)
138 int error
= sysctl_handle_int(oidp
, 0, ctob((int)Maxmem
), req
);
142 SYSCTL_PROC(_hw
, HW_PHYSMEM
, physmem
, CTLTYPE_INT
|CTLFLAG_RD
,
143 0, 0, sysctl_hw_physmem
, "IU", "");
146 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS
)
148 int error
= sysctl_handle_int(oidp
, 0,
149 ctob((int)Maxmem
- vmstats
.v_wire_count
), req
);
153 SYSCTL_PROC(_hw
, HW_USERMEM
, usermem
, CTLTYPE_INT
|CTLFLAG_RD
,
154 0, 0, sysctl_hw_usermem
, "IU", "");
156 SYSCTL_ULONG(_hw
, OID_AUTO
, availpages
, CTLFLAG_RD
, &Maxmem
, NULL
, "");
161 sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS
)
165 /* Unwind the buffer, so that it's linear (possibly starting with
166 * some initial nulls).
168 error
=sysctl_handle_opaque(oidp
,msgbufp
->msg_ptr
+msgbufp
->msg_bufr
,
169 msgbufp
->msg_size
-msgbufp
->msg_bufr
,req
);
170 if(error
) return(error
);
171 if(msgbufp
->msg_bufr
>0) {
172 error
=sysctl_handle_opaque(oidp
,msgbufp
->msg_ptr
,
173 msgbufp
->msg_bufr
,req
);
178 SYSCTL_PROC(_machdep
, OID_AUTO
, msgbuf
, CTLTYPE_STRING
|CTLFLAG_RD
,
179 0, 0, sysctl_machdep_msgbuf
, "A","Contents of kernel message buffer");
181 static int msgbuf_clear
;
184 sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS
)
187 error
= sysctl_handle_int(oidp
, oidp
->oid_arg1
, oidp
->oid_arg2
,
189 if (!error
&& req
->newptr
) {
190 /* Clear the buffer and reset write pointer */
191 bzero(msgbufp
->msg_ptr
,msgbufp
->msg_size
);
192 msgbufp
->msg_bufr
=msgbufp
->msg_bufx
=0;
198 SYSCTL_PROC(_machdep
, OID_AUTO
, msgbuf_clear
, CTLTYPE_INT
|CTLFLAG_RW
,
199 &msgbuf_clear
, 0, sysctl_machdep_msgbuf_clear
, "I",
200 "Clear kernel message buffer");
205 * Send an interrupt to process.
207 * Stack is set up to allow sigcode stored
208 * at top to call routine, followed by kcall
209 * to sigreturn routine below. After sigreturn
210 * resets the signal mask, the stack, and the
211 * frame pointer, it returns to the user
215 sendsig(sig_t catcher
, int sig
, sigset_t
*mask
, u_long code
)
217 struct lwp
*lp
= curthread
->td_lwp
;
218 struct proc
*p
= lp
->lwp_proc
;
219 struct trapframe
*regs
;
220 struct sigacts
*psp
= p
->p_sigacts
;
221 struct sigframe sf
, *sfp
;
224 regs
= lp
->lwp_md
.md_regs
;
225 oonstack
= (lp
->lwp_sigstk
.ss_flags
& SS_ONSTACK
) ? 1 : 0;
227 /* save user context */
228 bzero(&sf
, sizeof(struct sigframe
));
229 sf
.sf_uc
.uc_sigmask
= *mask
;
230 sf
.sf_uc
.uc_stack
= lp
->lwp_sigstk
;
231 sf
.sf_uc
.uc_mcontext
.mc_onstack
= oonstack
;
232 bcopy(regs
, &sf
.sf_uc
.uc_mcontext
.mc_gs
, sizeof(struct trapframe
));
234 /* make the size of the saved context visible to userland */
235 sf
.sf_uc
.uc_mcontext
.mc_len
= sizeof(sf
.sf_uc
.uc_mcontext
);
237 /* save mailbox pending state for syscall interlock semantics */
238 if (p
->p_flag
& P_MAILBOX
)
239 sf
.sf_uc
.uc_mcontext
.mc_xflags
|= PGEX_MAILBOX
;
242 /* Allocate and validate space for the signal handler context. */
243 if ((lp
->lwp_flag
& LWP_ALTSTACK
) != 0 && !oonstack
&&
244 SIGISMEMBER(psp
->ps_sigonstack
, sig
)) {
245 sfp
= (struct sigframe
*)(lp
->lwp_sigstk
.ss_sp
+
246 lp
->lwp_sigstk
.ss_size
- sizeof(struct sigframe
));
247 lp
->lwp_sigstk
.ss_flags
|= SS_ONSTACK
;
250 sfp
= (struct sigframe
*)regs
->tf_esp
- 1;
252 /* Translate the signal is appropriate */
253 if (p
->p_sysent
->sv_sigtbl
) {
254 if (sig
<= p
->p_sysent
->sv_sigsize
)
255 sig
= p
->p_sysent
->sv_sigtbl
[_SIG_IDX(sig
)];
258 /* Build the argument list for the signal handler. */
260 sf
.sf_ucontext
= (register_t
)&sfp
->sf_uc
;
261 if (SIGISMEMBER(psp
->ps_siginfo
, sig
)) {
262 /* Signal handler installed with SA_SIGINFO. */
263 sf
.sf_siginfo
= (register_t
)&sfp
->sf_si
;
264 sf
.sf_ahu
.sf_action
= (__siginfohandler_t
*)catcher
;
266 /* fill siginfo structure */
267 sf
.sf_si
.si_signo
= sig
;
268 sf
.sf_si
.si_code
= code
;
269 sf
.sf_si
.si_addr
= (void*)regs
->tf_err
;
272 /* Old FreeBSD-style arguments. */
273 sf
.sf_siginfo
= code
;
274 sf
.sf_addr
= regs
->tf_err
;
275 sf
.sf_ahu
.sf_handler
= catcher
;
280 * If we're a vm86 process, we want to save the segment registers.
281 * We also change eflags to be our emulated eflags, not the actual
284 if (regs
->tf_eflags
& PSL_VM
) {
285 struct trapframe_vm86
*tf
= (struct trapframe_vm86
*)regs
;
286 struct vm86_kernel
*vm86
= &lp
->lwp_thread
->td_pcb
->pcb_ext
->ext_vm86
;
288 sf
.sf_uc
.uc_mcontext
.mc_gs
= tf
->tf_vm86_gs
;
289 sf
.sf_uc
.uc_mcontext
.mc_fs
= tf
->tf_vm86_fs
;
290 sf
.sf_uc
.uc_mcontext
.mc_es
= tf
->tf_vm86_es
;
291 sf
.sf_uc
.uc_mcontext
.mc_ds
= tf
->tf_vm86_ds
;
293 if (vm86
->vm86_has_vme
== 0)
294 sf
.sf_uc
.uc_mcontext
.mc_eflags
=
295 (tf
->tf_eflags
& ~(PSL_VIF
| PSL_VIP
)) |
296 (vm86
->vm86_eflags
& (PSL_VIF
| PSL_VIP
));
299 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
300 * syscalls made by the signal handler. This just avoids
301 * wasting time for our lazy fixup of such faults. PSL_NT
302 * does nothing in vm86 mode, but vm86 programs can set it
303 * almost legitimately in probes for old cpu types.
305 tf
->tf_eflags
&= ~(PSL_VM
| PSL_NT
| PSL_VIF
| PSL_VIP
);
310 * Copy the sigframe out to the user's stack.
312 if (copyout(&sf
, sfp
, sizeof(struct sigframe
)) != 0) {
314 * Something is wrong with the stack pointer.
315 * ...Kill the process.
320 regs
->tf_esp
= (int)sfp
;
321 regs
->tf_eip
= PS_STRINGS
- *(p
->p_sysent
->sv_szsigcode
);
322 regs
->tf_eflags
&= ~PSL_T
;
323 regs
->tf_cs
= _ucodesel
;
324 regs
->tf_ds
= _udatasel
;
325 regs
->tf_es
= _udatasel
;
326 if (regs
->tf_trapno
== T_PROTFLT
) {
327 regs
->tf_fs
= _udatasel
;
328 regs
->tf_gs
= _udatasel
;
330 regs
->tf_ss
= _udatasel
;
334 * Sanitize the trapframe for a virtual kernel passing control to a custom
337 * Allow userland to set or maintain PSL_RF, the resume flag. This flag
338 * basically controls whether the return PC should skip the first instruction
339 * (as in an explicit system call) or re-execute it (as in an exception).
342 cpu_sanitize_frame(struct trapframe
*frame
)
344 frame
->tf_cs
= _ucodesel
;
345 frame
->tf_ds
= _udatasel
;
346 frame
->tf_es
= _udatasel
;
348 frame
->tf_fs
= _udatasel
;
349 frame
->tf_gs
= _udatasel
;
351 frame
->tf_ss
= _udatasel
;
352 frame
->tf_eflags
&= (PSL_RF
| PSL_USERCHANGE
);
353 frame
->tf_eflags
|= PSL_RESERVED_DEFAULT
| PSL_I
;
358 cpu_sanitize_tls(struct savetls
*tls
)
360 struct segment_descriptor
*desc
;
363 for (i
= 0; i
< NGTLS
; ++i
) {
365 if (desc
->sd_dpl
== 0 && desc
->sd_type
== 0)
367 if (desc
->sd_def32
== 0)
369 if (desc
->sd_type
!= SDT_MEMRWA
)
371 if (desc
->sd_dpl
!= SEL_UPL
)
373 if (desc
->sd_xx
!= 0 || desc
->sd_p
!= 1)
380 * sigreturn(ucontext_t *sigcntxp)
382 * System call to cleanup state after a signal
383 * has been taken. Reset signal mask and
384 * stack state from context left by sendsig (above).
385 * Return to previous pc and psl as specified by
386 * context left by sendsig. Check carefully to
387 * make sure that the user has not modified the
388 * state to gain improper privileges.
390 #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
391 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
394 sys_sigreturn(struct sigreturn_args
*uap
)
396 struct lwp
*lp
= curthread
->td_lwp
;
397 struct proc
*p
= lp
->lwp_proc
;
398 struct trapframe
*regs
;
404 error
= copyin(uap
->sigcntxp
, &ucp
, sizeof(ucp
));
408 regs
= lp
->lwp_md
.md_regs
;
409 eflags
= ucp
.uc_mcontext
.mc_eflags
;
412 if (eflags
& PSL_VM
) {
413 struct trapframe_vm86
*tf
= (struct trapframe_vm86
*)regs
;
414 struct vm86_kernel
*vm86
;
417 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
418 * set up the vm86 area, and we can't enter vm86 mode.
420 if (lp
->lwp_thread
->td_pcb
->pcb_ext
== 0)
422 vm86
= &lp
->lwp_thread
->td_pcb
->pcb_ext
->ext_vm86
;
423 if (vm86
->vm86_inited
== 0)
426 /* go back to user mode if both flags are set */
427 if ((eflags
& PSL_VIP
) && (eflags
& PSL_VIF
))
428 trapsignal(lp
->lwp_proc
, SIGBUS
, 0);
430 if (vm86
->vm86_has_vme
) {
431 eflags
= (tf
->tf_eflags
& ~VME_USERCHANGE
) |
432 (eflags
& VME_USERCHANGE
) | PSL_VM
;
434 vm86
->vm86_eflags
= eflags
; /* save VIF, VIP */
435 eflags
= (tf
->tf_eflags
& ~VM_USERCHANGE
) | (eflags
& VM_USERCHANGE
) | PSL_VM
;
437 bcopy(&ucp
.uc_mcontext
.mc_gs
, tf
, sizeof(struct trapframe
));
438 tf
->tf_eflags
= eflags
;
439 tf
->tf_vm86_ds
= tf
->tf_ds
;
440 tf
->tf_vm86_es
= tf
->tf_es
;
441 tf
->tf_vm86_fs
= tf
->tf_fs
;
442 tf
->tf_vm86_gs
= tf
->tf_gs
;
443 tf
->tf_ds
= _udatasel
;
444 tf
->tf_es
= _udatasel
;
446 tf
->tf_fs
= _udatasel
;
447 tf
->tf_gs
= _udatasel
;
453 * Don't allow users to change privileged or reserved flags.
456 * XXX do allow users to change the privileged flag PSL_RF.
457 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
458 * should sometimes set it there too. tf_eflags is kept in
459 * the signal context during signal handling and there is no
460 * other place to remember it, so the PSL_RF bit may be
461 * corrupted by the signal handler without us knowing.
462 * Corruption of the PSL_RF bit at worst causes one more or
463 * one less debugger trap, so allowing it is fairly harmless.
465 if (!EFL_SECURE(eflags
& ~PSL_RF
, regs
->tf_eflags
& ~PSL_RF
)) {
466 kprintf("sigreturn: eflags = 0x%x\n", eflags
);
471 * Don't allow users to load a valid privileged %cs. Let the
472 * hardware check for invalid selectors, excess privilege in
473 * other selectors, invalid %eip's and invalid %esp's.
475 cs
= ucp
.uc_mcontext
.mc_cs
;
476 if (!CS_SECURE(cs
)) {
477 kprintf("sigreturn: cs = 0x%x\n", cs
);
478 trapsignal(lp
, SIGBUS
, T_PROTFLT
);
481 bcopy(&ucp
.uc_mcontext
.mc_gs
, regs
, sizeof(struct trapframe
));
485 * Merge saved signal mailbox pending flag to maintain interlock
486 * semantics against system calls.
488 if (ucp
.uc_mcontext
.mc_xflags
& PGEX_MAILBOX
)
489 p
->p_flag
|= P_MAILBOX
;
491 if (ucp
.uc_mcontext
.mc_onstack
& 1)
492 lp
->lwp_sigstk
.ss_flags
|= SS_ONSTACK
;
494 lp
->lwp_sigstk
.ss_flags
&= ~SS_ONSTACK
;
496 lp
->lwp_sigmask
= ucp
.uc_sigmask
;
497 SIG_CANTMASK(lp
->lwp_sigmask
);
502 * Stack frame on entry to function. %eax will contain the function vector,
503 * %ecx will contain the function data. flags, ecx, and eax will have
504 * already been pushed on the stack.
515 sendupcall(struct vmupcall
*vu
, int morepending
)
517 struct lwp
*lp
= curthread
->td_lwp
;
518 struct trapframe
*regs
;
519 struct upcall upcall
;
520 struct upc_frame upc_frame
;
524 * If we are a virtual kernel running an emulated user process
525 * context, switch back to the virtual kernel context before
526 * trying to post the signal.
528 if (lp
->lwp_vkernel
&& lp
->lwp_vkernel
->ve
) {
529 lp
->lwp_md
.md_regs
->tf_trapno
= 0;
530 vkernel_trap(lp
, lp
->lwp_md
.md_regs
);
534 * Get the upcall data structure
536 if (copyin(lp
->lwp_upcall
, &upcall
, sizeof(upcall
)) ||
537 copyin((char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, &crit_count
, sizeof(int))
540 kprintf("bad upcall address\n");
545 * If the data structure is already marked pending or has a critical
546 * section count, mark the data structure as pending and return
547 * without doing an upcall. vu_pending is left set.
549 if (upcall
.upc_pending
|| crit_count
>= vu
->vu_pending
) {
550 if (upcall
.upc_pending
< vu
->vu_pending
) {
551 upcall
.upc_pending
= vu
->vu_pending
;
552 copyout(&upcall
.upc_pending
, &lp
->lwp_upcall
->upc_pending
,
553 sizeof(upcall
.upc_pending
));
559 * We can run this upcall now, clear vu_pending.
561 * Bump our critical section count and set or clear the
562 * user pending flag depending on whether more upcalls are
563 * pending. The user will be responsible for calling
564 * upc_dispatch(-1) to process remaining upcalls.
567 upcall
.upc_pending
= morepending
;
568 crit_count
+= TDPRI_CRIT
;
569 copyout(&upcall
.upc_pending
, &lp
->lwp_upcall
->upc_pending
,
570 sizeof(upcall
.upc_pending
));
571 copyout(&crit_count
, (char *)upcall
.upc_uthread
+ upcall
.upc_critoff
,
575 * Construct a stack frame and issue the upcall
577 regs
= lp
->lwp_md
.md_regs
;
578 upc_frame
.eax
= regs
->tf_eax
;
579 upc_frame
.ecx
= regs
->tf_ecx
;
580 upc_frame
.edx
= regs
->tf_edx
;
581 upc_frame
.flags
= regs
->tf_eflags
;
582 upc_frame
.oldip
= regs
->tf_eip
;
583 if (copyout(&upc_frame
, (void *)(regs
->tf_esp
- sizeof(upc_frame
)),
584 sizeof(upc_frame
)) != 0) {
585 kprintf("bad stack on upcall\n");
587 regs
->tf_eax
= (register_t
)vu
->vu_func
;
588 regs
->tf_ecx
= (register_t
)vu
->vu_data
;
589 regs
->tf_edx
= (register_t
)lp
->lwp_upcall
;
590 regs
->tf_eip
= (register_t
)vu
->vu_ctx
;
591 regs
->tf_esp
-= sizeof(upc_frame
);
596 * fetchupcall occurs in the context of a system call, which means that
597 * we have to return EJUSTRETURN in order to prevent eax and edx from
598 * being overwritten by the syscall return value.
600 * if vu is not NULL we return the new context in %edx, the new data in %ecx,
601 * and the function pointer in %eax.
604 fetchupcall (struct vmupcall
*vu
, int morepending
, void *rsp
)
606 struct upc_frame upc_frame
;
607 struct lwp
*lp
= curthread
->td_lwp
;
608 struct trapframe
*regs
;
610 struct upcall upcall
;
613 regs
= lp
->lwp_md
.md_regs
;
615 error
= copyout(&morepending
, &lp
->lwp_upcall
->upc_pending
, sizeof(int));
619 * This jumps us to the next ready context.
622 error
= copyin(lp
->lwp_upcall
, &upcall
, sizeof(upcall
));
625 error
= copyin((char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, &crit_count
, sizeof(int));
626 crit_count
+= TDPRI_CRIT
;
628 error
= copyout(&crit_count
, (char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, sizeof(int));
629 regs
->tf_eax
= (register_t
)vu
->vu_func
;
630 regs
->tf_ecx
= (register_t
)vu
->vu_data
;
631 regs
->tf_edx
= (register_t
)lp
->lwp_upcall
;
632 regs
->tf_eip
= (register_t
)vu
->vu_ctx
;
633 regs
->tf_esp
= (register_t
)rsp
;
636 * This returns us to the originally interrupted code.
638 error
= copyin(rsp
, &upc_frame
, sizeof(upc_frame
));
639 regs
->tf_eax
= upc_frame
.eax
;
640 regs
->tf_ecx
= upc_frame
.ecx
;
641 regs
->tf_edx
= upc_frame
.edx
;
642 regs
->tf_eflags
= (regs
->tf_eflags
& ~PSL_USERCHANGE
) |
643 (upc_frame
.flags
& PSL_USERCHANGE
);
644 regs
->tf_eip
= upc_frame
.oldip
;
645 regs
->tf_esp
= (register_t
)((char *)rsp
+ sizeof(upc_frame
));
654 * cpu_idle() represents the idle LWKT. You cannot return from this function
655 * (unless you want to blow things up!). Instead we look for runnable threads
656 * and loop or halt as appropriate. Giant is not held on entry to the thread.
658 * The main loop is entered with a critical section held, we must release
659 * the critical section before doing anything else. lwkt_switch() will
660 * check for pending interrupts due to entering and exiting its own
663 * Note on cpu_idle_hlt: On an SMP system we rely on a scheduler IPI
664 * to wake a HLTed cpu up. However, there are cases where the idlethread
665 * will be entered with the possibility that no IPI will occur and in such
666 * cases lwkt_switch() sets TDF_IDLE_NOHLT.
668 static int cpu_idle_hlt
= 1;
669 static int cpu_idle_hltcnt
;
670 static int cpu_idle_spincnt
;
671 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_hlt
, CTLFLAG_RW
,
672 &cpu_idle_hlt
, 0, "Idle loop HLT enable");
673 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_hltcnt
, CTLFLAG_RW
,
674 &cpu_idle_hltcnt
, 0, "Idle loop entry halts");
675 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_spincnt
, CTLFLAG_RW
,
676 &cpu_idle_spincnt
, 0, "Idle loop entry spins");
681 struct thread
*td
= curthread
;
684 KKASSERT(td
->td_pri
< TDPRI_CRIT
);
687 * See if there are any LWKTs ready to go.
692 * If we are going to halt call splz unconditionally after
693 * CLIing to catch any interrupt races. Note that we are
694 * at SPL0 and interrupts are enabled.
696 * We must poll our mailbox signals prior to calling
697 * sigpause() in order to properly interlock with them.
699 if (cpu_idle_hlt
&& !lwkt_runnable() &&
700 (td
->td_flags
& TDF_IDLE_NOHLT
) == 0) {
703 if (!lwkt_runnable()) {
708 __asm
__volatile("pause");
713 td
->td_flags
&= ~TDF_IDLE_NOHLT
;
717 /*__asm __volatile("sti; pause");*/
718 __asm
__volatile("pause");
720 /*__asm __volatile("sti");*/
728 * Clear registers on exec
731 exec_setregs(u_long entry
, u_long stack
, u_long ps_strings
)
733 struct thread
*td
= curthread
;
734 struct lwp
*lp
= td
->td_lwp
;
735 struct trapframe
*regs
= lp
->lwp_md
.md_regs
;
736 struct pcb
*pcb
= lp
->lwp_thread
->td_pcb
;
738 /* was i386_user_cleanup() in NetBSD */
741 bzero((char *)regs
, sizeof(struct trapframe
));
742 regs
->tf_eip
= entry
;
743 regs
->tf_esp
= stack
;
744 regs
->tf_eflags
= PSL_USER
| (regs
->tf_eflags
& PSL_T
);
752 /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */
753 regs
->tf_ebx
= ps_strings
;
756 * Reset the hardware debug registers if they were in use.
757 * They won't have any meaning for the newly exec'd process.
759 if (pcb
->pcb_flags
& PCB_DBREGS
) {
766 if (pcb
== td
->td_pcb
) {
768 * Clear the debug registers on the running
769 * CPU, otherwise they will end up affecting
770 * the next process we switch to.
774 pcb
->pcb_flags
&= ~PCB_DBREGS
;
778 * Initialize the math emulator (if any) for the current process.
779 * Actually, just clear the bit that says that the emulator has
780 * been initialized. Initialization is delayed until the process
781 * traps to the emulator (if it is done at all) mainly because
782 * emulators don't provide an entry point for initialization.
784 pcb
->pcb_flags
&= ~FP_SOFTFP
;
787 * note: do not set CR0_TS here. npxinit() must do it after clearing
788 * gd_npxthread. Otherwise a preemptive interrupt thread may panic
793 load_cr0(rcr0() | CR0_MP
);
797 /* Initialize the npx (if any) for the current process. */
798 npxinit(__INITIAL_NPXCW__
);
803 * note: linux emulator needs edx to be 0x0 on entry, which is
804 * handled in execve simply by setting the 64 bit syscall
816 cr0
|= CR0_NE
; /* Done by npxinit() */
817 cr0
|= CR0_MP
| CR0_TS
; /* Done at every execve() too. */
819 if (cpu_class
!= CPUCLASS_386
)
821 cr0
|= CR0_WP
| CR0_AM
;
828 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS
)
831 error
= sysctl_handle_int(oidp
, oidp
->oid_arg1
, oidp
->oid_arg2
,
833 if (!error
&& req
->newptr
)
838 SYSCTL_PROC(_machdep
, CPU_ADJKERNTZ
, adjkerntz
, CTLTYPE_INT
|CTLFLAG_RW
,
839 &adjkerntz
, 0, sysctl_machdep_adjkerntz
, "I", "");
841 extern u_long bootdev
; /* not a cdev_t - encoding is different */
842 SYSCTL_ULONG(_machdep
, OID_AUTO
, guessed_bootdev
,
843 CTLFLAG_RD
, &bootdev
, 0, "Boot device (not in cdev_t format)");
846 * Initialize 386 and configure to run kernel
850 * Initialize segments & interrupt table
853 extern struct user
*proc0paddr
;
858 IDTVEC(div
), IDTVEC(dbg
), IDTVEC(nmi
), IDTVEC(bpt
), IDTVEC(ofl
),
859 IDTVEC(bnd
), IDTVEC(ill
), IDTVEC(dna
), IDTVEC(fpusegm
),
860 IDTVEC(tss
), IDTVEC(missing
), IDTVEC(stk
), IDTVEC(prot
),
861 IDTVEC(page
), IDTVEC(mchk
), IDTVEC(fpu
), IDTVEC(align
),
862 IDTVEC(xmm
), IDTVEC(syscall
),
865 IDTVEC(int0x80_syscall
);
869 #ifdef DEBUG_INTERRUPTS
870 extern inthand_t
*Xrsvdary
[256];
874 ptrace_set_pc(struct lwp
*lp
, unsigned long addr
)
876 lp
->lwp_md
.md_regs
->tf_eip
= addr
;
881 ptrace_single_step(struct lwp
*lp
)
883 lp
->lwp_md
.md_regs
->tf_eflags
|= PSL_T
;
888 fill_regs(struct lwp
*lp
, struct reg
*regs
)
890 struct trapframe
*tp
;
892 tp
= lp
->lwp_md
.md_regs
;
893 regs
->r_gs
= tp
->tf_gs
;
894 regs
->r_fs
= tp
->tf_fs
;
895 regs
->r_es
= tp
->tf_es
;
896 regs
->r_ds
= tp
->tf_ds
;
897 regs
->r_edi
= tp
->tf_edi
;
898 regs
->r_esi
= tp
->tf_esi
;
899 regs
->r_ebp
= tp
->tf_ebp
;
900 regs
->r_ebx
= tp
->tf_ebx
;
901 regs
->r_edx
= tp
->tf_edx
;
902 regs
->r_ecx
= tp
->tf_ecx
;
903 regs
->r_eax
= tp
->tf_eax
;
904 regs
->r_eip
= tp
->tf_eip
;
905 regs
->r_cs
= tp
->tf_cs
;
906 regs
->r_eflags
= tp
->tf_eflags
;
907 regs
->r_esp
= tp
->tf_esp
;
908 regs
->r_ss
= tp
->tf_ss
;
913 set_regs(struct lwp
*lp
, struct reg
*regs
)
915 struct trapframe
*tp
;
917 tp
= lp
->lwp_md
.md_regs
;
918 if (!EFL_SECURE(regs
->r_eflags
, tp
->tf_eflags
) ||
919 !CS_SECURE(regs
->r_cs
))
921 tp
->tf_gs
= regs
->r_gs
;
922 tp
->tf_fs
= regs
->r_fs
;
923 tp
->tf_es
= regs
->r_es
;
924 tp
->tf_ds
= regs
->r_ds
;
925 tp
->tf_edi
= regs
->r_edi
;
926 tp
->tf_esi
= regs
->r_esi
;
927 tp
->tf_ebp
= regs
->r_ebp
;
928 tp
->tf_ebx
= regs
->r_ebx
;
929 tp
->tf_edx
= regs
->r_edx
;
930 tp
->tf_ecx
= regs
->r_ecx
;
931 tp
->tf_eax
= regs
->r_eax
;
932 tp
->tf_eip
= regs
->r_eip
;
933 tp
->tf_cs
= regs
->r_cs
;
934 tp
->tf_eflags
= regs
->r_eflags
;
935 tp
->tf_esp
= regs
->r_esp
;
936 tp
->tf_ss
= regs
->r_ss
;
940 #ifndef CPU_DISABLE_SSE
942 fill_fpregs_xmm(struct savexmm
*sv_xmm
, struct save87
*sv_87
)
944 struct env87
*penv_87
= &sv_87
->sv_env
;
945 struct envxmm
*penv_xmm
= &sv_xmm
->sv_env
;
948 /* FPU control/status */
949 penv_87
->en_cw
= penv_xmm
->en_cw
;
950 penv_87
->en_sw
= penv_xmm
->en_sw
;
951 penv_87
->en_tw
= penv_xmm
->en_tw
;
952 penv_87
->en_fip
= penv_xmm
->en_fip
;
953 penv_87
->en_fcs
= penv_xmm
->en_fcs
;
954 penv_87
->en_opcode
= penv_xmm
->en_opcode
;
955 penv_87
->en_foo
= penv_xmm
->en_foo
;
956 penv_87
->en_fos
= penv_xmm
->en_fos
;
959 for (i
= 0; i
< 8; ++i
)
960 sv_87
->sv_ac
[i
] = sv_xmm
->sv_fp
[i
].fp_acc
;
962 sv_87
->sv_ex_sw
= sv_xmm
->sv_ex_sw
;
966 set_fpregs_xmm(struct save87
*sv_87
, struct savexmm
*sv_xmm
)
968 struct env87
*penv_87
= &sv_87
->sv_env
;
969 struct envxmm
*penv_xmm
= &sv_xmm
->sv_env
;
972 /* FPU control/status */
973 penv_xmm
->en_cw
= penv_87
->en_cw
;
974 penv_xmm
->en_sw
= penv_87
->en_sw
;
975 penv_xmm
->en_tw
= penv_87
->en_tw
;
976 penv_xmm
->en_fip
= penv_87
->en_fip
;
977 penv_xmm
->en_fcs
= penv_87
->en_fcs
;
978 penv_xmm
->en_opcode
= penv_87
->en_opcode
;
979 penv_xmm
->en_foo
= penv_87
->en_foo
;
980 penv_xmm
->en_fos
= penv_87
->en_fos
;
983 for (i
= 0; i
< 8; ++i
)
984 sv_xmm
->sv_fp
[i
].fp_acc
= sv_87
->sv_ac
[i
];
986 sv_xmm
->sv_ex_sw
= sv_87
->sv_ex_sw
;
988 #endif /* CPU_DISABLE_SSE */
991 fill_fpregs(struct lwp
*lp
, struct fpreg
*fpregs
)
993 #ifndef CPU_DISABLE_SSE
995 fill_fpregs_xmm(&lp
->lwp_thread
->td_pcb
->pcb_save
.sv_xmm
,
996 (struct save87
*)fpregs
);
999 #endif /* CPU_DISABLE_SSE */
1000 bcopy(&lp
->lwp_thread
->td_pcb
->pcb_save
.sv_87
, fpregs
, sizeof *fpregs
);
1005 set_fpregs(struct lwp
*lp
, struct fpreg
*fpregs
)
1007 #ifndef CPU_DISABLE_SSE
1009 set_fpregs_xmm((struct save87
*)fpregs
,
1010 &lp
->lwp_thread
->td_pcb
->pcb_save
.sv_xmm
);
1013 #endif /* CPU_DISABLE_SSE */
1014 bcopy(fpregs
, &lp
->lwp_thread
->td_pcb
->pcb_save
.sv_87
, sizeof *fpregs
);
1019 fill_dbregs(struct lwp
*lp
, struct dbreg
*dbregs
)
1022 dbregs
->dr0
= rdr0();
1023 dbregs
->dr1
= rdr1();
1024 dbregs
->dr2
= rdr2();
1025 dbregs
->dr3
= rdr3();
1026 dbregs
->dr4
= rdr4();
1027 dbregs
->dr5
= rdr5();
1028 dbregs
->dr6
= rdr6();
1029 dbregs
->dr7
= rdr7();
1033 pcb
= lp
->lwp_thread
->td_pcb
;
1034 dbregs
->dr0
= pcb
->pcb_dr0
;
1035 dbregs
->dr1
= pcb
->pcb_dr1
;
1036 dbregs
->dr2
= pcb
->pcb_dr2
;
1037 dbregs
->dr3
= pcb
->pcb_dr3
;
1040 dbregs
->dr6
= pcb
->pcb_dr6
;
1041 dbregs
->dr7
= pcb
->pcb_dr7
;
1047 set_dbregs(struct lwp
*lp
, struct dbreg
*dbregs
)
1050 load_dr0(dbregs
->dr0
);
1051 load_dr1(dbregs
->dr1
);
1052 load_dr2(dbregs
->dr2
);
1053 load_dr3(dbregs
->dr3
);
1054 load_dr4(dbregs
->dr4
);
1055 load_dr5(dbregs
->dr5
);
1056 load_dr6(dbregs
->dr6
);
1057 load_dr7(dbregs
->dr7
);
1060 struct ucred
*ucred
;
1062 uint32_t mask1
, mask2
;
1065 * Don't let an illegal value for dr7 get set. Specifically,
1066 * check for undefined settings. Setting these bit patterns
1067 * result in undefined behaviour and can lead to an unexpected
1070 for (i
= 0, mask1
= 0x3<<16, mask2
= 0x2<<16; i
< 8;
1071 i
++, mask1
<<= 2, mask2
<<= 2)
1072 if ((dbregs
->dr7
& mask1
) == mask2
)
1075 pcb
= lp
->lwp_thread
->td_pcb
;
1076 ucred
= lp
->lwp_proc
->p_ucred
;
1079 * Don't let a process set a breakpoint that is not within the
1080 * process's address space. If a process could do this, it
1081 * could halt the system by setting a breakpoint in the kernel
1082 * (if ddb was enabled). Thus, we need to check to make sure
1083 * that no breakpoints are being enabled for addresses outside
1084 * process's address space, unless, perhaps, we were called by
1087 * XXX - what about when the watched area of the user's
1088 * address space is written into from within the kernel
1089 * ... wouldn't that still cause a breakpoint to be generated
1090 * from within kernel mode?
1093 if (suser_cred(ucred
, 0) != 0) {
1094 if (dbregs
->dr7
& 0x3) {
1095 /* dr0 is enabled */
1096 if (dbregs
->dr0
>= VM_MAX_USER_ADDRESS
)
1100 if (dbregs
->dr7
& (0x3<<2)) {
1101 /* dr1 is enabled */
1102 if (dbregs
->dr1
>= VM_MAX_USER_ADDRESS
)
1106 if (dbregs
->dr7
& (0x3<<4)) {
1107 /* dr2 is enabled */
1108 if (dbregs
->dr2
>= VM_MAX_USER_ADDRESS
)
1112 if (dbregs
->dr7
& (0x3<<6)) {
1113 /* dr3 is enabled */
1114 if (dbregs
->dr3
>= VM_MAX_USER_ADDRESS
)
1119 pcb
->pcb_dr0
= dbregs
->dr0
;
1120 pcb
->pcb_dr1
= dbregs
->dr1
;
1121 pcb
->pcb_dr2
= dbregs
->dr2
;
1122 pcb
->pcb_dr3
= dbregs
->dr3
;
1123 pcb
->pcb_dr6
= dbregs
->dr6
;
1124 pcb
->pcb_dr7
= dbregs
->dr7
;
1126 pcb
->pcb_flags
|= PCB_DBREGS
;
1134 * Return > 0 if a hardware breakpoint has been hit, and the
1135 * breakpoint was in user space. Return 0, otherwise.
1138 user_dbreg_trap(void)
1140 u_int32_t dr7
, dr6
; /* debug registers dr6 and dr7 */
1141 u_int32_t bp
; /* breakpoint bits extracted from dr6 */
1142 int nbp
; /* number of breakpoints that triggered */
1143 caddr_t addr
[4]; /* breakpoint addresses */
1147 if ((dr7
& 0x000000ff) == 0) {
1149 * all GE and LE bits in the dr7 register are zero,
1150 * thus the trap couldn't have been caused by the
1151 * hardware debug registers
1158 bp
= dr6
& 0x0000000f;
1162 * None of the breakpoint bits are set meaning this
1163 * trap was not caused by any of the debug registers
1169 * at least one of the breakpoints were hit, check to see
1170 * which ones and if any of them are user space addresses
1174 addr
[nbp
++] = (caddr_t
)rdr0();
1177 addr
[nbp
++] = (caddr_t
)rdr1();
1180 addr
[nbp
++] = (caddr_t
)rdr2();
1183 addr
[nbp
++] = (caddr_t
)rdr3();
1186 for (i
=0; i
<nbp
; i
++) {
1188 (caddr_t
)VM_MAX_USER_ADDRESS
) {
1190 * addr[i] is in user space
1197 * None of the breakpoints are in user space.
1207 Debugger(const char *msg
)
1209 kprintf("Debugger(\"%s\") called.\n", msg
);