2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (C) 1994, David Greenman
4 * Copyright (c) 1982, 1987, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
39 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $
40 * $DragonFly: src/sys/platform/vkernel/i386/cpu_regs.c,v 1.16 2007/06/29 21:54:11 dillon Exp $
43 #include "use_ether.h"
46 #include "opt_atalk.h"
47 #include "opt_compat.h"
49 #include "opt_directio.h"
52 #include "opt_msgbuf.h"
55 #include <sys/param.h>
56 #include <sys/systm.h>
57 #include <sys/sysproto.h>
58 #include <sys/signalvar.h>
59 #include <sys/kernel.h>
60 #include <sys/linker.h>
61 #include <sys/malloc.h>
64 #include <sys/reboot.h>
66 #include <sys/msgbuf.h>
67 #include <sys/sysent.h>
68 #include <sys/sysctl.h>
69 #include <sys/vmmeter.h>
71 #include <sys/upcall.h>
72 #include <sys/usched.h>
76 #include <vm/vm_param.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_page.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_pager.h>
83 #include <vm/vm_extern.h>
85 #include <sys/thread2.h>
93 #include <machine/cpu.h>
94 #include <machine/clock.h>
95 #include <machine/specialreg.h>
96 #include <machine/md_var.h>
97 #include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
98 #include <machine/globaldata.h> /* CPU_prvspace */
99 #include <machine/smp.h>
101 #include <machine/perfmon.h>
103 #include <machine/cputypes.h>
105 #include <bus/isa/rtc.h>
106 #include <machine/vm86.h>
107 #include <sys/random.h>
108 #include <sys/ptrace.h>
109 #include <machine/sigframe.h>
111 extern void dblfault_handler (void);
113 #ifndef CPU_DISABLE_SSE
114 static void set_fpregs_xmm (struct save87
*, struct savexmm
*);
115 static void fill_fpregs_xmm (struct savexmm
*, struct save87
*);
116 #endif /* CPU_DISABLE_SSE */
118 extern void ffs_rawread_setup(void);
119 #endif /* DIRECTIO */
122 int64_t tsc_offsets
[MAXCPU
];
124 int64_t tsc_offsets
[1];
127 #if defined(SWTCH_OPTIM_STATS)
128 extern int swtch_optim_stats
;
129 SYSCTL_INT(_debug
, OID_AUTO
, swtch_optim_stats
,
130 CTLFLAG_RD
, &swtch_optim_stats
, 0, "");
131 SYSCTL_INT(_debug
, OID_AUTO
, tlb_flush_count
,
132 CTLFLAG_RD
, &tlb_flush_count
, 0, "");
136 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS
)
138 int error
= sysctl_handle_int(oidp
, 0, ctob((int)Maxmem
), req
);
142 SYSCTL_PROC(_hw
, HW_PHYSMEM
, physmem
, CTLTYPE_INT
|CTLFLAG_RD
,
143 0, 0, sysctl_hw_physmem
, "IU", "");
146 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS
)
148 int error
= sysctl_handle_int(oidp
, 0,
149 ctob((int)Maxmem
- vmstats
.v_wire_count
), req
);
153 SYSCTL_PROC(_hw
, HW_USERMEM
, usermem
, CTLTYPE_INT
|CTLFLAG_RD
,
154 0, 0, sysctl_hw_usermem
, "IU", "");
156 SYSCTL_ULONG(_hw
, OID_AUTO
, availpages
, CTLFLAG_RD
, &Maxmem
, NULL
, "");
161 sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS
)
165 /* Unwind the buffer, so that it's linear (possibly starting with
166 * some initial nulls).
168 error
=sysctl_handle_opaque(oidp
,msgbufp
->msg_ptr
+msgbufp
->msg_bufr
,
169 msgbufp
->msg_size
-msgbufp
->msg_bufr
,req
);
170 if(error
) return(error
);
171 if(msgbufp
->msg_bufr
>0) {
172 error
=sysctl_handle_opaque(oidp
,msgbufp
->msg_ptr
,
173 msgbufp
->msg_bufr
,req
);
178 SYSCTL_PROC(_machdep
, OID_AUTO
, msgbuf
, CTLTYPE_STRING
|CTLFLAG_RD
,
179 0, 0, sysctl_machdep_msgbuf
, "A","Contents of kernel message buffer");
181 static int msgbuf_clear
;
184 sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS
)
187 error
= sysctl_handle_int(oidp
, oidp
->oid_arg1
, oidp
->oid_arg2
,
189 if (!error
&& req
->newptr
) {
190 /* Clear the buffer and reset write pointer */
191 bzero(msgbufp
->msg_ptr
,msgbufp
->msg_size
);
192 msgbufp
->msg_bufr
=msgbufp
->msg_bufx
=0;
198 SYSCTL_PROC(_machdep
, OID_AUTO
, msgbuf_clear
, CTLTYPE_INT
|CTLFLAG_RW
,
199 &msgbuf_clear
, 0, sysctl_machdep_msgbuf_clear
, "I",
200 "Clear kernel message buffer");
205 * Send an interrupt to process.
207 * Stack is set up to allow sigcode stored
208 * at top to call routine, followed by kcall
209 * to sigreturn routine below. After sigreturn
210 * resets the signal mask, the stack, and the
211 * frame pointer, it returns to the user
215 sendsig(sig_t catcher
, int sig
, sigset_t
*mask
, u_long code
)
217 struct lwp
*lp
= curthread
->td_lwp
;
218 struct proc
*p
= lp
->lwp_proc
;
219 struct trapframe
*regs
;
220 struct sigacts
*psp
= p
->p_sigacts
;
221 struct sigframe sf
, *sfp
;
224 regs
= lp
->lwp_md
.md_regs
;
225 oonstack
= (lp
->lwp_sigstk
.ss_flags
& SS_ONSTACK
) ? 1 : 0;
227 /* save user context */
228 bzero(&sf
, sizeof(struct sigframe
));
229 sf
.sf_uc
.uc_sigmask
= *mask
;
230 sf
.sf_uc
.uc_stack
= lp
->lwp_sigstk
;
231 sf
.sf_uc
.uc_mcontext
.mc_onstack
= oonstack
;
232 bcopy(regs
, &sf
.sf_uc
.uc_mcontext
.mc_gs
, sizeof(struct trapframe
));
234 /* make the size of the saved context visible to userland */
235 sf
.sf_uc
.uc_mcontext
.mc_len
= sizeof(sf
.sf_uc
.uc_mcontext
);
237 /* save mailbox pending state for syscall interlock semantics */
238 if (p
->p_flag
& P_MAILBOX
)
239 sf
.sf_uc
.uc_mcontext
.mc_xflags
|= PGEX_MAILBOX
;
242 /* Allocate and validate space for the signal handler context. */
243 if ((lp
->lwp_flag
& LWP_ALTSTACK
) != 0 && !oonstack
&&
244 SIGISMEMBER(psp
->ps_sigonstack
, sig
)) {
245 sfp
= (struct sigframe
*)(lp
->lwp_sigstk
.ss_sp
+
246 lp
->lwp_sigstk
.ss_size
- sizeof(struct sigframe
));
247 lp
->lwp_sigstk
.ss_flags
|= SS_ONSTACK
;
250 sfp
= (struct sigframe
*)regs
->tf_esp
- 1;
252 /* Translate the signal is appropriate */
253 if (p
->p_sysent
->sv_sigtbl
) {
254 if (sig
<= p
->p_sysent
->sv_sigsize
)
255 sig
= p
->p_sysent
->sv_sigtbl
[_SIG_IDX(sig
)];
258 /* Build the argument list for the signal handler. */
260 sf
.sf_ucontext
= (register_t
)&sfp
->sf_uc
;
261 if (SIGISMEMBER(psp
->ps_siginfo
, sig
)) {
262 /* Signal handler installed with SA_SIGINFO. */
263 sf
.sf_siginfo
= (register_t
)&sfp
->sf_si
;
264 sf
.sf_ahu
.sf_action
= (__siginfohandler_t
*)catcher
;
266 /* fill siginfo structure */
267 sf
.sf_si
.si_signo
= sig
;
268 sf
.sf_si
.si_code
= code
;
269 sf
.sf_si
.si_addr
= (void*)regs
->tf_err
;
272 /* Old FreeBSD-style arguments. */
273 sf
.sf_siginfo
= code
;
274 sf
.sf_addr
= regs
->tf_err
;
275 sf
.sf_ahu
.sf_handler
= catcher
;
280 * If we're a vm86 process, we want to save the segment registers.
281 * We also change eflags to be our emulated eflags, not the actual
284 if (regs
->tf_eflags
& PSL_VM
) {
285 struct trapframe_vm86
*tf
= (struct trapframe_vm86
*)regs
;
286 struct vm86_kernel
*vm86
= &lp
->lwp_thread
->td_pcb
->pcb_ext
->ext_vm86
;
288 sf
.sf_uc
.uc_mcontext
.mc_gs
= tf
->tf_vm86_gs
;
289 sf
.sf_uc
.uc_mcontext
.mc_fs
= tf
->tf_vm86_fs
;
290 sf
.sf_uc
.uc_mcontext
.mc_es
= tf
->tf_vm86_es
;
291 sf
.sf_uc
.uc_mcontext
.mc_ds
= tf
->tf_vm86_ds
;
293 if (vm86
->vm86_has_vme
== 0)
294 sf
.sf_uc
.uc_mcontext
.mc_eflags
=
295 (tf
->tf_eflags
& ~(PSL_VIF
| PSL_VIP
)) |
296 (vm86
->vm86_eflags
& (PSL_VIF
| PSL_VIP
));
299 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
300 * syscalls made by the signal handler. This just avoids
301 * wasting time for our lazy fixup of such faults. PSL_NT
302 * does nothing in vm86 mode, but vm86 programs can set it
303 * almost legitimately in probes for old cpu types.
305 tf
->tf_eflags
&= ~(PSL_VM
| PSL_NT
| PSL_VIF
| PSL_VIP
);
310 * Copy the sigframe out to the user's stack.
312 if (copyout(&sf
, sfp
, sizeof(struct sigframe
)) != 0) {
314 * Something is wrong with the stack pointer.
315 * ...Kill the process.
320 regs
->tf_esp
= (int)sfp
;
321 regs
->tf_eip
= PS_STRINGS
- *(p
->p_sysent
->sv_szsigcode
);
322 regs
->tf_eflags
&= ~PSL_T
;
323 regs
->tf_cs
= _ucodesel
;
324 regs
->tf_ds
= _udatasel
;
325 regs
->tf_es
= _udatasel
;
326 if (regs
->tf_trapno
== T_PROTFLT
) {
327 regs
->tf_fs
= _udatasel
;
328 regs
->tf_gs
= _udatasel
;
330 regs
->tf_ss
= _udatasel
;
334 * Sanitize the trapframe for a virtual kernel passing control to a custom
337 * Allow userland to set or maintain PSL_RF, the resume flag. This flag
338 * basically controls whether the return PC should skip the first instruction
339 * (as in an explicit system call) or re-execute it (as in an exception).
342 cpu_sanitize_frame(struct trapframe
*frame
)
344 frame
->tf_cs
= _ucodesel
;
345 frame
->tf_ds
= _udatasel
;
346 frame
->tf_es
= _udatasel
;
348 frame
->tf_fs
= _udatasel
;
349 frame
->tf_gs
= _udatasel
;
351 frame
->tf_ss
= _udatasel
;
352 frame
->tf_eflags
&= (PSL_RF
| PSL_USERCHANGE
);
353 frame
->tf_eflags
|= PSL_RESERVED_DEFAULT
| PSL_I
;
358 cpu_sanitize_tls(struct savetls
*tls
)
360 struct segment_descriptor
*desc
;
363 for (i
= 0; i
< NGTLS
; ++i
) {
365 if (desc
->sd_dpl
== 0 && desc
->sd_type
== 0)
367 if (desc
->sd_def32
== 0)
369 if (desc
->sd_type
!= SDT_MEMRWA
)
371 if (desc
->sd_dpl
!= SEL_UPL
)
373 if (desc
->sd_xx
!= 0 || desc
->sd_p
!= 1)
380 * sigreturn(ucontext_t *sigcntxp)
382 * System call to cleanup state after a signal
383 * has been taken. Reset signal mask and
384 * stack state from context left by sendsig (above).
385 * Return to previous pc and psl as specified by
386 * context left by sendsig. Check carefully to
387 * make sure that the user has not modified the
388 * state to gain improper privileges.
390 #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
391 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
394 sys_sigreturn(struct sigreturn_args
*uap
)
396 struct lwp
*lp
= curthread
->td_lwp
;
397 struct proc
*p
= lp
->lwp_proc
;
398 struct trapframe
*regs
;
404 error
= copyin(uap
->sigcntxp
, &ucp
, sizeof(ucp
));
408 regs
= lp
->lwp_md
.md_regs
;
409 eflags
= ucp
.uc_mcontext
.mc_eflags
;
412 if (eflags
& PSL_VM
) {
413 struct trapframe_vm86
*tf
= (struct trapframe_vm86
*)regs
;
414 struct vm86_kernel
*vm86
;
417 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
418 * set up the vm86 area, and we can't enter vm86 mode.
420 if (lp
->lwp_thread
->td_pcb
->pcb_ext
== 0)
422 vm86
= &lp
->lwp_thread
->td_pcb
->pcb_ext
->ext_vm86
;
423 if (vm86
->vm86_inited
== 0)
426 /* go back to user mode if both flags are set */
427 if ((eflags
& PSL_VIP
) && (eflags
& PSL_VIF
))
428 trapsignal(lp
->lwp_proc
, SIGBUS
, 0);
430 if (vm86
->vm86_has_vme
) {
431 eflags
= (tf
->tf_eflags
& ~VME_USERCHANGE
) |
432 (eflags
& VME_USERCHANGE
) | PSL_VM
;
434 vm86
->vm86_eflags
= eflags
; /* save VIF, VIP */
435 eflags
= (tf
->tf_eflags
& ~VM_USERCHANGE
) | (eflags
& VM_USERCHANGE
) | PSL_VM
;
437 bcopy(&ucp
.uc_mcontext
.mc_gs
, tf
, sizeof(struct trapframe
));
438 tf
->tf_eflags
= eflags
;
439 tf
->tf_vm86_ds
= tf
->tf_ds
;
440 tf
->tf_vm86_es
= tf
->tf_es
;
441 tf
->tf_vm86_fs
= tf
->tf_fs
;
442 tf
->tf_vm86_gs
= tf
->tf_gs
;
443 tf
->tf_ds
= _udatasel
;
444 tf
->tf_es
= _udatasel
;
446 tf
->tf_fs
= _udatasel
;
447 tf
->tf_gs
= _udatasel
;
453 * Don't allow users to change privileged or reserved flags.
456 * XXX do allow users to change the privileged flag PSL_RF.
457 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
458 * should sometimes set it there too. tf_eflags is kept in
459 * the signal context during signal handling and there is no
460 * other place to remember it, so the PSL_RF bit may be
461 * corrupted by the signal handler without us knowing.
462 * Corruption of the PSL_RF bit at worst causes one more or
463 * one less debugger trap, so allowing it is fairly harmless.
465 if (!EFL_SECURE(eflags
& ~PSL_RF
, regs
->tf_eflags
& ~PSL_RF
)) {
466 kprintf("sigreturn: eflags = 0x%x\n", eflags
);
471 * Don't allow users to load a valid privileged %cs. Let the
472 * hardware check for invalid selectors, excess privilege in
473 * other selectors, invalid %eip's and invalid %esp's.
475 cs
= ucp
.uc_mcontext
.mc_cs
;
476 if (!CS_SECURE(cs
)) {
477 kprintf("sigreturn: cs = 0x%x\n", cs
);
478 trapsignal(lp
, SIGBUS
, T_PROTFLT
);
481 bcopy(&ucp
.uc_mcontext
.mc_gs
, regs
, sizeof(struct trapframe
));
485 * Merge saved signal mailbox pending flag to maintain interlock
486 * semantics against system calls.
488 if (ucp
.uc_mcontext
.mc_xflags
& PGEX_MAILBOX
)
489 p
->p_flag
|= P_MAILBOX
;
491 if (ucp
.uc_mcontext
.mc_onstack
& 1)
492 lp
->lwp_sigstk
.ss_flags
|= SS_ONSTACK
;
494 lp
->lwp_sigstk
.ss_flags
&= ~SS_ONSTACK
;
496 lp
->lwp_sigmask
= ucp
.uc_sigmask
;
497 SIG_CANTMASK(lp
->lwp_sigmask
);
502 * Stack frame on entry to function. %eax will contain the function vector,
503 * %ecx will contain the function data. flags, ecx, and eax will have
504 * already been pushed on the stack.
515 sendupcall(struct vmupcall
*vu
, int morepending
)
517 struct lwp
*lp
= curthread
->td_lwp
;
518 struct trapframe
*regs
;
519 struct upcall upcall
;
520 struct upc_frame upc_frame
;
524 * If we are a virtual kernel running an emulated user process
525 * context, switch back to the virtual kernel context before
526 * trying to post the signal.
529 lp
->lwp_md
.md_regs
->tf_trapno
= 0;
530 vkernel_trap(lp
, lp
->lwp_md
.md_regs
);
534 * Get the upcall data structure
536 if (copyin(lp
->lwp_upcall
, &upcall
, sizeof(upcall
)) ||
537 copyin((char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, &crit_count
, sizeof(int))
540 kprintf("bad upcall address\n");
545 * If the data structure is already marked pending or has a critical
546 * section count, mark the data structure as pending and return
547 * without doing an upcall. vu_pending is left set.
549 if (upcall
.upc_pending
|| crit_count
>= vu
->vu_pending
) {
550 if (upcall
.upc_pending
< vu
->vu_pending
) {
551 upcall
.upc_pending
= vu
->vu_pending
;
552 copyout(&upcall
.upc_pending
, &lp
->lwp_upcall
->upc_pending
,
553 sizeof(upcall
.upc_pending
));
559 * We can run this upcall now, clear vu_pending.
561 * Bump our critical section count and set or clear the
562 * user pending flag depending on whether more upcalls are
563 * pending. The user will be responsible for calling
564 * upc_dispatch(-1) to process remaining upcalls.
567 upcall
.upc_pending
= morepending
;
568 crit_count
+= TDPRI_CRIT
;
569 copyout(&upcall
.upc_pending
, &lp
->lwp_upcall
->upc_pending
,
570 sizeof(upcall
.upc_pending
));
571 copyout(&crit_count
, (char *)upcall
.upc_uthread
+ upcall
.upc_critoff
,
575 * Construct a stack frame and issue the upcall
577 regs
= lp
->lwp_md
.md_regs
;
578 upc_frame
.eax
= regs
->tf_eax
;
579 upc_frame
.ecx
= regs
->tf_ecx
;
580 upc_frame
.edx
= regs
->tf_edx
;
581 upc_frame
.flags
= regs
->tf_eflags
;
582 upc_frame
.oldip
= regs
->tf_eip
;
583 if (copyout(&upc_frame
, (void *)(regs
->tf_esp
- sizeof(upc_frame
)),
584 sizeof(upc_frame
)) != 0) {
585 kprintf("bad stack on upcall\n");
587 regs
->tf_eax
= (register_t
)vu
->vu_func
;
588 regs
->tf_ecx
= (register_t
)vu
->vu_data
;
589 regs
->tf_edx
= (register_t
)lp
->lwp_upcall
;
590 regs
->tf_eip
= (register_t
)vu
->vu_ctx
;
591 regs
->tf_esp
-= sizeof(upc_frame
);
596 * fetchupcall occurs in the context of a system call, which means that
597 * we have to return EJUSTRETURN in order to prevent eax and edx from
598 * being overwritten by the syscall return value.
600 * if vu is not NULL we return the new context in %edx, the new data in %ecx,
601 * and the function pointer in %eax.
604 fetchupcall (struct vmupcall
*vu
, int morepending
, void *rsp
)
606 struct upc_frame upc_frame
;
607 struct lwp
*lp
= curthread
->td_lwp
;
608 struct trapframe
*regs
;
610 struct upcall upcall
;
613 regs
= lp
->lwp_md
.md_regs
;
615 error
= copyout(&morepending
, &lp
->lwp_upcall
->upc_pending
, sizeof(int));
619 * This jumps us to the next ready context.
622 error
= copyin(lp
->lwp_upcall
, &upcall
, sizeof(upcall
));
625 error
= copyin((char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, &crit_count
, sizeof(int));
626 crit_count
+= TDPRI_CRIT
;
628 error
= copyout(&crit_count
, (char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, sizeof(int));
629 regs
->tf_eax
= (register_t
)vu
->vu_func
;
630 regs
->tf_ecx
= (register_t
)vu
->vu_data
;
631 regs
->tf_edx
= (register_t
)lp
->lwp_upcall
;
632 regs
->tf_eip
= (register_t
)vu
->vu_ctx
;
633 regs
->tf_esp
= (register_t
)rsp
;
636 * This returns us to the originally interrupted code.
638 error
= copyin(rsp
, &upc_frame
, sizeof(upc_frame
));
639 regs
->tf_eax
= upc_frame
.eax
;
640 regs
->tf_ecx
= upc_frame
.ecx
;
641 regs
->tf_edx
= upc_frame
.edx
;
642 regs
->tf_eflags
= (regs
->tf_eflags
& ~PSL_USERCHANGE
) |
643 (upc_frame
.flags
& PSL_USERCHANGE
);
644 regs
->tf_eip
= upc_frame
.oldip
;
645 regs
->tf_esp
= (register_t
)((char *)rsp
+ sizeof(upc_frame
));
654 * cpu_idle() represents the idle LWKT. You cannot return from this function
655 * (unless you want to blow things up!). Instead we look for runnable threads
656 * and loop or halt as appropriate. Giant is not held on entry to the thread.
658 * The main loop is entered with a critical section held, we must release
659 * the critical section before doing anything else. lwkt_switch() will
660 * check for pending interrupts due to entering and exiting its own
663 * Note on cpu_idle_hlt: On an SMP system we rely on a scheduler IPI
664 * to wake a HLTed cpu up. However, there are cases where the idlethread
665 * will be entered with the possibility that no IPI will occur and in such
666 * cases lwkt_switch() sets TDF_IDLE_NOHLT.
669 /* XXX since we don't have IPIs implemented don't let the CPU halt */
670 static int cpu_idle_hlt
= 1;
671 static int cpu_idle_hltcnt
;
672 static int cpu_idle_spincnt
;
673 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_hlt
, CTLFLAG_RW
,
674 &cpu_idle_hlt
, 0, "Idle loop HLT enable");
675 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_hltcnt
, CTLFLAG_RW
,
676 &cpu_idle_hltcnt
, 0, "Idle loop entry halts");
677 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_spincnt
, CTLFLAG_RW
,
678 &cpu_idle_spincnt
, 0, "Idle loop entry spins");
683 struct thread
*td
= curthread
;
686 KKASSERT(td
->td_pri
< TDPRI_CRIT
);
689 * See if there are any LWKTs ready to go.
694 * If we are going to halt call splz unconditionally after
695 * CLIing to catch any interrupt races. Note that we are
696 * at SPL0 and interrupts are enabled.
698 * We must poll our mailbox signals prior to calling
699 * sigpause() in order to properly interlock with them.
701 if (cpu_idle_hlt
&& !lwkt_runnable() &&
702 (td
->td_flags
& TDF_IDLE_NOHLT
) == 0) {
705 if (!lwkt_runnable()) {
710 __asm
__volatile("pause");
715 td
->td_flags
&= ~TDF_IDLE_NOHLT
;
719 /*__asm __volatile("sti; pause");*/
720 __asm
__volatile("pause");
722 /*__asm __volatile("sti");*/
730 * Clear registers on exec
733 exec_setregs(u_long entry
, u_long stack
, u_long ps_strings
)
735 struct thread
*td
= curthread
;
736 struct lwp
*lp
= td
->td_lwp
;
737 struct trapframe
*regs
= lp
->lwp_md
.md_regs
;
738 struct pcb
*pcb
= lp
->lwp_thread
->td_pcb
;
740 /* was i386_user_cleanup() in NetBSD */
743 bzero((char *)regs
, sizeof(struct trapframe
));
744 regs
->tf_eip
= entry
;
745 regs
->tf_esp
= stack
;
746 regs
->tf_eflags
= PSL_USER
| (regs
->tf_eflags
& PSL_T
);
754 /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */
755 regs
->tf_ebx
= ps_strings
;
758 * Reset the hardware debug registers if they were in use.
759 * They won't have any meaning for the newly exec'd process.
761 if (pcb
->pcb_flags
& PCB_DBREGS
) {
768 if (pcb
== td
->td_pcb
) {
770 * Clear the debug registers on the running
771 * CPU, otherwise they will end up affecting
772 * the next process we switch to.
776 pcb
->pcb_flags
&= ~PCB_DBREGS
;
780 * Initialize the math emulator (if any) for the current process.
781 * Actually, just clear the bit that says that the emulator has
782 * been initialized. Initialization is delayed until the process
783 * traps to the emulator (if it is done at all) mainly because
784 * emulators don't provide an entry point for initialization.
786 pcb
->pcb_flags
&= ~FP_SOFTFP
;
789 * note: do not set CR0_TS here. npxinit() must do it after clearing
790 * gd_npxthread. Otherwise a preemptive interrupt thread may panic
795 load_cr0(rcr0() | CR0_MP
);
799 /* Initialize the npx (if any) for the current process. */
800 npxinit(__INITIAL_NPXCW__
);
805 * note: linux emulator needs edx to be 0x0 on entry, which is
806 * handled in execve simply by setting the 64 bit syscall
818 cr0
|= CR0_NE
; /* Done by npxinit() */
819 cr0
|= CR0_MP
| CR0_TS
; /* Done at every execve() too. */
821 if (cpu_class
!= CPUCLASS_386
)
823 cr0
|= CR0_WP
| CR0_AM
;
830 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS
)
833 error
= sysctl_handle_int(oidp
, oidp
->oid_arg1
, oidp
->oid_arg2
,
835 if (!error
&& req
->newptr
)
840 SYSCTL_PROC(_machdep
, CPU_ADJKERNTZ
, adjkerntz
, CTLTYPE_INT
|CTLFLAG_RW
,
841 &adjkerntz
, 0, sysctl_machdep_adjkerntz
, "I", "");
843 extern u_long bootdev
; /* not a cdev_t - encoding is different */
844 SYSCTL_ULONG(_machdep
, OID_AUTO
, guessed_bootdev
,
845 CTLFLAG_RD
, &bootdev
, 0, "Boot device (not in cdev_t format)");
848 * Initialize 386 and configure to run kernel
852 * Initialize segments & interrupt table
855 extern struct user
*proc0paddr
;
860 IDTVEC(div
), IDTVEC(dbg
), IDTVEC(nmi
), IDTVEC(bpt
), IDTVEC(ofl
),
861 IDTVEC(bnd
), IDTVEC(ill
), IDTVEC(dna
), IDTVEC(fpusegm
),
862 IDTVEC(tss
), IDTVEC(missing
), IDTVEC(stk
), IDTVEC(prot
),
863 IDTVEC(page
), IDTVEC(mchk
), IDTVEC(fpu
), IDTVEC(align
),
864 IDTVEC(xmm
), IDTVEC(syscall
),
867 IDTVEC(int0x80_syscall
);
871 #ifdef DEBUG_INTERRUPTS
872 extern inthand_t
*Xrsvdary
[256];
876 ptrace_set_pc(struct lwp
*lp
, unsigned long addr
)
878 lp
->lwp_md
.md_regs
->tf_eip
= addr
;
883 ptrace_single_step(struct lwp
*lp
)
885 lp
->lwp_md
.md_regs
->tf_eflags
|= PSL_T
;
890 fill_regs(struct lwp
*lp
, struct reg
*regs
)
892 struct trapframe
*tp
;
894 tp
= lp
->lwp_md
.md_regs
;
895 regs
->r_gs
= tp
->tf_gs
;
896 regs
->r_fs
= tp
->tf_fs
;
897 regs
->r_es
= tp
->tf_es
;
898 regs
->r_ds
= tp
->tf_ds
;
899 regs
->r_edi
= tp
->tf_edi
;
900 regs
->r_esi
= tp
->tf_esi
;
901 regs
->r_ebp
= tp
->tf_ebp
;
902 regs
->r_ebx
= tp
->tf_ebx
;
903 regs
->r_edx
= tp
->tf_edx
;
904 regs
->r_ecx
= tp
->tf_ecx
;
905 regs
->r_eax
= tp
->tf_eax
;
906 regs
->r_eip
= tp
->tf_eip
;
907 regs
->r_cs
= tp
->tf_cs
;
908 regs
->r_eflags
= tp
->tf_eflags
;
909 regs
->r_esp
= tp
->tf_esp
;
910 regs
->r_ss
= tp
->tf_ss
;
915 set_regs(struct lwp
*lp
, struct reg
*regs
)
917 struct trapframe
*tp
;
919 tp
= lp
->lwp_md
.md_regs
;
920 if (!EFL_SECURE(regs
->r_eflags
, tp
->tf_eflags
) ||
921 !CS_SECURE(regs
->r_cs
))
923 tp
->tf_gs
= regs
->r_gs
;
924 tp
->tf_fs
= regs
->r_fs
;
925 tp
->tf_es
= regs
->r_es
;
926 tp
->tf_ds
= regs
->r_ds
;
927 tp
->tf_edi
= regs
->r_edi
;
928 tp
->tf_esi
= regs
->r_esi
;
929 tp
->tf_ebp
= regs
->r_ebp
;
930 tp
->tf_ebx
= regs
->r_ebx
;
931 tp
->tf_edx
= regs
->r_edx
;
932 tp
->tf_ecx
= regs
->r_ecx
;
933 tp
->tf_eax
= regs
->r_eax
;
934 tp
->tf_eip
= regs
->r_eip
;
935 tp
->tf_cs
= regs
->r_cs
;
936 tp
->tf_eflags
= regs
->r_eflags
;
937 tp
->tf_esp
= regs
->r_esp
;
938 tp
->tf_ss
= regs
->r_ss
;
942 #ifndef CPU_DISABLE_SSE
944 fill_fpregs_xmm(struct savexmm
*sv_xmm
, struct save87
*sv_87
)
946 struct env87
*penv_87
= &sv_87
->sv_env
;
947 struct envxmm
*penv_xmm
= &sv_xmm
->sv_env
;
950 /* FPU control/status */
951 penv_87
->en_cw
= penv_xmm
->en_cw
;
952 penv_87
->en_sw
= penv_xmm
->en_sw
;
953 penv_87
->en_tw
= penv_xmm
->en_tw
;
954 penv_87
->en_fip
= penv_xmm
->en_fip
;
955 penv_87
->en_fcs
= penv_xmm
->en_fcs
;
956 penv_87
->en_opcode
= penv_xmm
->en_opcode
;
957 penv_87
->en_foo
= penv_xmm
->en_foo
;
958 penv_87
->en_fos
= penv_xmm
->en_fos
;
961 for (i
= 0; i
< 8; ++i
)
962 sv_87
->sv_ac
[i
] = sv_xmm
->sv_fp
[i
].fp_acc
;
964 sv_87
->sv_ex_sw
= sv_xmm
->sv_ex_sw
;
968 set_fpregs_xmm(struct save87
*sv_87
, struct savexmm
*sv_xmm
)
970 struct env87
*penv_87
= &sv_87
->sv_env
;
971 struct envxmm
*penv_xmm
= &sv_xmm
->sv_env
;
974 /* FPU control/status */
975 penv_xmm
->en_cw
= penv_87
->en_cw
;
976 penv_xmm
->en_sw
= penv_87
->en_sw
;
977 penv_xmm
->en_tw
= penv_87
->en_tw
;
978 penv_xmm
->en_fip
= penv_87
->en_fip
;
979 penv_xmm
->en_fcs
= penv_87
->en_fcs
;
980 penv_xmm
->en_opcode
= penv_87
->en_opcode
;
981 penv_xmm
->en_foo
= penv_87
->en_foo
;
982 penv_xmm
->en_fos
= penv_87
->en_fos
;
985 for (i
= 0; i
< 8; ++i
)
986 sv_xmm
->sv_fp
[i
].fp_acc
= sv_87
->sv_ac
[i
];
988 sv_xmm
->sv_ex_sw
= sv_87
->sv_ex_sw
;
990 #endif /* CPU_DISABLE_SSE */
993 fill_fpregs(struct lwp
*lp
, struct fpreg
*fpregs
)
995 #ifndef CPU_DISABLE_SSE
997 fill_fpregs_xmm(&lp
->lwp_thread
->td_pcb
->pcb_save
.sv_xmm
,
998 (struct save87
*)fpregs
);
1001 #endif /* CPU_DISABLE_SSE */
1002 bcopy(&lp
->lwp_thread
->td_pcb
->pcb_save
.sv_87
, fpregs
, sizeof *fpregs
);
1007 set_fpregs(struct lwp
*lp
, struct fpreg
*fpregs
)
1009 #ifndef CPU_DISABLE_SSE
1011 set_fpregs_xmm((struct save87
*)fpregs
,
1012 &lp
->lwp_thread
->td_pcb
->pcb_save
.sv_xmm
);
1015 #endif /* CPU_DISABLE_SSE */
1016 bcopy(fpregs
, &lp
->lwp_thread
->td_pcb
->pcb_save
.sv_87
, sizeof *fpregs
);
1021 fill_dbregs(struct lwp
*lp
, struct dbreg
*dbregs
)
1024 dbregs
->dr0
= rdr0();
1025 dbregs
->dr1
= rdr1();
1026 dbregs
->dr2
= rdr2();
1027 dbregs
->dr3
= rdr3();
1028 dbregs
->dr4
= rdr4();
1029 dbregs
->dr5
= rdr5();
1030 dbregs
->dr6
= rdr6();
1031 dbregs
->dr7
= rdr7();
1035 pcb
= lp
->lwp_thread
->td_pcb
;
1036 dbregs
->dr0
= pcb
->pcb_dr0
;
1037 dbregs
->dr1
= pcb
->pcb_dr1
;
1038 dbregs
->dr2
= pcb
->pcb_dr2
;
1039 dbregs
->dr3
= pcb
->pcb_dr3
;
1042 dbregs
->dr6
= pcb
->pcb_dr6
;
1043 dbregs
->dr7
= pcb
->pcb_dr7
;
1049 set_dbregs(struct lwp
*lp
, struct dbreg
*dbregs
)
1052 load_dr0(dbregs
->dr0
);
1053 load_dr1(dbregs
->dr1
);
1054 load_dr2(dbregs
->dr2
);
1055 load_dr3(dbregs
->dr3
);
1056 load_dr4(dbregs
->dr4
);
1057 load_dr5(dbregs
->dr5
);
1058 load_dr6(dbregs
->dr6
);
1059 load_dr7(dbregs
->dr7
);
1062 struct ucred
*ucred
;
1064 uint32_t mask1
, mask2
;
1067 * Don't let an illegal value for dr7 get set. Specifically,
1068 * check for undefined settings. Setting these bit patterns
1069 * result in undefined behaviour and can lead to an unexpected
1072 for (i
= 0, mask1
= 0x3<<16, mask2
= 0x2<<16; i
< 8;
1073 i
++, mask1
<<= 2, mask2
<<= 2)
1074 if ((dbregs
->dr7
& mask1
) == mask2
)
1077 pcb
= lp
->lwp_thread
->td_pcb
;
1078 ucred
= lp
->lwp_proc
->p_ucred
;
1081 * Don't let a process set a breakpoint that is not within the
1082 * process's address space. If a process could do this, it
1083 * could halt the system by setting a breakpoint in the kernel
1084 * (if ddb was enabled). Thus, we need to check to make sure
1085 * that no breakpoints are being enabled for addresses outside
1086 * process's address space, unless, perhaps, we were called by
1089 * XXX - what about when the watched area of the user's
1090 * address space is written into from within the kernel
1091 * ... wouldn't that still cause a breakpoint to be generated
1092 * from within kernel mode?
1095 if (suser_cred(ucred
, 0) != 0) {
1096 if (dbregs
->dr7
& 0x3) {
1097 /* dr0 is enabled */
1098 if (dbregs
->dr0
>= VM_MAX_USER_ADDRESS
)
1102 if (dbregs
->dr7
& (0x3<<2)) {
1103 /* dr1 is enabled */
1104 if (dbregs
->dr1
>= VM_MAX_USER_ADDRESS
)
1108 if (dbregs
->dr7
& (0x3<<4)) {
1109 /* dr2 is enabled */
1110 if (dbregs
->dr2
>= VM_MAX_USER_ADDRESS
)
1114 if (dbregs
->dr7
& (0x3<<6)) {
1115 /* dr3 is enabled */
1116 if (dbregs
->dr3
>= VM_MAX_USER_ADDRESS
)
1121 pcb
->pcb_dr0
= dbregs
->dr0
;
1122 pcb
->pcb_dr1
= dbregs
->dr1
;
1123 pcb
->pcb_dr2
= dbregs
->dr2
;
1124 pcb
->pcb_dr3
= dbregs
->dr3
;
1125 pcb
->pcb_dr6
= dbregs
->dr6
;
1126 pcb
->pcb_dr7
= dbregs
->dr7
;
1128 pcb
->pcb_flags
|= PCB_DBREGS
;
1136 * Return > 0 if a hardware breakpoint has been hit, and the
1137 * breakpoint was in user space. Return 0, otherwise.
1140 user_dbreg_trap(void)
1142 u_int32_t dr7
, dr6
; /* debug registers dr6 and dr7 */
1143 u_int32_t bp
; /* breakpoint bits extracted from dr6 */
1144 int nbp
; /* number of breakpoints that triggered */
1145 caddr_t addr
[4]; /* breakpoint addresses */
1149 if ((dr7
& 0x000000ff) == 0) {
1151 * all GE and LE bits in the dr7 register are zero,
1152 * thus the trap couldn't have been caused by the
1153 * hardware debug registers
1160 bp
= dr6
& 0x0000000f;
1164 * None of the breakpoint bits are set meaning this
1165 * trap was not caused by any of the debug registers
1171 * at least one of the breakpoints were hit, check to see
1172 * which ones and if any of them are user space addresses
1176 addr
[nbp
++] = (caddr_t
)rdr0();
1179 addr
[nbp
++] = (caddr_t
)rdr1();
1182 addr
[nbp
++] = (caddr_t
)rdr2();
1185 addr
[nbp
++] = (caddr_t
)rdr3();
1188 for (i
=0; i
<nbp
; i
++) {
1190 (caddr_t
)VM_MAX_USER_ADDRESS
) {
1192 * addr[i] is in user space
1199 * None of the breakpoints are in user space.
1209 Debugger(const char *msg
)
1211 kprintf("Debugger(\"%s\") called.\n", msg
);