2 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
3 * Copyright (c) 1992 Terrence R. Lambert.
4 * Copyright (c) 2003 Peter Wemm.
5 * Copyright (c) 2008 The DragonFly Project.
8 * This code is derived from software contributed to Berkeley by
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
40 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $
43 #include "use_ether.h"
44 //#include "use_npx.h"
46 #include "opt_atalk.h"
47 #include "opt_compat.h"
50 #include "opt_directio.h"
53 #include "opt_msgbuf.h"
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/sysproto.h>
59 #include <sys/signalvar.h>
60 #include <sys/kernel.h>
61 #include <sys/linker.h>
62 #include <sys/malloc.h>
66 #include <sys/reboot.h>
68 #include <sys/msgbuf.h>
69 #include <sys/sysent.h>
70 #include <sys/sysctl.h>
71 #include <sys/vmmeter.h>
73 #include <sys/upcall.h>
74 #include <sys/usched.h>
78 #include <vm/vm_param.h>
80 #include <vm/vm_kern.h>
81 #include <vm/vm_object.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_pager.h>
85 #include <vm/vm_extern.h>
87 #include <sys/thread2.h>
88 #include <sys/mplock2.h>
96 #include <machine/cpu.h>
97 #include <machine/clock.h>
98 #include <machine/specialreg.h>
100 #include <machine/bootinfo.h>
102 #include <machine/md_var.h>
103 #include <machine/metadata.h>
104 #include <machine/pc/bios.h>
105 #include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
106 #include <machine/globaldata.h> /* CPU_prvspace */
107 #include <machine/smp.h>
109 #include <machine/perfmon.h>
111 #include <machine/cputypes.h>
114 #include <bus/isa/isa_device.h>
116 #include <machine_base/isa/intr_machdep.h>
117 #include <bus/isa/rtc.h>
118 #include <sys/random.h>
119 #include <sys/ptrace.h>
120 #include <machine/sigframe.h>
122 #define PHYSMAP_ENTRIES 10
124 extern void init386(int first
);
125 extern void dblfault_handler(void);
126 extern u_int64_t
hammer_time(u_int64_t
, u_int64_t
);
128 extern void printcpuinfo(void); /* XXX header file */
129 extern void identify_cpu(void);
131 extern void finishidentcpu(void);
133 extern void panicifcpuunsupported(void);
135 static void cpu_startup(void *);
136 #ifndef CPU_DISABLE_SSE
137 static void set_fpregs_xmm(struct save87
*, struct savexmm
*);
138 static void fill_fpregs_xmm(struct savexmm
*, struct save87
*);
139 #endif /* CPU_DISABLE_SSE */
141 extern void ffs_rawread_setup(void);
142 #endif /* DIRECTIO */
143 static void init_locks(void);
145 SYSINIT(cpu
, SI_BOOT2_SMP
, SI_ORDER_FIRST
, cpu_startup
, NULL
)
148 extern vm_offset_t ksym_start
, ksym_end
;
156 struct privatespace CPU_prvspace
[MAXCPU
];
158 int _udatasel
, _ucodesel
, _ucode32sel
;
161 int64_t tsc_offsets
[MAXCPU
];
163 int64_t tsc_offsets
[1];
166 #if defined(SWTCH_OPTIM_STATS)
167 extern int swtch_optim_stats
;
168 SYSCTL_INT(_debug
, OID_AUTO
, swtch_optim_stats
,
169 CTLFLAG_RD
, &swtch_optim_stats
, 0, "");
170 SYSCTL_INT(_debug
, OID_AUTO
, tlb_flush_count
,
171 CTLFLAG_RD
, &tlb_flush_count
, 0, "");
177 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS
)
179 int error
= sysctl_handle_int(oidp
, 0, ctob(physmem
), req
);
183 SYSCTL_PROC(_hw
, HW_PHYSMEM
, physmem
, CTLTYPE_INT
|CTLFLAG_RD
,
184 0, 0, sysctl_hw_physmem
, "IU", "");
187 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS
)
189 int error
= sysctl_handle_int(oidp
, 0,
190 ctob(physmem
- vmstats
.v_wire_count
), req
);
194 SYSCTL_PROC(_hw
, HW_USERMEM
, usermem
, CTLTYPE_INT
|CTLFLAG_RD
,
195 0, 0, sysctl_hw_usermem
, "IU", "");
198 sysctl_hw_availpages(SYSCTL_HANDLER_ARGS
)
200 int error
= sysctl_handle_int(oidp
, 0,
201 x86_64_btop(avail_end
- avail_start
), req
);
205 SYSCTL_PROC(_hw
, OID_AUTO
, availpages
, CTLTYPE_INT
|CTLFLAG_RD
,
206 0, 0, sysctl_hw_availpages
, "I", "");
208 vm_paddr_t Maxmem
= 0;
211 * The number of PHYSMAP entries must be one less than the number of
212 * PHYSSEG entries because the PHYSMAP entry that spans the largest
213 * physical address that is accessible by ISA DMA is split into two
216 #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1))
218 vm_paddr_t phys_avail
[PHYSMAP_SIZE
+ 2];
219 vm_paddr_t dump_avail
[PHYSMAP_SIZE
+ 2];
221 /* must be 2 less so 0 0 can signal end of chunks */
222 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
223 #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2)
225 static vm_offset_t buffer_sva
, buffer_eva
;
226 vm_offset_t clean_sva
, clean_eva
;
227 static vm_offset_t pager_sva
, pager_eva
;
228 static struct trapframe proc0_tf
;
231 cpu_startup(void *dummy
)
235 vm_offset_t firstaddr
;
237 if (boothowto
& RB_VERBOSE
)
241 * Good {morning,afternoon,evening,night}.
243 kprintf("%s", version
);
246 panicifcpuunsupported();
250 kprintf("real memory = %ju (%juK bytes)\n",
251 (intmax_t)ptoa(Maxmem
),
252 (intmax_t)ptoa(Maxmem
) / 1024);
254 * Display any holes after the first chunk of extended memory.
259 kprintf("Physical memory chunk(s):\n");
260 for (indx
= 0; phys_avail
[indx
+ 1] != 0; indx
+= 2) {
261 vm_paddr_t size1
= phys_avail
[indx
+ 1] - phys_avail
[indx
];
263 kprintf("0x%08jx - 0x%08jx, %ju bytes (%ju pages)\n",
264 (intmax_t)phys_avail
[indx
],
265 (intmax_t)phys_avail
[indx
+ 1] - 1,
267 (intmax_t)(size1
/ PAGE_SIZE
));
272 * Allocate space for system data structures.
273 * The first available kernel virtual address is in "v".
274 * As pages of kernel virtual memory are allocated, "v" is incremented.
275 * As pages of memory are allocated and cleared,
276 * "firstaddr" is incremented.
277 * An index into the kernel page table corresponding to the
278 * virtual memory address maintained in "v" is kept in "mapaddr".
282 * Make two passes. The first pass calculates how much memory is
283 * needed and allocates it. The second pass assigns virtual
284 * addresses to the various data structures.
288 v
= (caddr_t
)firstaddr
;
290 #define valloc(name, type, num) \
291 (name) = (type *)v; v = (caddr_t)((name)+(num))
292 #define valloclim(name, type, num, lim) \
293 (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
296 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
297 * For the first 64MB of ram nominally allocate sufficient buffers to
298 * cover 1/4 of our ram. Beyond the first 64MB allocate additional
299 * buffers to cover 1/20 of our ram over 64MB. When auto-sizing
300 * the buffer cache we limit the eventual kva reservation to
303 * factor represents the 1/4 x ram conversion.
306 int factor
= 4 * BKVASIZE
/ 1024;
307 int kbytes
= physmem
* (PAGE_SIZE
/ 1024);
311 nbuf
+= min((kbytes
- 4096) / factor
, 65536 / factor
);
313 nbuf
+= (kbytes
- 65536) * 2 / (factor
* 5);
314 if (maxbcache
&& nbuf
> maxbcache
/ BKVASIZE
)
315 nbuf
= maxbcache
/ BKVASIZE
;
319 * Do not allow the buffer_map to be more then 1/2 the size of the
322 if (nbuf
> (virtual_end
- virtual_start
) / (BKVASIZE
* 2)) {
323 nbuf
= (virtual_end
- virtual_start
) / (BKVASIZE
* 2);
324 kprintf("Warning: nbufs capped at %d\n", nbuf
);
327 nswbuf
= max(min(nbuf
/4, 256), 16);
329 if (nswbuf
< NSWBUF_MIN
)
336 valloc(swbuf
, struct buf
, nswbuf
);
337 valloc(buf
, struct buf
, nbuf
);
340 * End of first pass, size has been calculated so allocate memory
342 if (firstaddr
== 0) {
343 size
= (vm_size_t
)(v
- firstaddr
);
344 firstaddr
= kmem_alloc(&kernel_map
, round_page(size
));
346 panic("startup: no room for tables");
351 * End of second pass, addresses have been assigned
353 if ((vm_size_t
)(v
- firstaddr
) != size
)
354 panic("startup: table size inconsistency");
356 kmem_suballoc(&kernel_map
, &clean_map
, &clean_sva
, &clean_eva
,
357 (nbuf
*BKVASIZE
) + (nswbuf
*MAXPHYS
) + pager_map_size
);
358 kmem_suballoc(&clean_map
, &buffer_map
, &buffer_sva
, &buffer_eva
,
360 buffer_map
.system_map
= 1;
361 kmem_suballoc(&clean_map
, &pager_map
, &pager_sva
, &pager_eva
,
362 (nswbuf
*MAXPHYS
) + pager_map_size
);
363 pager_map
.system_map
= 1;
365 #if defined(USERCONFIG)
367 cninit(); /* the preferred console may have changed */
370 kprintf("avail memory = %lu (%luK bytes)\n",
371 ptoa(vmstats
.v_free_count
),
372 ptoa(vmstats
.v_free_count
) / 1024);
375 * Set up buffers, so they can be used to read disk labels.
378 vm_pager_bufferinit();
382 * OK, enough kmem_alloc/malloc state should be up, lets get on with it!
384 mp_start(); /* fire up the APs and APICs */
391 * Send an interrupt to process.
393 * Stack is set up to allow sigcode stored
394 * at top to call routine, followed by kcall
395 * to sigreturn routine below. After sigreturn
396 * resets the signal mask, the stack, and the
397 * frame pointer, it returns to the user
401 sendsig(sig_t catcher
, int sig
, sigset_t
*mask
, u_long code
)
403 struct lwp
*lp
= curthread
->td_lwp
;
404 struct proc
*p
= lp
->lwp_proc
;
405 struct trapframe
*regs
;
406 struct sigacts
*psp
= p
->p_sigacts
;
407 struct sigframe sf
, *sfp
;
411 regs
= lp
->lwp_md
.md_regs
;
412 oonstack
= (lp
->lwp_sigstk
.ss_flags
& SS_ONSTACK
) ? 1 : 0;
414 /* Save user context */
415 bzero(&sf
, sizeof(struct sigframe
));
416 sf
.sf_uc
.uc_sigmask
= *mask
;
417 sf
.sf_uc
.uc_stack
= lp
->lwp_sigstk
;
418 sf
.sf_uc
.uc_mcontext
.mc_onstack
= oonstack
;
419 KKASSERT(__offsetof(struct trapframe
, tf_rdi
) == 0);
420 bcopy(regs
, &sf
.sf_uc
.uc_mcontext
.mc_rdi
, sizeof(struct trapframe
));
422 /* Make the size of the saved context visible to userland */
423 sf
.sf_uc
.uc_mcontext
.mc_len
= sizeof(sf
.sf_uc
.uc_mcontext
);
425 /* Save mailbox pending state for syscall interlock semantics */
426 if (p
->p_flag
& P_MAILBOX
)
427 sf
.sf_uc
.uc_mcontext
.mc_xflags
|= PGEX_MAILBOX
;
429 /* Allocate and validate space for the signal handler context. */
430 if ((lp
->lwp_flag
& LWP_ALTSTACK
) != 0 && !oonstack
&&
431 SIGISMEMBER(psp
->ps_sigonstack
, sig
)) {
432 sp
= (char *)(lp
->lwp_sigstk
.ss_sp
+ lp
->lwp_sigstk
.ss_size
-
433 sizeof(struct sigframe
));
434 lp
->lwp_sigstk
.ss_flags
|= SS_ONSTACK
;
436 /* We take red zone into account */
437 sp
= (char *)regs
->tf_rsp
- sizeof(struct sigframe
) - 128;
440 /* Align to 16 bytes */
441 sfp
= (struct sigframe
*)((intptr_t)sp
& ~0xFUL
);
443 /* Translate the signal is appropriate */
444 if (p
->p_sysent
->sv_sigtbl
) {
445 if (sig
<= p
->p_sysent
->sv_sigsize
)
446 sig
= p
->p_sysent
->sv_sigtbl
[_SIG_IDX(sig
)];
450 * Build the argument list for the signal handler.
452 * Arguments are in registers (%rdi, %rsi, %rdx, %rcx)
454 regs
->tf_rdi
= sig
; /* argument 1 */
455 regs
->tf_rdx
= (register_t
)&sfp
->sf_uc
; /* argument 3 */
457 if (SIGISMEMBER(psp
->ps_siginfo
, sig
)) {
459 * Signal handler installed with SA_SIGINFO.
461 * action(signo, siginfo, ucontext)
463 regs
->tf_rsi
= (register_t
)&sfp
->sf_si
; /* argument 2 */
464 regs
->tf_rcx
= (register_t
)regs
->tf_err
; /* argument 4 */
465 sf
.sf_ahu
.sf_action
= (__siginfohandler_t
*)catcher
;
467 /* fill siginfo structure */
468 sf
.sf_si
.si_signo
= sig
;
469 sf
.sf_si
.si_code
= code
;
470 sf
.sf_si
.si_addr
= (void *)regs
->tf_err
;
473 * Old FreeBSD-style arguments.
475 * handler (signo, code, [uc], addr)
477 regs
->tf_rsi
= (register_t
)code
; /* argument 2 */
478 regs
->tf_rcx
= (register_t
)regs
->tf_err
; /* argument 4 */
479 sf
.sf_ahu
.sf_handler
= catcher
;
483 * If we're a vm86 process, we want to save the segment registers.
484 * We also change eflags to be our emulated eflags, not the actual
488 if (regs
->tf_eflags
& PSL_VM
) {
489 struct trapframe_vm86
*tf
= (struct trapframe_vm86
*)regs
;
490 struct vm86_kernel
*vm86
= &lp
->lwp_thread
->td_pcb
->pcb_ext
->ext_vm86
;
492 sf
.sf_uc
.uc_mcontext
.mc_gs
= tf
->tf_vm86_gs
;
493 sf
.sf_uc
.uc_mcontext
.mc_fs
= tf
->tf_vm86_fs
;
494 sf
.sf_uc
.uc_mcontext
.mc_es
= tf
->tf_vm86_es
;
495 sf
.sf_uc
.uc_mcontext
.mc_ds
= tf
->tf_vm86_ds
;
497 if (vm86
->vm86_has_vme
== 0)
498 sf
.sf_uc
.uc_mcontext
.mc_eflags
=
499 (tf
->tf_eflags
& ~(PSL_VIF
| PSL_VIP
)) |
500 (vm86
->vm86_eflags
& (PSL_VIF
| PSL_VIP
));
503 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
504 * syscalls made by the signal handler. This just avoids
505 * wasting time for our lazy fixup of such faults. PSL_NT
506 * does nothing in vm86 mode, but vm86 programs can set it
507 * almost legitimately in probes for old cpu types.
509 tf
->tf_eflags
&= ~(PSL_VM
| PSL_NT
| PSL_VIF
| PSL_VIP
);
514 * Save the FPU state and reinit the FP unit
516 npxpush(&sf
.sf_uc
.uc_mcontext
);
519 * Copy the sigframe out to the user's stack.
521 if (copyout(&sf
, sfp
, sizeof(struct sigframe
)) != 0) {
523 * Something is wrong with the stack pointer.
524 * ...Kill the process.
529 regs
->tf_rsp
= (register_t
)sfp
;
530 regs
->tf_rip
= PS_STRINGS
- *(p
->p_sysent
->sv_szsigcode
);
533 * i386 abi specifies that the direction flag must be cleared
536 regs
->tf_rflags
&= ~(PSL_T
|PSL_D
);
539 * 64 bit mode has a code and stack selector but
540 * no data or extra selector. %fs and %gs are not
543 regs
->tf_cs
= _ucodesel
;
544 regs
->tf_ss
= _udatasel
;
548 * Sanitize the trapframe for a virtual kernel passing control to a custom
549 * VM context. Remove any items that would otherwise create a privilage
552 * XXX at the moment we allow userland to set the resume flag. Is this a
556 cpu_sanitize_frame(struct trapframe
*frame
)
558 frame
->tf_cs
= _ucodesel
;
559 frame
->tf_ss
= _udatasel
;
560 /* XXX VM (8086) mode not supported? */
561 frame
->tf_rflags
&= (PSL_RF
| PSL_USERCHANGE
| PSL_VM_UNSUPP
);
562 frame
->tf_rflags
|= PSL_RESERVED_DEFAULT
| PSL_I
;
568 * Sanitize the tls so loading the descriptor does not blow up
569 * on us. For x86_64 we don't have to do anything.
572 cpu_sanitize_tls(struct savetls
*tls
)
578 * sigreturn(ucontext_t *sigcntxp)
580 * System call to cleanup state after a signal
581 * has been taken. Reset signal mask and
582 * stack state from context left by sendsig (above).
583 * Return to previous pc and psl as specified by
584 * context left by sendsig. Check carefully to
585 * make sure that the user has not modified the
586 * state to gain improper privileges.
590 #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
591 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
594 sys_sigreturn(struct sigreturn_args
*uap
)
596 struct lwp
*lp
= curthread
->td_lwp
;
597 struct proc
*p
= lp
->lwp_proc
;
598 struct trapframe
*regs
;
606 * We have to copy the information into kernel space so userland
607 * can't modify it while we are sniffing it.
609 regs
= lp
->lwp_md
.md_regs
;
610 error
= copyin(uap
->sigcntxp
, &uc
, sizeof(uc
));
614 rflags
= ucp
->uc_mcontext
.mc_rflags
;
616 /* VM (8086) mode not supported */
617 rflags
&= ~PSL_VM_UNSUPP
;
620 if (eflags
& PSL_VM
) {
621 struct trapframe_vm86
*tf
= (struct trapframe_vm86
*)regs
;
622 struct vm86_kernel
*vm86
;
625 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
626 * set up the vm86 area, and we can't enter vm86 mode.
628 if (lp
->lwp_thread
->td_pcb
->pcb_ext
== 0)
630 vm86
= &lp
->lwp_thread
->td_pcb
->pcb_ext
->ext_vm86
;
631 if (vm86
->vm86_inited
== 0)
634 /* go back to user mode if both flags are set */
635 if ((eflags
& PSL_VIP
) && (eflags
& PSL_VIF
))
636 trapsignal(lp
, SIGBUS
, 0);
638 if (vm86
->vm86_has_vme
) {
639 eflags
= (tf
->tf_eflags
& ~VME_USERCHANGE
) |
640 (eflags
& VME_USERCHANGE
) | PSL_VM
;
642 vm86
->vm86_eflags
= eflags
; /* save VIF, VIP */
643 eflags
= (tf
->tf_eflags
& ~VM_USERCHANGE
) |
644 (eflags
& VM_USERCHANGE
) | PSL_VM
;
646 bcopy(&ucp
->uc_mcontext
.mc_gs
, tf
, sizeof(struct trapframe
));
647 tf
->tf_eflags
= eflags
;
648 tf
->tf_vm86_ds
= tf
->tf_ds
;
649 tf
->tf_vm86_es
= tf
->tf_es
;
650 tf
->tf_vm86_fs
= tf
->tf_fs
;
651 tf
->tf_vm86_gs
= tf
->tf_gs
;
652 tf
->tf_ds
= _udatasel
;
653 tf
->tf_es
= _udatasel
;
654 tf
->tf_fs
= _udatasel
;
655 tf
->tf_gs
= _udatasel
;
660 * Don't allow users to change privileged or reserved flags.
663 * XXX do allow users to change the privileged flag PSL_RF.
664 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
665 * should sometimes set it there too. tf_eflags is kept in
666 * the signal context during signal handling and there is no
667 * other place to remember it, so the PSL_RF bit may be
668 * corrupted by the signal handler without us knowing.
669 * Corruption of the PSL_RF bit at worst causes one more or
670 * one less debugger trap, so allowing it is fairly harmless.
672 if (!EFL_SECURE(rflags
& ~PSL_RF
, regs
->tf_rflags
& ~PSL_RF
)) {
673 kprintf("sigreturn: rflags = 0x%lx\n", (long)rflags
);
678 * Don't allow users to load a valid privileged %cs. Let the
679 * hardware check for invalid selectors, excess privilege in
680 * other selectors, invalid %eip's and invalid %esp's.
682 cs
= ucp
->uc_mcontext
.mc_cs
;
683 if (!CS_SECURE(cs
)) {
684 kprintf("sigreturn: cs = 0x%x\n", cs
);
685 trapsignal(lp
, SIGBUS
, T_PROTFLT
);
688 bcopy(&ucp
->uc_mcontext
.mc_rdi
, regs
, sizeof(struct trapframe
));
692 * Restore the FPU state from the frame
695 npxpop(&ucp
->uc_mcontext
);
698 * Merge saved signal mailbox pending flag to maintain interlock
699 * semantics against system calls.
701 if (ucp
->uc_mcontext
.mc_xflags
& PGEX_MAILBOX
)
702 p
->p_flag
|= P_MAILBOX
;
704 if (ucp
->uc_mcontext
.mc_onstack
& 1)
705 lp
->lwp_sigstk
.ss_flags
|= SS_ONSTACK
;
707 lp
->lwp_sigstk
.ss_flags
&= ~SS_ONSTACK
;
709 lp
->lwp_sigmask
= ucp
->uc_sigmask
;
710 SIG_CANTMASK(lp
->lwp_sigmask
);
716 * Stack frame on entry to function. %rax will contain the function vector,
717 * %rcx will contain the function data. flags, rcx, and rax will have
718 * already been pushed on the stack.
729 sendupcall(struct vmupcall
*vu
, int morepending
)
731 struct lwp
*lp
= curthread
->td_lwp
;
732 struct trapframe
*regs
;
733 struct upcall upcall
;
734 struct upc_frame upc_frame
;
738 * If we are a virtual kernel running an emulated user process
739 * context, switch back to the virtual kernel context before
740 * trying to post the signal.
742 if (lp
->lwp_vkernel
&& lp
->lwp_vkernel
->ve
) {
743 lp
->lwp_md
.md_regs
->tf_trapno
= 0;
744 vkernel_trap(lp
, lp
->lwp_md
.md_regs
);
748 * Get the upcall data structure
750 if (copyin(lp
->lwp_upcall
, &upcall
, sizeof(upcall
)) ||
751 copyin((char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, &crit_count
, sizeof(int))
754 kprintf("bad upcall address\n");
759 * If the data structure is already marked pending or has a critical
760 * section count, mark the data structure as pending and return
761 * without doing an upcall. vu_pending is left set.
763 if (upcall
.upc_pending
|| crit_count
>= vu
->vu_pending
) {
764 if (upcall
.upc_pending
< vu
->vu_pending
) {
765 upcall
.upc_pending
= vu
->vu_pending
;
766 copyout(&upcall
.upc_pending
, &lp
->lwp_upcall
->upc_pending
,
767 sizeof(upcall
.upc_pending
));
773 * We can run this upcall now, clear vu_pending.
775 * Bump our critical section count and set or clear the
776 * user pending flag depending on whether more upcalls are
777 * pending. The user will be responsible for calling
778 * upc_dispatch(-1) to process remaining upcalls.
781 upcall
.upc_pending
= morepending
;
782 crit_count
+= TDPRI_CRIT
;
783 copyout(&upcall
.upc_pending
, &lp
->lwp_upcall
->upc_pending
,
784 sizeof(upcall
.upc_pending
));
785 copyout(&crit_count
, (char *)upcall
.upc_uthread
+ upcall
.upc_critoff
,
789 * Construct a stack frame and issue the upcall
791 regs
= lp
->lwp_md
.md_regs
;
792 upc_frame
.rax
= regs
->tf_rax
;
793 upc_frame
.rcx
= regs
->tf_rcx
;
794 upc_frame
.rdx
= regs
->tf_rdx
;
795 upc_frame
.flags
= regs
->tf_rflags
;
796 upc_frame
.oldip
= regs
->tf_rip
;
797 if (copyout(&upc_frame
, (void *)(regs
->tf_rsp
- sizeof(upc_frame
)),
798 sizeof(upc_frame
)) != 0) {
799 kprintf("bad stack on upcall\n");
801 regs
->tf_rax
= (register_t
)vu
->vu_func
;
802 regs
->tf_rcx
= (register_t
)vu
->vu_data
;
803 regs
->tf_rdx
= (register_t
)lp
->lwp_upcall
;
804 regs
->tf_rip
= (register_t
)vu
->vu_ctx
;
805 regs
->tf_rsp
-= sizeof(upc_frame
);
810 * fetchupcall occurs in the context of a system call, which means that
811 * we have to return EJUSTRETURN in order to prevent eax and edx from
812 * being overwritten by the syscall return value.
814 * if vu is not NULL we return the new context in %edx, the new data in %ecx,
815 * and the function pointer in %eax.
818 fetchupcall(struct vmupcall
*vu
, int morepending
, void *rsp
)
820 struct upc_frame upc_frame
;
821 struct lwp
*lp
= curthread
->td_lwp
;
822 struct trapframe
*regs
;
824 struct upcall upcall
;
827 regs
= lp
->lwp_md
.md_regs
;
829 error
= copyout(&morepending
, &lp
->lwp_upcall
->upc_pending
, sizeof(int));
833 * This jumps us to the next ready context.
836 error
= copyin(lp
->lwp_upcall
, &upcall
, sizeof(upcall
));
839 error
= copyin((char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, &crit_count
, sizeof(int));
840 crit_count
+= TDPRI_CRIT
;
842 error
= copyout(&crit_count
, (char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, sizeof(int));
843 regs
->tf_rax
= (register_t
)vu
->vu_func
;
844 regs
->tf_rcx
= (register_t
)vu
->vu_data
;
845 regs
->tf_rdx
= (register_t
)lp
->lwp_upcall
;
846 regs
->tf_rip
= (register_t
)vu
->vu_ctx
;
847 regs
->tf_rsp
= (register_t
)rsp
;
850 * This returns us to the originally interrupted code.
852 error
= copyin(rsp
, &upc_frame
, sizeof(upc_frame
));
853 regs
->tf_rax
= upc_frame
.rax
;
854 regs
->tf_rcx
= upc_frame
.rcx
;
855 regs
->tf_rdx
= upc_frame
.rdx
;
856 regs
->tf_rflags
= (regs
->tf_rflags
& ~PSL_USERCHANGE
) |
857 (upc_frame
.flags
& PSL_USERCHANGE
);
858 regs
->tf_rip
= upc_frame
.oldip
;
859 regs
->tf_rsp
= (register_t
)((char *)rsp
+ sizeof(upc_frame
));
868 * Machine dependent boot() routine
870 * I haven't seen anything to put here yet
871 * Possibly some stuff might be grafted back here from boot()
879 * Shutdown the CPU as much as possible
885 __asm__
__volatile("hlt");
889 * cpu_idle() represents the idle LWKT. You cannot return from this function
890 * (unless you want to blow things up!). Instead we look for runnable threads
891 * and loop or halt as appropriate. Giant is not held on entry to the thread.
893 * The main loop is entered with a critical section held, we must release
894 * the critical section before doing anything else. lwkt_switch() will
895 * check for pending interrupts due to entering and exiting its own
898 * Note on cpu_idle_hlt: On an SMP system we rely on a scheduler IPI
899 * to wake a HLTed cpu up. However, there are cases where the idlethread
900 * will be entered with the possibility that no IPI will occur and in such
901 * cases lwkt_switch() sets TDF_IDLE_NOHLT.
903 static int cpu_idle_hlt
= 1;
904 static int cpu_idle_hltcnt
;
905 static int cpu_idle_spincnt
;
906 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_hlt
, CTLFLAG_RW
,
907 &cpu_idle_hlt
, 0, "Idle loop HLT enable");
908 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_hltcnt
, CTLFLAG_RW
,
909 &cpu_idle_hltcnt
, 0, "Idle loop entry halts");
910 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_spincnt
, CTLFLAG_RW
,
911 &cpu_idle_spincnt
, 0, "Idle loop entry spins");
914 cpu_idle_default_hook(void)
917 * We must guarentee that hlt is exactly the instruction
920 __asm
__volatile("sti; hlt");
923 /* Other subsystems (e.g., ACPI) can hook this later. */
924 void (*cpu_idle_hook
)(void) = cpu_idle_default_hook
;
929 struct thread
*td
= curthread
;
932 KKASSERT(td
->td_pri
< TDPRI_CRIT
);
935 * See if there are any LWKTs ready to go.
940 * If we are going to halt call splz unconditionally after
941 * CLIing to catch any interrupt races. Note that we are
942 * at SPL0 and interrupts are enabled.
944 if (cpu_idle_hlt
&& !lwkt_runnable() &&
945 (td
->td_flags
& TDF_IDLE_NOHLT
) == 0) {
946 __asm
__volatile("cli");
948 if (!lwkt_runnable())
952 __asm
__volatile("pause");
956 td
->td_flags
&= ~TDF_IDLE_NOHLT
;
959 __asm
__volatile("sti; pause");
961 __asm
__volatile("sti");
971 * This routine is called when the only runnable threads require
972 * the MP lock, and the scheduler couldn't get it. On a real cpu
973 * we let the scheduler spin.
976 cpu_mplock_contested(void)
982 * This routine is called if a spinlock has been held through the
983 * exponential backoff period and is seriously contested. On a real cpu
987 cpu_spinlock_contested(void)
995 * Clear registers on exec
998 exec_setregs(u_long entry
, u_long stack
, u_long ps_strings
)
1000 struct thread
*td
= curthread
;
1001 struct lwp
*lp
= td
->td_lwp
;
1002 struct pcb
*pcb
= td
->td_pcb
;
1003 struct trapframe
*regs
= lp
->lwp_md
.md_regs
;
1005 /* was i386_user_cleanup() in NetBSD */
1008 bzero((char *)regs
, sizeof(struct trapframe
));
1009 regs
->tf_rip
= entry
;
1010 regs
->tf_rsp
= ((stack
- 8) & ~0xFul
) + 8; /* align the stack */
1011 regs
->tf_rdi
= stack
; /* argv */
1012 regs
->tf_rflags
= PSL_USER
| (regs
->tf_rflags
& PSL_T
);
1013 regs
->tf_ss
= _udatasel
;
1014 regs
->tf_cs
= _ucodesel
;
1015 regs
->tf_rbx
= ps_strings
;
1018 * Reset the hardware debug registers if they were in use.
1019 * They won't have any meaning for the newly exec'd process.
1021 if (pcb
->pcb_flags
& PCB_DBREGS
) {
1027 pcb
->pcb_dr7
= 0; /* JG set bit 10? */
1028 if (pcb
== td
->td_pcb
) {
1030 * Clear the debug registers on the running
1031 * CPU, otherwise they will end up affecting
1032 * the next process we switch to.
1036 pcb
->pcb_flags
&= ~PCB_DBREGS
;
1040 * Initialize the math emulator (if any) for the current process.
1041 * Actually, just clear the bit that says that the emulator has
1042 * been initialized. Initialization is delayed until the process
1043 * traps to the emulator (if it is done at all) mainly because
1044 * emulators don't provide an entry point for initialization.
1046 pcb
->pcb_flags
&= ~FP_SOFTFP
;
1049 * NOTE: do not set CR0_TS here. npxinit() must do it after clearing
1050 * gd_npxthread. Otherwise a preemptive interrupt thread
1051 * may panic in npxdna().
1054 load_cr0(rcr0() | CR0_MP
);
1057 * NOTE: The MSR values must be correct so we can return to
1058 * userland. gd_user_fs/gs must be correct so the switch
1059 * code knows what the current MSR values are.
1061 pcb
->pcb_fsbase
= 0; /* Values loaded from PCB on switch */
1062 pcb
->pcb_gsbase
= 0;
1063 mdcpu
->gd_user_fs
= 0; /* Cache of current MSR values */
1064 mdcpu
->gd_user_gs
= 0;
1065 wrmsr(MSR_FSBASE
, 0); /* Set MSR values for return to userland */
1066 wrmsr(MSR_KGSBASE
, 0);
1068 /* Initialize the npx (if any) for the current process. */
1069 npxinit(__INITIAL_NPXCW__
);
1072 pcb
->pcb_ds
= _udatasel
;
1073 pcb
->pcb_es
= _udatasel
;
1074 pcb
->pcb_fs
= _udatasel
;
1075 pcb
->pcb_gs
= _udatasel
;
1084 cr0
|= CR0_NE
; /* Done by npxinit() */
1085 cr0
|= CR0_MP
| CR0_TS
; /* Done at every execve() too. */
1086 cr0
|= CR0_WP
| CR0_AM
;
1092 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS
)
1095 error
= sysctl_handle_int(oidp
, oidp
->oid_arg1
, oidp
->oid_arg2
,
1097 if (!error
&& req
->newptr
)
1102 SYSCTL_PROC(_machdep
, CPU_ADJKERNTZ
, adjkerntz
, CTLTYPE_INT
|CTLFLAG_RW
,
1103 &adjkerntz
, 0, sysctl_machdep_adjkerntz
, "I", "");
1106 SYSCTL_INT(_machdep
, CPU_DISRTCSET
, disable_rtc_set
,
1107 CTLFLAG_RW
, &disable_rtc_set
, 0, "");
1111 SYSCTL_STRUCT(_machdep
, CPU_BOOTINFO
, bootinfo
,
1112 CTLFLAG_RD
, &bootinfo
, bootinfo
, "");
1115 SYSCTL_INT(_machdep
, CPU_WALLCLOCK
, wall_cmos_clock
,
1116 CTLFLAG_RW
, &wall_cmos_clock
, 0, "");
1118 extern u_long bootdev
; /* not a cdev_t - encoding is different */
1119 SYSCTL_ULONG(_machdep
, OID_AUTO
, guessed_bootdev
,
1120 CTLFLAG_RD
, &bootdev
, 0, "Boot device (not in cdev_t format)");
1123 * Initialize 386 and configure to run kernel
1127 * Initialize segments & interrupt table
1131 struct user_segment_descriptor gdt
[NGDT
* MAXCPU
]; /* global descriptor table */
1132 static struct gate_descriptor idt0
[NIDT
];
1133 struct gate_descriptor
*idt
= &idt0
[0]; /* interrupt descriptor table */
1135 union descriptor ldt
[NLDT
]; /* local descriptor table */
1138 /* table descriptors - used to load tables by cpu */
1139 struct region_descriptor r_gdt
, r_idt
;
1141 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
1142 extern int has_f00f_bug
;
1145 static char dblfault_stack
[PAGE_SIZE
] __aligned(16);
1147 /* JG proc0paddr is a virtual address */
1150 char proc0paddr_buff
[LWKT_THREAD_STACK
];
1153 /* software prototypes -- in more palatable form */
1154 struct soft_segment_descriptor gdt_segs
[] = {
1155 /* GNULL_SEL 0 Null Descriptor */
1156 { 0x0, /* segment base address */
1158 0, /* segment type */
1159 0, /* segment descriptor priority level */
1160 0, /* segment descriptor present */
1162 0, /* default 32 vs 16 bit size */
1163 0 /* limit granularity (byte/page units)*/ },
1164 /* GCODE_SEL 1 Code Descriptor for kernel */
1165 { 0x0, /* segment base address */
1166 0xfffff, /* length - all address space */
1167 SDT_MEMERA
, /* segment type */
1168 SEL_KPL
, /* segment descriptor priority level */
1169 1, /* segment descriptor present */
1171 0, /* default 32 vs 16 bit size */
1172 1 /* limit granularity (byte/page units)*/ },
1173 /* GDATA_SEL 2 Data Descriptor for kernel */
1174 { 0x0, /* segment base address */
1175 0xfffff, /* length - all address space */
1176 SDT_MEMRWA
, /* segment type */
1177 SEL_KPL
, /* segment descriptor priority level */
1178 1, /* segment descriptor present */
1180 0, /* default 32 vs 16 bit size */
1181 1 /* limit granularity (byte/page units)*/ },
1182 /* GUCODE32_SEL 3 32 bit Code Descriptor for user */
1183 { 0x0, /* segment base address */
1184 0xfffff, /* length - all address space */
1185 SDT_MEMERA
, /* segment type */
1186 SEL_UPL
, /* segment descriptor priority level */
1187 1, /* segment descriptor present */
1189 1, /* default 32 vs 16 bit size */
1190 1 /* limit granularity (byte/page units)*/ },
1191 /* GUDATA_SEL 4 32/64 bit Data Descriptor for user */
1192 { 0x0, /* segment base address */
1193 0xfffff, /* length - all address space */
1194 SDT_MEMRWA
, /* segment type */
1195 SEL_UPL
, /* segment descriptor priority level */
1196 1, /* segment descriptor present */
1198 1, /* default 32 vs 16 bit size */
1199 1 /* limit granularity (byte/page units)*/ },
1200 /* GUCODE_SEL 5 64 bit Code Descriptor for user */
1201 { 0x0, /* segment base address */
1202 0xfffff, /* length - all address space */
1203 SDT_MEMERA
, /* segment type */
1204 SEL_UPL
, /* segment descriptor priority level */
1205 1, /* segment descriptor present */
1207 0, /* default 32 vs 16 bit size */
1208 1 /* limit granularity (byte/page units)*/ },
1209 /* GPROC0_SEL 6 Proc 0 Tss Descriptor */
1211 0x0, /* segment base address */
1212 sizeof(struct x86_64tss
)-1,/* length - all address space */
1213 SDT_SYSTSS
, /* segment type */
1214 SEL_KPL
, /* segment descriptor priority level */
1215 1, /* segment descriptor present */
1217 0, /* unused - default 32 vs 16 bit size */
1218 0 /* limit granularity (byte/page units)*/ },
1219 /* Actually, the TSS is a system descriptor which is double size */
1220 { 0x0, /* segment base address */
1222 0, /* segment type */
1223 0, /* segment descriptor priority level */
1224 0, /* segment descriptor present */
1226 0, /* default 32 vs 16 bit size */
1227 0 /* limit granularity (byte/page units)*/ },
1228 /* GUGS32_SEL 8 32 bit GS Descriptor for user */
1229 { 0x0, /* segment base address */
1230 0xfffff, /* length - all address space */
1231 SDT_MEMRWA
, /* segment type */
1232 SEL_UPL
, /* segment descriptor priority level */
1233 1, /* segment descriptor present */
1235 1, /* default 32 vs 16 bit size */
1236 1 /* limit granularity (byte/page units)*/ },
1240 setidt(int idx
, inthand_t
*func
, int typ
, int dpl
, int ist
)
1242 struct gate_descriptor
*ip
;
1245 ip
->gd_looffset
= (uintptr_t)func
;
1246 ip
->gd_selector
= GSEL(GCODE_SEL
, SEL_KPL
);
1252 ip
->gd_hioffset
= ((uintptr_t)func
)>>16 ;
1255 #define IDTVEC(name) __CONCAT(X,name)
1258 IDTVEC(div
), IDTVEC(dbg
), IDTVEC(nmi
), IDTVEC(bpt
), IDTVEC(ofl
),
1259 IDTVEC(bnd
), IDTVEC(ill
), IDTVEC(dna
), IDTVEC(fpusegm
),
1260 IDTVEC(tss
), IDTVEC(missing
), IDTVEC(stk
), IDTVEC(prot
),
1261 IDTVEC(page
), IDTVEC(mchk
), IDTVEC(rsvd
), IDTVEC(fpu
), IDTVEC(align
),
1262 IDTVEC(xmm
), IDTVEC(dblfault
),
1263 IDTVEC(fast_syscall
), IDTVEC(fast_syscall32
);
1265 #ifdef DEBUG_INTERRUPTS
1266 extern inthand_t
*Xrsvdary
[256];
1270 sdtossd(struct user_segment_descriptor
*sd
, struct soft_segment_descriptor
*ssd
)
1272 ssd
->ssd_base
= (sd
->sd_hibase
<< 24) | sd
->sd_lobase
;
1273 ssd
->ssd_limit
= (sd
->sd_hilimit
<< 16) | sd
->sd_lolimit
;
1274 ssd
->ssd_type
= sd
->sd_type
;
1275 ssd
->ssd_dpl
= sd
->sd_dpl
;
1276 ssd
->ssd_p
= sd
->sd_p
;
1277 ssd
->ssd_def32
= sd
->sd_def32
;
1278 ssd
->ssd_gran
= sd
->sd_gran
;
1282 ssdtosd(struct soft_segment_descriptor
*ssd
, struct user_segment_descriptor
*sd
)
1285 sd
->sd_lobase
= (ssd
->ssd_base
) & 0xffffff;
1286 sd
->sd_hibase
= (ssd
->ssd_base
>> 24) & 0xff;
1287 sd
->sd_lolimit
= (ssd
->ssd_limit
) & 0xffff;
1288 sd
->sd_hilimit
= (ssd
->ssd_limit
>> 16) & 0xf;
1289 sd
->sd_type
= ssd
->ssd_type
;
1290 sd
->sd_dpl
= ssd
->ssd_dpl
;
1291 sd
->sd_p
= ssd
->ssd_p
;
1292 sd
->sd_long
= ssd
->ssd_long
;
1293 sd
->sd_def32
= ssd
->ssd_def32
;
1294 sd
->sd_gran
= ssd
->ssd_gran
;
1298 ssdtosyssd(struct soft_segment_descriptor
*ssd
,
1299 struct system_segment_descriptor
*sd
)
1302 sd
->sd_lobase
= (ssd
->ssd_base
) & 0xffffff;
1303 sd
->sd_hibase
= (ssd
->ssd_base
>> 24) & 0xfffffffffful
;
1304 sd
->sd_lolimit
= (ssd
->ssd_limit
) & 0xffff;
1305 sd
->sd_hilimit
= (ssd
->ssd_limit
>> 16) & 0xf;
1306 sd
->sd_type
= ssd
->ssd_type
;
1307 sd
->sd_dpl
= ssd
->ssd_dpl
;
1308 sd
->sd_p
= ssd
->ssd_p
;
1309 sd
->sd_gran
= ssd
->ssd_gran
;
1315 * Populate the (physmap) array with base/bound pairs describing the
1316 * available physical memory in the system, then test this memory and
1317 * build the phys_avail array describing the actually-available memory.
1319 * If we cannot accurately determine the physical memory map, then use
1320 * value from the 0xE801 call, and failing that, the RTC.
1322 * Total memory size may be set by the kernel environment variable
1323 * hw.physmem or the compile-time define MAXMEM.
1325 * XXX first should be vm_paddr_t.
1328 getmemsize(caddr_t kmdp
, u_int64_t first
)
1330 int i
, off
, physmap_idx
, pa_indx
, da_indx
;
1331 vm_paddr_t pa
, physmap
[PHYSMAP_SIZE
];
1332 u_long physmem_tunable
;
1334 struct bios_smap
*smapbase
, *smap
, *smapend
;
1336 quad_t dcons_addr
, dcons_size
;
1338 bzero(physmap
, sizeof(physmap
));
1343 * get memory map from INT 15:E820, kindly supplied by the loader.
1345 * subr_module.c says:
1346 * "Consumer may safely assume that size value precedes data."
1347 * ie: an int32_t immediately precedes smap.
1349 smapbase
= (struct bios_smap
*)preload_search_info(kmdp
,
1350 MODINFO_METADATA
| MODINFOMD_SMAP
);
1351 if (smapbase
== NULL
)
1352 panic("No BIOS smap info from loader!");
1354 smapsize
= *((u_int32_t
*)smapbase
- 1);
1355 smapend
= (struct bios_smap
*)((uintptr_t)smapbase
+ smapsize
);
1357 for (smap
= smapbase
; smap
< smapend
; smap
++) {
1358 if (boothowto
& RB_VERBOSE
)
1359 kprintf("SMAP type=%02x base=%016lx len=%016lx\n",
1360 smap
->type
, smap
->base
, smap
->length
);
1362 if (smap
->type
!= SMAP_TYPE_MEMORY
)
1365 if (smap
->length
== 0)
1368 for (i
= 0; i
<= physmap_idx
; i
+= 2) {
1369 if (smap
->base
< physmap
[i
+ 1]) {
1370 if (boothowto
& RB_VERBOSE
)
1372 "Overlapping or non-monotonic memory region, ignoring second region\n");
1377 if (smap
->base
== physmap
[physmap_idx
+ 1]) {
1378 physmap
[physmap_idx
+ 1] += smap
->length
;
1383 if (physmap_idx
== PHYSMAP_SIZE
) {
1385 "Too many segments in the physical address map, giving up\n");
1388 physmap
[physmap_idx
] = smap
->base
;
1389 physmap
[physmap_idx
+ 1] = smap
->base
+ smap
->length
;
1393 * Find the 'base memory' segment for SMP
1396 for (i
= 0; i
<= physmap_idx
; i
+= 2) {
1397 if (physmap
[i
] == 0x00000000) {
1398 basemem
= physmap
[i
+ 1] / 1024;
1403 panic("BIOS smap did not include a basemem segment!");
1406 /* make hole for AP bootstrap code */
1407 physmap
[1] = mp_bootaddress(physmap
[1] / 1024);
1409 /* look for the MP hardware - needed for apic addresses */
1414 * Maxmem isn't the "maximum memory", it's one larger than the
1415 * highest page of the physical address space. It should be
1416 * called something like "Maxphyspage". We may adjust this
1417 * based on ``hw.physmem'' and the results of the memory test.
1419 Maxmem
= atop(physmap
[physmap_idx
+ 1]);
1422 Maxmem
= MAXMEM
/ 4;
1425 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable
))
1426 Maxmem
= atop(physmem_tunable
);
1429 * Don't allow MAXMEM or hw.physmem to extend the amount of memory
1432 if (Maxmem
> atop(physmap
[physmap_idx
+ 1]))
1433 Maxmem
= atop(physmap
[physmap_idx
+ 1]);
1435 if (atop(physmap
[physmap_idx
+ 1]) != Maxmem
&&
1436 (boothowto
& RB_VERBOSE
))
1437 kprintf("Physical memory use set to %ldK\n", Maxmem
* 4);
1439 /* call pmap initialization to make new kernel address space */
1440 pmap_bootstrap(&first
);
1443 * Size up each available chunk of physical memory.
1445 physmap
[0] = PAGE_SIZE
; /* mask off page 0 */
1448 phys_avail
[pa_indx
++] = physmap
[0];
1449 phys_avail
[pa_indx
] = physmap
[0];
1450 dump_avail
[da_indx
] = physmap
[0];
1454 * Get dcons buffer address
1456 if (kgetenv_quad("dcons.addr", &dcons_addr
) == 0 ||
1457 kgetenv_quad("dcons.size", &dcons_size
) == 0)
1461 * physmap is in bytes, so when converting to page boundaries,
1462 * round up the start address and round down the end address.
1464 for (i
= 0; i
<= physmap_idx
; i
+= 2) {
1467 end
= ptoa((vm_paddr_t
)Maxmem
);
1468 if (physmap
[i
+ 1] < end
)
1469 end
= trunc_page(physmap
[i
+ 1]);
1470 for (pa
= round_page(physmap
[i
]); pa
< end
; pa
+= PAGE_SIZE
) {
1471 int tmp
, page_bad
, full
;
1472 int *ptr
= (int *)CADDR1
;
1476 * block out kernel memory as not available.
1478 if (pa
>= 0x100000 && pa
< first
)
1482 * block out dcons buffer
1485 && pa
>= trunc_page(dcons_addr
)
1486 && pa
< dcons_addr
+ dcons_size
)
1492 * map page into kernel: valid, read/write,non-cacheable
1494 *pte
= pa
| PG_V
| PG_RW
| PG_N
;
1499 * Test for alternating 1's and 0's
1501 *(volatile int *)ptr
= 0xaaaaaaaa;
1502 if (*(volatile int *)ptr
!= 0xaaaaaaaa)
1505 * Test for alternating 0's and 1's
1507 *(volatile int *)ptr
= 0x55555555;
1508 if (*(volatile int *)ptr
!= 0x55555555)
1513 *(volatile int *)ptr
= 0xffffffff;
1514 if (*(volatile int *)ptr
!= 0xffffffff)
1519 *(volatile int *)ptr
= 0x0;
1520 if (*(volatile int *)ptr
!= 0x0)
1523 * Restore original value.
1528 * Adjust array of valid/good pages.
1530 if (page_bad
== TRUE
)
1533 * If this good page is a continuation of the
1534 * previous set of good pages, then just increase
1535 * the end pointer. Otherwise start a new chunk.
1536 * Note that "end" points one higher than end,
1537 * making the range >= start and < end.
1538 * If we're also doing a speculative memory
1539 * test and we at or past the end, bump up Maxmem
1540 * so that we keep going. The first bad page
1541 * will terminate the loop.
1543 if (phys_avail
[pa_indx
] == pa
) {
1544 phys_avail
[pa_indx
] += PAGE_SIZE
;
1547 if (pa_indx
== PHYS_AVAIL_ARRAY_END
) {
1549 "Too many holes in the physical address space, giving up\n");
1554 phys_avail
[pa_indx
++] = pa
; /* start */
1555 phys_avail
[pa_indx
] = pa
+ PAGE_SIZE
; /* end */
1559 if (dump_avail
[da_indx
] == pa
) {
1560 dump_avail
[da_indx
] += PAGE_SIZE
;
1563 if (da_indx
== DUMP_AVAIL_ARRAY_END
) {
1567 dump_avail
[da_indx
++] = pa
; /* start */
1568 dump_avail
[da_indx
] = pa
+ PAGE_SIZE
; /* end */
1580 * The last chunk must contain at least one page plus the message
1581 * buffer to avoid complicating other code (message buffer address
1582 * calculation, etc.).
1584 while (phys_avail
[pa_indx
- 1] + PAGE_SIZE
+
1585 round_page(MSGBUF_SIZE
) >= phys_avail
[pa_indx
]) {
1586 physmem
-= atop(phys_avail
[pa_indx
] - phys_avail
[pa_indx
- 1]);
1587 phys_avail
[pa_indx
--] = 0;
1588 phys_avail
[pa_indx
--] = 0;
1591 Maxmem
= atop(phys_avail
[pa_indx
]);
1593 /* Trim off space for the message buffer. */
1594 phys_avail
[pa_indx
] -= round_page(MSGBUF_SIZE
);
1596 avail_end
= phys_avail
[pa_indx
];
1598 /* Map the message buffer. */
1599 for (off
= 0; off
< round_page(MSGBUF_SIZE
); off
+= PAGE_SIZE
)
1600 pmap_kenter((vm_offset_t
)msgbufp
+ off
, phys_avail
[pa_indx
] +
1613 * 7 Device Not Available (x87)
1615 * 9 Coprocessor Segment overrun (unsupported, reserved)
1617 * 11 Segment not present
1619 * 13 General Protection
1622 * 16 x87 FP Exception pending
1623 * 17 Alignment Check
1625 * 19 SIMD floating point
1627 * 32-255 INTn/external sources
1630 hammer_time(u_int64_t modulep
, u_int64_t physfree
)
1635 int metadata_missing
, off
;
1637 struct mdglobaldata
*gd
;
1643 * This must be done before the first references
1644 * to CPU_prvspace[0] are made.
1646 init_paging(&physfree
);
1650 * Prevent lowering of the ipl if we call tsleep() early.
1652 gd
= &CPU_prvspace
[0].mdglobaldata
;
1653 bzero(gd
, sizeof(*gd
));
1656 * Note: on both UP and SMP curthread must be set non-NULL
1657 * early in the boot sequence because the system assumes
1658 * that 'curthread' is never NULL.
1661 gd
->mi
.gd_curthread
= &thread0
;
1662 thread0
.td_gd
= &gd
->mi
;
1664 atdevbase
= ISA_HOLE_START
+ PTOV_OFFSET
;
1667 metadata_missing
= 0;
1668 if (bootinfo
.bi_modulep
) {
1669 preload_metadata
= (caddr_t
)bootinfo
.bi_modulep
+ KERNBASE
;
1670 preload_bootstrap_relocate(KERNBASE
);
1672 metadata_missing
= 1;
1674 if (bootinfo
.bi_envp
)
1675 kern_envp
= (caddr_t
)bootinfo
.bi_envp
+ KERNBASE
;
1678 preload_metadata
= (caddr_t
)(uintptr_t)(modulep
+ PTOV_OFFSET
);
1679 preload_bootstrap_relocate(PTOV_OFFSET
);
1680 kmdp
= preload_search_by_type("elf kernel");
1682 kmdp
= preload_search_by_type("elf64 kernel");
1683 boothowto
= MD_FETCH(kmdp
, MODINFOMD_HOWTO
, int);
1684 kern_envp
= MD_FETCH(kmdp
, MODINFOMD_ENVP
, char *) + PTOV_OFFSET
;
1686 ksym_start
= MD_FETCH(kmdp
, MODINFOMD_SSYM
, uintptr_t);
1687 ksym_end
= MD_FETCH(kmdp
, MODINFOMD_ESYM
, uintptr_t);
1691 * start with one cpu. Note: with one cpu, ncpus2_shift, ncpus2_mask,
1692 * and ncpus_fit_mask remain 0.
1697 /* Init basic tunables, hz etc */
1701 * make gdt memory segments
1703 gdt_segs
[GPROC0_SEL
].ssd_base
=
1704 (uintptr_t) &CPU_prvspace
[0].mdglobaldata
.gd_common_tss
;
1706 gd
->mi
.gd_prvspace
= &CPU_prvspace
[0];
1708 for (x
= 0; x
< NGDT
; x
++) {
1709 if (x
!= GPROC0_SEL
&& x
!= (GPROC0_SEL
+ 1))
1710 ssdtosd(&gdt_segs
[x
], &gdt
[x
]);
1712 ssdtosyssd(&gdt_segs
[GPROC0_SEL
],
1713 (struct system_segment_descriptor
*)&gdt
[GPROC0_SEL
]);
1715 r_gdt
.rd_limit
= NGDT
* sizeof(gdt
[0]) - 1;
1716 r_gdt
.rd_base
= (long) gdt
;
1719 wrmsr(MSR_FSBASE
, 0); /* User value */
1720 wrmsr(MSR_GSBASE
, (u_int64_t
)&gd
->mi
);
1721 wrmsr(MSR_KGSBASE
, 0); /* User value while in the kernel */
1723 mi_gdinit(&gd
->mi
, 0);
1725 proc0paddr
= proc0paddr_buff
;
1726 mi_proc0init(&gd
->mi
, proc0paddr
);
1727 safepri
= TDPRI_MAX
;
1729 /* spinlocks and the BGL */
1733 for (x
= 0; x
< NIDT
; x
++)
1734 setidt(x
, &IDTVEC(rsvd
), SDT_SYSIGT
, SEL_KPL
, 0);
1735 setidt(IDT_DE
, &IDTVEC(div
), SDT_SYSIGT
, SEL_KPL
, 0);
1736 setidt(IDT_DB
, &IDTVEC(dbg
), SDT_SYSIGT
, SEL_KPL
, 0);
1737 setidt(IDT_NMI
, &IDTVEC(nmi
), SDT_SYSIGT
, SEL_KPL
, 1);
1738 setidt(IDT_BP
, &IDTVEC(bpt
), SDT_SYSIGT
, SEL_UPL
, 0);
1739 setidt(IDT_OF
, &IDTVEC(ofl
), SDT_SYSIGT
, SEL_KPL
, 0);
1740 setidt(IDT_BR
, &IDTVEC(bnd
), SDT_SYSIGT
, SEL_KPL
, 0);
1741 setidt(IDT_UD
, &IDTVEC(ill
), SDT_SYSIGT
, SEL_KPL
, 0);
1742 setidt(IDT_NM
, &IDTVEC(dna
), SDT_SYSIGT
, SEL_KPL
, 0);
1743 setidt(IDT_DF
, &IDTVEC(dblfault
), SDT_SYSIGT
, SEL_KPL
, 1);
1744 setidt(IDT_FPUGP
, &IDTVEC(fpusegm
), SDT_SYSIGT
, SEL_KPL
, 0);
1745 setidt(IDT_TS
, &IDTVEC(tss
), SDT_SYSIGT
, SEL_KPL
, 0);
1746 setidt(IDT_NP
, &IDTVEC(missing
), SDT_SYSIGT
, SEL_KPL
, 0);
1747 setidt(IDT_SS
, &IDTVEC(stk
), SDT_SYSIGT
, SEL_KPL
, 0);
1748 setidt(IDT_GP
, &IDTVEC(prot
), SDT_SYSIGT
, SEL_KPL
, 0);
1749 setidt(IDT_PF
, &IDTVEC(page
), SDT_SYSIGT
, SEL_KPL
, 0);
1750 setidt(IDT_MF
, &IDTVEC(fpu
), SDT_SYSIGT
, SEL_KPL
, 0);
1751 setidt(IDT_AC
, &IDTVEC(align
), SDT_SYSIGT
, SEL_KPL
, 0);
1752 setidt(IDT_MC
, &IDTVEC(mchk
), SDT_SYSIGT
, SEL_KPL
, 0);
1753 setidt(IDT_XF
, &IDTVEC(xmm
), SDT_SYSIGT
, SEL_KPL
, 0);
1755 r_idt
.rd_limit
= sizeof(idt0
) - 1;
1756 r_idt
.rd_base
= (long) idt
;
1760 * Initialize the console before we print anything out.
1765 if (metadata_missing
)
1766 kprintf("WARNING: loader(8) metadata is missing!\n");
1776 if (boothowto
& RB_KDB
)
1777 Debugger("Boot flags requested debugger");
1781 finishidentcpu(); /* Final stage of CPU initialization */
1782 setidt(6, &IDTVEC(ill
), SDT_SYS386TGT
, SEL_KPL
, GSEL(GCODE_SEL
, SEL_KPL
));
1783 setidt(13, &IDTVEC(prot
), SDT_SYS386TGT
, SEL_KPL
, GSEL(GCODE_SEL
, SEL_KPL
));
1785 identify_cpu(); /* Final stage of CPU initialization */
1786 initializecpu(); /* Initialize CPU registers */
1788 /* make an initial tss so cpu can get interrupt stack on syscall! */
1789 gd
->gd_common_tss
.tss_rsp0
=
1790 (register_t
)(thread0
.td_kstack
+
1791 KSTACK_PAGES
* PAGE_SIZE
- sizeof(struct pcb
));
1792 /* Ensure the stack is aligned to 16 bytes */
1793 gd
->gd_common_tss
.tss_rsp0
&= ~0xFul
;
1794 gd
->gd_rsp0
= gd
->gd_common_tss
.tss_rsp0
;
1796 /* doublefault stack space, runs on ist1 */
1797 gd
->gd_common_tss
.tss_ist1
= (long)&dblfault_stack
[sizeof(dblfault_stack
)];
1799 /* Set the IO permission bitmap (empty due to tss seg limit) */
1800 gd
->gd_common_tss
.tss_iobase
= sizeof(struct x86_64tss
);
1802 gsel_tss
= GSEL(GPROC0_SEL
, SEL_KPL
);
1803 gd
->gd_tss_gdt
= &gdt
[GPROC0_SEL
];
1804 gd
->gd_common_tssd
= *gd
->gd_tss_gdt
;
1807 /* Set up the fast syscall stuff */
1808 msr
= rdmsr(MSR_EFER
) | EFER_SCE
;
1809 wrmsr(MSR_EFER
, msr
);
1810 wrmsr(MSR_LSTAR
, (u_int64_t
)IDTVEC(fast_syscall
));
1811 wrmsr(MSR_CSTAR
, (u_int64_t
)IDTVEC(fast_syscall32
));
1812 msr
= ((u_int64_t
)GSEL(GCODE_SEL
, SEL_KPL
) << 32) |
1813 ((u_int64_t
)GSEL(GUCODE32_SEL
, SEL_UPL
) << 48);
1814 wrmsr(MSR_STAR
, msr
);
1815 wrmsr(MSR_SF_MASK
, PSL_NT
|PSL_T
|PSL_I
|PSL_C
|PSL_D
);
1817 getmemsize(kmdp
, physfree
);
1818 init_param2(physmem
);
1820 /* now running on new page tables, configured,and u/iom is accessible */
1822 /* Map the message buffer. */
1824 for (off
= 0; off
< round_page(MSGBUF_SIZE
); off
+= PAGE_SIZE
)
1825 pmap_kenter((vm_offset_t
)msgbufp
+ off
, avail_end
+ off
);
1828 msgbufinit(msgbufp
, MSGBUF_SIZE
);
1831 /* transfer to user mode */
1833 _ucodesel
= GSEL(GUCODE_SEL
, SEL_UPL
);
1834 _udatasel
= GSEL(GUDATA_SEL
, SEL_UPL
);
1835 _ucode32sel
= GSEL(GUCODE32_SEL
, SEL_UPL
);
1841 /* setup proc 0's pcb */
1842 thread0
.td_pcb
->pcb_flags
= 0;
1843 thread0
.td_pcb
->pcb_cr3
= KPML4phys
;
1844 thread0
.td_pcb
->pcb_ext
= 0;
1845 lwp0
.lwp_md
.md_regs
= &proc0_tf
;
1846 env
= kgetenv("kernelname");
1848 strlcpy(kernelname
, env
, sizeof(kernelname
));
1850 /* Location of kernel stack for locore */
1851 return ((u_int64_t
)thread0
.td_pcb
);
1855 * Initialize machine-dependant portions of the global data structure.
1856 * Note that the global data area and cpu0's idlestack in the private
1857 * data space were allocated in locore.
1859 * Note: the idlethread's cpl is 0
1861 * WARNING! Called from early boot, 'mycpu' may not work yet.
1864 cpu_gdinit(struct mdglobaldata
*gd
, int cpu
)
1867 gd
->mi
.gd_curthread
= &gd
->mi
.gd_idlethread
;
1869 lwkt_init_thread(&gd
->mi
.gd_idlethread
,
1870 gd
->mi
.gd_prvspace
->idlestack
,
1871 sizeof(gd
->mi
.gd_prvspace
->idlestack
),
1872 TDF_MPSAFE
, &gd
->mi
);
1873 lwkt_set_comm(&gd
->mi
.gd_idlethread
, "idle_%d", cpu
);
1874 gd
->mi
.gd_idlethread
.td_switch
= cpu_lwkt_switch
;
1875 gd
->mi
.gd_idlethread
.td_sp
-= sizeof(void *);
1876 *(void **)gd
->mi
.gd_idlethread
.td_sp
= cpu_idle_restore
;
1880 is_globaldata_space(vm_offset_t saddr
, vm_offset_t eaddr
)
1882 if (saddr
>= (vm_offset_t
)&CPU_prvspace
[0] &&
1883 eaddr
<= (vm_offset_t
)&CPU_prvspace
[MAXCPU
]) {
1890 globaldata_find(int cpu
)
1892 KKASSERT(cpu
>= 0 && cpu
< ncpus
);
1893 return(&CPU_prvspace
[cpu
].mdglobaldata
.mi
);
1896 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
1897 static void f00f_hack(void *unused
);
1898 SYSINIT(f00f_hack
, SI_BOOT2_BIOS
, SI_ORDER_ANY
, f00f_hack
, NULL
);
1901 f00f_hack(void *unused
)
1903 struct gate_descriptor
*new_idt
;
1909 kprintf("Intel Pentium detected, installing workaround for F00F bug\n");
1911 r_idt
.rd_limit
= sizeof(idt0
) - 1;
1913 tmp
= kmem_alloc(&kernel_map
, PAGE_SIZE
* 2);
1915 panic("kmem_alloc returned 0");
1916 if (((unsigned int)tmp
& (PAGE_SIZE
-1)) != 0)
1917 panic("kmem_alloc returned non-page-aligned memory");
1918 /* Put the first seven entries in the lower page */
1919 new_idt
= (struct gate_descriptor
*)(tmp
+ PAGE_SIZE
- (7*8));
1920 bcopy(idt
, new_idt
, sizeof(idt0
));
1921 r_idt
.rd_base
= (int)new_idt
;
1924 if (vm_map_protect(&kernel_map
, tmp
, tmp
+ PAGE_SIZE
,
1925 VM_PROT_READ
, FALSE
) != KERN_SUCCESS
)
1926 panic("vm_map_protect failed");
1929 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
1932 ptrace_set_pc(struct lwp
*lp
, unsigned long addr
)
1934 lp
->lwp_md
.md_regs
->tf_rip
= addr
;
1939 ptrace_single_step(struct lwp
*lp
)
1941 lp
->lwp_md
.md_regs
->tf_rflags
|= PSL_T
;
1946 fill_regs(struct lwp
*lp
, struct reg
*regs
)
1949 struct trapframe
*tp
;
1951 tp
= lp
->lwp_md
.md_regs
;
1952 bcopy(&tp
->tf_rdi
, ®s
->r_rdi
, sizeof(*regs
));
1954 pcb
= lp
->lwp_thread
->td_pcb
;
1959 set_regs(struct lwp
*lp
, struct reg
*regs
)
1962 struct trapframe
*tp
;
1964 tp
= lp
->lwp_md
.md_regs
;
1965 if (!EFL_SECURE(regs
->r_rflags
, tp
->tf_rflags
) ||
1966 !CS_SECURE(regs
->r_cs
))
1968 bcopy(®s
->r_rdi
, &tp
->tf_rdi
, sizeof(*regs
));
1969 pcb
= lp
->lwp_thread
->td_pcb
;
1973 #ifndef CPU_DISABLE_SSE
1975 fill_fpregs_xmm(struct savexmm
*sv_xmm
, struct save87
*sv_87
)
1977 struct env87
*penv_87
= &sv_87
->sv_env
;
1978 struct envxmm
*penv_xmm
= &sv_xmm
->sv_env
;
1981 /* FPU control/status */
1982 penv_87
->en_cw
= penv_xmm
->en_cw
;
1983 penv_87
->en_sw
= penv_xmm
->en_sw
;
1984 penv_87
->en_tw
= penv_xmm
->en_tw
;
1985 penv_87
->en_fip
= penv_xmm
->en_fip
;
1986 penv_87
->en_fcs
= penv_xmm
->en_fcs
;
1987 penv_87
->en_opcode
= penv_xmm
->en_opcode
;
1988 penv_87
->en_foo
= penv_xmm
->en_foo
;
1989 penv_87
->en_fos
= penv_xmm
->en_fos
;
1992 for (i
= 0; i
< 8; ++i
)
1993 sv_87
->sv_ac
[i
] = sv_xmm
->sv_fp
[i
].fp_acc
;
1995 sv_87
->sv_ex_sw
= sv_xmm
->sv_ex_sw
;
1999 set_fpregs_xmm(struct save87
*sv_87
, struct savexmm
*sv_xmm
)
2001 struct env87
*penv_87
= &sv_87
->sv_env
;
2002 struct envxmm
*penv_xmm
= &sv_xmm
->sv_env
;
2005 /* FPU control/status */
2006 penv_xmm
->en_cw
= penv_87
->en_cw
;
2007 penv_xmm
->en_sw
= penv_87
->en_sw
;
2008 penv_xmm
->en_tw
= penv_87
->en_tw
;
2009 penv_xmm
->en_fip
= penv_87
->en_fip
;
2010 penv_xmm
->en_fcs
= penv_87
->en_fcs
;
2011 penv_xmm
->en_opcode
= penv_87
->en_opcode
;
2012 penv_xmm
->en_foo
= penv_87
->en_foo
;
2013 penv_xmm
->en_fos
= penv_87
->en_fos
;
2016 for (i
= 0; i
< 8; ++i
)
2017 sv_xmm
->sv_fp
[i
].fp_acc
= sv_87
->sv_ac
[i
];
2019 sv_xmm
->sv_ex_sw
= sv_87
->sv_ex_sw
;
2021 #endif /* CPU_DISABLE_SSE */
2024 fill_fpregs(struct lwp
*lp
, struct fpreg
*fpregs
)
2026 #ifndef CPU_DISABLE_SSE
2028 fill_fpregs_xmm(&lp
->lwp_thread
->td_pcb
->pcb_save
.sv_xmm
,
2029 (struct save87
*)fpregs
);
2032 #endif /* CPU_DISABLE_SSE */
2033 bcopy(&lp
->lwp_thread
->td_pcb
->pcb_save
.sv_87
, fpregs
, sizeof *fpregs
);
2038 set_fpregs(struct lwp
*lp
, struct fpreg
*fpregs
)
2040 #ifndef CPU_DISABLE_SSE
2042 set_fpregs_xmm((struct save87
*)fpregs
,
2043 &lp
->lwp_thread
->td_pcb
->pcb_save
.sv_xmm
);
2046 #endif /* CPU_DISABLE_SSE */
2047 bcopy(fpregs
, &lp
->lwp_thread
->td_pcb
->pcb_save
.sv_87
, sizeof *fpregs
);
2052 fill_dbregs(struct lwp
*lp
, struct dbreg
*dbregs
)
2055 dbregs
->dr
[0] = rdr0();
2056 dbregs
->dr
[1] = rdr1();
2057 dbregs
->dr
[2] = rdr2();
2058 dbregs
->dr
[3] = rdr3();
2059 dbregs
->dr
[4] = rdr4();
2060 dbregs
->dr
[5] = rdr5();
2061 dbregs
->dr
[6] = rdr6();
2062 dbregs
->dr
[7] = rdr7();
2066 pcb
= lp
->lwp_thread
->td_pcb
;
2067 dbregs
->dr
[0] = pcb
->pcb_dr0
;
2068 dbregs
->dr
[1] = pcb
->pcb_dr1
;
2069 dbregs
->dr
[2] = pcb
->pcb_dr2
;
2070 dbregs
->dr
[3] = pcb
->pcb_dr3
;
2073 dbregs
->dr
[6] = pcb
->pcb_dr6
;
2074 dbregs
->dr
[7] = pcb
->pcb_dr7
;
2080 set_dbregs(struct lwp
*lp
, struct dbreg
*dbregs
)
2083 load_dr0(dbregs
->dr
[0]);
2084 load_dr1(dbregs
->dr
[1]);
2085 load_dr2(dbregs
->dr
[2]);
2086 load_dr3(dbregs
->dr
[3]);
2087 load_dr4(dbregs
->dr
[4]);
2088 load_dr5(dbregs
->dr
[5]);
2089 load_dr6(dbregs
->dr
[6]);
2090 load_dr7(dbregs
->dr
[7]);
2093 struct ucred
*ucred
;
2095 uint64_t mask1
, mask2
;
2098 * Don't let an illegal value for dr7 get set. Specifically,
2099 * check for undefined settings. Setting these bit patterns
2100 * result in undefined behaviour and can lead to an unexpected
2103 /* JG this loop looks unreadable */
2104 /* Check 4 2-bit fields for invalid patterns.
2105 * These fields are R/Wi, for i = 0..3
2107 /* Is 10 in LENi allowed when running in compatibility mode? */
2108 /* Pattern 10 in R/Wi might be used to indicate
2109 * breakpoint on I/O. Further analysis should be
2110 * carried to decide if it is safe and useful to
2111 * provide access to that capability
2113 for (i
= 0, mask1
= 0x3<<16, mask2
= 0x2<<16; i
< 4;
2114 i
++, mask1
<<= 4, mask2
<<= 4)
2115 if ((dbregs
->dr
[7] & mask1
) == mask2
)
2118 pcb
= lp
->lwp_thread
->td_pcb
;
2119 ucred
= lp
->lwp_proc
->p_ucred
;
2122 * Don't let a process set a breakpoint that is not within the
2123 * process's address space. If a process could do this, it
2124 * could halt the system by setting a breakpoint in the kernel
2125 * (if ddb was enabled). Thus, we need to check to make sure
2126 * that no breakpoints are being enabled for addresses outside
2127 * process's address space, unless, perhaps, we were called by
2130 * XXX - what about when the watched area of the user's
2131 * address space is written into from within the kernel
2132 * ... wouldn't that still cause a breakpoint to be generated
2133 * from within kernel mode?
2136 if (priv_check_cred(ucred
, PRIV_ROOT
, 0) != 0) {
2137 if (dbregs
->dr
[7] & 0x3) {
2138 /* dr0 is enabled */
2139 if (dbregs
->dr
[0] >= VM_MAX_USER_ADDRESS
)
2143 if (dbregs
->dr
[7] & (0x3<<2)) {
2144 /* dr1 is enabled */
2145 if (dbregs
->dr
[1] >= VM_MAX_USER_ADDRESS
)
2149 if (dbregs
->dr
[7] & (0x3<<4)) {
2150 /* dr2 is enabled */
2151 if (dbregs
->dr
[2] >= VM_MAX_USER_ADDRESS
)
2155 if (dbregs
->dr
[7] & (0x3<<6)) {
2156 /* dr3 is enabled */
2157 if (dbregs
->dr
[3] >= VM_MAX_USER_ADDRESS
)
2162 pcb
->pcb_dr0
= dbregs
->dr
[0];
2163 pcb
->pcb_dr1
= dbregs
->dr
[1];
2164 pcb
->pcb_dr2
= dbregs
->dr
[2];
2165 pcb
->pcb_dr3
= dbregs
->dr
[3];
2166 pcb
->pcb_dr6
= dbregs
->dr
[6];
2167 pcb
->pcb_dr7
= dbregs
->dr
[7];
2169 pcb
->pcb_flags
|= PCB_DBREGS
;
2176 * Return > 0 if a hardware breakpoint has been hit, and the
2177 * breakpoint was in user space. Return 0, otherwise.
2180 user_dbreg_trap(void)
2182 u_int64_t dr7
, dr6
; /* debug registers dr6 and dr7 */
2183 u_int64_t bp
; /* breakpoint bits extracted from dr6 */
2184 int nbp
; /* number of breakpoints that triggered */
2185 caddr_t addr
[4]; /* breakpoint addresses */
2189 if ((dr7
& 0xff) == 0) {
2191 * all GE and LE bits in the dr7 register are zero,
2192 * thus the trap couldn't have been caused by the
2193 * hardware debug registers
2204 * None of the breakpoint bits are set meaning this
2205 * trap was not caused by any of the debug registers
2211 * at least one of the breakpoints were hit, check to see
2212 * which ones and if any of them are user space addresses
2216 addr
[nbp
++] = (caddr_t
)rdr0();
2219 addr
[nbp
++] = (caddr_t
)rdr1();
2222 addr
[nbp
++] = (caddr_t
)rdr2();
2225 addr
[nbp
++] = (caddr_t
)rdr3();
2228 for (i
=0; i
<nbp
; i
++) {
2230 (caddr_t
)VM_MAX_USER_ADDRESS
) {
2232 * addr[i] is in user space
2239 * None of the breakpoints are in user space.
2247 Debugger(const char *msg
)
2249 kprintf("Debugger(\"%s\") called.\n", msg
);
2256 * Provide inb() and outb() as functions. They are normally only
2257 * available as macros calling inlined functions, thus cannot be
2258 * called inside DDB.
2260 * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
2266 /* silence compiler warnings */
2268 void outb(u_int
, u_char
);
2275 * We use %%dx and not %1 here because i/o is done at %dx and not at
2276 * %edx, while gcc generates inferior code (movw instead of movl)
2277 * if we tell it to load (u_short) port.
2279 __asm
__volatile("inb %%dx,%0" : "=a" (data
) : "d" (port
));
2284 outb(u_int port
, u_char data
)
2288 * Use an unnecessary assignment to help gcc's register allocator.
2289 * This make a large difference for gcc-1.40 and a tiny difference
2290 * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
2291 * best results. gcc-2.6.0 can't handle this.
2294 __asm
__volatile("outb %0,%%dx" : : "a" (al
), "d" (port
));
2301 #include "opt_cpu.h"
2305 * initialize all the SMP locks
2308 /* critical region when masking or unmasking interupts */
2309 struct spinlock_deprecated imen_spinlock
;
2311 /* Make FAST_INTR() routines sequential */
2312 struct spinlock_deprecated fast_intr_spinlock
;
2314 /* critical region for old style disable_intr/enable_intr */
2315 struct spinlock_deprecated mpintr_spinlock
;
2317 /* critical region around INTR() routines */
2318 struct spinlock_deprecated intr_spinlock
;
2320 /* lock region used by kernel profiling */
2321 struct spinlock_deprecated mcount_spinlock
;
2323 /* locks com (tty) data/hardware accesses: a FASTINTR() */
2324 struct spinlock_deprecated com_spinlock
;
2326 /* locks kernel kprintfs */
2327 struct spinlock_deprecated cons_spinlock
;
2329 /* lock regions around the clock hardware */
2330 struct spinlock_deprecated clock_spinlock
;
2332 /* lock around the MP rendezvous */
2333 struct spinlock_deprecated smp_rv_spinlock
;
2339 * mp_lock = 0; BSP already owns the MP lock
2342 * Get the initial mp_lock with a count of 1 for the BSP.
2343 * This uses a LOGICAL cpu ID, ie BSP == 0.
2346 cpu_get_initial_mplock();
2349 spin_lock_init(&mcount_spinlock
);
2350 spin_lock_init(&fast_intr_spinlock
);
2351 spin_lock_init(&intr_spinlock
);
2352 spin_lock_init(&mpintr_spinlock
);
2353 spin_lock_init(&imen_spinlock
);
2354 spin_lock_init(&smp_rv_spinlock
);
2355 spin_lock_init(&com_spinlock
);
2356 spin_lock_init(&clock_spinlock
);
2357 spin_lock_init(&cons_spinlock
);
2359 /* our token pool needs to work early */
2360 lwkt_token_pool_init();