2 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
3 * Copyright (c) 1992 Terrence R. Lambert.
4 * Copyright (c) 2003 Peter Wemm.
5 * Copyright (c) 2008 The DragonFly Project.
8 * This code is derived from software contributed to Berkeley by
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
40 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $
43 #include "use_ether.h"
44 //#include "use_npx.h"
46 #include "opt_atalk.h"
47 #include "opt_compat.h"
50 #include "opt_directio.h"
53 #include "opt_msgbuf.h"
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/sysproto.h>
59 #include <sys/signalvar.h>
60 #include <sys/kernel.h>
61 #include <sys/linker.h>
62 #include <sys/malloc.h>
66 #include <sys/reboot.h>
68 #include <sys/msgbuf.h>
69 #include <sys/sysent.h>
70 #include <sys/sysctl.h>
71 #include <sys/vmmeter.h>
73 #include <sys/upcall.h>
74 #include <sys/usched.h>
78 #include <vm/vm_param.h>
80 #include <vm/vm_kern.h>
81 #include <vm/vm_object.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_pager.h>
85 #include <vm/vm_extern.h>
87 #include <sys/thread2.h>
88 #include <sys/mplock2.h>
96 #include <machine/cpu.h>
97 #include <machine/clock.h>
98 #include <machine/specialreg.h>
100 #include <machine/bootinfo.h>
102 #include <machine/md_var.h>
103 #include <machine/metadata.h>
104 #include <machine/pc/bios.h>
105 #include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
106 #include <machine/globaldata.h> /* CPU_prvspace */
107 #include <machine/smp.h>
109 #include <machine/perfmon.h>
111 #include <machine/cputypes.h>
114 #include <bus/isa/isa_device.h>
116 #include <machine_base/isa/intr_machdep.h>
117 #include <bus/isa/rtc.h>
118 #include <sys/random.h>
119 #include <sys/ptrace.h>
120 #include <machine/sigframe.h>
122 #define PHYSMAP_ENTRIES 10
124 extern void init386(int first
);
125 extern void dblfault_handler(void);
126 extern u_int64_t
hammer_time(u_int64_t
, u_int64_t
);
128 extern void printcpuinfo(void); /* XXX header file */
129 extern void identify_cpu(void);
131 extern void finishidentcpu(void);
133 extern void panicifcpuunsupported(void);
135 static void cpu_startup(void *);
136 #ifndef CPU_DISABLE_SSE
137 static void set_fpregs_xmm(struct save87
*, struct savexmm
*);
138 static void fill_fpregs_xmm(struct savexmm
*, struct save87
*);
139 #endif /* CPU_DISABLE_SSE */
141 extern void ffs_rawread_setup(void);
142 #endif /* DIRECTIO */
143 static void init_locks(void);
145 SYSINIT(cpu
, SI_BOOT2_SMP
, SI_ORDER_FIRST
, cpu_startup
, NULL
)
148 extern vm_offset_t ksym_start
, ksym_end
;
156 struct privatespace CPU_prvspace
[MAXCPU
];
158 int _udatasel
, _ucodesel
, _ucode32sel
;
161 int64_t tsc_offsets
[MAXCPU
];
163 int64_t tsc_offsets
[1];
166 #if defined(SWTCH_OPTIM_STATS)
167 extern int swtch_optim_stats
;
168 SYSCTL_INT(_debug
, OID_AUTO
, swtch_optim_stats
,
169 CTLFLAG_RD
, &swtch_optim_stats
, 0, "");
170 SYSCTL_INT(_debug
, OID_AUTO
, tlb_flush_count
,
171 CTLFLAG_RD
, &tlb_flush_count
, 0, "");
177 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS
)
179 int error
= sysctl_handle_int(oidp
, 0, ctob(physmem
), req
);
183 SYSCTL_PROC(_hw
, HW_PHYSMEM
, physmem
, CTLTYPE_INT
|CTLFLAG_RD
,
184 0, 0, sysctl_hw_physmem
, "IU", "");
187 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS
)
189 int error
= sysctl_handle_int(oidp
, 0,
190 ctob(physmem
- vmstats
.v_wire_count
), req
);
194 SYSCTL_PROC(_hw
, HW_USERMEM
, usermem
, CTLTYPE_INT
|CTLFLAG_RD
,
195 0, 0, sysctl_hw_usermem
, "IU", "");
198 sysctl_hw_availpages(SYSCTL_HANDLER_ARGS
)
200 int error
= sysctl_handle_int(oidp
, 0,
201 x86_64_btop(avail_end
- avail_start
), req
);
205 SYSCTL_PROC(_hw
, OID_AUTO
, availpages
, CTLTYPE_INT
|CTLFLAG_RD
,
206 0, 0, sysctl_hw_availpages
, "I", "");
208 vm_paddr_t Maxmem
= 0;
211 * The number of PHYSMAP entries must be one less than the number of
212 * PHYSSEG entries because the PHYSMAP entry that spans the largest
213 * physical address that is accessible by ISA DMA is split into two
216 #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1))
218 vm_paddr_t phys_avail
[PHYSMAP_SIZE
+ 2];
219 vm_paddr_t dump_avail
[PHYSMAP_SIZE
+ 2];
221 /* must be 2 less so 0 0 can signal end of chunks */
222 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
223 #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2)
225 static vm_offset_t buffer_sva
, buffer_eva
;
226 vm_offset_t clean_sva
, clean_eva
;
227 static vm_offset_t pager_sva
, pager_eva
;
228 static struct trapframe proc0_tf
;
231 cpu_startup(void *dummy
)
235 vm_offset_t firstaddr
;
237 if (boothowto
& RB_VERBOSE
)
241 * Good {morning,afternoon,evening,night}.
243 kprintf("%s", version
);
246 panicifcpuunsupported();
250 kprintf("real memory = %ju (%juK bytes)\n",
251 (intmax_t)ptoa(Maxmem
),
252 (intmax_t)ptoa(Maxmem
) / 1024);
254 * Display any holes after the first chunk of extended memory.
259 kprintf("Physical memory chunk(s):\n");
260 for (indx
= 0; phys_avail
[indx
+ 1] != 0; indx
+= 2) {
261 vm_paddr_t size1
= phys_avail
[indx
+ 1] - phys_avail
[indx
];
263 kprintf("0x%08jx - 0x%08jx, %ju bytes (%ju pages)\n",
264 (intmax_t)phys_avail
[indx
],
265 (intmax_t)phys_avail
[indx
+ 1] - 1,
267 (intmax_t)(size1
/ PAGE_SIZE
));
272 * Allocate space for system data structures.
273 * The first available kernel virtual address is in "v".
274 * As pages of kernel virtual memory are allocated, "v" is incremented.
275 * As pages of memory are allocated and cleared,
276 * "firstaddr" is incremented.
277 * An index into the kernel page table corresponding to the
278 * virtual memory address maintained in "v" is kept in "mapaddr".
282 * Make two passes. The first pass calculates how much memory is
283 * needed and allocates it. The second pass assigns virtual
284 * addresses to the various data structures.
288 v
= (caddr_t
)firstaddr
;
290 #define valloc(name, type, num) \
291 (name) = (type *)v; v = (caddr_t)((name)+(num))
292 #define valloclim(name, type, num, lim) \
293 (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
296 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
297 * For the first 64MB of ram nominally allocate sufficient buffers to
298 * cover 1/4 of our ram. Beyond the first 64MB allocate additional
299 * buffers to cover 1/20 of our ram over 64MB. When auto-sizing
300 * the buffer cache we limit the eventual kva reservation to
303 * factor represents the 1/4 x ram conversion.
306 int factor
= 4 * BKVASIZE
/ 1024;
307 int kbytes
= physmem
* (PAGE_SIZE
/ 1024);
311 nbuf
+= min((kbytes
- 4096) / factor
, 65536 / factor
);
313 nbuf
+= (kbytes
- 65536) * 2 / (factor
* 5);
314 if (maxbcache
&& nbuf
> maxbcache
/ BKVASIZE
)
315 nbuf
= maxbcache
/ BKVASIZE
;
319 * Do not allow the buffer_map to be more then 1/2 the size of the
322 if (nbuf
> (virtual_end
- virtual_start
) / (BKVASIZE
* 2)) {
323 nbuf
= (virtual_end
- virtual_start
) / (BKVASIZE
* 2);
324 kprintf("Warning: nbufs capped at %d\n", nbuf
);
327 nswbuf
= max(min(nbuf
/4, 256), 16);
329 if (nswbuf
< NSWBUF_MIN
)
336 valloc(swbuf
, struct buf
, nswbuf
);
337 valloc(buf
, struct buf
, nbuf
);
340 * End of first pass, size has been calculated so allocate memory
342 if (firstaddr
== 0) {
343 size
= (vm_size_t
)(v
- firstaddr
);
344 firstaddr
= kmem_alloc(&kernel_map
, round_page(size
));
346 panic("startup: no room for tables");
351 * End of second pass, addresses have been assigned
353 if ((vm_size_t
)(v
- firstaddr
) != size
)
354 panic("startup: table size inconsistency");
356 kmem_suballoc(&kernel_map
, &clean_map
, &clean_sva
, &clean_eva
,
357 (nbuf
*BKVASIZE
) + (nswbuf
*MAXPHYS
) + pager_map_size
);
358 kmem_suballoc(&clean_map
, &buffer_map
, &buffer_sva
, &buffer_eva
,
360 buffer_map
.system_map
= 1;
361 kmem_suballoc(&clean_map
, &pager_map
, &pager_sva
, &pager_eva
,
362 (nswbuf
*MAXPHYS
) + pager_map_size
);
363 pager_map
.system_map
= 1;
365 #if defined(USERCONFIG)
367 cninit(); /* the preferred console may have changed */
370 kprintf("avail memory = %lu (%luK bytes)\n",
371 ptoa(vmstats
.v_free_count
),
372 ptoa(vmstats
.v_free_count
) / 1024);
375 * Set up buffers, so they can be used to read disk labels.
378 vm_pager_bufferinit();
382 * OK, enough kmem_alloc/malloc state should be up, lets get on with it!
384 mp_start(); /* fire up the APs and APICs */
391 * Send an interrupt to process.
393 * Stack is set up to allow sigcode stored
394 * at top to call routine, followed by kcall
395 * to sigreturn routine below. After sigreturn
396 * resets the signal mask, the stack, and the
397 * frame pointer, it returns to the user
401 sendsig(sig_t catcher
, int sig
, sigset_t
*mask
, u_long code
)
403 struct lwp
*lp
= curthread
->td_lwp
;
404 struct proc
*p
= lp
->lwp_proc
;
405 struct trapframe
*regs
;
406 struct sigacts
*psp
= p
->p_sigacts
;
407 struct sigframe sf
, *sfp
;
411 regs
= lp
->lwp_md
.md_regs
;
412 oonstack
= (lp
->lwp_sigstk
.ss_flags
& SS_ONSTACK
) ? 1 : 0;
414 /* Save user context */
415 bzero(&sf
, sizeof(struct sigframe
));
416 sf
.sf_uc
.uc_sigmask
= *mask
;
417 sf
.sf_uc
.uc_stack
= lp
->lwp_sigstk
;
418 sf
.sf_uc
.uc_mcontext
.mc_onstack
= oonstack
;
419 KKASSERT(__offsetof(struct trapframe
, tf_rdi
) == 0);
420 bcopy(regs
, &sf
.sf_uc
.uc_mcontext
.mc_rdi
, sizeof(struct trapframe
));
422 /* Make the size of the saved context visible to userland */
423 sf
.sf_uc
.uc_mcontext
.mc_len
= sizeof(sf
.sf_uc
.uc_mcontext
);
425 /* Save mailbox pending state for syscall interlock semantics */
426 if (p
->p_flag
& P_MAILBOX
)
427 sf
.sf_uc
.uc_mcontext
.mc_xflags
|= PGEX_MAILBOX
;
429 /* Allocate and validate space for the signal handler context. */
430 if ((lp
->lwp_flag
& LWP_ALTSTACK
) != 0 && !oonstack
&&
431 SIGISMEMBER(psp
->ps_sigonstack
, sig
)) {
432 sp
= (char *)(lp
->lwp_sigstk
.ss_sp
+ lp
->lwp_sigstk
.ss_size
-
433 sizeof(struct sigframe
));
434 lp
->lwp_sigstk
.ss_flags
|= SS_ONSTACK
;
436 /* We take red zone into account */
437 sp
= (char *)regs
->tf_rsp
- sizeof(struct sigframe
) - 128;
440 /* Align to 16 bytes */
441 sfp
= (struct sigframe
*)((intptr_t)sp
& ~0xFUL
);
443 /* Translate the signal is appropriate */
444 if (p
->p_sysent
->sv_sigtbl
) {
445 if (sig
<= p
->p_sysent
->sv_sigsize
)
446 sig
= p
->p_sysent
->sv_sigtbl
[_SIG_IDX(sig
)];
450 * Build the argument list for the signal handler.
452 * Arguments are in registers (%rdi, %rsi, %rdx, %rcx)
454 regs
->tf_rdi
= sig
; /* argument 1 */
455 regs
->tf_rdx
= (register_t
)&sfp
->sf_uc
; /* argument 3 */
457 if (SIGISMEMBER(psp
->ps_siginfo
, sig
)) {
459 * Signal handler installed with SA_SIGINFO.
461 * action(signo, siginfo, ucontext)
463 regs
->tf_rsi
= (register_t
)&sfp
->sf_si
; /* argument 2 */
464 regs
->tf_rcx
= (register_t
)regs
->tf_err
; /* argument 4 */
465 sf
.sf_ahu
.sf_action
= (__siginfohandler_t
*)catcher
;
467 /* fill siginfo structure */
468 sf
.sf_si
.si_signo
= sig
;
469 sf
.sf_si
.si_code
= code
;
470 sf
.sf_si
.si_addr
= (void *)regs
->tf_err
;
473 * Old FreeBSD-style arguments.
475 * handler (signo, code, [uc], addr)
477 regs
->tf_rsi
= (register_t
)code
; /* argument 2 */
478 regs
->tf_rcx
= (register_t
)regs
->tf_err
; /* argument 4 */
479 sf
.sf_ahu
.sf_handler
= catcher
;
483 * If we're a vm86 process, we want to save the segment registers.
484 * We also change eflags to be our emulated eflags, not the actual
488 if (regs
->tf_eflags
& PSL_VM
) {
489 struct trapframe_vm86
*tf
= (struct trapframe_vm86
*)regs
;
490 struct vm86_kernel
*vm86
= &lp
->lwp_thread
->td_pcb
->pcb_ext
->ext_vm86
;
492 sf
.sf_uc
.uc_mcontext
.mc_gs
= tf
->tf_vm86_gs
;
493 sf
.sf_uc
.uc_mcontext
.mc_fs
= tf
->tf_vm86_fs
;
494 sf
.sf_uc
.uc_mcontext
.mc_es
= tf
->tf_vm86_es
;
495 sf
.sf_uc
.uc_mcontext
.mc_ds
= tf
->tf_vm86_ds
;
497 if (vm86
->vm86_has_vme
== 0)
498 sf
.sf_uc
.uc_mcontext
.mc_eflags
=
499 (tf
->tf_eflags
& ~(PSL_VIF
| PSL_VIP
)) |
500 (vm86
->vm86_eflags
& (PSL_VIF
| PSL_VIP
));
503 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
504 * syscalls made by the signal handler. This just avoids
505 * wasting time for our lazy fixup of such faults. PSL_NT
506 * does nothing in vm86 mode, but vm86 programs can set it
507 * almost legitimately in probes for old cpu types.
509 tf
->tf_eflags
&= ~(PSL_VM
| PSL_NT
| PSL_VIF
| PSL_VIP
);
514 * Save the FPU state and reinit the FP unit
516 npxpush(&sf
.sf_uc
.uc_mcontext
);
519 * Copy the sigframe out to the user's stack.
521 if (copyout(&sf
, sfp
, sizeof(struct sigframe
)) != 0) {
523 * Something is wrong with the stack pointer.
524 * ...Kill the process.
529 regs
->tf_rsp
= (register_t
)sfp
;
530 regs
->tf_rip
= PS_STRINGS
- *(p
->p_sysent
->sv_szsigcode
);
533 * i386 abi specifies that the direction flag must be cleared
536 regs
->tf_rflags
&= ~(PSL_T
|PSL_D
);
539 * 64 bit mode has a code and stack selector but
540 * no data or extra selector. %fs and %gs are not
543 regs
->tf_cs
= _ucodesel
;
544 regs
->tf_ss
= _udatasel
;
548 * Sanitize the trapframe for a virtual kernel passing control to a custom
549 * VM context. Remove any items that would otherwise create a privilage
552 * XXX at the moment we allow userland to set the resume flag. Is this a
556 cpu_sanitize_frame(struct trapframe
*frame
)
558 frame
->tf_cs
= _ucodesel
;
559 frame
->tf_ss
= _udatasel
;
560 /* XXX VM (8086) mode not supported? */
561 frame
->tf_rflags
&= (PSL_RF
| PSL_USERCHANGE
| PSL_VM_UNSUPP
);
562 frame
->tf_rflags
|= PSL_RESERVED_DEFAULT
| PSL_I
;
568 * Sanitize the tls so loading the descriptor does not blow up
569 * on us. For x86_64 we don't have to do anything.
572 cpu_sanitize_tls(struct savetls
*tls
)
578 * sigreturn(ucontext_t *sigcntxp)
580 * System call to cleanup state after a signal
581 * has been taken. Reset signal mask and
582 * stack state from context left by sendsig (above).
583 * Return to previous pc and psl as specified by
584 * context left by sendsig. Check carefully to
585 * make sure that the user has not modified the
586 * state to gain improper privileges.
590 #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
591 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
594 sys_sigreturn(struct sigreturn_args
*uap
)
596 struct lwp
*lp
= curthread
->td_lwp
;
597 struct proc
*p
= lp
->lwp_proc
;
598 struct trapframe
*regs
;
606 * We have to copy the information into kernel space so userland
607 * can't modify it while we are sniffing it.
609 regs
= lp
->lwp_md
.md_regs
;
610 error
= copyin(uap
->sigcntxp
, &uc
, sizeof(uc
));
614 rflags
= ucp
->uc_mcontext
.mc_rflags
;
616 /* VM (8086) mode not supported */
617 rflags
&= ~PSL_VM_UNSUPP
;
620 if (eflags
& PSL_VM
) {
621 struct trapframe_vm86
*tf
= (struct trapframe_vm86
*)regs
;
622 struct vm86_kernel
*vm86
;
625 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
626 * set up the vm86 area, and we can't enter vm86 mode.
628 if (lp
->lwp_thread
->td_pcb
->pcb_ext
== 0)
630 vm86
= &lp
->lwp_thread
->td_pcb
->pcb_ext
->ext_vm86
;
631 if (vm86
->vm86_inited
== 0)
634 /* go back to user mode if both flags are set */
635 if ((eflags
& PSL_VIP
) && (eflags
& PSL_VIF
))
636 trapsignal(lp
, SIGBUS
, 0);
638 if (vm86
->vm86_has_vme
) {
639 eflags
= (tf
->tf_eflags
& ~VME_USERCHANGE
) |
640 (eflags
& VME_USERCHANGE
) | PSL_VM
;
642 vm86
->vm86_eflags
= eflags
; /* save VIF, VIP */
643 eflags
= (tf
->tf_eflags
& ~VM_USERCHANGE
) |
644 (eflags
& VM_USERCHANGE
) | PSL_VM
;
646 bcopy(&ucp
->uc_mcontext
.mc_gs
, tf
, sizeof(struct trapframe
));
647 tf
->tf_eflags
= eflags
;
648 tf
->tf_vm86_ds
= tf
->tf_ds
;
649 tf
->tf_vm86_es
= tf
->tf_es
;
650 tf
->tf_vm86_fs
= tf
->tf_fs
;
651 tf
->tf_vm86_gs
= tf
->tf_gs
;
652 tf
->tf_ds
= _udatasel
;
653 tf
->tf_es
= _udatasel
;
654 tf
->tf_fs
= _udatasel
;
655 tf
->tf_gs
= _udatasel
;
660 * Don't allow users to change privileged or reserved flags.
663 * XXX do allow users to change the privileged flag PSL_RF.
664 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
665 * should sometimes set it there too. tf_eflags is kept in
666 * the signal context during signal handling and there is no
667 * other place to remember it, so the PSL_RF bit may be
668 * corrupted by the signal handler without us knowing.
669 * Corruption of the PSL_RF bit at worst causes one more or
670 * one less debugger trap, so allowing it is fairly harmless.
672 if (!EFL_SECURE(rflags
& ~PSL_RF
, regs
->tf_rflags
& ~PSL_RF
)) {
673 kprintf("sigreturn: rflags = 0x%lx\n", (long)rflags
);
678 * Don't allow users to load a valid privileged %cs. Let the
679 * hardware check for invalid selectors, excess privilege in
680 * other selectors, invalid %eip's and invalid %esp's.
682 cs
= ucp
->uc_mcontext
.mc_cs
;
683 if (!CS_SECURE(cs
)) {
684 kprintf("sigreturn: cs = 0x%x\n", cs
);
685 trapsignal(lp
, SIGBUS
, T_PROTFLT
);
688 bcopy(&ucp
->uc_mcontext
.mc_rdi
, regs
, sizeof(struct trapframe
));
692 * Restore the FPU state from the frame
695 npxpop(&ucp
->uc_mcontext
);
698 * Merge saved signal mailbox pending flag to maintain interlock
699 * semantics against system calls.
701 if (ucp
->uc_mcontext
.mc_xflags
& PGEX_MAILBOX
)
702 p
->p_flag
|= P_MAILBOX
;
704 if (ucp
->uc_mcontext
.mc_onstack
& 1)
705 lp
->lwp_sigstk
.ss_flags
|= SS_ONSTACK
;
707 lp
->lwp_sigstk
.ss_flags
&= ~SS_ONSTACK
;
709 lp
->lwp_sigmask
= ucp
->uc_sigmask
;
710 SIG_CANTMASK(lp
->lwp_sigmask
);
716 * Stack frame on entry to function. %rax will contain the function vector,
717 * %rcx will contain the function data. flags, rcx, and rax will have
718 * already been pushed on the stack.
729 sendupcall(struct vmupcall
*vu
, int morepending
)
731 struct lwp
*lp
= curthread
->td_lwp
;
732 struct trapframe
*regs
;
733 struct upcall upcall
;
734 struct upc_frame upc_frame
;
738 * If we are a virtual kernel running an emulated user process
739 * context, switch back to the virtual kernel context before
740 * trying to post the signal.
742 if (lp
->lwp_vkernel
&& lp
->lwp_vkernel
->ve
) {
743 lp
->lwp_md
.md_regs
->tf_trapno
= 0;
744 vkernel_trap(lp
, lp
->lwp_md
.md_regs
);
748 * Get the upcall data structure
750 if (copyin(lp
->lwp_upcall
, &upcall
, sizeof(upcall
)) ||
751 copyin((char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, &crit_count
, sizeof(int))
754 kprintf("bad upcall address\n");
759 * If the data structure is already marked pending or has a critical
760 * section count, mark the data structure as pending and return
761 * without doing an upcall. vu_pending is left set.
763 if (upcall
.upc_pending
|| crit_count
>= vu
->vu_pending
) {
764 if (upcall
.upc_pending
< vu
->vu_pending
) {
765 upcall
.upc_pending
= vu
->vu_pending
;
766 copyout(&upcall
.upc_pending
, &lp
->lwp_upcall
->upc_pending
,
767 sizeof(upcall
.upc_pending
));
773 * We can run this upcall now, clear vu_pending.
775 * Bump our critical section count and set or clear the
776 * user pending flag depending on whether more upcalls are
777 * pending. The user will be responsible for calling
778 * upc_dispatch(-1) to process remaining upcalls.
781 upcall
.upc_pending
= morepending
;
782 crit_count
+= TDPRI_CRIT
;
783 copyout(&upcall
.upc_pending
, &lp
->lwp_upcall
->upc_pending
,
784 sizeof(upcall
.upc_pending
));
785 copyout(&crit_count
, (char *)upcall
.upc_uthread
+ upcall
.upc_critoff
,
789 * Construct a stack frame and issue the upcall
791 regs
= lp
->lwp_md
.md_regs
;
792 upc_frame
.rax
= regs
->tf_rax
;
793 upc_frame
.rcx
= regs
->tf_rcx
;
794 upc_frame
.rdx
= regs
->tf_rdx
;
795 upc_frame
.flags
= regs
->tf_rflags
;
796 upc_frame
.oldip
= regs
->tf_rip
;
797 if (copyout(&upc_frame
, (void *)(regs
->tf_rsp
- sizeof(upc_frame
)),
798 sizeof(upc_frame
)) != 0) {
799 kprintf("bad stack on upcall\n");
801 regs
->tf_rax
= (register_t
)vu
->vu_func
;
802 regs
->tf_rcx
= (register_t
)vu
->vu_data
;
803 regs
->tf_rdx
= (register_t
)lp
->lwp_upcall
;
804 regs
->tf_rip
= (register_t
)vu
->vu_ctx
;
805 regs
->tf_rsp
-= sizeof(upc_frame
);
810 * fetchupcall occurs in the context of a system call, which means that
811 * we have to return EJUSTRETURN in order to prevent eax and edx from
812 * being overwritten by the syscall return value.
814 * if vu is not NULL we return the new context in %edx, the new data in %ecx,
815 * and the function pointer in %eax.
818 fetchupcall(struct vmupcall
*vu
, int morepending
, void *rsp
)
820 struct upc_frame upc_frame
;
821 struct lwp
*lp
= curthread
->td_lwp
;
822 struct trapframe
*regs
;
824 struct upcall upcall
;
827 regs
= lp
->lwp_md
.md_regs
;
829 error
= copyout(&morepending
, &lp
->lwp_upcall
->upc_pending
, sizeof(int));
833 * This jumps us to the next ready context.
836 error
= copyin(lp
->lwp_upcall
, &upcall
, sizeof(upcall
));
839 error
= copyin((char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, &crit_count
, sizeof(int));
840 crit_count
+= TDPRI_CRIT
;
842 error
= copyout(&crit_count
, (char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, sizeof(int));
843 regs
->tf_rax
= (register_t
)vu
->vu_func
;
844 regs
->tf_rcx
= (register_t
)vu
->vu_data
;
845 regs
->tf_rdx
= (register_t
)lp
->lwp_upcall
;
846 regs
->tf_rip
= (register_t
)vu
->vu_ctx
;
847 regs
->tf_rsp
= (register_t
)rsp
;
850 * This returns us to the originally interrupted code.
852 error
= copyin(rsp
, &upc_frame
, sizeof(upc_frame
));
853 regs
->tf_rax
= upc_frame
.rax
;
854 regs
->tf_rcx
= upc_frame
.rcx
;
855 regs
->tf_rdx
= upc_frame
.rdx
;
856 regs
->tf_rflags
= (regs
->tf_rflags
& ~PSL_USERCHANGE
) |
857 (upc_frame
.flags
& PSL_USERCHANGE
);
858 regs
->tf_rip
= upc_frame
.oldip
;
859 regs
->tf_rsp
= (register_t
)((char *)rsp
+ sizeof(upc_frame
));
868 * Machine dependent boot() routine
870 * I haven't seen anything to put here yet
871 * Possibly some stuff might be grafted back here from boot()
879 * Shutdown the CPU as much as possible
885 __asm__
__volatile("hlt");
889 * cpu_idle() represents the idle LWKT. You cannot return from this function
890 * (unless you want to blow things up!). Instead we look for runnable threads
891 * and loop or halt as appropriate. Giant is not held on entry to the thread.
893 * The main loop is entered with a critical section held, we must release
894 * the critical section before doing anything else. lwkt_switch() will
895 * check for pending interrupts due to entering and exiting its own
898 * Note on cpu_idle_hlt: On an SMP system we rely on a scheduler IPI
899 * to wake a HLTed cpu up. However, there are cases where the idlethread
900 * will be entered with the possibility that no IPI will occur and in such
901 * cases lwkt_switch() sets TDF_IDLE_NOHLT.
903 static int cpu_idle_hlt
= 1;
904 static int cpu_idle_hltcnt
;
905 static int cpu_idle_spincnt
;
906 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_hlt
, CTLFLAG_RW
,
907 &cpu_idle_hlt
, 0, "Idle loop HLT enable");
908 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_hltcnt
, CTLFLAG_RW
,
909 &cpu_idle_hltcnt
, 0, "Idle loop entry halts");
910 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_spincnt
, CTLFLAG_RW
,
911 &cpu_idle_spincnt
, 0, "Idle loop entry spins");
914 cpu_idle_default_hook(void)
917 * We must guarentee that hlt is exactly the instruction
920 __asm
__volatile("sti; hlt");
923 /* Other subsystems (e.g., ACPI) can hook this later. */
924 void (*cpu_idle_hook
)(void) = cpu_idle_default_hook
;
929 struct thread
*td
= curthread
;
932 KKASSERT(td
->td_pri
< TDPRI_CRIT
);
935 * See if there are any LWKTs ready to go.
940 * If we are going to halt call splz unconditionally after
941 * CLIing to catch any interrupt races. Note that we are
942 * at SPL0 and interrupts are enabled.
944 if (cpu_idle_hlt
&& !lwkt_runnable() &&
945 (td
->td_flags
& TDF_IDLE_NOHLT
) == 0) {
946 __asm
__volatile("cli");
948 if (!lwkt_runnable())
952 __asm
__volatile("pause");
956 td
->td_flags
&= ~TDF_IDLE_NOHLT
;
959 __asm
__volatile("sti; pause");
961 __asm
__volatile("sti");
971 * This routine is called when the only runnable threads require
972 * the MP lock, and the scheduler couldn't get it. On a real cpu
973 * we let the scheduler spin.
976 cpu_mplock_contested(void)
982 * This routine is called if a spinlock has been held through the
983 * exponential backoff period and is seriously contested. On a real cpu
987 cpu_spinlock_contested(void)
995 * Clear registers on exec
998 exec_setregs(u_long entry
, u_long stack
, u_long ps_strings
)
1000 struct thread
*td
= curthread
;
1001 struct lwp
*lp
= td
->td_lwp
;
1002 struct pcb
*pcb
= td
->td_pcb
;
1003 struct trapframe
*regs
= lp
->lwp_md
.md_regs
;
1005 /* was i386_user_cleanup() in NetBSD */
1008 bzero((char *)regs
, sizeof(struct trapframe
));
1009 regs
->tf_rip
= entry
;
1010 regs
->tf_rsp
= ((stack
- 8) & ~0xFul
) + 8; /* align the stack */
1011 regs
->tf_rdi
= stack
; /* argv */
1012 regs
->tf_rflags
= PSL_USER
| (regs
->tf_rflags
& PSL_T
);
1013 regs
->tf_ss
= _udatasel
;
1014 regs
->tf_cs
= _ucodesel
;
1015 regs
->tf_rbx
= ps_strings
;
1018 * Reset the hardware debug registers if they were in use.
1019 * They won't have any meaning for the newly exec'd process.
1021 if (pcb
->pcb_flags
& PCB_DBREGS
) {
1027 pcb
->pcb_dr7
= 0; /* JG set bit 10? */
1028 if (pcb
== td
->td_pcb
) {
1030 * Clear the debug registers on the running
1031 * CPU, otherwise they will end up affecting
1032 * the next process we switch to.
1036 pcb
->pcb_flags
&= ~PCB_DBREGS
;
1040 * Initialize the math emulator (if any) for the current process.
1041 * Actually, just clear the bit that says that the emulator has
1042 * been initialized. Initialization is delayed until the process
1043 * traps to the emulator (if it is done at all) mainly because
1044 * emulators don't provide an entry point for initialization.
1046 pcb
->pcb_flags
&= ~FP_SOFTFP
;
1049 * NOTE: do not set CR0_TS here. npxinit() must do it after clearing
1050 * gd_npxthread. Otherwise a preemptive interrupt thread
1051 * may panic in npxdna().
1054 load_cr0(rcr0() | CR0_MP
);
1057 * NOTE: The MSR values must be correct so we can return to
1058 * userland. gd_user_fs/gs must be correct so the switch
1059 * code knows what the current MSR values are.
1061 pcb
->pcb_fsbase
= 0; /* Values loaded from PCB on switch */
1062 pcb
->pcb_gsbase
= 0;
1063 mdcpu
->gd_user_fs
= 0; /* Cache of current MSR values */
1064 mdcpu
->gd_user_gs
= 0;
1065 wrmsr(MSR_FSBASE
, 0); /* Set MSR values for return to userland */
1066 wrmsr(MSR_KGSBASE
, 0);
1068 /* Initialize the npx (if any) for the current process. */
1069 npxinit(__INITIAL_NPXCW__
);
1072 pcb
->pcb_ds
= _udatasel
;
1073 pcb
->pcb_es
= _udatasel
;
1074 pcb
->pcb_fs
= _udatasel
;
1075 pcb
->pcb_gs
= _udatasel
;
1084 cr0
|= CR0_NE
; /* Done by npxinit() */
1085 cr0
|= CR0_MP
| CR0_TS
; /* Done at every execve() too. */
1086 cr0
|= CR0_WP
| CR0_AM
;
1092 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS
)
1095 error
= sysctl_handle_int(oidp
, oidp
->oid_arg1
, oidp
->oid_arg2
,
1097 if (!error
&& req
->newptr
)
1102 SYSCTL_PROC(_machdep
, CPU_ADJKERNTZ
, adjkerntz
, CTLTYPE_INT
|CTLFLAG_RW
,
1103 &adjkerntz
, 0, sysctl_machdep_adjkerntz
, "I", "");
1105 SYSCTL_INT(_machdep
, CPU_DISRTCSET
, disable_rtc_set
,
1106 CTLFLAG_RW
, &disable_rtc_set
, 0, "");
1109 SYSCTL_STRUCT(_machdep
, CPU_BOOTINFO
, bootinfo
,
1110 CTLFLAG_RD
, &bootinfo
, bootinfo
, "");
1113 SYSCTL_INT(_machdep
, CPU_WALLCLOCK
, wall_cmos_clock
,
1114 CTLFLAG_RW
, &wall_cmos_clock
, 0, "");
1116 extern u_long bootdev
; /* not a cdev_t - encoding is different */
1117 SYSCTL_ULONG(_machdep
, OID_AUTO
, guessed_bootdev
,
1118 CTLFLAG_RD
, &bootdev
, 0, "Boot device (not in cdev_t format)");
1121 * Initialize 386 and configure to run kernel
1125 * Initialize segments & interrupt table
1129 struct user_segment_descriptor gdt
[NGDT
* MAXCPU
]; /* global descriptor table */
1130 static struct gate_descriptor idt0
[NIDT
];
1131 struct gate_descriptor
*idt
= &idt0
[0]; /* interrupt descriptor table */
1133 union descriptor ldt
[NLDT
]; /* local descriptor table */
1136 /* table descriptors - used to load tables by cpu */
1137 struct region_descriptor r_gdt
, r_idt
;
1139 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
1140 extern int has_f00f_bug
;
1143 static char dblfault_stack
[PAGE_SIZE
] __aligned(16);
1145 /* JG proc0paddr is a virtual address */
1148 char proc0paddr_buff
[LWKT_THREAD_STACK
];
1151 /* software prototypes -- in more palatable form */
1152 struct soft_segment_descriptor gdt_segs
[] = {
1153 /* GNULL_SEL 0 Null Descriptor */
1154 { 0x0, /* segment base address */
1156 0, /* segment type */
1157 0, /* segment descriptor priority level */
1158 0, /* segment descriptor present */
1160 0, /* default 32 vs 16 bit size */
1161 0 /* limit granularity (byte/page units)*/ },
1162 /* GCODE_SEL 1 Code Descriptor for kernel */
1163 { 0x0, /* segment base address */
1164 0xfffff, /* length - all address space */
1165 SDT_MEMERA
, /* segment type */
1166 SEL_KPL
, /* segment descriptor priority level */
1167 1, /* segment descriptor present */
1169 0, /* default 32 vs 16 bit size */
1170 1 /* limit granularity (byte/page units)*/ },
1171 /* GDATA_SEL 2 Data Descriptor for kernel */
1172 { 0x0, /* segment base address */
1173 0xfffff, /* length - all address space */
1174 SDT_MEMRWA
, /* segment type */
1175 SEL_KPL
, /* segment descriptor priority level */
1176 1, /* segment descriptor present */
1178 0, /* default 32 vs 16 bit size */
1179 1 /* limit granularity (byte/page units)*/ },
1180 /* GUCODE32_SEL 3 32 bit Code Descriptor for user */
1181 { 0x0, /* segment base address */
1182 0xfffff, /* length - all address space */
1183 SDT_MEMERA
, /* segment type */
1184 SEL_UPL
, /* segment descriptor priority level */
1185 1, /* segment descriptor present */
1187 1, /* default 32 vs 16 bit size */
1188 1 /* limit granularity (byte/page units)*/ },
1189 /* GUDATA_SEL 4 32/64 bit Data Descriptor for user */
1190 { 0x0, /* segment base address */
1191 0xfffff, /* length - all address space */
1192 SDT_MEMRWA
, /* segment type */
1193 SEL_UPL
, /* segment descriptor priority level */
1194 1, /* segment descriptor present */
1196 1, /* default 32 vs 16 bit size */
1197 1 /* limit granularity (byte/page units)*/ },
1198 /* GUCODE_SEL 5 64 bit Code Descriptor for user */
1199 { 0x0, /* segment base address */
1200 0xfffff, /* length - all address space */
1201 SDT_MEMERA
, /* segment type */
1202 SEL_UPL
, /* segment descriptor priority level */
1203 1, /* segment descriptor present */
1205 0, /* default 32 vs 16 bit size */
1206 1 /* limit granularity (byte/page units)*/ },
1207 /* GPROC0_SEL 6 Proc 0 Tss Descriptor */
1209 0x0, /* segment base address */
1210 sizeof(struct x86_64tss
)-1,/* length - all address space */
1211 SDT_SYSTSS
, /* segment type */
1212 SEL_KPL
, /* segment descriptor priority level */
1213 1, /* segment descriptor present */
1215 0, /* unused - default 32 vs 16 bit size */
1216 0 /* limit granularity (byte/page units)*/ },
1217 /* Actually, the TSS is a system descriptor which is double size */
1218 { 0x0, /* segment base address */
1220 0, /* segment type */
1221 0, /* segment descriptor priority level */
1222 0, /* segment descriptor present */
1224 0, /* default 32 vs 16 bit size */
1225 0 /* limit granularity (byte/page units)*/ },
1226 /* GUGS32_SEL 8 32 bit GS Descriptor for user */
1227 { 0x0, /* segment base address */
1228 0xfffff, /* length - all address space */
1229 SDT_MEMRWA
, /* segment type */
1230 SEL_UPL
, /* segment descriptor priority level */
1231 1, /* segment descriptor present */
1233 1, /* default 32 vs 16 bit size */
1234 1 /* limit granularity (byte/page units)*/ },
1238 setidt(int idx
, inthand_t
*func
, int typ
, int dpl
, int ist
)
1240 struct gate_descriptor
*ip
;
1243 ip
->gd_looffset
= (uintptr_t)func
;
1244 ip
->gd_selector
= GSEL(GCODE_SEL
, SEL_KPL
);
1250 ip
->gd_hioffset
= ((uintptr_t)func
)>>16 ;
1253 #define IDTVEC(name) __CONCAT(X,name)
1256 IDTVEC(div
), IDTVEC(dbg
), IDTVEC(nmi
), IDTVEC(bpt
), IDTVEC(ofl
),
1257 IDTVEC(bnd
), IDTVEC(ill
), IDTVEC(dna
), IDTVEC(fpusegm
),
1258 IDTVEC(tss
), IDTVEC(missing
), IDTVEC(stk
), IDTVEC(prot
),
1259 IDTVEC(page
), IDTVEC(mchk
), IDTVEC(rsvd
), IDTVEC(fpu
), IDTVEC(align
),
1260 IDTVEC(xmm
), IDTVEC(dblfault
),
1261 IDTVEC(fast_syscall
), IDTVEC(fast_syscall32
);
1263 #ifdef DEBUG_INTERRUPTS
1264 extern inthand_t
*Xrsvdary
[256];
1268 sdtossd(struct user_segment_descriptor
*sd
, struct soft_segment_descriptor
*ssd
)
1270 ssd
->ssd_base
= (sd
->sd_hibase
<< 24) | sd
->sd_lobase
;
1271 ssd
->ssd_limit
= (sd
->sd_hilimit
<< 16) | sd
->sd_lolimit
;
1272 ssd
->ssd_type
= sd
->sd_type
;
1273 ssd
->ssd_dpl
= sd
->sd_dpl
;
1274 ssd
->ssd_p
= sd
->sd_p
;
1275 ssd
->ssd_def32
= sd
->sd_def32
;
1276 ssd
->ssd_gran
= sd
->sd_gran
;
1280 ssdtosd(struct soft_segment_descriptor
*ssd
, struct user_segment_descriptor
*sd
)
1283 sd
->sd_lobase
= (ssd
->ssd_base
) & 0xffffff;
1284 sd
->sd_hibase
= (ssd
->ssd_base
>> 24) & 0xff;
1285 sd
->sd_lolimit
= (ssd
->ssd_limit
) & 0xffff;
1286 sd
->sd_hilimit
= (ssd
->ssd_limit
>> 16) & 0xf;
1287 sd
->sd_type
= ssd
->ssd_type
;
1288 sd
->sd_dpl
= ssd
->ssd_dpl
;
1289 sd
->sd_p
= ssd
->ssd_p
;
1290 sd
->sd_long
= ssd
->ssd_long
;
1291 sd
->sd_def32
= ssd
->ssd_def32
;
1292 sd
->sd_gran
= ssd
->ssd_gran
;
1296 ssdtosyssd(struct soft_segment_descriptor
*ssd
,
1297 struct system_segment_descriptor
*sd
)
1300 sd
->sd_lobase
= (ssd
->ssd_base
) & 0xffffff;
1301 sd
->sd_hibase
= (ssd
->ssd_base
>> 24) & 0xfffffffffful
;
1302 sd
->sd_lolimit
= (ssd
->ssd_limit
) & 0xffff;
1303 sd
->sd_hilimit
= (ssd
->ssd_limit
>> 16) & 0xf;
1304 sd
->sd_type
= ssd
->ssd_type
;
1305 sd
->sd_dpl
= ssd
->ssd_dpl
;
1306 sd
->sd_p
= ssd
->ssd_p
;
1307 sd
->sd_gran
= ssd
->ssd_gran
;
1313 * Populate the (physmap) array with base/bound pairs describing the
1314 * available physical memory in the system, then test this memory and
1315 * build the phys_avail array describing the actually-available memory.
1317 * If we cannot accurately determine the physical memory map, then use
1318 * value from the 0xE801 call, and failing that, the RTC.
1320 * Total memory size may be set by the kernel environment variable
1321 * hw.physmem or the compile-time define MAXMEM.
1323 * XXX first should be vm_paddr_t.
1326 getmemsize(caddr_t kmdp
, u_int64_t first
)
1328 int i
, off
, physmap_idx
, pa_indx
, da_indx
;
1329 vm_paddr_t pa
, physmap
[PHYSMAP_SIZE
];
1330 u_long physmem_tunable
;
1332 struct bios_smap
*smapbase
, *smap
, *smapend
;
1334 quad_t dcons_addr
, dcons_size
;
1336 bzero(physmap
, sizeof(physmap
));
1341 * get memory map from INT 15:E820, kindly supplied by the loader.
1343 * subr_module.c says:
1344 * "Consumer may safely assume that size value precedes data."
1345 * ie: an int32_t immediately precedes smap.
1347 smapbase
= (struct bios_smap
*)preload_search_info(kmdp
,
1348 MODINFO_METADATA
| MODINFOMD_SMAP
);
1349 if (smapbase
== NULL
)
1350 panic("No BIOS smap info from loader!");
1352 smapsize
= *((u_int32_t
*)smapbase
- 1);
1353 smapend
= (struct bios_smap
*)((uintptr_t)smapbase
+ smapsize
);
1355 for (smap
= smapbase
; smap
< smapend
; smap
++) {
1356 if (boothowto
& RB_VERBOSE
)
1357 kprintf("SMAP type=%02x base=%016lx len=%016lx\n",
1358 smap
->type
, smap
->base
, smap
->length
);
1360 if (smap
->type
!= SMAP_TYPE_MEMORY
)
1363 if (smap
->length
== 0)
1366 for (i
= 0; i
<= physmap_idx
; i
+= 2) {
1367 if (smap
->base
< physmap
[i
+ 1]) {
1368 if (boothowto
& RB_VERBOSE
)
1370 "Overlapping or non-monotonic memory region, ignoring second region\n");
1375 if (smap
->base
== physmap
[physmap_idx
+ 1]) {
1376 physmap
[physmap_idx
+ 1] += smap
->length
;
1381 if (physmap_idx
== PHYSMAP_SIZE
) {
1383 "Too many segments in the physical address map, giving up\n");
1386 physmap
[physmap_idx
] = smap
->base
;
1387 physmap
[physmap_idx
+ 1] = smap
->base
+ smap
->length
;
1391 * Find the 'base memory' segment for SMP
1394 for (i
= 0; i
<= physmap_idx
; i
+= 2) {
1395 if (physmap
[i
] == 0x00000000) {
1396 basemem
= physmap
[i
+ 1] / 1024;
1401 panic("BIOS smap did not include a basemem segment!");
1404 /* make hole for AP bootstrap code */
1405 physmap
[1] = mp_bootaddress(physmap
[1] / 1024);
1407 /* look for the MP hardware - needed for apic addresses */
1412 * Maxmem isn't the "maximum memory", it's one larger than the
1413 * highest page of the physical address space. It should be
1414 * called something like "Maxphyspage". We may adjust this
1415 * based on ``hw.physmem'' and the results of the memory test.
1417 Maxmem
= atop(physmap
[physmap_idx
+ 1]);
1420 Maxmem
= MAXMEM
/ 4;
1423 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable
))
1424 Maxmem
= atop(physmem_tunable
);
1427 * Don't allow MAXMEM or hw.physmem to extend the amount of memory
1430 if (Maxmem
> atop(physmap
[physmap_idx
+ 1]))
1431 Maxmem
= atop(physmap
[physmap_idx
+ 1]);
1433 if (atop(physmap
[physmap_idx
+ 1]) != Maxmem
&&
1434 (boothowto
& RB_VERBOSE
))
1435 kprintf("Physical memory use set to %ldK\n", Maxmem
* 4);
1437 /* call pmap initialization to make new kernel address space */
1438 pmap_bootstrap(&first
);
1441 * Size up each available chunk of physical memory.
1443 physmap
[0] = PAGE_SIZE
; /* mask off page 0 */
1446 phys_avail
[pa_indx
++] = physmap
[0];
1447 phys_avail
[pa_indx
] = physmap
[0];
1448 dump_avail
[da_indx
] = physmap
[0];
1452 * Get dcons buffer address
1454 if (kgetenv_quad("dcons.addr", &dcons_addr
) == 0 ||
1455 kgetenv_quad("dcons.size", &dcons_size
) == 0)
1459 * physmap is in bytes, so when converting to page boundaries,
1460 * round up the start address and round down the end address.
1462 for (i
= 0; i
<= physmap_idx
; i
+= 2) {
1465 end
= ptoa((vm_paddr_t
)Maxmem
);
1466 if (physmap
[i
+ 1] < end
)
1467 end
= trunc_page(physmap
[i
+ 1]);
1468 for (pa
= round_page(physmap
[i
]); pa
< end
; pa
+= PAGE_SIZE
) {
1469 int tmp
, page_bad
, full
;
1470 int *ptr
= (int *)CADDR1
;
1474 * block out kernel memory as not available.
1476 if (pa
>= 0x100000 && pa
< first
)
1480 * block out dcons buffer
1483 && pa
>= trunc_page(dcons_addr
)
1484 && pa
< dcons_addr
+ dcons_size
)
1490 * map page into kernel: valid, read/write,non-cacheable
1492 *pte
= pa
| PG_V
| PG_RW
| PG_N
;
1497 * Test for alternating 1's and 0's
1499 *(volatile int *)ptr
= 0xaaaaaaaa;
1500 if (*(volatile int *)ptr
!= 0xaaaaaaaa)
1503 * Test for alternating 0's and 1's
1505 *(volatile int *)ptr
= 0x55555555;
1506 if (*(volatile int *)ptr
!= 0x55555555)
1511 *(volatile int *)ptr
= 0xffffffff;
1512 if (*(volatile int *)ptr
!= 0xffffffff)
1517 *(volatile int *)ptr
= 0x0;
1518 if (*(volatile int *)ptr
!= 0x0)
1521 * Restore original value.
1526 * Adjust array of valid/good pages.
1528 if (page_bad
== TRUE
)
1531 * If this good page is a continuation of the
1532 * previous set of good pages, then just increase
1533 * the end pointer. Otherwise start a new chunk.
1534 * Note that "end" points one higher than end,
1535 * making the range >= start and < end.
1536 * If we're also doing a speculative memory
1537 * test and we at or past the end, bump up Maxmem
1538 * so that we keep going. The first bad page
1539 * will terminate the loop.
1541 if (phys_avail
[pa_indx
] == pa
) {
1542 phys_avail
[pa_indx
] += PAGE_SIZE
;
1545 if (pa_indx
== PHYS_AVAIL_ARRAY_END
) {
1547 "Too many holes in the physical address space, giving up\n");
1552 phys_avail
[pa_indx
++] = pa
; /* start */
1553 phys_avail
[pa_indx
] = pa
+ PAGE_SIZE
; /* end */
1557 if (dump_avail
[da_indx
] == pa
) {
1558 dump_avail
[da_indx
] += PAGE_SIZE
;
1561 if (da_indx
== DUMP_AVAIL_ARRAY_END
) {
1565 dump_avail
[da_indx
++] = pa
; /* start */
1566 dump_avail
[da_indx
] = pa
+ PAGE_SIZE
; /* end */
1578 * The last chunk must contain at least one page plus the message
1579 * buffer to avoid complicating other code (message buffer address
1580 * calculation, etc.).
1582 while (phys_avail
[pa_indx
- 1] + PAGE_SIZE
+
1583 round_page(MSGBUF_SIZE
) >= phys_avail
[pa_indx
]) {
1584 physmem
-= atop(phys_avail
[pa_indx
] - phys_avail
[pa_indx
- 1]);
1585 phys_avail
[pa_indx
--] = 0;
1586 phys_avail
[pa_indx
--] = 0;
1589 Maxmem
= atop(phys_avail
[pa_indx
]);
1591 /* Trim off space for the message buffer. */
1592 phys_avail
[pa_indx
] -= round_page(MSGBUF_SIZE
);
1594 avail_end
= phys_avail
[pa_indx
];
1596 /* Map the message buffer. */
1597 for (off
= 0; off
< round_page(MSGBUF_SIZE
); off
+= PAGE_SIZE
)
1598 pmap_kenter((vm_offset_t
)msgbufp
+ off
, phys_avail
[pa_indx
] +
1611 * 7 Device Not Available (x87)
1613 * 9 Coprocessor Segment overrun (unsupported, reserved)
1615 * 11 Segment not present
1617 * 13 General Protection
1620 * 16 x87 FP Exception pending
1621 * 17 Alignment Check
1623 * 19 SIMD floating point
1625 * 32-255 INTn/external sources
1628 hammer_time(u_int64_t modulep
, u_int64_t physfree
)
1633 int metadata_missing
, off
;
1635 struct mdglobaldata
*gd
;
1641 * This must be done before the first references
1642 * to CPU_prvspace[0] are made.
1644 init_paging(&physfree
);
1648 * Prevent lowering of the ipl if we call tsleep() early.
1650 gd
= &CPU_prvspace
[0].mdglobaldata
;
1651 bzero(gd
, sizeof(*gd
));
1654 * Note: on both UP and SMP curthread must be set non-NULL
1655 * early in the boot sequence because the system assumes
1656 * that 'curthread' is never NULL.
1659 gd
->mi
.gd_curthread
= &thread0
;
1660 thread0
.td_gd
= &gd
->mi
;
1662 atdevbase
= ISA_HOLE_START
+ PTOV_OFFSET
;
1665 metadata_missing
= 0;
1666 if (bootinfo
.bi_modulep
) {
1667 preload_metadata
= (caddr_t
)bootinfo
.bi_modulep
+ KERNBASE
;
1668 preload_bootstrap_relocate(KERNBASE
);
1670 metadata_missing
= 1;
1672 if (bootinfo
.bi_envp
)
1673 kern_envp
= (caddr_t
)bootinfo
.bi_envp
+ KERNBASE
;
1676 preload_metadata
= (caddr_t
)(uintptr_t)(modulep
+ PTOV_OFFSET
);
1677 preload_bootstrap_relocate(PTOV_OFFSET
);
1678 kmdp
= preload_search_by_type("elf kernel");
1680 kmdp
= preload_search_by_type("elf64 kernel");
1681 boothowto
= MD_FETCH(kmdp
, MODINFOMD_HOWTO
, int);
1682 kern_envp
= MD_FETCH(kmdp
, MODINFOMD_ENVP
, char *) + PTOV_OFFSET
;
1684 ksym_start
= MD_FETCH(kmdp
, MODINFOMD_SSYM
, uintptr_t);
1685 ksym_end
= MD_FETCH(kmdp
, MODINFOMD_ESYM
, uintptr_t);
1689 * start with one cpu. Note: with one cpu, ncpus2_shift, ncpus2_mask,
1690 * and ncpus_fit_mask remain 0.
1695 /* Init basic tunables, hz etc */
1699 * make gdt memory segments
1701 gdt_segs
[GPROC0_SEL
].ssd_base
=
1702 (uintptr_t) &CPU_prvspace
[0].mdglobaldata
.gd_common_tss
;
1704 gd
->mi
.gd_prvspace
= &CPU_prvspace
[0];
1706 for (x
= 0; x
< NGDT
; x
++) {
1707 if (x
!= GPROC0_SEL
&& x
!= (GPROC0_SEL
+ 1))
1708 ssdtosd(&gdt_segs
[x
], &gdt
[x
]);
1710 ssdtosyssd(&gdt_segs
[GPROC0_SEL
],
1711 (struct system_segment_descriptor
*)&gdt
[GPROC0_SEL
]);
1713 r_gdt
.rd_limit
= NGDT
* sizeof(gdt
[0]) - 1;
1714 r_gdt
.rd_base
= (long) gdt
;
1717 wrmsr(MSR_FSBASE
, 0); /* User value */
1718 wrmsr(MSR_GSBASE
, (u_int64_t
)&gd
->mi
);
1719 wrmsr(MSR_KGSBASE
, 0); /* User value while in the kernel */
1721 mi_gdinit(&gd
->mi
, 0);
1723 proc0paddr
= proc0paddr_buff
;
1724 mi_proc0init(&gd
->mi
, proc0paddr
);
1725 safepri
= TDPRI_MAX
;
1727 /* spinlocks and the BGL */
1731 for (x
= 0; x
< NIDT
; x
++)
1732 setidt(x
, &IDTVEC(rsvd
), SDT_SYSIGT
, SEL_KPL
, 0);
1733 setidt(IDT_DE
, &IDTVEC(div
), SDT_SYSIGT
, SEL_KPL
, 0);
1734 setidt(IDT_DB
, &IDTVEC(dbg
), SDT_SYSIGT
, SEL_KPL
, 0);
1735 setidt(IDT_NMI
, &IDTVEC(nmi
), SDT_SYSIGT
, SEL_KPL
, 1);
1736 setidt(IDT_BP
, &IDTVEC(bpt
), SDT_SYSIGT
, SEL_UPL
, 0);
1737 setidt(IDT_OF
, &IDTVEC(ofl
), SDT_SYSIGT
, SEL_KPL
, 0);
1738 setidt(IDT_BR
, &IDTVEC(bnd
), SDT_SYSIGT
, SEL_KPL
, 0);
1739 setidt(IDT_UD
, &IDTVEC(ill
), SDT_SYSIGT
, SEL_KPL
, 0);
1740 setidt(IDT_NM
, &IDTVEC(dna
), SDT_SYSIGT
, SEL_KPL
, 0);
1741 setidt(IDT_DF
, &IDTVEC(dblfault
), SDT_SYSIGT
, SEL_KPL
, 1);
1742 setidt(IDT_FPUGP
, &IDTVEC(fpusegm
), SDT_SYSIGT
, SEL_KPL
, 0);
1743 setidt(IDT_TS
, &IDTVEC(tss
), SDT_SYSIGT
, SEL_KPL
, 0);
1744 setidt(IDT_NP
, &IDTVEC(missing
), SDT_SYSIGT
, SEL_KPL
, 0);
1745 setidt(IDT_SS
, &IDTVEC(stk
), SDT_SYSIGT
, SEL_KPL
, 0);
1746 setidt(IDT_GP
, &IDTVEC(prot
), SDT_SYSIGT
, SEL_KPL
, 0);
1747 setidt(IDT_PF
, &IDTVEC(page
), SDT_SYSIGT
, SEL_KPL
, 0);
1748 setidt(IDT_MF
, &IDTVEC(fpu
), SDT_SYSIGT
, SEL_KPL
, 0);
1749 setidt(IDT_AC
, &IDTVEC(align
), SDT_SYSIGT
, SEL_KPL
, 0);
1750 setidt(IDT_MC
, &IDTVEC(mchk
), SDT_SYSIGT
, SEL_KPL
, 0);
1751 setidt(IDT_XF
, &IDTVEC(xmm
), SDT_SYSIGT
, SEL_KPL
, 0);
1753 r_idt
.rd_limit
= sizeof(idt0
) - 1;
1754 r_idt
.rd_base
= (long) idt
;
1758 * Initialize the console before we print anything out.
1763 if (metadata_missing
)
1764 kprintf("WARNING: loader(8) metadata is missing!\n");
1774 if (boothowto
& RB_KDB
)
1775 Debugger("Boot flags requested debugger");
1779 finishidentcpu(); /* Final stage of CPU initialization */
1780 setidt(6, &IDTVEC(ill
), SDT_SYS386TGT
, SEL_KPL
, GSEL(GCODE_SEL
, SEL_KPL
));
1781 setidt(13, &IDTVEC(prot
), SDT_SYS386TGT
, SEL_KPL
, GSEL(GCODE_SEL
, SEL_KPL
));
1783 identify_cpu(); /* Final stage of CPU initialization */
1784 initializecpu(); /* Initialize CPU registers */
1786 /* make an initial tss so cpu can get interrupt stack on syscall! */
1787 gd
->gd_common_tss
.tss_rsp0
=
1788 (register_t
)(thread0
.td_kstack
+
1789 KSTACK_PAGES
* PAGE_SIZE
- sizeof(struct pcb
));
1790 /* Ensure the stack is aligned to 16 bytes */
1791 gd
->gd_common_tss
.tss_rsp0
&= ~0xFul
;
1792 gd
->gd_rsp0
= gd
->gd_common_tss
.tss_rsp0
;
1794 /* doublefault stack space, runs on ist1 */
1795 gd
->gd_common_tss
.tss_ist1
= (long)&dblfault_stack
[sizeof(dblfault_stack
)];
1797 /* Set the IO permission bitmap (empty due to tss seg limit) */
1798 gd
->gd_common_tss
.tss_iobase
= sizeof(struct x86_64tss
);
1800 gsel_tss
= GSEL(GPROC0_SEL
, SEL_KPL
);
1801 gd
->gd_tss_gdt
= &gdt
[GPROC0_SEL
];
1802 gd
->gd_common_tssd
= *gd
->gd_tss_gdt
;
1805 /* Set up the fast syscall stuff */
1806 msr
= rdmsr(MSR_EFER
) | EFER_SCE
;
1807 wrmsr(MSR_EFER
, msr
);
1808 wrmsr(MSR_LSTAR
, (u_int64_t
)IDTVEC(fast_syscall
));
1809 wrmsr(MSR_CSTAR
, (u_int64_t
)IDTVEC(fast_syscall32
));
1810 msr
= ((u_int64_t
)GSEL(GCODE_SEL
, SEL_KPL
) << 32) |
1811 ((u_int64_t
)GSEL(GUCODE32_SEL
, SEL_UPL
) << 48);
1812 wrmsr(MSR_STAR
, msr
);
1813 wrmsr(MSR_SF_MASK
, PSL_NT
|PSL_T
|PSL_I
|PSL_C
|PSL_D
);
1815 getmemsize(kmdp
, physfree
);
1816 init_param2(physmem
);
1818 /* now running on new page tables, configured,and u/iom is accessible */
1820 /* Map the message buffer. */
1822 for (off
= 0; off
< round_page(MSGBUF_SIZE
); off
+= PAGE_SIZE
)
1823 pmap_kenter((vm_offset_t
)msgbufp
+ off
, avail_end
+ off
);
1826 msgbufinit(msgbufp
, MSGBUF_SIZE
);
1829 /* transfer to user mode */
1831 _ucodesel
= GSEL(GUCODE_SEL
, SEL_UPL
);
1832 _udatasel
= GSEL(GUDATA_SEL
, SEL_UPL
);
1833 _ucode32sel
= GSEL(GUCODE32_SEL
, SEL_UPL
);
1839 /* setup proc 0's pcb */
1840 thread0
.td_pcb
->pcb_flags
= 0;
1841 thread0
.td_pcb
->pcb_cr3
= KPML4phys
;
1842 thread0
.td_pcb
->pcb_ext
= 0;
1843 lwp0
.lwp_md
.md_regs
= &proc0_tf
;
1844 env
= kgetenv("kernelname");
1846 strlcpy(kernelname
, env
, sizeof(kernelname
));
1848 /* Location of kernel stack for locore */
1849 return ((u_int64_t
)thread0
.td_pcb
);
1853 * Initialize machine-dependant portions of the global data structure.
1854 * Note that the global data area and cpu0's idlestack in the private
1855 * data space were allocated in locore.
1857 * Note: the idlethread's cpl is 0
1859 * WARNING! Called from early boot, 'mycpu' may not work yet.
1862 cpu_gdinit(struct mdglobaldata
*gd
, int cpu
)
1865 gd
->mi
.gd_curthread
= &gd
->mi
.gd_idlethread
;
1867 lwkt_init_thread(&gd
->mi
.gd_idlethread
,
1868 gd
->mi
.gd_prvspace
->idlestack
,
1869 sizeof(gd
->mi
.gd_prvspace
->idlestack
),
1870 TDF_MPSAFE
, &gd
->mi
);
1871 lwkt_set_comm(&gd
->mi
.gd_idlethread
, "idle_%d", cpu
);
1872 gd
->mi
.gd_idlethread
.td_switch
= cpu_lwkt_switch
;
1873 gd
->mi
.gd_idlethread
.td_sp
-= sizeof(void *);
1874 *(void **)gd
->mi
.gd_idlethread
.td_sp
= cpu_idle_restore
;
1878 is_globaldata_space(vm_offset_t saddr
, vm_offset_t eaddr
)
1880 if (saddr
>= (vm_offset_t
)&CPU_prvspace
[0] &&
1881 eaddr
<= (vm_offset_t
)&CPU_prvspace
[MAXCPU
]) {
1888 globaldata_find(int cpu
)
1890 KKASSERT(cpu
>= 0 && cpu
< ncpus
);
1891 return(&CPU_prvspace
[cpu
].mdglobaldata
.mi
);
1894 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
1895 static void f00f_hack(void *unused
);
1896 SYSINIT(f00f_hack
, SI_BOOT2_BIOS
, SI_ORDER_ANY
, f00f_hack
, NULL
);
1899 f00f_hack(void *unused
)
1901 struct gate_descriptor
*new_idt
;
1907 kprintf("Intel Pentium detected, installing workaround for F00F bug\n");
1909 r_idt
.rd_limit
= sizeof(idt0
) - 1;
1911 tmp
= kmem_alloc(&kernel_map
, PAGE_SIZE
* 2);
1913 panic("kmem_alloc returned 0");
1914 if (((unsigned int)tmp
& (PAGE_SIZE
-1)) != 0)
1915 panic("kmem_alloc returned non-page-aligned memory");
1916 /* Put the first seven entries in the lower page */
1917 new_idt
= (struct gate_descriptor
*)(tmp
+ PAGE_SIZE
- (7*8));
1918 bcopy(idt
, new_idt
, sizeof(idt0
));
1919 r_idt
.rd_base
= (int)new_idt
;
1922 if (vm_map_protect(&kernel_map
, tmp
, tmp
+ PAGE_SIZE
,
1923 VM_PROT_READ
, FALSE
) != KERN_SUCCESS
)
1924 panic("vm_map_protect failed");
1927 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
1930 ptrace_set_pc(struct lwp
*lp
, unsigned long addr
)
1932 lp
->lwp_md
.md_regs
->tf_rip
= addr
;
1937 ptrace_single_step(struct lwp
*lp
)
1939 lp
->lwp_md
.md_regs
->tf_rflags
|= PSL_T
;
1944 fill_regs(struct lwp
*lp
, struct reg
*regs
)
1947 struct trapframe
*tp
;
1949 tp
= lp
->lwp_md
.md_regs
;
1950 bcopy(&tp
->tf_rdi
, ®s
->r_rdi
, sizeof(*regs
));
1952 pcb
= lp
->lwp_thread
->td_pcb
;
1957 set_regs(struct lwp
*lp
, struct reg
*regs
)
1960 struct trapframe
*tp
;
1962 tp
= lp
->lwp_md
.md_regs
;
1963 if (!EFL_SECURE(regs
->r_rflags
, tp
->tf_rflags
) ||
1964 !CS_SECURE(regs
->r_cs
))
1966 bcopy(®s
->r_rdi
, &tp
->tf_rdi
, sizeof(*regs
));
1967 pcb
= lp
->lwp_thread
->td_pcb
;
1971 #ifndef CPU_DISABLE_SSE
1973 fill_fpregs_xmm(struct savexmm
*sv_xmm
, struct save87
*sv_87
)
1975 struct env87
*penv_87
= &sv_87
->sv_env
;
1976 struct envxmm
*penv_xmm
= &sv_xmm
->sv_env
;
1979 /* FPU control/status */
1980 penv_87
->en_cw
= penv_xmm
->en_cw
;
1981 penv_87
->en_sw
= penv_xmm
->en_sw
;
1982 penv_87
->en_tw
= penv_xmm
->en_tw
;
1983 penv_87
->en_fip
= penv_xmm
->en_fip
;
1984 penv_87
->en_fcs
= penv_xmm
->en_fcs
;
1985 penv_87
->en_opcode
= penv_xmm
->en_opcode
;
1986 penv_87
->en_foo
= penv_xmm
->en_foo
;
1987 penv_87
->en_fos
= penv_xmm
->en_fos
;
1990 for (i
= 0; i
< 8; ++i
)
1991 sv_87
->sv_ac
[i
] = sv_xmm
->sv_fp
[i
].fp_acc
;
1993 sv_87
->sv_ex_sw
= sv_xmm
->sv_ex_sw
;
1997 set_fpregs_xmm(struct save87
*sv_87
, struct savexmm
*sv_xmm
)
1999 struct env87
*penv_87
= &sv_87
->sv_env
;
2000 struct envxmm
*penv_xmm
= &sv_xmm
->sv_env
;
2003 /* FPU control/status */
2004 penv_xmm
->en_cw
= penv_87
->en_cw
;
2005 penv_xmm
->en_sw
= penv_87
->en_sw
;
2006 penv_xmm
->en_tw
= penv_87
->en_tw
;
2007 penv_xmm
->en_fip
= penv_87
->en_fip
;
2008 penv_xmm
->en_fcs
= penv_87
->en_fcs
;
2009 penv_xmm
->en_opcode
= penv_87
->en_opcode
;
2010 penv_xmm
->en_foo
= penv_87
->en_foo
;
2011 penv_xmm
->en_fos
= penv_87
->en_fos
;
2014 for (i
= 0; i
< 8; ++i
)
2015 sv_xmm
->sv_fp
[i
].fp_acc
= sv_87
->sv_ac
[i
];
2017 sv_xmm
->sv_ex_sw
= sv_87
->sv_ex_sw
;
2019 #endif /* CPU_DISABLE_SSE */
2022 fill_fpregs(struct lwp
*lp
, struct fpreg
*fpregs
)
2024 #ifndef CPU_DISABLE_SSE
2026 fill_fpregs_xmm(&lp
->lwp_thread
->td_pcb
->pcb_save
.sv_xmm
,
2027 (struct save87
*)fpregs
);
2030 #endif /* CPU_DISABLE_SSE */
2031 bcopy(&lp
->lwp_thread
->td_pcb
->pcb_save
.sv_87
, fpregs
, sizeof *fpregs
);
2036 set_fpregs(struct lwp
*lp
, struct fpreg
*fpregs
)
2038 #ifndef CPU_DISABLE_SSE
2040 set_fpregs_xmm((struct save87
*)fpregs
,
2041 &lp
->lwp_thread
->td_pcb
->pcb_save
.sv_xmm
);
2044 #endif /* CPU_DISABLE_SSE */
2045 bcopy(fpregs
, &lp
->lwp_thread
->td_pcb
->pcb_save
.sv_87
, sizeof *fpregs
);
2050 fill_dbregs(struct lwp
*lp
, struct dbreg
*dbregs
)
2053 dbregs
->dr
[0] = rdr0();
2054 dbregs
->dr
[1] = rdr1();
2055 dbregs
->dr
[2] = rdr2();
2056 dbregs
->dr
[3] = rdr3();
2057 dbregs
->dr
[4] = rdr4();
2058 dbregs
->dr
[5] = rdr5();
2059 dbregs
->dr
[6] = rdr6();
2060 dbregs
->dr
[7] = rdr7();
2064 pcb
= lp
->lwp_thread
->td_pcb
;
2065 dbregs
->dr
[0] = pcb
->pcb_dr0
;
2066 dbregs
->dr
[1] = pcb
->pcb_dr1
;
2067 dbregs
->dr
[2] = pcb
->pcb_dr2
;
2068 dbregs
->dr
[3] = pcb
->pcb_dr3
;
2071 dbregs
->dr
[6] = pcb
->pcb_dr6
;
2072 dbregs
->dr
[7] = pcb
->pcb_dr7
;
2078 set_dbregs(struct lwp
*lp
, struct dbreg
*dbregs
)
2081 load_dr0(dbregs
->dr
[0]);
2082 load_dr1(dbregs
->dr
[1]);
2083 load_dr2(dbregs
->dr
[2]);
2084 load_dr3(dbregs
->dr
[3]);
2085 load_dr4(dbregs
->dr
[4]);
2086 load_dr5(dbregs
->dr
[5]);
2087 load_dr6(dbregs
->dr
[6]);
2088 load_dr7(dbregs
->dr
[7]);
2091 struct ucred
*ucred
;
2093 uint64_t mask1
, mask2
;
2096 * Don't let an illegal value for dr7 get set. Specifically,
2097 * check for undefined settings. Setting these bit patterns
2098 * result in undefined behaviour and can lead to an unexpected
2101 /* JG this loop looks unreadable */
2102 /* Check 4 2-bit fields for invalid patterns.
2103 * These fields are R/Wi, for i = 0..3
2105 /* Is 10 in LENi allowed when running in compatibility mode? */
2106 /* Pattern 10 in R/Wi might be used to indicate
2107 * breakpoint on I/O. Further analysis should be
2108 * carried to decide if it is safe and useful to
2109 * provide access to that capability
2111 for (i
= 0, mask1
= 0x3<<16, mask2
= 0x2<<16; i
< 4;
2112 i
++, mask1
<<= 4, mask2
<<= 4)
2113 if ((dbregs
->dr
[7] & mask1
) == mask2
)
2116 pcb
= lp
->lwp_thread
->td_pcb
;
2117 ucred
= lp
->lwp_proc
->p_ucred
;
2120 * Don't let a process set a breakpoint that is not within the
2121 * process's address space. If a process could do this, it
2122 * could halt the system by setting a breakpoint in the kernel
2123 * (if ddb was enabled). Thus, we need to check to make sure
2124 * that no breakpoints are being enabled for addresses outside
2125 * process's address space, unless, perhaps, we were called by
2128 * XXX - what about when the watched area of the user's
2129 * address space is written into from within the kernel
2130 * ... wouldn't that still cause a breakpoint to be generated
2131 * from within kernel mode?
2134 if (priv_check_cred(ucred
, PRIV_ROOT
, 0) != 0) {
2135 if (dbregs
->dr
[7] & 0x3) {
2136 /* dr0 is enabled */
2137 if (dbregs
->dr
[0] >= VM_MAX_USER_ADDRESS
)
2141 if (dbregs
->dr
[7] & (0x3<<2)) {
2142 /* dr1 is enabled */
2143 if (dbregs
->dr
[1] >= VM_MAX_USER_ADDRESS
)
2147 if (dbregs
->dr
[7] & (0x3<<4)) {
2148 /* dr2 is enabled */
2149 if (dbregs
->dr
[2] >= VM_MAX_USER_ADDRESS
)
2153 if (dbregs
->dr
[7] & (0x3<<6)) {
2154 /* dr3 is enabled */
2155 if (dbregs
->dr
[3] >= VM_MAX_USER_ADDRESS
)
2160 pcb
->pcb_dr0
= dbregs
->dr
[0];
2161 pcb
->pcb_dr1
= dbregs
->dr
[1];
2162 pcb
->pcb_dr2
= dbregs
->dr
[2];
2163 pcb
->pcb_dr3
= dbregs
->dr
[3];
2164 pcb
->pcb_dr6
= dbregs
->dr
[6];
2165 pcb
->pcb_dr7
= dbregs
->dr
[7];
2167 pcb
->pcb_flags
|= PCB_DBREGS
;
2174 * Return > 0 if a hardware breakpoint has been hit, and the
2175 * breakpoint was in user space. Return 0, otherwise.
2178 user_dbreg_trap(void)
2180 u_int64_t dr7
, dr6
; /* debug registers dr6 and dr7 */
2181 u_int64_t bp
; /* breakpoint bits extracted from dr6 */
2182 int nbp
; /* number of breakpoints that triggered */
2183 caddr_t addr
[4]; /* breakpoint addresses */
2187 if ((dr7
& 0xff) == 0) {
2189 * all GE and LE bits in the dr7 register are zero,
2190 * thus the trap couldn't have been caused by the
2191 * hardware debug registers
2202 * None of the breakpoint bits are set meaning this
2203 * trap was not caused by any of the debug registers
2209 * at least one of the breakpoints were hit, check to see
2210 * which ones and if any of them are user space addresses
2214 addr
[nbp
++] = (caddr_t
)rdr0();
2217 addr
[nbp
++] = (caddr_t
)rdr1();
2220 addr
[nbp
++] = (caddr_t
)rdr2();
2223 addr
[nbp
++] = (caddr_t
)rdr3();
2226 for (i
=0; i
<nbp
; i
++) {
2228 (caddr_t
)VM_MAX_USER_ADDRESS
) {
2230 * addr[i] is in user space
2237 * None of the breakpoints are in user space.
2245 Debugger(const char *msg
)
2247 kprintf("Debugger(\"%s\") called.\n", msg
);
2254 * Provide inb() and outb() as functions. They are normally only
2255 * available as macros calling inlined functions, thus cannot be
2256 * called inside DDB.
2258 * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
2264 /* silence compiler warnings */
2266 void outb(u_int
, u_char
);
2273 * We use %%dx and not %1 here because i/o is done at %dx and not at
2274 * %edx, while gcc generates inferior code (movw instead of movl)
2275 * if we tell it to load (u_short) port.
2277 __asm
__volatile("inb %%dx,%0" : "=a" (data
) : "d" (port
));
2282 outb(u_int port
, u_char data
)
2286 * Use an unnecessary assignment to help gcc's register allocator.
2287 * This make a large difference for gcc-1.40 and a tiny difference
2288 * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
2289 * best results. gcc-2.6.0 can't handle this.
2292 __asm
__volatile("outb %0,%%dx" : : "a" (al
), "d" (port
));
2299 #include "opt_cpu.h"
2303 * initialize all the SMP locks
2306 /* critical region when masking or unmasking interupts */
2307 struct spinlock_deprecated imen_spinlock
;
2309 /* Make FAST_INTR() routines sequential */
2310 struct spinlock_deprecated fast_intr_spinlock
;
2312 /* critical region for old style disable_intr/enable_intr */
2313 struct spinlock_deprecated mpintr_spinlock
;
2315 /* critical region around INTR() routines */
2316 struct spinlock_deprecated intr_spinlock
;
2318 /* lock region used by kernel profiling */
2319 struct spinlock_deprecated mcount_spinlock
;
2321 /* locks com (tty) data/hardware accesses: a FASTINTR() */
2322 struct spinlock_deprecated com_spinlock
;
2324 /* locks kernel kprintfs */
2325 struct spinlock_deprecated cons_spinlock
;
2327 /* lock regions around the clock hardware */
2328 struct spinlock_deprecated clock_spinlock
;
2330 /* lock around the MP rendezvous */
2331 struct spinlock_deprecated smp_rv_spinlock
;
2337 * mp_lock = 0; BSP already owns the MP lock
2340 * Get the initial mp_lock with a count of 1 for the BSP.
2341 * This uses a LOGICAL cpu ID, ie BSP == 0.
2344 cpu_get_initial_mplock();
2347 spin_lock_init(&mcount_spinlock
);
2348 spin_lock_init(&fast_intr_spinlock
);
2349 spin_lock_init(&intr_spinlock
);
2350 spin_lock_init(&mpintr_spinlock
);
2351 spin_lock_init(&imen_spinlock
);
2352 spin_lock_init(&smp_rv_spinlock
);
2353 spin_lock_init(&com_spinlock
);
2354 spin_lock_init(&clock_spinlock
);
2355 spin_lock_init(&cons_spinlock
);
2357 /* our token pool needs to work early */
2358 lwkt_token_pool_init();