2 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
3 * Copyright (c) 1992 Terrence R. Lambert.
4 * Copyright (c) 2003 Peter Wemm.
5 * Copyright (c) 2008 The DragonFly Project.
8 * This code is derived from software contributed to Berkeley by
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
40 * $FreeBSD: src/sys/i386/i386/machdep.c,v 1.385.2.30 2003/05/31 08:48:05 alc Exp $
41 * $DragonFly: src/sys/platform/pc64/amd64/machdep.c,v 1.1 2008/08/29 17:07:10 dillon Exp $
44 #include "use_ether.h"
45 //#include "use_npx.h"
47 #include "opt_atalk.h"
48 #include "opt_compat.h"
51 #include "opt_directio.h"
54 #include "opt_msgbuf.h"
57 #include <sys/param.h>
58 #include <sys/systm.h>
59 #include <sys/sysproto.h>
60 #include <sys/signalvar.h>
61 #include <sys/kernel.h>
62 #include <sys/linker.h>
63 #include <sys/malloc.h>
67 #include <sys/reboot.h>
69 #include <sys/msgbuf.h>
70 #include <sys/sysent.h>
71 #include <sys/sysctl.h>
72 #include <sys/vmmeter.h>
74 #include <sys/upcall.h>
75 #include <sys/usched.h>
79 #include <vm/vm_param.h>
81 #include <vm/vm_kern.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
84 #include <vm/vm_map.h>
85 #include <vm/vm_pager.h>
86 #include <vm/vm_extern.h>
88 #include <sys/thread2.h>
96 #include <machine/cpu.h>
97 #include <machine/clock.h>
98 #include <machine/specialreg.h>
100 #include <machine/bootinfo.h>
102 #include <machine/intr_machdep.h> /* for inthand_t */
103 #include <machine/md_var.h>
104 #include <machine/metadata.h>
105 #include <machine/pc/bios.h>
106 #include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
107 #include <machine/globaldata.h> /* CPU_prvspace */
108 #include <machine/smp.h>
110 #include <machine/perfmon.h>
112 #include <machine/cputypes.h>
115 #include <bus/isa/i386/isa_device.h>
117 #include <machine_base/isa/intr_machdep.h>
118 #include <bus/isa/rtc.h>
119 #include <sys/random.h>
120 #include <sys/ptrace.h>
121 #include <machine/sigframe.h>
123 #define PHYSMAP_ENTRIES 10
125 extern void init386(int first
);
126 extern void dblfault_handler(void);
127 extern u_int64_t
hammer_time(u_int64_t
, u_int64_t
);
129 extern void printcpuinfo(void); /* XXX header file */
130 extern void identify_cpu(void);
132 extern void finishidentcpu(void);
134 extern void panicifcpuunsupported(void);
135 extern void initializecpu(void);
137 extern void init_paging(vm_paddr_t
*);
139 static void cpu_startup(void *);
140 #ifndef CPU_DISABLE_SSE
141 static void set_fpregs_xmm(struct save87
*, struct savexmm
*);
142 static void fill_fpregs_xmm(struct savexmm
*, struct save87
*);
143 #endif /* CPU_DISABLE_SSE */
145 extern void ffs_rawread_setup(void);
146 #endif /* DIRECTIO */
147 static void init_locks(void);
149 SYSINIT(cpu
, SI_BOOT2_SMP
, SI_ORDER_FIRST
, cpu_startup
, NULL
)
152 extern vm_offset_t ksym_start
, ksym_end
;
155 uint64_t common_lvl4_phys
;
156 uint64_t common_lvl3_phys
;
161 pdp_entry_t
*link_pdpe
;
164 int _udatasel
, _ucodesel
, _ucode32sel
;
167 int64_t tsc_offsets
[MAXCPU
];
169 int64_t tsc_offsets
[1];
172 #if defined(SWTCH_OPTIM_STATS)
173 extern int swtch_optim_stats
;
174 SYSCTL_INT(_debug
, OID_AUTO
, swtch_optim_stats
,
175 CTLFLAG_RD
, &swtch_optim_stats
, 0, "");
176 SYSCTL_INT(_debug
, OID_AUTO
, tlb_flush_count
,
177 CTLFLAG_RD
, &tlb_flush_count
, 0, "");
183 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS
)
185 int error
= sysctl_handle_int(oidp
, 0, ctob(physmem
), req
);
189 SYSCTL_PROC(_hw
, HW_PHYSMEM
, physmem
, CTLTYPE_INT
|CTLFLAG_RD
,
190 0, 0, sysctl_hw_physmem
, "IU", "");
193 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS
)
195 int error
= sysctl_handle_int(oidp
, 0,
196 ctob(physmem
- vmstats
.v_wire_count
), req
);
200 SYSCTL_PROC(_hw
, HW_USERMEM
, usermem
, CTLTYPE_INT
|CTLFLAG_RD
,
201 0, 0, sysctl_hw_usermem
, "IU", "");
204 sysctl_hw_availpages(SYSCTL_HANDLER_ARGS
)
207 int error
= sysctl_handle_int(oidp
, 0,
208 i386_btop(avail_end
- avail_start
), req
);
215 SYSCTL_PROC(_hw
, OID_AUTO
, availpages
, CTLTYPE_INT
|CTLFLAG_RD
,
216 0, 0, sysctl_hw_availpages
, "I", "");
219 sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS
)
223 /* Unwind the buffer, so that it's linear (possibly starting with
224 * some initial nulls).
226 error
=sysctl_handle_opaque(oidp
,msgbufp
->msg_ptr
+msgbufp
->msg_bufr
,
227 msgbufp
->msg_size
-msgbufp
->msg_bufr
,req
);
228 if(error
) return(error
);
229 if(msgbufp
->msg_bufr
>0) {
230 error
=sysctl_handle_opaque(oidp
,msgbufp
->msg_ptr
,
231 msgbufp
->msg_bufr
,req
);
236 SYSCTL_PROC(_machdep
, OID_AUTO
, msgbuf
, CTLTYPE_STRING
|CTLFLAG_RD
,
237 0, 0, sysctl_machdep_msgbuf
, "A","Contents of kernel message buffer");
239 static int msgbuf_clear
;
242 sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS
)
245 error
= sysctl_handle_int(oidp
, oidp
->oid_arg1
, oidp
->oid_arg2
,
247 if (!error
&& req
->newptr
) {
248 /* Clear the buffer and reset write pointer */
249 bzero(msgbufp
->msg_ptr
,msgbufp
->msg_size
);
250 msgbufp
->msg_bufr
=msgbufp
->msg_bufx
=0;
256 SYSCTL_PROC(_machdep
, OID_AUTO
, msgbuf_clear
, CTLTYPE_INT
|CTLFLAG_RW
,
257 &msgbuf_clear
, 0, sysctl_machdep_msgbuf_clear
, "I",
258 "Clear kernel message buffer");
260 vm_paddr_t Maxmem
= 0;
263 * The number of PHYSMAP entries must be one less than the number of
264 * PHYSSEG entries because the PHYSMAP entry that spans the largest
265 * physical address that is accessible by ISA DMA is split into two
268 #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1))
270 vm_paddr_t phys_avail
[PHYSMAP_SIZE
+ 2];
271 vm_paddr_t dump_avail
[PHYSMAP_SIZE
+ 2];
273 /* must be 2 less so 0 0 can signal end of chunks */
274 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
275 #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2)
277 static vm_offset_t buffer_sva
, buffer_eva
;
278 vm_offset_t clean_sva
, clean_eva
;
279 static vm_offset_t pager_sva
, pager_eva
;
280 static struct trapframe proc0_tf
;
283 cpu_startup(void *dummy
)
287 vm_offset_t firstaddr
;
289 if (boothowto
& RB_VERBOSE
)
293 * Good {morning,afternoon,evening,night}.
295 kprintf("%s", version
);
298 panicifcpuunsupported();
302 kprintf("real memory = %llu (%lluK bytes)\n", ptoa(Maxmem
), ptoa(Maxmem
) / 1024);
304 * Display any holes after the first chunk of extended memory.
309 kprintf("Physical memory chunk(s):\n");
310 for (indx
= 0; phys_avail
[indx
+ 1] != 0; indx
+= 2) {
311 vm_paddr_t size1
= phys_avail
[indx
+ 1] - phys_avail
[indx
];
313 kprintf("0x%08llx - 0x%08llx, %llu bytes (%llu pages)\n",
314 phys_avail
[indx
], phys_avail
[indx
+ 1] - 1, size1
,
320 * Allocate space for system data structures.
321 * The first available kernel virtual address is in "v".
322 * As pages of kernel virtual memory are allocated, "v" is incremented.
323 * As pages of memory are allocated and cleared,
324 * "firstaddr" is incremented.
325 * An index into the kernel page table corresponding to the
326 * virtual memory address maintained in "v" is kept in "mapaddr".
330 * Make two passes. The first pass calculates how much memory is
331 * needed and allocates it. The second pass assigns virtual
332 * addresses to the various data structures.
336 v
= (caddr_t
)firstaddr
;
338 #define valloc(name, type, num) \
339 (name) = (type *)v; v = (caddr_t)((name)+(num))
340 #define valloclim(name, type, num, lim) \
341 (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
344 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
345 * For the first 64MB of ram nominally allocate sufficient buffers to
346 * cover 1/4 of our ram. Beyond the first 64MB allocate additional
347 * buffers to cover 1/20 of our ram over 64MB. When auto-sizing
348 * the buffer cache we limit the eventual kva reservation to
351 * factor represents the 1/4 x ram conversion.
354 int factor
= 4 * BKVASIZE
/ 1024;
355 int kbytes
= physmem
* (PAGE_SIZE
/ 1024);
359 nbuf
+= min((kbytes
- 4096) / factor
, 65536 / factor
);
361 nbuf
+= (kbytes
- 65536) * 2 / (factor
* 5);
362 if (maxbcache
&& nbuf
> maxbcache
/ BKVASIZE
)
363 nbuf
= maxbcache
/ BKVASIZE
;
367 * Do not allow the buffer_map to be more then 1/2 the size of the
370 if (nbuf
> (virtual_end
- virtual_start
) / (BKVASIZE
* 2)) {
371 nbuf
= (virtual_end
- virtual_start
) / (BKVASIZE
* 2);
372 kprintf("Warning: nbufs capped at %d\n", nbuf
);
375 nswbuf
= max(min(nbuf
/4, 256), 16);
377 if (nswbuf
< NSWBUF_MIN
)
384 valloc(swbuf
, struct buf
, nswbuf
);
385 valloc(buf
, struct buf
, nbuf
);
388 * End of first pass, size has been calculated so allocate memory
390 if (firstaddr
== 0) {
391 size
= (vm_size_t
)(v
- firstaddr
);
392 firstaddr
= kmem_alloc(&kernel_map
, round_page(size
));
394 panic("startup: no room for tables");
399 * End of second pass, addresses have been assigned
401 if ((vm_size_t
)(v
- firstaddr
) != size
)
402 panic("startup: table size inconsistency");
404 kmem_suballoc(&kernel_map
, &clean_map
, &clean_sva
, &clean_eva
,
405 (nbuf
*BKVASIZE
) + (nswbuf
*MAXPHYS
) + pager_map_size
);
406 kmem_suballoc(&clean_map
, &buffer_map
, &buffer_sva
, &buffer_eva
,
408 buffer_map
.system_map
= 1;
409 kmem_suballoc(&clean_map
, &pager_map
, &pager_sva
, &pager_eva
,
410 (nswbuf
*MAXPHYS
) + pager_map_size
);
411 pager_map
.system_map
= 1;
413 #if defined(USERCONFIG)
415 cninit(); /* the preferred console may have changed */
418 kprintf("avail memory = %u (%uK bytes)\n", ptoa(vmstats
.v_free_count
),
419 ptoa(vmstats
.v_free_count
) / 1024);
422 * Set up buffers, so they can be used to read disk labels.
425 vm_pager_bufferinit();
429 * OK, enough kmem_alloc/malloc state should be up, lets get on with it!
431 mp_start(); /* fire up the APs and APICs */
438 * Send an interrupt to process.
440 * Stack is set up to allow sigcode stored
441 * at top to call routine, followed by kcall
442 * to sigreturn routine below. After sigreturn
443 * resets the signal mask, the stack, and the
444 * frame pointer, it returns to the user
448 sendsig(sig_t catcher
, int sig
, sigset_t
*mask
, u_long code
)
450 kprintf0("sendsig\n");
451 struct lwp
*lp
= curthread
->td_lwp
;
452 struct proc
*p
= lp
->lwp_proc
;
453 struct trapframe
*regs
;
454 struct sigacts
*psp
= p
->p_sigacts
;
455 struct sigframe sf
, *sfp
;
458 regs
= lp
->lwp_md
.md_regs
;
459 oonstack
= (lp
->lwp_sigstk
.ss_flags
& SS_ONSTACK
) ? 1 : 0;
461 /* save user context */
462 bzero(&sf
, sizeof(struct sigframe
));
463 sf
.sf_uc
.uc_sigmask
= *mask
;
464 sf
.sf_uc
.uc_stack
= lp
->lwp_sigstk
;
465 sf
.sf_uc
.uc_mcontext
.mc_onstack
= oonstack
;
467 bcopy(regs
, &sf
.sf_uc
.uc_mcontext
.mc_gs
, sizeof(struct trapframe
));
470 /* make the size of the saved context visible to userland */
471 sf
.sf_uc
.uc_mcontext
.mc_len
= sizeof(sf
.sf_uc
.uc_mcontext
);
473 /* save mailbox pending state for syscall interlock semantics */
475 if (p
->p_flag
& P_MAILBOX
)
476 sf
.sf_uc
.uc_mcontext
.mc_xflags
|= PGEX_MAILBOX
;
479 /* Allocate and validate space for the signal handler context. */
480 if ((lp
->lwp_flag
& LWP_ALTSTACK
) != 0 && !oonstack
&&
481 SIGISMEMBER(psp
->ps_sigonstack
, sig
)) {
482 sfp
= (struct sigframe
*)(lp
->lwp_sigstk
.ss_sp
+
483 lp
->lwp_sigstk
.ss_size
- sizeof(struct sigframe
));
484 lp
->lwp_sigstk
.ss_flags
|= SS_ONSTACK
;
487 sfp
= (struct sigframe
*)regs
->tf_esp
- 1;
491 /* Translate the signal is appropriate */
492 if (p
->p_sysent
->sv_sigtbl
) {
493 if (sig
<= p
->p_sysent
->sv_sigsize
)
494 sig
= p
->p_sysent
->sv_sigtbl
[_SIG_IDX(sig
)];
497 /* Build the argument list for the signal handler. */
499 sf
.sf_ucontext
= (register_t
)&sfp
->sf_uc
;
500 if (SIGISMEMBER(psp
->ps_siginfo
, sig
)) {
501 /* Signal handler installed with SA_SIGINFO. */
502 sf
.sf_siginfo
= (register_t
)&sfp
->sf_si
;
503 sf
.sf_ahu
.sf_action
= (__siginfohandler_t
*)catcher
;
505 /* fill siginfo structure */
506 sf
.sf_si
.si_signo
= sig
;
507 sf
.sf_si
.si_code
= code
;
508 sf
.sf_si
.si_addr
= (void*)regs
->tf_err
;
511 /* Old FreeBSD-style arguments. */
512 sf
.sf_siginfo
= code
;
513 sf
.sf_addr
= regs
->tf_err
;
514 sf
.sf_ahu
.sf_handler
= catcher
;
518 * If we're a vm86 process, we want to save the segment registers.
519 * We also change eflags to be our emulated eflags, not the actual
523 if (regs
->tf_eflags
& PSL_VM
) {
524 struct trapframe_vm86
*tf
= (struct trapframe_vm86
*)regs
;
525 struct vm86_kernel
*vm86
= &lp
->lwp_thread
->td_pcb
->pcb_ext
->ext_vm86
;
527 sf
.sf_uc
.uc_mcontext
.mc_gs
= tf
->tf_vm86_gs
;
528 sf
.sf_uc
.uc_mcontext
.mc_fs
= tf
->tf_vm86_fs
;
529 sf
.sf_uc
.uc_mcontext
.mc_es
= tf
->tf_vm86_es
;
530 sf
.sf_uc
.uc_mcontext
.mc_ds
= tf
->tf_vm86_ds
;
532 if (vm86
->vm86_has_vme
== 0)
533 sf
.sf_uc
.uc_mcontext
.mc_eflags
=
534 (tf
->tf_eflags
& ~(PSL_VIF
| PSL_VIP
)) |
535 (vm86
->vm86_eflags
& (PSL_VIF
| PSL_VIP
));
538 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
539 * syscalls made by the signal handler. This just avoids
540 * wasting time for our lazy fixup of such faults. PSL_NT
541 * does nothing in vm86 mode, but vm86 programs can set it
542 * almost legitimately in probes for old cpu types.
544 tf
->tf_eflags
&= ~(PSL_VM
| PSL_NT
| PSL_VIF
| PSL_VIP
);
549 * Save the FPU state and reinit the FP unit
552 npxpush(&sf
.sf_uc
.uc_mcontext
);
556 * Copy the sigframe out to the user's stack.
558 if (copyout(&sf
, sfp
, sizeof(struct sigframe
)) != 0) {
560 * Something is wrong with the stack pointer.
561 * ...Kill the process.
567 regs
->tf_esp
= (int)sfp
;
568 regs
->tf_eip
= PS_STRINGS
- *(p
->p_sysent
->sv_szsigcode
);
572 * i386 abi specifies that the direction flag must be cleared
576 regs
->tf_eflags
&= ~(PSL_T
|PSL_D
);
579 regs
->tf_cs
= _ucodesel
;
581 regs
->tf_ds
= _udatasel
;
582 regs
->tf_es
= _udatasel
;
586 * Allow the signal handler to inherit %fs in addition to %gs as
587 * the userland program might be using both.
589 * However, if a T_PROTFLT occured the segment registers could be
590 * totally broken. They must be reset in order to be able to
591 * return to userland.
593 if (regs
->tf_trapno
== T_PROTFLT
) {
595 regs
->tf_fs
= _udatasel
;
596 regs
->tf_gs
= _udatasel
;
599 regs
->tf_ss
= _udatasel
;
603 * Sanitize the trapframe for a virtual kernel passing control to a custom
604 * VM context. Remove any items that would otherwise create a privilage
607 * XXX at the moment we allow userland to set the resume flag. Is this a
611 cpu_sanitize_frame(struct trapframe
*frame
)
613 kprintf0("cpu_sanitize_frame\n");
614 frame
->tf_cs
= _ucodesel
;
616 frame
->tf_ds
= _udatasel
;
617 frame
->tf_es
= _udatasel
; /* XXX allow userland this one too? */
620 frame
->tf_fs
= _udatasel
;
621 frame
->tf_gs
= _udatasel
;
623 frame
->tf_ss
= _udatasel
;
625 frame
->tf_eflags
&= (PSL_RF
| PSL_USERCHANGE
);
626 frame
->tf_eflags
|= PSL_RESERVED_DEFAULT
| PSL_I
;
632 * Sanitize the tls so loading the descriptor does not blow up
633 * on us. For AMD64 we don't have to do anything.
636 cpu_sanitize_tls(struct savetls
*tls
)
642 * sigreturn(ucontext_t *sigcntxp)
644 * System call to cleanup state after a signal
645 * has been taken. Reset signal mask and
646 * stack state from context left by sendsig (above).
647 * Return to previous pc and psl as specified by
648 * context left by sendsig. Check carefully to
649 * make sure that the user has not modified the
650 * state to gain improper privileges.
652 #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
653 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
656 sys_sigreturn(struct sigreturn_args
*uap
)
658 struct lwp
*lp
= curthread
->td_lwp
;
659 struct proc
*p
= lp
->lwp_proc
;
660 struct trapframe
*regs
;
668 * We have to copy the information into kernel space so userland
669 * can't modify it while we are sniffing it.
671 regs
= lp
->lwp_md
.md_regs
;
672 error
= copyin(uap
->sigcntxp
, &uc
, sizeof(uc
));
677 eflags
= ucp
->uc_mcontext
.mc_eflags
;
681 if (eflags
& PSL_VM
) {
682 struct trapframe_vm86
*tf
= (struct trapframe_vm86
*)regs
;
683 struct vm86_kernel
*vm86
;
686 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
687 * set up the vm86 area, and we can't enter vm86 mode.
689 if (lp
->lwp_thread
->td_pcb
->pcb_ext
== 0)
691 vm86
= &lp
->lwp_thread
->td_pcb
->pcb_ext
->ext_vm86
;
692 if (vm86
->vm86_inited
== 0)
695 /* go back to user mode if both flags are set */
696 if ((eflags
& PSL_VIP
) && (eflags
& PSL_VIF
))
697 trapsignal(lp
, SIGBUS
, 0);
699 if (vm86
->vm86_has_vme
) {
701 eflags
= (tf
->tf_eflags
& ~VME_USERCHANGE
) |
702 (eflags
& VME_USERCHANGE
) | PSL_VM
;
706 vm86
->vm86_eflags
= eflags
; /* save VIF, VIP */
707 eflags
= (tf
->tf_eflags
& ~VM_USERCHANGE
) |
708 (eflags
& VM_USERCHANGE
) | PSL_VM
;
712 bcopy(&ucp
->uc_mcontext
.mc_gs
, tf
, sizeof(struct trapframe
));
713 tf
->tf_eflags
= eflags
;
715 tf
->tf_vm86_ds
= tf
->tf_ds
;
716 tf
->tf_vm86_es
= tf
->tf_es
;
717 tf
->tf_vm86_fs
= tf
->tf_fs
;
718 tf
->tf_vm86_gs
= tf
->tf_gs
;
719 tf
->tf_ds
= _udatasel
;
720 tf
->tf_es
= _udatasel
;
722 tf
->tf_fs
= _udatasel
;
723 tf
->tf_gs
= _udatasel
;
727 * Don't allow users to change privileged or reserved flags.
730 * XXX do allow users to change the privileged flag PSL_RF.
731 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers
732 * should sometimes set it there too. tf_eflags is kept in
733 * the signal context during signal handling and there is no
734 * other place to remember it, so the PSL_RF bit may be
735 * corrupted by the signal handler without us knowing.
736 * Corruption of the PSL_RF bit at worst causes one more or
737 * one less debugger trap, so allowing it is fairly harmless.
740 if (!EFL_SECURE(eflags
& ~PSL_RF
, regs
->tf_eflags
& ~PSL_RF
)) {
741 kprintf("sigreturn: eflags = 0x%x\n", eflags
);
747 * Don't allow users to load a valid privileged %cs. Let the
748 * hardware check for invalid selectors, excess privilege in
749 * other selectors, invalid %eip's and invalid %esp's.
751 cs
= ucp
->uc_mcontext
.mc_cs
;
752 if (!CS_SECURE(cs
)) {
753 kprintf("sigreturn: cs = 0x%x\n", cs
);
754 trapsignal(lp
, SIGBUS
, T_PROTFLT
);
758 bcopy(&ucp
->uc_mcontext
.mc_gs
, regs
, sizeof(struct trapframe
));
764 * Restore the FPU state from the frame
767 npxpop(&ucp
->uc_mcontext
);
771 * Merge saved signal mailbox pending flag to maintain interlock
772 * semantics against system calls.
775 if (ucp
->uc_mcontext
.mc_xflags
& PGEX_MAILBOX
)
776 p
->p_flag
|= P_MAILBOX
;
779 if (ucp
->uc_mcontext
.mc_onstack
& 1)
780 lp
->lwp_sigstk
.ss_flags
|= SS_ONSTACK
;
782 lp
->lwp_sigstk
.ss_flags
&= ~SS_ONSTACK
;
784 lp
->lwp_sigmask
= ucp
->uc_sigmask
;
785 SIG_CANTMASK(lp
->lwp_sigmask
);
790 * Stack frame on entry to function. %eax will contain the function vector,
791 * %ecx will contain the function data. flags, ecx, and eax will have
792 * already been pushed on the stack.
803 sendupcall(struct vmupcall
*vu
, int morepending
)
805 struct lwp
*lp
= curthread
->td_lwp
;
806 struct trapframe
*regs
;
807 struct upcall upcall
;
808 struct upc_frame upc_frame
;
812 * If we are a virtual kernel running an emulated user process
813 * context, switch back to the virtual kernel context before
814 * trying to post the signal.
816 if (lp
->lwp_vkernel
&& lp
->lwp_vkernel
->ve
) {
817 lp
->lwp_md
.md_regs
->tf_trapno
= 0;
818 vkernel_trap(lp
, lp
->lwp_md
.md_regs
);
822 * Get the upcall data structure
824 if (copyin(lp
->lwp_upcall
, &upcall
, sizeof(upcall
)) ||
825 copyin((char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, &crit_count
, sizeof(int))
828 kprintf("bad upcall address\n");
833 * If the data structure is already marked pending or has a critical
834 * section count, mark the data structure as pending and return
835 * without doing an upcall. vu_pending is left set.
837 if (upcall
.upc_pending
|| crit_count
>= vu
->vu_pending
) {
838 if (upcall
.upc_pending
< vu
->vu_pending
) {
839 upcall
.upc_pending
= vu
->vu_pending
;
840 copyout(&upcall
.upc_pending
, &lp
->lwp_upcall
->upc_pending
,
841 sizeof(upcall
.upc_pending
));
847 * We can run this upcall now, clear vu_pending.
849 * Bump our critical section count and set or clear the
850 * user pending flag depending on whether more upcalls are
851 * pending. The user will be responsible for calling
852 * upc_dispatch(-1) to process remaining upcalls.
855 upcall
.upc_pending
= morepending
;
856 crit_count
+= TDPRI_CRIT
;
857 copyout(&upcall
.upc_pending
, &lp
->lwp_upcall
->upc_pending
,
858 sizeof(upcall
.upc_pending
));
859 copyout(&crit_count
, (char *)upcall
.upc_uthread
+ upcall
.upc_critoff
,
863 * Construct a stack frame and issue the upcall
865 regs
= lp
->lwp_md
.md_regs
;
867 upc_frame
.eax
= regs
->tf_eax
;
868 upc_frame
.ecx
= regs
->tf_ecx
;
869 upc_frame
.edx
= regs
->tf_edx
;
870 upc_frame
.flags
= regs
->tf_eflags
;
871 upc_frame
.oldip
= regs
->tf_eip
;
872 if (copyout(&upc_frame
, (void *)(regs
->tf_esp
- sizeof(upc_frame
)),
873 sizeof(upc_frame
)) != 0) {
874 kprintf("bad stack on upcall\n");
876 regs
->tf_eax
= (register_t
)vu
->vu_func
;
877 regs
->tf_ecx
= (register_t
)vu
->vu_data
;
878 regs
->tf_edx
= (register_t
)lp
->lwp_upcall
;
879 regs
->tf_eip
= (register_t
)vu
->vu_ctx
;
880 regs
->tf_esp
-= sizeof(upc_frame
);
886 * fetchupcall occurs in the context of a system call, which means that
887 * we have to return EJUSTRETURN in order to prevent eax and edx from
888 * being overwritten by the syscall return value.
890 * if vu is not NULL we return the new context in %edx, the new data in %ecx,
891 * and the function pointer in %eax.
894 fetchupcall(struct vmupcall
*vu
, int morepending
, void *rsp
)
896 struct upc_frame upc_frame
;
897 struct lwp
*lp
= curthread
->td_lwp
;
898 struct trapframe
*regs
;
900 struct upcall upcall
;
903 regs
= lp
->lwp_md
.md_regs
;
905 error
= copyout(&morepending
, &lp
->lwp_upcall
->upc_pending
, sizeof(int));
909 * This jumps us to the next ready context.
912 error
= copyin(lp
->lwp_upcall
, &upcall
, sizeof(upcall
));
915 error
= copyin((char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, &crit_count
, sizeof(int));
916 crit_count
+= TDPRI_CRIT
;
918 error
= copyout(&crit_count
, (char *)upcall
.upc_uthread
+ upcall
.upc_critoff
, sizeof(int));
920 regs
->tf_eax
= (register_t
)vu
->vu_func
;
921 regs
->tf_ecx
= (register_t
)vu
->vu_data
;
922 regs
->tf_edx
= (register_t
)lp
->lwp_upcall
;
923 regs
->tf_eip
= (register_t
)vu
->vu_ctx
;
924 regs
->tf_esp
= (register_t
)rsp
;
928 * This returns us to the originally interrupted code.
930 error
= copyin(rsp
, &upc_frame
, sizeof(upc_frame
));
932 regs
->tf_eax
= upc_frame
.eax
;
933 regs
->tf_ecx
= upc_frame
.ecx
;
934 regs
->tf_edx
= upc_frame
.edx
;
935 regs
->tf_eflags
= (regs
->tf_eflags
& ~PSL_USERCHANGE
) |
936 (upc_frame
.flags
& PSL_USERCHANGE
);
937 regs
->tf_eip
= upc_frame
.oldip
;
938 regs
->tf_esp
= (register_t
)((char *)rsp
+ sizeof(upc_frame
));
948 * Machine dependent boot() routine
950 * I haven't seen anything to put here yet
951 * Possibly some stuff might be grafted back here from boot()
959 * Shutdown the CPU as much as possible
965 __asm__
__volatile("hlt");
969 * cpu_idle() represents the idle LWKT. You cannot return from this function
970 * (unless you want to blow things up!). Instead we look for runnable threads
971 * and loop or halt as appropriate. Giant is not held on entry to the thread.
973 * The main loop is entered with a critical section held, we must release
974 * the critical section before doing anything else. lwkt_switch() will
975 * check for pending interrupts due to entering and exiting its own
978 * Note on cpu_idle_hlt: On an SMP system we rely on a scheduler IPI
979 * to wake a HLTed cpu up. However, there are cases where the idlethread
980 * will be entered with the possibility that no IPI will occur and in such
981 * cases lwkt_switch() sets TDF_IDLE_NOHLT.
983 static int cpu_idle_hlt
= 1;
984 static int cpu_idle_hltcnt
;
985 static int cpu_idle_spincnt
;
986 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_hlt
, CTLFLAG_RW
,
987 &cpu_idle_hlt
, 0, "Idle loop HLT enable");
988 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_hltcnt
, CTLFLAG_RW
,
989 &cpu_idle_hltcnt
, 0, "Idle loop entry halts");
990 SYSCTL_INT(_machdep
, OID_AUTO
, cpu_idle_spincnt
, CTLFLAG_RW
,
991 &cpu_idle_spincnt
, 0, "Idle loop entry spins");
994 cpu_idle_default_hook(void)
997 * We must guarentee that hlt is exactly the instruction
1000 __asm
__volatile("sti; hlt");
1003 /* Other subsystems (e.g., ACPI) can hook this later. */
1004 void (*cpu_idle_hook
)(void) = cpu_idle_default_hook
;
1009 struct thread
*td
= curthread
;
1012 KKASSERT(td
->td_pri
< TDPRI_CRIT
);
1015 * See if there are any LWKTs ready to go.
1020 * If we are going to halt call splz unconditionally after
1021 * CLIing to catch any interrupt races. Note that we are
1022 * at SPL0 and interrupts are enabled.
1024 if (cpu_idle_hlt
&& !lwkt_runnable() &&
1025 (td
->td_flags
& TDF_IDLE_NOHLT
) == 0) {
1026 __asm
__volatile("cli");
1028 if (!lwkt_runnable())
1032 __asm
__volatile("pause");
1036 td
->td_flags
&= ~TDF_IDLE_NOHLT
;
1039 __asm
__volatile("sti; pause");
1041 __asm
__volatile("sti");
1049 * This routine is called when the only runnable threads require
1050 * the MP lock, and the scheduler couldn't get it. On a real cpu
1051 * we let the scheduler spin.
1054 cpu_mplock_contested(void)
1060 * This routine is called if a spinlock has been held through the
1061 * exponential backoff period and is seriously contested. On a real cpu
1065 cpu_spinlock_contested(void)
1071 * Clear registers on exec
1074 exec_setregs(u_long entry
, u_long stack
, u_long ps_strings
)
1076 struct thread
*td
= curthread
;
1077 struct lwp
*lp
= td
->td_lwp
;
1078 struct pcb
*pcb
= td
->td_pcb
;
1079 struct trapframe
*regs
= lp
->lwp_md
.md_regs
;
1081 kprintf0("exec_setregs\n");
1083 /* was i386_user_cleanup() in NetBSD */
1086 bzero((char *)regs
, sizeof(struct trapframe
));
1087 regs
->tf_rip
= entry
;
1088 regs
->tf_rsp
= ((stack
- 8) & ~0xFul
) + 8; /* align the stack */
1089 regs
->tf_rdi
= stack
; /* argv */
1090 regs
->tf_rflags
= PSL_USER
| (regs
->tf_rflags
& PSL_T
);
1091 regs
->tf_ss
= _udatasel
;
1092 regs
->tf_cs
= _ucodesel
;
1093 regs
->tf_rbx
= ps_strings
;
1096 * Reset the hardware debug registers if they were in use.
1097 * They won't have any meaning for the newly exec'd process.
1099 if (pcb
->pcb_flags
& PCB_DBREGS
) {
1105 pcb
->pcb_dr7
= 0; /* JG set bit 10? */
1106 if (pcb
== td
->td_pcb
) {
1108 * Clear the debug registers on the running
1109 * CPU, otherwise they will end up affecting
1110 * the next process we switch to.
1114 pcb
->pcb_flags
&= ~PCB_DBREGS
;
1118 * Initialize the math emulator (if any) for the current process.
1119 * Actually, just clear the bit that says that the emulator has
1120 * been initialized. Initialization is delayed until the process
1121 * traps to the emulator (if it is done at all) mainly because
1122 * emulators don't provide an entry point for initialization.
1125 pcb
->pcb_flags
&= ~FP_SOFTFP
;
1129 * note: do not set CR0_TS here. npxinit() must do it after clearing
1130 * gd_npxthread. Otherwise a preemptive interrupt thread may panic
1134 load_cr0(rcr0() | CR0_MP
);
1136 wrmsr(MSR_FSBASE
, 0);
1137 wrmsr(MSR_KGSBASE
, 0); /* User value while we're in the kernel */
1138 pcb
->pcb_fsbase
= 0;
1139 pcb
->pcb_gsbase
= 0;
1142 /* Initialize the npx (if any) for the current process. */
1143 npxinit(__INITIAL_NPXCW__
);
1147 pcb
->pcb_ds
= _udatasel
;
1148 pcb
->pcb_es
= _udatasel
;
1149 pcb
->pcb_fs
= _udatasel
;
1150 pcb
->pcb_gs
= _udatasel
;
1159 cr0
|= CR0_NE
; /* Done by npxinit() */
1160 cr0
|= CR0_MP
| CR0_TS
; /* Done at every execve() too. */
1161 cr0
|= CR0_WP
| CR0_AM
;
1167 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS
)
1170 error
= sysctl_handle_int(oidp
, oidp
->oid_arg1
, oidp
->oid_arg2
,
1172 if (!error
&& req
->newptr
)
1177 SYSCTL_PROC(_machdep
, CPU_ADJKERNTZ
, adjkerntz
, CTLTYPE_INT
|CTLFLAG_RW
,
1178 &adjkerntz
, 0, sysctl_machdep_adjkerntz
, "I", "");
1181 SYSCTL_INT(_machdep
, CPU_DISRTCSET
, disable_rtc_set
,
1182 CTLFLAG_RW
, &disable_rtc_set
, 0, "");
1186 SYSCTL_STRUCT(_machdep
, CPU_BOOTINFO
, bootinfo
,
1187 CTLFLAG_RD
, &bootinfo
, bootinfo
, "");
1190 SYSCTL_INT(_machdep
, CPU_WALLCLOCK
, wall_cmos_clock
,
1191 CTLFLAG_RW
, &wall_cmos_clock
, 0, "");
1193 extern u_long bootdev
; /* not a cdev_t - encoding is different */
1194 SYSCTL_ULONG(_machdep
, OID_AUTO
, guessed_bootdev
,
1195 CTLFLAG_RD
, &bootdev
, 0, "Boot device (not in cdev_t format)");
1198 * Initialize 386 and configure to run kernel
1202 * Initialize segments & interrupt table
1206 struct user_segment_descriptor gdt
[NGDT
* MAXCPU
]; /* global descriptor table */
1207 static struct gate_descriptor idt0
[NIDT
];
1208 struct gate_descriptor
*idt
= &idt0
[0]; /* interrupt descriptor table */
1210 union descriptor ldt
[NLDT
]; /* local descriptor table */
1213 /* table descriptors - used to load tables by cpu */
1214 struct region_descriptor r_gdt
, r_idt
;
1216 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
1217 extern int has_f00f_bug
;
1220 static char dblfault_stack
[PAGE_SIZE
] __aligned(16);
1222 /* JG proc0paddr is a virtual address */
1225 char proc0paddr_buff
[LWKT_THREAD_STACK
];
1228 /* software prototypes -- in more palatable form */
1229 struct soft_segment_descriptor gdt_segs
[] = {
1230 /* GNULL_SEL 0 Null Descriptor */
1231 { 0x0, /* segment base address */
1233 0, /* segment type */
1234 0, /* segment descriptor priority level */
1235 0, /* segment descriptor present */
1237 0, /* default 32 vs 16 bit size */
1238 0 /* limit granularity (byte/page units)*/ },
1239 /* GCODE_SEL 1 Code Descriptor for kernel */
1240 { 0x0, /* segment base address */
1241 0xfffff, /* length - all address space */
1242 SDT_MEMERA
, /* segment type */
1243 SEL_KPL
, /* segment descriptor priority level */
1244 1, /* segment descriptor present */
1246 0, /* default 32 vs 16 bit size */
1247 1 /* limit granularity (byte/page units)*/ },
1248 /* GDATA_SEL 2 Data Descriptor for kernel */
1249 { 0x0, /* segment base address */
1250 0xfffff, /* length - all address space */
1251 SDT_MEMRWA
, /* segment type */
1252 SEL_KPL
, /* segment descriptor priority level */
1253 1, /* segment descriptor present */
1255 0, /* default 32 vs 16 bit size */
1256 1 /* limit granularity (byte/page units)*/ },
1257 /* GUCODE32_SEL 3 32 bit Code Descriptor for user */
1258 { 0x0, /* segment base address */
1259 0xfffff, /* length - all address space */
1260 SDT_MEMERA
, /* segment type */
1261 SEL_UPL
, /* segment descriptor priority level */
1262 1, /* segment descriptor present */
1264 1, /* default 32 vs 16 bit size */
1265 1 /* limit granularity (byte/page units)*/ },
1266 /* GUDATA_SEL 4 32/64 bit Data Descriptor for user */
1267 { 0x0, /* segment base address */
1268 0xfffff, /* length - all address space */
1269 SDT_MEMRWA
, /* segment type */
1270 SEL_UPL
, /* segment descriptor priority level */
1271 1, /* segment descriptor present */
1273 1, /* default 32 vs 16 bit size */
1274 1 /* limit granularity (byte/page units)*/ },
1275 /* GUCODE_SEL 5 64 bit Code Descriptor for user */
1276 { 0x0, /* segment base address */
1277 0xfffff, /* length - all address space */
1278 SDT_MEMERA
, /* segment type */
1279 SEL_UPL
, /* segment descriptor priority level */
1280 1, /* segment descriptor present */
1282 0, /* default 32 vs 16 bit size */
1283 1 /* limit granularity (byte/page units)*/ },
1284 /* GPROC0_SEL 6 Proc 0 Tss Descriptor */
1286 0x0, /* segment base address */
1287 sizeof(struct amd64tss
)-1,/* length - all address space */
1288 SDT_SYSTSS
, /* segment type */
1289 SEL_KPL
, /* segment descriptor priority level */
1290 1, /* segment descriptor present */
1292 0, /* unused - default 32 vs 16 bit size */
1293 0 /* limit granularity (byte/page units)*/ },
1294 /* Actually, the TSS is a system descriptor which is double size */
1295 { 0x0, /* segment base address */
1297 0, /* segment type */
1298 0, /* segment descriptor priority level */
1299 0, /* segment descriptor present */
1301 0, /* default 32 vs 16 bit size */
1302 0 /* limit granularity (byte/page units)*/ },
1303 /* GUGS32_SEL 8 32 bit GS Descriptor for user */
1304 { 0x0, /* segment base address */
1305 0xfffff, /* length - all address space */
1306 SDT_MEMRWA
, /* segment type */
1307 SEL_UPL
, /* segment descriptor priority level */
1308 1, /* segment descriptor present */
1310 1, /* default 32 vs 16 bit size */
1311 1 /* limit granularity (byte/page units)*/ },
1315 setidt(int idx
, inthand_t
*func
, int typ
, int dpl
, int ist
)
1317 struct gate_descriptor
*ip
;
1320 ip
->gd_looffset
= (uintptr_t)func
;
1321 ip
->gd_selector
= GSEL(GCODE_SEL
, SEL_KPL
);
1327 ip
->gd_hioffset
= ((uintptr_t)func
)>>16 ;
1330 #define IDTVEC(name) __CONCAT(X,name)
1333 IDTVEC(div
), IDTVEC(dbg
), IDTVEC(nmi
), IDTVEC(bpt
), IDTVEC(ofl
),
1334 IDTVEC(bnd
), IDTVEC(ill
), IDTVEC(dna
), IDTVEC(fpusegm
),
1335 IDTVEC(tss
), IDTVEC(missing
), IDTVEC(stk
), IDTVEC(prot
),
1336 IDTVEC(page
), IDTVEC(mchk
), IDTVEC(rsvd
), IDTVEC(fpu
), IDTVEC(align
),
1337 IDTVEC(xmm
), IDTVEC(dblfault
),
1338 IDTVEC(fast_syscall
), IDTVEC(fast_syscall32
);
1340 #ifdef DEBUG_INTERRUPTS
1341 extern inthand_t
*Xrsvdary
[256];
1345 sdtossd(struct user_segment_descriptor
*sd
, struct soft_segment_descriptor
*ssd
)
1347 ssd
->ssd_base
= (sd
->sd_hibase
<< 24) | sd
->sd_lobase
;
1348 ssd
->ssd_limit
= (sd
->sd_hilimit
<< 16) | sd
->sd_lolimit
;
1349 ssd
->ssd_type
= sd
->sd_type
;
1350 ssd
->ssd_dpl
= sd
->sd_dpl
;
1351 ssd
->ssd_p
= sd
->sd_p
;
1352 ssd
->ssd_def32
= sd
->sd_def32
;
1353 ssd
->ssd_gran
= sd
->sd_gran
;
1357 ssdtosd(struct soft_segment_descriptor
*ssd
, struct user_segment_descriptor
*sd
)
1360 sd
->sd_lobase
= (ssd
->ssd_base
) & 0xffffff;
1361 sd
->sd_hibase
= (ssd
->ssd_base
>> 24) & 0xff;
1362 sd
->sd_lolimit
= (ssd
->ssd_limit
) & 0xffff;
1363 sd
->sd_hilimit
= (ssd
->ssd_limit
>> 16) & 0xf;
1364 sd
->sd_type
= ssd
->ssd_type
;
1365 sd
->sd_dpl
= ssd
->ssd_dpl
;
1366 sd
->sd_p
= ssd
->ssd_p
;
1367 sd
->sd_long
= ssd
->ssd_long
;
1368 sd
->sd_def32
= ssd
->ssd_def32
;
1369 sd
->sd_gran
= ssd
->ssd_gran
;
1373 ssdtosyssd(struct soft_segment_descriptor
*ssd
,
1374 struct system_segment_descriptor
*sd
)
1377 sd
->sd_lobase
= (ssd
->ssd_base
) & 0xffffff;
1378 sd
->sd_hibase
= (ssd
->ssd_base
>> 24) & 0xfffffffffful
;
1379 sd
->sd_lolimit
= (ssd
->ssd_limit
) & 0xffff;
1380 sd
->sd_hilimit
= (ssd
->ssd_limit
>> 16) & 0xf;
1381 sd
->sd_type
= ssd
->ssd_type
;
1382 sd
->sd_dpl
= ssd
->ssd_dpl
;
1383 sd
->sd_p
= ssd
->ssd_p
;
1384 sd
->sd_gran
= ssd
->ssd_gran
;
1390 * Populate the (physmap) array with base/bound pairs describing the
1391 * available physical memory in the system, then test this memory and
1392 * build the phys_avail array describing the actually-available memory.
1394 * If we cannot accurately determine the physical memory map, then use
1395 * value from the 0xE801 call, and failing that, the RTC.
1397 * Total memory size may be set by the kernel environment variable
1398 * hw.physmem or the compile-time define MAXMEM.
1400 * XXX first should be vm_paddr_t.
1403 getmemsize(caddr_t kmdp
, u_int64_t first
)
1405 int i
, off
, physmap_idx
, pa_indx
, da_indx
;
1406 vm_paddr_t pa
, physmap
[PHYSMAP_SIZE
];
1407 u_long physmem_tunable
;
1409 struct bios_smap
*smapbase
, *smap
, *smapend
;
1411 quad_t dcons_addr
, dcons_size
;
1413 bzero(physmap
, sizeof(physmap
));
1418 * get memory map from INT 15:E820, kindly supplied by the loader.
1420 * subr_module.c says:
1421 * "Consumer may safely assume that size value precedes data."
1422 * ie: an int32_t immediately precedes smap.
1424 smapbase
= (struct bios_smap
*)preload_search_info(kmdp
,
1425 MODINFO_METADATA
| MODINFOMD_SMAP
);
1426 if (smapbase
== NULL
)
1427 panic("No BIOS smap info from loader!");
1429 smapsize
= *((u_int32_t
*)smapbase
- 1);
1430 smapend
= (struct bios_smap
*)((uintptr_t)smapbase
+ smapsize
);
1432 for (smap
= smapbase
; smap
< smapend
; smap
++) {
1433 if (boothowto
& RB_VERBOSE
)
1434 kprintf("SMAP type=%02x base=%016lx len=%016lx\n",
1435 smap
->type
, smap
->base
, smap
->length
);
1437 if (smap
->type
!= SMAP_TYPE_MEMORY
)
1440 if (smap
->length
== 0)
1443 for (i
= 0; i
<= physmap_idx
; i
+= 2) {
1444 if (smap
->base
< physmap
[i
+ 1]) {
1445 if (boothowto
& RB_VERBOSE
)
1447 "Overlapping or non-monotonic memory region, ignoring second region\n");
1452 if (smap
->base
== physmap
[physmap_idx
+ 1]) {
1453 physmap
[physmap_idx
+ 1] += smap
->length
;
1458 if (physmap_idx
== PHYSMAP_SIZE
) {
1460 "Too many segments in the physical address map, giving up\n");
1463 physmap
[physmap_idx
] = smap
->base
;
1464 physmap
[physmap_idx
+ 1] = smap
->base
+ smap
->length
;
1468 * Find the 'base memory' segment for SMP
1471 for (i
= 0; i
<= physmap_idx
; i
+= 2) {
1472 if (physmap
[i
] == 0x00000000) {
1473 basemem
= physmap
[i
+ 1] / 1024;
1478 panic("BIOS smap did not include a basemem segment!");
1481 /* make hole for AP bootstrap code */
1482 physmap
[1] = mp_bootaddress(physmap
[1] / 1024);
1486 * Maxmem isn't the "maximum memory", it's one larger than the
1487 * highest page of the physical address space. It should be
1488 * called something like "Maxphyspage". We may adjust this
1489 * based on ``hw.physmem'' and the results of the memory test.
1491 Maxmem
= atop(physmap
[physmap_idx
+ 1]);
1494 Maxmem
= MAXMEM
/ 4;
1497 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable
))
1498 Maxmem
= atop(physmem_tunable
);
1501 * Don't allow MAXMEM or hw.physmem to extend the amount of memory
1504 if (Maxmem
> atop(physmap
[physmap_idx
+ 1]))
1505 Maxmem
= atop(physmap
[physmap_idx
+ 1]);
1507 if (atop(physmap
[physmap_idx
+ 1]) != Maxmem
&&
1508 (boothowto
& RB_VERBOSE
))
1509 kprintf("Physical memory use set to %ldK\n", Maxmem
* 4);
1511 /* call pmap initialization to make new kernel address space */
1512 pmap_bootstrap(&first
, 0);
1515 * Size up each available chunk of physical memory.
1517 physmap
[0] = PAGE_SIZE
; /* mask off page 0 */
1520 phys_avail
[pa_indx
++] = physmap
[0];
1521 phys_avail
[pa_indx
] = physmap
[0];
1522 dump_avail
[da_indx
] = physmap
[0];
1526 * Get dcons buffer address
1528 if (kgetenv_quad("dcons.addr", &dcons_addr
) == 0 ||
1529 kgetenv_quad("dcons.size", &dcons_size
) == 0)
1533 * physmap is in bytes, so when converting to page boundaries,
1534 * round up the start address and round down the end address.
1536 for (i
= 0; i
<= physmap_idx
; i
+= 2) {
1539 end
= ptoa((vm_paddr_t
)Maxmem
);
1540 if (physmap
[i
+ 1] < end
)
1541 end
= trunc_page(physmap
[i
+ 1]);
1542 for (pa
= round_page(physmap
[i
]); pa
< end
; pa
+= PAGE_SIZE
) {
1543 int tmp
, page_bad
, full
;
1544 int *ptr
= (int *)CADDR1
;
1548 * block out kernel memory as not available.
1550 if (pa
>= 0x100000 && pa
< first
)
1554 * block out dcons buffer
1557 && pa
>= trunc_page(dcons_addr
)
1558 && pa
< dcons_addr
+ dcons_size
)
1564 * map page into kernel: valid, read/write,non-cacheable
1566 *pte
= pa
| PG_V
| PG_RW
| PG_N
;
1571 * Test for alternating 1's and 0's
1573 *(volatile int *)ptr
= 0xaaaaaaaa;
1574 if (*(volatile int *)ptr
!= 0xaaaaaaaa)
1577 * Test for alternating 0's and 1's
1579 *(volatile int *)ptr
= 0x55555555;
1580 if (*(volatile int *)ptr
!= 0x55555555)
1585 *(volatile int *)ptr
= 0xffffffff;
1586 if (*(volatile int *)ptr
!= 0xffffffff)
1591 *(volatile int *)ptr
= 0x0;
1592 if (*(volatile int *)ptr
!= 0x0)
1595 * Restore original value.
1600 * Adjust array of valid/good pages.
1602 if (page_bad
== TRUE
)
1605 * If this good page is a continuation of the
1606 * previous set of good pages, then just increase
1607 * the end pointer. Otherwise start a new chunk.
1608 * Note that "end" points one higher than end,
1609 * making the range >= start and < end.
1610 * If we're also doing a speculative memory
1611 * test and we at or past the end, bump up Maxmem
1612 * so that we keep going. The first bad page
1613 * will terminate the loop.
1615 if (phys_avail
[pa_indx
] == pa
) {
1616 phys_avail
[pa_indx
] += PAGE_SIZE
;
1619 if (pa_indx
== PHYS_AVAIL_ARRAY_END
) {
1621 "Too many holes in the physical address space, giving up\n");
1626 phys_avail
[pa_indx
++] = pa
; /* start */
1627 phys_avail
[pa_indx
] = pa
+ PAGE_SIZE
; /* end */
1631 if (dump_avail
[da_indx
] == pa
) {
1632 dump_avail
[da_indx
] += PAGE_SIZE
;
1635 if (da_indx
== DUMP_AVAIL_ARRAY_END
) {
1639 dump_avail
[da_indx
++] = pa
; /* start */
1640 dump_avail
[da_indx
] = pa
+ PAGE_SIZE
; /* end */
1652 * The last chunk must contain at least one page plus the message
1653 * buffer to avoid complicating other code (message buffer address
1654 * calculation, etc.).
1656 while (phys_avail
[pa_indx
- 1] + PAGE_SIZE
+
1657 round_page(MSGBUF_SIZE
) >= phys_avail
[pa_indx
]) {
1658 physmem
-= atop(phys_avail
[pa_indx
] - phys_avail
[pa_indx
- 1]);
1659 phys_avail
[pa_indx
--] = 0;
1660 phys_avail
[pa_indx
--] = 0;
1663 Maxmem
= atop(phys_avail
[pa_indx
]);
1665 /* Trim off space for the message buffer. */
1666 phys_avail
[pa_indx
] -= round_page(MSGBUF_SIZE
);
1668 /* Map the message buffer. */
1669 for (off
= 0; off
< round_page(MSGBUF_SIZE
); off
+= PAGE_SIZE
)
1670 pmap_kenter((vm_offset_t
)msgbufp
+ off
, phys_avail
[pa_indx
] +
1683 * 7 Device Not Available (x87)
1685 * 9 Coprocessor Segment overrun (unsupported, reserved)
1687 * 11 Segment not present
1689 * 13 General Protection
1692 * 16 x87 FP Exception pending
1693 * 17 Alignment Check
1695 * 19 SIMD floating point
1697 * 32-255 INTn/external sources
1700 hammer_time(u_int64_t modulep
, u_int64_t physfree
)
1703 int gsel_tss
, metadata_missing
, off
, x
;
1704 struct mdglobaldata
*gd
;
1709 * This must be done before the first references
1710 * to CPU_prvspace[0] are made.
1712 init_paging(&physfree
);
1715 * Prevent lowering of the ipl if we call tsleep() early.
1717 gd
= &CPU_prvspace
[0].mdglobaldata
;
1718 bzero(gd
, sizeof(*gd
));
1721 * Note: on both UP and SMP curthread must be set non-NULL
1722 * early in the boot sequence because the system assumes
1723 * that 'curthread' is never NULL.
1726 gd
->mi
.gd_curthread
= &thread0
;
1727 thread0
.td_gd
= &gd
->mi
;
1729 atdevbase
= ISA_HOLE_START
+ PTOV_OFFSET
;
1732 metadata_missing
= 0;
1733 if (bootinfo
.bi_modulep
) {
1734 preload_metadata
= (caddr_t
)bootinfo
.bi_modulep
+ KERNBASE
;
1735 preload_bootstrap_relocate(KERNBASE
);
1737 metadata_missing
= 1;
1739 if (bootinfo
.bi_envp
)
1740 kern_envp
= (caddr_t
)bootinfo
.bi_envp
+ KERNBASE
;
1743 preload_metadata
= (caddr_t
)(uintptr_t)(modulep
+ PTOV_OFFSET
);
1744 preload_bootstrap_relocate(PTOV_OFFSET
);
1745 kmdp
= preload_search_by_type("elf kernel");
1747 kmdp
= preload_search_by_type("elf64 kernel");
1748 boothowto
= MD_FETCH(kmdp
, MODINFOMD_HOWTO
, int);
1749 kern_envp
= MD_FETCH(kmdp
, MODINFOMD_ENVP
, char *) + PTOV_OFFSET
;
1751 ksym_start
= MD_FETCH(kmdp
, MODINFOMD_SSYM
, uintptr_t);
1752 ksym_end
= MD_FETCH(kmdp
, MODINFOMD_ESYM
, uintptr_t);
1756 * start with one cpu. Note: with one cpu, ncpus2_shift, ncpus2_mask,
1757 * and ncpus_fit_mask remain 0.
1762 /* Init basic tunables, hz etc */
1766 * make gdt memory segments
1768 gdt_segs
[GPROC0_SEL
].ssd_base
=
1769 (uintptr_t) &CPU_prvspace
[0].mdglobaldata
.gd_common_tss
;
1771 gd
->mi
.gd_prvspace
= &CPU_prvspace
[0];
1773 for (x
= 0; x
< NGDT
; x
++) {
1774 if (x
!= GPROC0_SEL
&& x
!= (GPROC0_SEL
+ 1))
1775 ssdtosd(&gdt_segs
[x
], &gdt
[x
]);
1777 ssdtosyssd(&gdt_segs
[GPROC0_SEL
],
1778 (struct system_segment_descriptor
*)&gdt
[GPROC0_SEL
]);
1779 r_gdt
.rd_limit
= NGDT
* sizeof(gdt
[0]) - 1;
1780 r_gdt
.rd_base
= (long) gdt
;
1783 wrmsr(MSR_FSBASE
, 0); /* User value */
1784 wrmsr(MSR_GSBASE
, (u_int64_t
)&gd
->mi
);
1785 wrmsr(MSR_KGSBASE
, 0); /* User value while in the kernel */
1787 mi_gdinit(&gd
->mi
, 0);
1789 proc0paddr
= proc0paddr_buff
;
1790 mi_proc0init(&gd
->mi
, proc0paddr
);
1791 safepri
= TDPRI_MAX
;
1793 /* spinlocks and the BGL */
1797 for (x
= 0; x
< NIDT
; x
++)
1798 setidt(x
, &IDTVEC(rsvd
), SDT_SYSIGT
, SEL_KPL
, 0);
1799 setidt(IDT_DE
, &IDTVEC(div
), SDT_SYSIGT
, SEL_KPL
, 0);
1800 setidt(IDT_DB
, &IDTVEC(dbg
), SDT_SYSIGT
, SEL_KPL
, 0);
1801 setidt(IDT_NMI
, &IDTVEC(nmi
), SDT_SYSIGT
, SEL_KPL
, 1);
1802 setidt(IDT_BP
, &IDTVEC(bpt
), SDT_SYSIGT
, SEL_UPL
, 0);
1803 setidt(IDT_OF
, &IDTVEC(ofl
), SDT_SYSIGT
, SEL_KPL
, 0);
1804 setidt(IDT_BR
, &IDTVEC(bnd
), SDT_SYSIGT
, SEL_KPL
, 0);
1805 setidt(IDT_UD
, &IDTVEC(ill
), SDT_SYSIGT
, SEL_KPL
, 0);
1806 setidt(IDT_NM
, &IDTVEC(dna
), SDT_SYSIGT
, SEL_KPL
, 0);
1807 setidt(IDT_DF
, &IDTVEC(dblfault
), SDT_SYSIGT
, SEL_KPL
, 1);
1808 setidt(IDT_FPUGP
, &IDTVEC(fpusegm
), SDT_SYSIGT
, SEL_KPL
, 0);
1809 setidt(IDT_TS
, &IDTVEC(tss
), SDT_SYSIGT
, SEL_KPL
, 0);
1810 setidt(IDT_NP
, &IDTVEC(missing
), SDT_SYSIGT
, SEL_KPL
, 0);
1811 setidt(IDT_SS
, &IDTVEC(stk
), SDT_SYSIGT
, SEL_KPL
, 0);
1812 setidt(IDT_GP
, &IDTVEC(prot
), SDT_SYSIGT
, SEL_KPL
, 0);
1813 setidt(IDT_PF
, &IDTVEC(page
), SDT_SYSIGT
, SEL_KPL
, 0);
1814 setidt(IDT_MF
, &IDTVEC(fpu
), SDT_SYSIGT
, SEL_KPL
, 0);
1815 setidt(IDT_AC
, &IDTVEC(align
), SDT_SYSIGT
, SEL_KPL
, 0);
1816 setidt(IDT_MC
, &IDTVEC(mchk
), SDT_SYSIGT
, SEL_KPL
, 0);
1817 setidt(IDT_XF
, &IDTVEC(xmm
), SDT_SYSIGT
, SEL_KPL
, 0);
1819 r_idt
.rd_limit
= sizeof(idt0
) - 1;
1820 r_idt
.rd_base
= (long) idt
;
1824 * Initialize the console before we print anything out.
1829 if (metadata_missing
)
1830 kprintf("WARNING: loader(8) metadata is missing!\n");
1840 if (boothowto
& RB_KDB
)
1841 Debugger("Boot flags requested debugger");
1845 finishidentcpu(); /* Final stage of CPU initialization */
1846 setidt(6, &IDTVEC(ill
), SDT_SYS386TGT
, SEL_KPL
, GSEL(GCODE_SEL
, SEL_KPL
));
1847 setidt(13, &IDTVEC(prot
), SDT_SYS386TGT
, SEL_KPL
, GSEL(GCODE_SEL
, SEL_KPL
));
1849 identify_cpu(); /* Final stage of CPU initialization */
1850 initializecpu(); /* Initialize CPU registers */
1852 /* make an initial tss so cpu can get interrupt stack on syscall! */
1853 gd
->gd_common_tss
.tss_rsp0
= thread0
.td_kstack
+ \
1854 KSTACK_PAGES
* PAGE_SIZE
- sizeof(struct pcb
);
1855 /* Ensure the stack is aligned to 16 bytes */
1856 gd
->gd_common_tss
.tss_rsp0
&= ~0xFul
;
1857 gd
->gd_rsp0
= gd
->gd_common_tss
.tss_rsp0
;
1859 /* doublefault stack space, runs on ist1 */
1860 gd
->gd_common_tss
.tss_ist1
= (long)&dblfault_stack
[sizeof(dblfault_stack
)];
1862 /* Set the IO permission bitmap (empty due to tss seg limit) */
1863 gd
->gd_common_tss
.tss_iobase
= sizeof(struct amd64tss
);
1865 gsel_tss
= GSEL(GPROC0_SEL
, SEL_KPL
);
1866 gd
->gd_tss_gdt
= &gdt
[GPROC0_SEL
];
1867 gd
->gd_common_tssd
= *gd
->gd_tss_gdt
;
1870 /* Set up the fast syscall stuff */
1871 msr
= rdmsr(MSR_EFER
) | EFER_SCE
;
1872 wrmsr(MSR_EFER
, msr
);
1873 wrmsr(MSR_LSTAR
, (u_int64_t
)IDTVEC(fast_syscall
));
1874 wrmsr(MSR_CSTAR
, (u_int64_t
)IDTVEC(fast_syscall32
));
1875 msr
= ((u_int64_t
)GSEL(GCODE_SEL
, SEL_KPL
) << 32) |
1876 ((u_int64_t
)GSEL(GUCODE32_SEL
, SEL_UPL
) << 48);
1877 wrmsr(MSR_STAR
, msr
);
1878 wrmsr(MSR_SF_MASK
, PSL_NT
|PSL_T
|PSL_I
|PSL_C
|PSL_D
);
1880 getmemsize(kmdp
, physfree
);
1881 init_param2(physmem
);
1883 /* now running on new page tables, configured,and u/iom is accessible */
1885 /* Map the message buffer. */
1887 for (off
= 0; off
< round_page(MSGBUF_SIZE
); off
+= PAGE_SIZE
)
1888 pmap_kenter((vm_offset_t
)msgbufp
+ off
, avail_end
+ off
);
1891 msgbufinit(msgbufp
, MSGBUF_SIZE
);
1894 /* transfer to user mode */
1896 _ucodesel
= GSEL(GUCODE_SEL
, SEL_UPL
);
1897 _udatasel
= GSEL(GUDATA_SEL
, SEL_UPL
);
1898 _ucode32sel
= GSEL(GUCODE32_SEL
, SEL_UPL
);
1904 /* setup proc 0's pcb */
1905 thread0
.td_pcb
->pcb_flags
= 0;
1907 thread0
.td_pcb
->pcb_cr3
= KPML4phys
;
1909 thread0
.td_pcb
->pcb_cr3
= IdlePTD
;
1911 thread0
.td_pcb
->pcb_ext
= 0;
1912 lwp0
.lwp_md
.md_regs
= &proc0_tf
;
1913 env
= kgetenv("kernelname");
1915 strlcpy(kernelname
, env
, sizeof(kernelname
));
1917 /* Location of kernel stack for locore */
1918 return ((u_int64_t
)thread0
.td_pcb
);
1922 * Initialize machine-dependant portions of the global data structure.
1923 * Note that the global data area and cpu0's idlestack in the private
1924 * data space were allocated in locore.
1926 * Note: the idlethread's cpl is 0
1928 * WARNING! Called from early boot, 'mycpu' may not work yet.
1931 cpu_gdinit(struct mdglobaldata
*gd
, int cpu
)
1934 gd
->mi
.gd_curthread
= &gd
->mi
.gd_idlethread
;
1936 lwkt_init_thread(&gd
->mi
.gd_idlethread
,
1937 gd
->mi
.gd_prvspace
->idlestack
,
1938 sizeof(gd
->mi
.gd_prvspace
->idlestack
),
1939 TDF_MPSAFE
, &gd
->mi
);
1940 lwkt_set_comm(&gd
->mi
.gd_idlethread
, "idle_%d", cpu
);
1941 gd
->mi
.gd_idlethread
.td_switch
= cpu_lwkt_switch
;
1942 gd
->mi
.gd_idlethread
.td_sp
-= sizeof(void *);
1943 *(void **)gd
->mi
.gd_idlethread
.td_sp
= cpu_idle_restore
;
1947 is_globaldata_space(vm_offset_t saddr
, vm_offset_t eaddr
)
1949 if (saddr
>= (vm_offset_t
)&CPU_prvspace
[0] &&
1950 eaddr
<= (vm_offset_t
)&CPU_prvspace
[MAXCPU
]) {
1957 globaldata_find(int cpu
)
1959 KKASSERT(cpu
>= 0 && cpu
< ncpus
);
1960 return(&CPU_prvspace
[cpu
].mdglobaldata
.mi
);
1963 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
1964 static void f00f_hack(void *unused
);
1965 SYSINIT(f00f_hack
, SI_BOOT2_BIOS
, SI_ORDER_ANY
, f00f_hack
, NULL
);
1968 f00f_hack(void *unused
)
1970 struct gate_descriptor
*new_idt
;
1976 kprintf("Intel Pentium detected, installing workaround for F00F bug\n");
1978 r_idt
.rd_limit
= sizeof(idt0
) - 1;
1980 tmp
= kmem_alloc(&kernel_map
, PAGE_SIZE
* 2);
1982 panic("kmem_alloc returned 0");
1983 if (((unsigned int)tmp
& (PAGE_SIZE
-1)) != 0)
1984 panic("kmem_alloc returned non-page-aligned memory");
1985 /* Put the first seven entries in the lower page */
1986 new_idt
= (struct gate_descriptor
*)(tmp
+ PAGE_SIZE
- (7*8));
1987 bcopy(idt
, new_idt
, sizeof(idt0
));
1988 r_idt
.rd_base
= (int)new_idt
;
1991 if (vm_map_protect(&kernel_map
, tmp
, tmp
+ PAGE_SIZE
,
1992 VM_PROT_READ
, FALSE
) != KERN_SUCCESS
)
1993 panic("vm_map_protect failed");
1996 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
1999 ptrace_set_pc(struct lwp
*lp
, unsigned long addr
)
2002 lp
->lwp_md
.md_regs
->tf_eip
= addr
;
2008 ptrace_single_step(struct lwp
*lp
)
2011 lp
->lwp_md
.md_regs
->tf_eflags
|= PSL_T
;
2017 fill_regs(struct lwp
*lp
, struct reg
*regs
)
2020 struct trapframe
*tp
;
2022 tp
= lp
->lwp_md
.md_regs
;
2024 regs
->r_gs
= tp
->tf_gs
;
2025 regs
->r_fs
= tp
->tf_fs
;
2026 regs
->r_es
= tp
->tf_es
;
2027 regs
->r_ds
= tp
->tf_ds
;
2028 regs
->r_edi
= tp
->tf_edi
;
2029 regs
->r_esi
= tp
->tf_esi
;
2030 regs
->r_ebp
= tp
->tf_ebp
;
2031 regs
->r_ebx
= tp
->tf_ebx
;
2032 regs
->r_edx
= tp
->tf_edx
;
2033 regs
->r_ecx
= tp
->tf_ecx
;
2034 regs
->r_eax
= tp
->tf_eax
;
2035 regs
->r_eip
= tp
->tf_eip
;
2037 regs
->r_cs
= tp
->tf_cs
;
2039 regs
->r_eflags
= tp
->tf_eflags
;
2040 regs
->r_esp
= tp
->tf_esp
;
2042 regs
->r_ss
= tp
->tf_ss
;
2043 pcb
= lp
->lwp_thread
->td_pcb
;
2048 set_regs(struct lwp
*lp
, struct reg
*regs
)
2051 struct trapframe
*tp
;
2053 tp
= lp
->lwp_md
.md_regs
;
2055 if (!EFL_SECURE(regs
->r_eflags
, tp
->tf_eflags
) ||
2056 !CS_SECURE(regs
->r_cs
))
2058 tp
->tf_gs
= regs
->r_gs
;
2059 tp
->tf_fs
= regs
->r_fs
;
2060 tp
->tf_es
= regs
->r_es
;
2061 tp
->tf_ds
= regs
->r_ds
;
2062 tp
->tf_edi
= regs
->r_edi
;
2063 tp
->tf_esi
= regs
->r_esi
;
2064 tp
->tf_ebp
= regs
->r_ebp
;
2065 tp
->tf_ebx
= regs
->r_ebx
;
2066 tp
->tf_edx
= regs
->r_edx
;
2067 tp
->tf_ecx
= regs
->r_ecx
;
2068 tp
->tf_eax
= regs
->r_eax
;
2069 tp
->tf_eip
= regs
->r_eip
;
2071 tp
->tf_cs
= regs
->r_cs
;
2073 tp
->tf_eflags
= regs
->r_eflags
;
2074 tp
->tf_esp
= regs
->r_esp
;
2076 tp
->tf_ss
= regs
->r_ss
;
2077 pcb
= lp
->lwp_thread
->td_pcb
;
2081 #ifndef CPU_DISABLE_SSE
2083 fill_fpregs_xmm(struct savexmm
*sv_xmm
, struct save87
*sv_87
)
2085 struct env87
*penv_87
= &sv_87
->sv_env
;
2086 struct envxmm
*penv_xmm
= &sv_xmm
->sv_env
;
2089 /* FPU control/status */
2090 penv_87
->en_cw
= penv_xmm
->en_cw
;
2091 penv_87
->en_sw
= penv_xmm
->en_sw
;
2092 penv_87
->en_tw
= penv_xmm
->en_tw
;
2093 penv_87
->en_fip
= penv_xmm
->en_fip
;
2094 penv_87
->en_fcs
= penv_xmm
->en_fcs
;
2095 penv_87
->en_opcode
= penv_xmm
->en_opcode
;
2096 penv_87
->en_foo
= penv_xmm
->en_foo
;
2097 penv_87
->en_fos
= penv_xmm
->en_fos
;
2100 for (i
= 0; i
< 8; ++i
)
2101 sv_87
->sv_ac
[i
] = sv_xmm
->sv_fp
[i
].fp_acc
;
2103 sv_87
->sv_ex_sw
= sv_xmm
->sv_ex_sw
;
2107 set_fpregs_xmm(struct save87
*sv_87
, struct savexmm
*sv_xmm
)
2109 struct env87
*penv_87
= &sv_87
->sv_env
;
2110 struct envxmm
*penv_xmm
= &sv_xmm
->sv_env
;
2113 /* FPU control/status */
2114 penv_xmm
->en_cw
= penv_87
->en_cw
;
2115 penv_xmm
->en_sw
= penv_87
->en_sw
;
2116 penv_xmm
->en_tw
= penv_87
->en_tw
;
2117 penv_xmm
->en_fip
= penv_87
->en_fip
;
2118 penv_xmm
->en_fcs
= penv_87
->en_fcs
;
2119 penv_xmm
->en_opcode
= penv_87
->en_opcode
;
2120 penv_xmm
->en_foo
= penv_87
->en_foo
;
2121 penv_xmm
->en_fos
= penv_87
->en_fos
;
2124 for (i
= 0; i
< 8; ++i
)
2125 sv_xmm
->sv_fp
[i
].fp_acc
= sv_87
->sv_ac
[i
];
2127 sv_xmm
->sv_ex_sw
= sv_87
->sv_ex_sw
;
2129 #endif /* CPU_DISABLE_SSE */
2132 fill_fpregs(struct lwp
*lp
, struct fpreg
*fpregs
)
2134 #ifndef CPU_DISABLE_SSE
2136 fill_fpregs_xmm(&lp
->lwp_thread
->td_pcb
->pcb_save
.sv_xmm
,
2137 (struct save87
*)fpregs
);
2140 #endif /* CPU_DISABLE_SSE */
2141 bcopy(&lp
->lwp_thread
->td_pcb
->pcb_save
.sv_87
, fpregs
, sizeof *fpregs
);
2146 set_fpregs(struct lwp
*lp
, struct fpreg
*fpregs
)
2148 #ifndef CPU_DISABLE_SSE
2150 set_fpregs_xmm((struct save87
*)fpregs
,
2151 &lp
->lwp_thread
->td_pcb
->pcb_save
.sv_xmm
);
2154 #endif /* CPU_DISABLE_SSE */
2155 bcopy(fpregs
, &lp
->lwp_thread
->td_pcb
->pcb_save
.sv_87
, sizeof *fpregs
);
2160 fill_dbregs(struct lwp
*lp
, struct dbreg
*dbregs
)
2163 dbregs
->dr
[0] = rdr0();
2164 dbregs
->dr
[1] = rdr1();
2165 dbregs
->dr
[2] = rdr2();
2166 dbregs
->dr
[3] = rdr3();
2167 dbregs
->dr
[4] = rdr4();
2168 dbregs
->dr
[5] = rdr5();
2169 dbregs
->dr
[6] = rdr6();
2170 dbregs
->dr
[7] = rdr7();
2174 pcb
= lp
->lwp_thread
->td_pcb
;
2175 dbregs
->dr
[0] = pcb
->pcb_dr0
;
2176 dbregs
->dr
[1] = pcb
->pcb_dr1
;
2177 dbregs
->dr
[2] = pcb
->pcb_dr2
;
2178 dbregs
->dr
[3] = pcb
->pcb_dr3
;
2181 dbregs
->dr
[6] = pcb
->pcb_dr6
;
2182 dbregs
->dr
[7] = pcb
->pcb_dr7
;
2188 set_dbregs(struct lwp
*lp
, struct dbreg
*dbregs
)
2191 load_dr0(dbregs
->dr
[0]);
2192 load_dr1(dbregs
->dr
[1]);
2193 load_dr2(dbregs
->dr
[2]);
2194 load_dr3(dbregs
->dr
[3]);
2195 load_dr4(dbregs
->dr
[4]);
2196 load_dr5(dbregs
->dr
[5]);
2197 load_dr6(dbregs
->dr
[6]);
2198 load_dr7(dbregs
->dr
[7]);
2201 struct ucred
*ucred
;
2203 uint64_t mask1
, mask2
;
2206 * Don't let an illegal value for dr7 get set. Specifically,
2207 * check for undefined settings. Setting these bit patterns
2208 * result in undefined behaviour and can lead to an unexpected
2211 /* JG this loop looks unreadable */
2212 /* Check 4 2-bit fields for invalid patterns.
2213 * These fields are R/Wi, for i = 0..3
2215 /* Is 10 in LENi allowed when running in compatibility mode? */
2216 /* Pattern 10 in R/Wi might be used to indicate
2217 * breakpoint on I/O. Further analysis should be
2218 * carried to decide if it is safe and useful to
2219 * provide access to that capability
2221 for (i
= 0, mask1
= 0x3<<16, mask2
= 0x2<<16; i
< 4;
2222 i
++, mask1
<<= 4, mask2
<<= 4)
2223 if ((dbregs
->dr
[7] & mask1
) == mask2
)
2226 pcb
= lp
->lwp_thread
->td_pcb
;
2227 ucred
= lp
->lwp_proc
->p_ucred
;
2230 * Don't let a process set a breakpoint that is not within the
2231 * process's address space. If a process could do this, it
2232 * could halt the system by setting a breakpoint in the kernel
2233 * (if ddb was enabled). Thus, we need to check to make sure
2234 * that no breakpoints are being enabled for addresses outside
2235 * process's address space, unless, perhaps, we were called by
2238 * XXX - what about when the watched area of the user's
2239 * address space is written into from within the kernel
2240 * ... wouldn't that still cause a breakpoint to be generated
2241 * from within kernel mode?
2244 if (priv_check_cred(ucred
, PRIV_ROOT
, 0) != 0) {
2245 if (dbregs
->dr
[7] & 0x3) {
2246 /* dr0 is enabled */
2247 if (dbregs
->dr
[0] >= VM_MAX_USER_ADDRESS
)
2251 if (dbregs
->dr
[7] & (0x3<<2)) {
2252 /* dr1 is enabled */
2253 if (dbregs
->dr
[1] >= VM_MAX_USER_ADDRESS
)
2257 if (dbregs
->dr
[7] & (0x3<<4)) {
2258 /* dr2 is enabled */
2259 if (dbregs
->dr
[2] >= VM_MAX_USER_ADDRESS
)
2263 if (dbregs
->dr
[7] & (0x3<<6)) {
2264 /* dr3 is enabled */
2265 if (dbregs
->dr
[3] >= VM_MAX_USER_ADDRESS
)
2270 pcb
->pcb_dr0
= dbregs
->dr
[0];
2271 pcb
->pcb_dr1
= dbregs
->dr
[1];
2272 pcb
->pcb_dr2
= dbregs
->dr
[2];
2273 pcb
->pcb_dr3
= dbregs
->dr
[3];
2274 pcb
->pcb_dr6
= dbregs
->dr
[6];
2275 pcb
->pcb_dr7
= dbregs
->dr
[7];
2277 pcb
->pcb_flags
|= PCB_DBREGS
;
2284 * Return > 0 if a hardware breakpoint has been hit, and the
2285 * breakpoint was in user space. Return 0, otherwise.
2288 user_dbreg_trap(void)
2290 u_int64_t dr7
, dr6
; /* debug registers dr6 and dr7 */
2291 u_int64_t bp
; /* breakpoint bits extracted from dr6 */
2292 int nbp
; /* number of breakpoints that triggered */
2293 caddr_t addr
[4]; /* breakpoint addresses */
2297 if ((dr7
& 0xff) == 0) {
2299 * all GE and LE bits in the dr7 register are zero,
2300 * thus the trap couldn't have been caused by the
2301 * hardware debug registers
2312 * None of the breakpoint bits are set meaning this
2313 * trap was not caused by any of the debug registers
2319 * at least one of the breakpoints were hit, check to see
2320 * which ones and if any of them are user space addresses
2324 addr
[nbp
++] = (caddr_t
)rdr0();
2327 addr
[nbp
++] = (caddr_t
)rdr1();
2330 addr
[nbp
++] = (caddr_t
)rdr2();
2333 addr
[nbp
++] = (caddr_t
)rdr3();
2336 for (i
=0; i
<nbp
; i
++) {
2338 (caddr_t
)VM_MAX_USER_ADDRESS
) {
2340 * addr[i] is in user space
2347 * None of the breakpoints are in user space.
2355 Debugger(const char *msg
)
2357 kprintf("Debugger(\"%s\") called.\n", msg
);
2364 * Provide inb() and outb() as functions. They are normally only
2365 * available as macros calling inlined functions, thus cannot be
2366 * called inside DDB.
2368 * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
2374 /* silence compiler warnings */
2376 void outb(u_int
, u_char
);
2383 * We use %%dx and not %1 here because i/o is done at %dx and not at
2384 * %edx, while gcc generates inferior code (movw instead of movl)
2385 * if we tell it to load (u_short) port.
2387 __asm
__volatile("inb %%dx,%0" : "=a" (data
) : "d" (port
));
2392 outb(u_int port
, u_char data
)
2396 * Use an unnecessary assignment to help gcc's register allocator.
2397 * This make a large difference for gcc-1.40 and a tiny difference
2398 * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
2399 * best results. gcc-2.6.0 can't handle this.
2402 __asm
__volatile("outb %0,%%dx" : : "a" (al
), "d" (port
));
2409 #include "opt_cpu.h"
2413 * initialize all the SMP locks
2416 /* critical region when masking or unmasking interupts */
2417 struct spinlock_deprecated imen_spinlock
;
2419 /* Make FAST_INTR() routines sequential */
2420 struct spinlock_deprecated fast_intr_spinlock
;
2422 /* critical region for old style disable_intr/enable_intr */
2423 struct spinlock_deprecated mpintr_spinlock
;
2425 /* critical region around INTR() routines */
2426 struct spinlock_deprecated intr_spinlock
;
2428 /* lock region used by kernel profiling */
2429 struct spinlock_deprecated mcount_spinlock
;
2431 /* locks com (tty) data/hardware accesses: a FASTINTR() */
2432 struct spinlock_deprecated com_spinlock
;
2434 /* locks kernel kprintfs */
2435 struct spinlock_deprecated cons_spinlock
;
2437 /* lock regions around the clock hardware */
2438 struct spinlock_deprecated clock_spinlock
;
2440 /* lock around the MP rendezvous */
2441 struct spinlock_deprecated smp_rv_spinlock
;
2447 * mp_lock = 0; BSP already owns the MP lock
2450 * Get the initial mp_lock with a count of 1 for the BSP.
2451 * This uses a LOGICAL cpu ID, ie BSP == 0.
2454 cpu_get_initial_mplock();
2457 spin_lock_init(&mcount_spinlock
);
2458 spin_lock_init(&fast_intr_spinlock
);
2459 spin_lock_init(&intr_spinlock
);
2460 spin_lock_init(&mpintr_spinlock
);
2461 spin_lock_init(&imen_spinlock
);
2462 spin_lock_init(&smp_rv_spinlock
);
2463 spin_lock_init(&com_spinlock
);
2464 spin_lock_init(&clock_spinlock
);
2465 spin_lock_init(&cons_spinlock
);
2467 /* our token pool needs to work early */
2468 lwkt_token_pool_init();