vkernel - Fix more pagein/pageout corruption
[dragonfly.git] / sys / vm / vm_vmspace.c
blob67007978855d649b87835aedcd75e8820f132f55
1 /*
2 * (MPSAFE)
4 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
5 *
6 * This code is derived from software contributed to The DragonFly Project
7 * by Matthew Dillon <dillon@backplane.com>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 * 3. Neither the name of The DragonFly Project nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific, prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
37 #include <sys/param.h>
38 #include <sys/kernel.h>
39 #include <sys/systm.h>
40 #include <sys/sysproto.h>
41 #include <sys/kern_syscall.h>
42 #include <sys/mman.h>
43 #include <sys/thread.h>
44 #include <sys/proc.h>
45 #include <sys/malloc.h>
46 #include <sys/sysctl.h>
47 #include <sys/vkernel.h>
48 #include <sys/vmspace.h>
50 #include <vm/vm_extern.h>
51 #include <vm/pmap.h>
53 #include <machine/vmparam.h>
54 #include <machine/vmm.h>
56 #include <sys/sysref2.h>
58 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_proc *vkp,
59 void *id, int havetoken);
60 static int vmspace_entry_delete(struct vmspace_entry *ve,
61 struct vkernel_proc *vkp, int refs);
62 static void vmspace_entry_cache_ref(struct vmspace_entry *ve);
63 static void vmspace_entry_cache_drop(struct vmspace_entry *ve);
64 static void vmspace_entry_drop(struct vmspace_entry *ve);
66 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures");
69 * vmspace_create (void *id, int type, void *data)
71 * Create a VMSPACE under the control of the caller with the specified id.
72 * An id of NULL cannot be used. The type and data fields must currently
73 * be 0.
75 * The vmspace starts out completely empty. Memory may be mapped into the
76 * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled
77 * with vmspace_mcontrol().
79 * No requirements.
81 int
82 sys_vmspace_create(struct vmspace_create_args *uap)
84 struct vmspace_entry *ve;
85 struct vkernel_proc *vkp;
86 struct proc *p = curproc;
87 int error;
89 if (vkernel_enable == 0)
90 return (EOPNOTSUPP);
93 * Create a virtual kernel side-structure for the process if one
94 * does not exist.
96 * Implement a simple resolution for SMP races.
98 if ((vkp = p->p_vkernel) == NULL) {
99 vkp = kmalloc(sizeof(*vkp), M_VKERNEL, M_WAITOK|M_ZERO);
100 lwkt_gettoken(&p->p_token);
101 if (p->p_vkernel == NULL) {
102 vkp->refs = 1;
103 lwkt_token_init(&vkp->token, "vkernel");
104 RB_INIT(&vkp->root);
105 p->p_vkernel = vkp;
106 } else {
107 kfree(vkp, M_VKERNEL);
108 vkp = p->p_vkernel;
110 lwkt_reltoken(&p->p_token);
113 if (curthread->td_vmm)
114 return 0;
117 * Create a new VMSPACE, disallow conflicting ids
119 ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO);
120 ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
121 ve->id = uap->id;
122 ve->refs = 0; /* active refs (none) */
123 ve->cache_refs = 1; /* on-tree, not deleted (prevent kfree) */
124 pmap_pinit2(vmspace_pmap(ve->vmspace));
126 lwkt_gettoken(&vkp->token);
127 if (RB_INSERT(vmspace_rb_tree, &vkp->root, ve)) {
128 vmspace_rel(ve->vmspace);
129 ve->vmspace = NULL; /* safety */
130 kfree(ve, M_VKERNEL);
131 error = EEXIST;
132 } else {
133 error = 0;
135 lwkt_reltoken(&vkp->token);
137 return (error);
141 * Destroy a VMSPACE given its identifier.
143 * No requirements.
146 sys_vmspace_destroy(struct vmspace_destroy_args *uap)
148 struct vkernel_proc *vkp;
149 struct vmspace_entry *ve;
150 int error;
152 if ((vkp = curproc->p_vkernel) == NULL)
153 return EINVAL;
156 * vkp->token protects the deletion against a new RB tree search.
158 lwkt_gettoken(&vkp->token);
159 error = ENOENT;
160 if ((ve = vkernel_find_vmspace(vkp, uap->id, 1)) != NULL) {
161 error = vmspace_entry_delete(ve, vkp, 1);
162 if (error == 0)
163 vmspace_entry_cache_drop(ve);
165 lwkt_reltoken(&vkp->token);
167 return(error);
171 * vmspace_ctl (void *id, int cmd, struct trapframe *tframe,
172 * struct vextframe *vframe);
174 * Transfer control to a VMSPACE. Control is returned after the specified
175 * number of microseconds or if a page fault, signal, trap, or system call
176 * occurs. The context is updated as appropriate.
178 * No requirements.
181 sys_vmspace_ctl(struct vmspace_ctl_args *uap)
183 struct vkernel_proc *vkp;
184 struct vkernel_lwp *vklp;
185 struct vmspace_entry *ve = NULL;
186 struct lwp *lp;
187 struct proc *p;
188 int framesz;
189 int error;
191 lp = curthread->td_lwp;
192 p = lp->lwp_proc;
194 if ((vkp = p->p_vkernel) == NULL)
195 return (EINVAL);
198 * ve only matters when VMM is not used.
200 if (curthread->td_vmm == NULL) {
201 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) {
202 error = ENOENT;
203 goto done;
207 switch(uap->cmd) {
208 case VMSPACE_CTL_RUN:
210 * Save the caller's register context, swap VM spaces, and
211 * install the passed register context. Return with
212 * EJUSTRETURN so the syscall code doesn't adjust the context.
214 framesz = sizeof(struct trapframe);
215 if ((vklp = lp->lwp_vkernel) == NULL) {
216 vklp = kmalloc(sizeof(*vklp), M_VKERNEL,
217 M_WAITOK|M_ZERO);
218 lp->lwp_vkernel = vklp;
220 if (ve && vklp->ve_cache != ve) {
221 vmspace_entry_cache_ref(ve);
222 if (vklp->ve_cache)
223 vmspace_entry_cache_drop(vklp->ve_cache);
224 vklp->ve_cache = ve;
226 vklp->user_trapframe = uap->tframe;
227 vklp->user_vextframe = uap->vframe;
228 bcopy(uap->sysmsg_frame, &vklp->save_trapframe, framesz);
229 bcopy(&curthread->td_tls, &vklp->save_vextframe.vx_tls,
230 sizeof(vklp->save_vextframe.vx_tls));
231 error = copyin(uap->tframe, uap->sysmsg_frame, framesz);
232 if (error == 0) {
233 error = copyin(&uap->vframe->vx_tls,
234 &curthread->td_tls,
235 sizeof(struct savetls));
237 if (error == 0)
238 error = cpu_sanitize_frame(uap->sysmsg_frame);
239 if (error == 0)
240 error = cpu_sanitize_tls(&curthread->td_tls);
241 if (error) {
242 bcopy(&vklp->save_trapframe, uap->sysmsg_frame,
243 framesz);
244 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls,
245 sizeof(vklp->save_vextframe.vx_tls));
246 set_user_TLS();
247 } else {
249 * If it's a VMM thread just set the CR3. We also set
250 * the vklp->ve to a key to be able to distinguish
251 * when a vkernel user process runs and when not
252 * (when it's NULL)
254 if (curthread->td_vmm == NULL) {
255 vklp->ve = ve;
256 atomic_add_int(&ve->refs, 1);
257 pmap_setlwpvm(lp, ve->vmspace);
258 } else {
259 vklp->ve = uap->id;
260 vmm_vm_set_guest_cr3((register_t)uap->id);
262 set_user_TLS();
263 set_vkernel_fp(uap->sysmsg_frame);
264 error = EJUSTRETURN;
266 break;
267 default:
268 error = EOPNOTSUPP;
269 break;
271 done:
272 if (ve)
273 vmspace_entry_drop(ve);
275 return(error);
279 * vmspace_mmap(id, addr, len, prot, flags, fd, offset)
281 * map memory within a VMSPACE. This function is just like a normal mmap()
282 * but operates on the vmspace's memory map. Most callers use this to create
283 * a MAP_VPAGETABLE mapping.
285 * No requirements.
288 sys_vmspace_mmap(struct vmspace_mmap_args *uap)
290 struct vkernel_proc *vkp;
291 struct vmspace_entry *ve;
292 int error;
294 if ((vkp = curproc->p_vkernel) == NULL) {
295 error = EINVAL;
296 goto done2;
299 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) {
300 error = ENOENT;
301 goto done2;
304 error = kern_mmap(ve->vmspace, uap->addr, uap->len,
305 uap->prot, uap->flags,
306 uap->fd, uap->offset, &uap->sysmsg_resultp);
308 vmspace_entry_drop(ve);
309 done2:
310 return (error);
314 * vmspace_munmap(id, addr, len)
316 * unmap memory within a VMSPACE.
318 * No requirements.
321 sys_vmspace_munmap(struct vmspace_munmap_args *uap)
323 struct vkernel_proc *vkp;
324 struct vmspace_entry *ve;
325 vm_offset_t addr;
326 vm_offset_t tmpaddr;
327 vm_size_t size, pageoff;
328 vm_map_t map;
329 int error;
331 if ((vkp = curproc->p_vkernel) == NULL) {
332 error = EINVAL;
333 goto done2;
336 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) {
337 error = ENOENT;
338 goto done2;
342 * NOTE: kern_munmap() can block so we need to temporarily
343 * ref ve->refs.
347 * Copied from sys_munmap()
349 addr = (vm_offset_t)uap->addr;
350 size = uap->len;
352 pageoff = (addr & PAGE_MASK);
353 addr -= pageoff;
354 size += pageoff;
355 size = (vm_size_t)round_page(size);
356 if (size < uap->len) { /* wrap */
357 error = EINVAL;
358 goto done1;
360 tmpaddr = addr + size; /* workaround gcc4 opt */
361 if (tmpaddr < addr) { /* wrap */
362 error = EINVAL;
363 goto done1;
365 if (size == 0) {
366 error = 0;
367 goto done1;
370 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) {
371 error = EINVAL;
372 goto done1;
374 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) {
375 error = EINVAL;
376 goto done1;
378 map = &ve->vmspace->vm_map;
379 if (!vm_map_check_protection(map, addr, tmpaddr, VM_PROT_NONE, FALSE)) {
380 error = EINVAL;
381 goto done1;
383 vm_map_remove(map, addr, addr + size);
384 error = 0;
385 done1:
386 vmspace_entry_drop(ve);
387 done2:
388 return (error);
392 * vmspace_pread(id, buf, nbyte, flags, offset)
394 * Read data from a vmspace. The number of bytes read is returned or
395 * -1 if an unrecoverable error occured. If the number of bytes read is
396 * less then the request size, a page fault occured in the VMSPACE which
397 * the caller must resolve in order to proceed.
399 * (not implemented yet)
400 * No requirements.
403 sys_vmspace_pread(struct vmspace_pread_args *uap)
405 struct vkernel_proc *vkp;
406 struct vmspace_entry *ve;
407 int error;
409 if ((vkp = curproc->p_vkernel) == NULL) {
410 error = EINVAL;
411 goto done3;
414 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) {
415 error = ENOENT;
416 goto done3;
418 vmspace_entry_drop(ve);
419 error = EINVAL;
420 done3:
421 return (error);
425 * vmspace_pwrite(id, buf, nbyte, flags, offset)
427 * Write data to a vmspace. The number of bytes written is returned or
428 * -1 if an unrecoverable error occured. If the number of bytes written is
429 * less then the request size, a page fault occured in the VMSPACE which
430 * the caller must resolve in order to proceed.
432 * (not implemented yet)
433 * No requirements.
436 sys_vmspace_pwrite(struct vmspace_pwrite_args *uap)
438 struct vkernel_proc *vkp;
439 struct vmspace_entry *ve;
440 int error;
442 if ((vkp = curproc->p_vkernel) == NULL) {
443 error = EINVAL;
444 goto done3;
446 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) {
447 error = ENOENT;
448 goto done3;
450 vmspace_entry_drop(ve);
451 error = EINVAL;
452 done3:
453 return (error);
457 * vmspace_mcontrol(id, addr, len, behav, value)
459 * madvise/mcontrol support for a vmspace.
461 * No requirements.
464 sys_vmspace_mcontrol(struct vmspace_mcontrol_args *uap)
466 struct vkernel_proc *vkp;
467 struct vmspace_entry *ve;
468 struct lwp *lp;
469 vm_offset_t start, end;
470 vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len;
471 int error;
473 lp = curthread->td_lwp;
474 if ((vkp = curproc->p_vkernel) == NULL) {
475 error = EINVAL;
476 goto done3;
479 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) {
480 error = ENOENT;
481 goto done3;
485 * This code is basically copied from sys_mcontrol()
487 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) {
488 error = EINVAL;
489 goto done1;
492 if (tmpaddr < (vm_offset_t)uap->addr) {
493 error = EINVAL;
494 goto done1;
496 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) {
497 error = EINVAL;
498 goto done1;
500 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) {
501 error = EINVAL;
502 goto done1;
505 start = trunc_page((vm_offset_t) uap->addr);
506 end = round_page(tmpaddr);
508 error = vm_map_madvise(&ve->vmspace->vm_map, start, end,
509 uap->behav, uap->value);
510 done1:
511 vmspace_entry_drop(ve);
512 done3:
513 return (error);
517 * Red black tree functions
519 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *);
520 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare);
523 * a->start is address, and the only field has to be initialized.
524 * The caller must hold vkp->token.
526 * The caller must hold vkp->token.
528 static int
529 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b)
531 if ((char *)a->id < (char *)b->id)
532 return(-1);
533 else if ((char *)a->id > (char *)b->id)
534 return(1);
535 return(0);
539 * The caller must hold vkp->token.
541 static
543 rb_vmspace_delete(struct vmspace_entry *ve, void *data)
545 struct vkernel_proc *vkp = data;
547 if (vmspace_entry_delete(ve, vkp, 0) == 0)
548 vmspace_entry_cache_drop(ve);
549 else
550 panic("rb_vmspace_delete: invalid refs %d", ve->refs);
551 return(0);
555 * Remove a vmspace_entry from the RB tree and destroy it. We have to clean
556 * up the pmap, the vm_map, then destroy the vmspace. We gain control of
557 * the associated cache_refs ref, which the caller will drop for us.
559 * The ve must not have any active references other than those from the
560 * caller. If it does, EBUSY is returned. The ve may still maintain
561 * any number of cache references which will drop as the related LWPs
562 * execute vmspace operations or exit.
564 * 0 is returned on success, EBUSY on failure. On success the caller must
565 * drop the last cache_refs. We have dropped the callers active refs.
567 * The caller must hold vkp->token.
569 static
571 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_proc *vkp,
572 int refs)
575 * Interlocked by vkp->token.
577 * Drop the callers refs and set VKE_REF_DELETED atomically, if
578 * the remaining refs match exactly. Dropping refs and setting
579 * the DELETED flag atomically protects other threads from trying
580 * to use the ve.
582 * The caller now owns the final cache_ref that was previously
583 * associated with the live state of the ve.
585 if (atomic_cmpset_int(&ve->refs, refs, VKE_REF_DELETED) == 0) {
586 KKASSERT(ve->refs >= refs);
587 return EBUSY;
589 RB_REMOVE(vmspace_rb_tree, &vkp->root, ve);
591 pmap_remove_pages(vmspace_pmap(ve->vmspace),
592 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
593 vm_map_remove(&ve->vmspace->vm_map,
594 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
595 vmspace_rel(ve->vmspace);
596 ve->vmspace = NULL; /* safety */
598 return 0;
602 * Ref a ve for cache purposes
604 static
605 void
606 vmspace_entry_cache_ref(struct vmspace_entry *ve)
608 atomic_add_int(&ve->cache_refs, 1);
612 * The ve cache_drop is the final word for a ve. It gains an extra ref
613 * representing it being on the RB tree and not being in a deleted state.
614 * Removal from the RB tree and deletion manipulate this ref. The last
615 * drop will thus include full deletion of the ve in addition to the last
616 * cached user going away.
618 static
619 void
620 vmspace_entry_cache_drop(struct vmspace_entry *ve)
622 if (atomic_fetchadd_int(&ve->cache_refs, -1) == 1) {
623 KKASSERT(ve->refs & VKE_REF_DELETED);
624 kfree(ve, M_VKERNEL);
629 * Drop primary reference. The ve cannot be freed on the 1->0 transition.
630 * Instead, ve deletion interlocks the final kfree() via cache_refs.
632 static
633 void
634 vmspace_entry_drop(struct vmspace_entry *ve)
636 atomic_fetchadd_int(&ve->refs, -1);
640 * Locate the ve for (id), return the ve or NULL. If found this function
641 * will bump ve->refs which prevents the ve from being immediately destroyed
642 * (but it can still be removed).
644 * The cache can potentially contain a stale ve, check by testing ve->vmspace.
646 * The caller must hold vkp->token if excl is non-zero.
648 static
649 struct vmspace_entry *
650 vkernel_find_vmspace(struct vkernel_proc *vkp, void *id, int excl)
652 struct vmspace_entry *ve;
653 struct vmspace_entry key;
654 struct vkernel_lwp *vklp;
655 struct lwp *lp = curthread->td_lwp;
658 * Cache check. Since we already hold a ref on the cache entry
659 * the ve cannot be ripped out from under us while we cycle
660 * ve->refs.
662 if ((vklp = lp->lwp_vkernel) != NULL) {
663 ve = vklp->ve_cache;
664 if (ve && ve->id == id) {
665 uint32_t n;
668 * Bump active refs, check to see if the cache
669 * entry is stale. If not, we are good.
671 n = atomic_fetchadd_int(&ve->refs, 1);
672 if ((n & VKE_REF_DELETED) == 0) {
673 KKASSERT(ve->vmspace);
674 return ve;
678 * Cache is stale, clean it out and fall through
679 * to a normal search.
681 vklp->ve_cache = NULL;
682 vmspace_entry_drop(ve);
683 vmspace_entry_cache_drop(ve);
688 * Normal search protected by vkp->token. No new ve's can be marked
689 * DELETED while we hold the token so we are safe.
691 if (excl == 0)
692 lwkt_gettoken_shared(&vkp->token);
693 key.id = id;
694 ve = RB_FIND(vmspace_rb_tree, &vkp->root, &key);
695 if (ve) {
696 if (atomic_fetchadd_int(&ve->refs, 1) & VKE_REF_DELETED) {
697 vmspace_entry_drop(ve);
698 ve = NULL;
701 if (excl == 0)
702 lwkt_reltoken(&vkp->token);
703 return (ve);
707 * Manage vkernel refs, used by the kernel when fork()ing or exit()ing
708 * a vkernel process.
710 * No requirements.
712 void
713 vkernel_inherit(struct proc *p1, struct proc *p2)
715 struct vkernel_proc *vkp;
717 vkp = p1->p_vkernel;
718 KKASSERT(vkp->refs > 0);
719 atomic_add_int(&vkp->refs, 1);
720 p2->p_vkernel = vkp;
724 * No requirements.
726 void
727 vkernel_exit(struct proc *p)
729 struct vkernel_proc *vkp;
730 struct lwp *lp;
732 vkp = p->p_vkernel;
735 * Restore the original VM context if we are killed while running
736 * a different one.
738 * This isn't supposed to happen. What is supposed to happen is
739 * that the process should enter vkernel_trap() before the handling
740 * the signal.
742 RB_FOREACH(lp, lwp_rb_tree, &p->p_lwp_tree) {
743 vkernel_lwp_exit(lp);
747 * Dereference the common area
749 p->p_vkernel = NULL;
750 KKASSERT(vkp->refs > 0);
752 if (atomic_fetchadd_int(&vkp->refs, -1) == 1) {
753 lwkt_gettoken(&vkp->token);
754 RB_SCAN(vmspace_rb_tree, &vkp->root, NULL,
755 rb_vmspace_delete, vkp);
756 lwkt_reltoken(&vkp->token);
757 kfree(vkp, M_VKERNEL);
762 * No requirements.
764 void
765 vkernel_lwp_exit(struct lwp *lp)
767 struct vkernel_lwp *vklp;
768 struct vmspace_entry *ve;
770 if ((vklp = lp->lwp_vkernel) != NULL) {
771 if (lp->lwp_thread->td_vmm == NULL) {
773 * vkernel thread
775 if ((ve = vklp->ve) != NULL) {
776 kprintf("Warning, pid %d killed with "
777 "active VC!\n", lp->lwp_proc->p_pid);
778 pmap_setlwpvm(lp, lp->lwp_proc->p_vmspace);
779 vklp->ve = NULL;
780 KKASSERT(ve->refs > 0);
781 vmspace_entry_drop(ve);
783 } else {
785 * guest thread
787 vklp->ve = NULL;
789 if ((ve = vklp->ve_cache) != NULL) {
790 vklp->ve_cache = NULL;
791 vmspace_entry_cache_drop(ve);
794 lp->lwp_vkernel = NULL;
795 kfree(vklp, M_VKERNEL);
800 * A VM space under virtual kernel control trapped out or made a system call
801 * or otherwise needs to return control to the virtual kernel context.
803 * No requirements.
805 void
806 vkernel_trap(struct lwp *lp, struct trapframe *frame)
808 struct proc *p = lp->lwp_proc;
809 struct vmspace_entry *ve;
810 struct vkernel_lwp *vklp;
811 int error;
814 * Which vmspace entry was running?
816 vklp = lp->lwp_vkernel;
817 KKASSERT(vklp);
819 /* If it's a VMM thread just set the vkernel CR3 back */
820 if (curthread->td_vmm == NULL) {
821 ve = vklp->ve;
822 KKASSERT(ve != NULL);
825 * Switch the LWP vmspace back to the virtual kernel's VM space.
827 vklp->ve = NULL;
828 pmap_setlwpvm(lp, p->p_vmspace);
829 KKASSERT(ve->refs > 0);
830 vmspace_entry_drop(ve);
831 /* ve is invalid once we kill our ref */
832 } else {
833 vklp->ve = NULL;
834 vmm_vm_set_guest_cr3(p->p_vkernel->vkernel_cr3);
838 * Copy the emulated process frame to the virtual kernel process.
839 * The emulated process cannot change TLS descriptors so don't
840 * bother saving them, we already have a copy.
842 * Restore the virtual kernel's saved context so the virtual kernel
843 * process can resume.
845 error = copyout(frame, vklp->user_trapframe, sizeof(*frame));
846 bcopy(&vklp->save_trapframe, frame, sizeof(*frame));
847 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls,
848 sizeof(vklp->save_vextframe.vx_tls));
849 set_user_TLS();
850 cpu_vkernel_trap(frame, error);