kernel - VM rework part 9 - Precursor work for terminal pv_entry removal
[dragonfly.git] / sys / vm / vm_object.c
blob876e4c59513cc18776f2c06e416861df844e25e8
1 /*
2 * Copyright (c) 1991, 1993, 2013
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
32 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94
35 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
36 * All rights reserved.
38 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
40 * Permission to use, copy, modify and distribute this software and
41 * its documentation is hereby granted, provided that both the copyright
42 * notice and this permission notice appear in all copies of the
43 * software, derivative works or modified versions, and any portions
44 * thereof, and that both notices appear in supporting documentation.
46 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
47 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
48 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
50 * Carnegie Mellon requests users of this software to return to
52 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
53 * School of Computer Science
54 * Carnegie Mellon University
55 * Pittsburgh PA 15213-3890
57 * any improvements or extensions that they make and grant Carnegie the
58 * rights to redistribute these changes.
60 * $FreeBSD: src/sys/vm/vm_object.c,v 1.171.2.8 2003/05/26 19:17:56 alc Exp $
64 * Virtual memory object module.
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/proc.h> /* for curproc, pageproc */
70 #include <sys/thread.h>
71 #include <sys/vnode.h>
72 #include <sys/vmmeter.h>
73 #include <sys/mman.h>
74 #include <sys/mount.h>
75 #include <sys/kernel.h>
76 #include <sys/sysctl.h>
77 #include <sys/refcount.h>
79 #include <vm/vm.h>
80 #include <vm/vm_param.h>
81 #include <vm/pmap.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_object.h>
84 #include <vm/vm_page.h>
85 #include <vm/vm_pageout.h>
86 #include <vm/vm_pager.h>
87 #include <vm/swap_pager.h>
88 #include <vm/vm_kern.h>
89 #include <vm/vm_extern.h>
90 #include <vm/vm_zone.h>
92 #include <vm/vm_page2.h>
94 #include <machine/specialreg.h>
96 #define EASY_SCAN_FACTOR 8
98 static void vm_object_page_collect_flush(vm_object_t object, vm_page_t p,
99 int pagerflags);
100 static void vm_object_lock_init(vm_object_t);
103 * Virtual memory objects maintain the actual data
104 * associated with allocated virtual memory. A given
105 * page of memory exists within exactly one object.
107 * An object is only deallocated when all "references"
108 * are given up. Only one "reference" to a given
109 * region of an object should be writeable.
111 * Associated with each object is a list of all resident
112 * memory pages belonging to that object; this list is
113 * maintained by the "vm_page" module, and locked by the object's
114 * lock.
116 * Each object also records a "pager" routine which is
117 * used to retrieve (and store) pages to the proper backing
118 * storage. In addition, objects may be backed by other
119 * objects from which they were virtual-copied.
121 * The only items within the object structure which are
122 * modified after time of creation are:
123 * reference count locked by object's lock
124 * pager routine locked by object's lock
128 struct vm_object kernel_object;
130 struct vm_object_hash vm_object_hash[VMOBJ_HSIZE];
132 MALLOC_DEFINE(M_VM_OBJECT, "vm_object", "vm_object structures");
134 #define VMOBJ_HASH_PRIME1 66555444443333333ULL
135 #define VMOBJ_HASH_PRIME2 989042931893ULL
137 int vm_object_debug;
138 SYSCTL_INT(_vm, OID_AUTO, object_debug, CTLFLAG_RW, &vm_object_debug, 0, "");
140 static __inline
141 struct vm_object_hash *
142 vmobj_hash(vm_object_t obj)
144 uintptr_t hash1;
145 uintptr_t hash2;
147 hash1 = (uintptr_t)obj + ((uintptr_t)obj >> 18);
148 hash1 %= VMOBJ_HASH_PRIME1;
149 hash2 = ((uintptr_t)obj >> 8) + ((uintptr_t)obj >> 24);
150 hash2 %= VMOBJ_HASH_PRIME2;
151 return (&vm_object_hash[(hash1 ^ hash2) & VMOBJ_HMASK]);
154 #if defined(DEBUG_LOCKS)
156 #define vm_object_vndeallocate(obj, vpp) \
157 debugvm_object_vndeallocate(obj, vpp, __FILE__, __LINE__)
160 * Debug helper to track hold/drop/ref/deallocate calls.
162 static void
163 debugvm_object_add(vm_object_t obj, char *file, int line, int addrem)
165 int i;
167 i = atomic_fetchadd_int(&obj->debug_index, 1);
168 i = i & (VMOBJ_DEBUG_ARRAY_SIZE - 1);
169 ksnprintf(obj->debug_hold_thrs[i],
170 sizeof(obj->debug_hold_thrs[i]),
171 "%c%d:(%d):%s",
172 (addrem == -1 ? '-' : (addrem == 1 ? '+' : '=')),
173 (curthread->td_proc ? curthread->td_proc->p_pid : -1),
174 obj->ref_count,
175 curthread->td_comm);
176 obj->debug_hold_file[i] = file;
177 obj->debug_hold_line[i] = line;
178 #if 0
179 /* Uncomment for debugging obj refs/derefs in reproducable cases */
180 if (strcmp(curthread->td_comm, "sshd") == 0) {
181 kprintf("%d %p refs=%d ar=%d file: %s/%d\n",
182 (curthread->td_proc ? curthread->td_proc->p_pid : -1),
183 obj, obj->ref_count, addrem, file, line);
185 #endif
188 #endif
191 * Misc low level routines
193 static void
194 vm_object_lock_init(vm_object_t obj)
196 #if defined(DEBUG_LOCKS)
197 int i;
199 obj->debug_index = 0;
200 for (i = 0; i < VMOBJ_DEBUG_ARRAY_SIZE; i++) {
201 obj->debug_hold_thrs[i][0] = 0;
202 obj->debug_hold_file[i] = NULL;
203 obj->debug_hold_line[i] = 0;
205 #endif
208 void
209 vm_object_lock_swap(void)
211 lwkt_token_swap();
214 void
215 vm_object_lock(vm_object_t obj)
217 lwkt_gettoken(&obj->token);
221 * Returns TRUE on sucesss
223 static int
224 vm_object_lock_try(vm_object_t obj)
226 return(lwkt_trytoken(&obj->token));
229 void
230 vm_object_lock_shared(vm_object_t obj)
232 lwkt_gettoken_shared(&obj->token);
235 void
236 vm_object_unlock(vm_object_t obj)
238 lwkt_reltoken(&obj->token);
241 void
242 vm_object_upgrade(vm_object_t obj)
244 lwkt_reltoken(&obj->token);
245 lwkt_gettoken(&obj->token);
248 void
249 vm_object_downgrade(vm_object_t obj)
251 lwkt_reltoken(&obj->token);
252 lwkt_gettoken_shared(&obj->token);
255 static __inline void
256 vm_object_assert_held(vm_object_t obj)
258 ASSERT_LWKT_TOKEN_HELD(&obj->token);
262 vm_quickcolor(void)
264 globaldata_t gd = mycpu;
265 int pg_color;
267 pg_color = (int)(intptr_t)gd->gd_curthread >> 10;
268 pg_color += gd->gd_quick_color;
269 gd->gd_quick_color += PQ_PRIME2;
271 return pg_color;
274 void
275 VMOBJDEBUG(vm_object_hold)(vm_object_t obj VMOBJDBARGS)
277 KKASSERT(obj != NULL);
280 * Object must be held (object allocation is stable due to callers
281 * context, typically already holding the token on a parent object)
282 * prior to potentially blocking on the lock, otherwise the object
283 * can get ripped away from us.
285 refcount_acquire(&obj->hold_count);
286 vm_object_lock(obj);
288 #if defined(DEBUG_LOCKS)
289 debugvm_object_add(obj, file, line, 1);
290 #endif
294 VMOBJDEBUG(vm_object_hold_try)(vm_object_t obj VMOBJDBARGS)
296 KKASSERT(obj != NULL);
299 * Object must be held (object allocation is stable due to callers
300 * context, typically already holding the token on a parent object)
301 * prior to potentially blocking on the lock, otherwise the object
302 * can get ripped away from us.
304 refcount_acquire(&obj->hold_count);
305 if (vm_object_lock_try(obj) == 0) {
306 if (refcount_release(&obj->hold_count)) {
307 if (obj->ref_count == 0 && (obj->flags & OBJ_DEAD))
308 kfree(obj, M_VM_OBJECT);
310 return(0);
313 #if defined(DEBUG_LOCKS)
314 debugvm_object_add(obj, file, line, 1);
315 #endif
316 return(1);
319 void
320 VMOBJDEBUG(vm_object_hold_shared)(vm_object_t obj VMOBJDBARGS)
322 KKASSERT(obj != NULL);
325 * Object must be held (object allocation is stable due to callers
326 * context, typically already holding the token on a parent object)
327 * prior to potentially blocking on the lock, otherwise the object
328 * can get ripped away from us.
330 refcount_acquire(&obj->hold_count);
331 vm_object_lock_shared(obj);
333 #if defined(DEBUG_LOCKS)
334 debugvm_object_add(obj, file, line, 1);
335 #endif
339 * Drop the token and hold_count on the object.
341 * WARNING! Token might be shared.
343 void
344 VMOBJDEBUG(vm_object_drop)(vm_object_t obj VMOBJDBARGS)
346 if (obj == NULL)
347 return;
350 * No new holders should be possible once we drop hold_count 1->0 as
351 * there is no longer any way to reference the object.
353 KKASSERT(obj->hold_count > 0);
354 if (refcount_release(&obj->hold_count)) {
355 #if defined(DEBUG_LOCKS)
356 debugvm_object_add(obj, file, line, -1);
357 #endif
359 if (obj->ref_count == 0 && (obj->flags & OBJ_DEAD)) {
360 vm_object_unlock(obj);
361 kfree(obj, M_VM_OBJECT);
362 } else {
363 vm_object_unlock(obj);
365 } else {
366 #if defined(DEBUG_LOCKS)
367 debugvm_object_add(obj, file, line, -1);
368 #endif
369 vm_object_unlock(obj);
374 * Initialize a freshly allocated object, returning a held object.
376 * Used only by vm_object_allocate(), zinitna() and vm_object_init().
378 * No requirements.
380 void
381 _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
383 struct vm_object_hash *hash;
385 RB_INIT(&object->rb_memq);
386 lwkt_token_init(&object->token, "vmobj");
388 TAILQ_INIT(&object->backing_list);
389 object->type = type;
390 object->size = size;
391 object->ref_count = 1;
392 object->memattr = VM_MEMATTR_DEFAULT;
393 object->hold_count = 0;
394 object->flags = 0;
395 if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
396 vm_object_set_flag(object, OBJ_ONEMAPPING);
397 object->paging_in_progress = 0;
398 object->resident_page_count = 0;
399 /* cpu localization twist */
400 object->pg_color = vm_quickcolor();
401 object->handle = NULL;
403 atomic_add_int(&object->generation, 1);
404 object->swblock_count = 0;
405 RB_INIT(&object->swblock_root);
406 vm_object_lock_init(object);
407 pmap_object_init(object);
409 vm_object_hold(object);
411 hash = vmobj_hash(object);
412 lwkt_gettoken(&hash->token);
413 TAILQ_INSERT_TAIL(&hash->list, object, object_entry);
414 lwkt_reltoken(&hash->token);
418 * Initialize a VM object.
420 void
421 vm_object_init(vm_object_t object, vm_pindex_t size)
423 _vm_object_allocate(OBJT_DEFAULT, size, object);
424 vm_object_drop(object);
428 * Initialize the VM objects module.
430 * Called from the low level boot code only. Note that this occurs before
431 * kmalloc is initialized so we cannot allocate any VM objects.
433 void
434 vm_object_init1(void)
436 int i;
438 for (i = 0; i < VMOBJ_HSIZE; ++i) {
439 TAILQ_INIT(&vm_object_hash[i].list);
440 lwkt_token_init(&vm_object_hash[i].token, "vmobjlst");
443 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(KvaEnd),
444 &kernel_object);
445 vm_object_drop(&kernel_object);
448 void
449 vm_object_init2(void)
451 kmalloc_set_unlimited(M_VM_OBJECT);
455 * Allocate and return a new object of the specified type and size.
457 * No requirements.
459 vm_object_t
460 vm_object_allocate(objtype_t type, vm_pindex_t size)
462 vm_object_t obj;
464 obj = kmalloc(sizeof(*obj), M_VM_OBJECT, M_INTWAIT|M_ZERO);
465 _vm_object_allocate(type, size, obj);
466 vm_object_drop(obj);
468 return (obj);
472 * This version returns a held object, allowing further atomic initialization
473 * of the object.
475 vm_object_t
476 vm_object_allocate_hold(objtype_t type, vm_pindex_t size)
478 vm_object_t obj;
480 obj = kmalloc(sizeof(*obj), M_VM_OBJECT, M_INTWAIT|M_ZERO);
481 _vm_object_allocate(type, size, obj);
483 return (obj);
487 * Add an additional reference to a vm_object. The object must already be
488 * held. The original non-lock version is no longer supported. The object
489 * must NOT be chain locked by anyone at the time the reference is added.
491 * The object must be held, but may be held shared if desired (hence why
492 * we use an atomic op).
494 void
495 VMOBJDEBUG(vm_object_reference_locked)(vm_object_t object VMOBJDBARGS)
497 KKASSERT(object != NULL);
498 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
499 atomic_add_int(&object->ref_count, 1);
500 if (object->type == OBJT_VNODE) {
501 vref(object->handle);
502 /* XXX what if the vnode is being destroyed? */
504 #if defined(DEBUG_LOCKS)
505 debugvm_object_add(object, file, line, 1);
506 #endif
510 * This version is only allowed in situations where the caller
511 * already knows that the object is deterministically referenced
512 * (usually because its taken from a ref'd vnode, or during a map_entry
513 * replication).
515 void
516 VMOBJDEBUG(vm_object_reference_quick)(vm_object_t object VMOBJDBARGS)
518 KKASSERT(object->type == OBJT_VNODE || object->ref_count > 0);
519 atomic_add_int(&object->ref_count, 1);
520 if (object->type == OBJT_VNODE)
521 vref(object->handle);
522 #if defined(DEBUG_LOCKS)
523 debugvm_object_add(object, file, line, 1);
524 #endif
528 * Dereference an object and its underlying vnode. The object may be
529 * held shared. On return the object will remain held.
531 * This function may return a vnode in *vpp which the caller must release
532 * after the caller drops its own lock. If vpp is NULL, we assume that
533 * the caller was holding an exclusive lock on the object and we vrele()
534 * the vp ourselves.
536 static void
537 VMOBJDEBUG(vm_object_vndeallocate)(vm_object_t object, struct vnode **vpp
538 VMOBJDBARGS)
540 struct vnode *vp = (struct vnode *) object->handle;
542 KASSERT(object->type == OBJT_VNODE,
543 ("vm_object_vndeallocate: not a vnode object"));
544 KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
545 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
546 #ifdef INVARIANTS
547 if (object->ref_count == 0) {
548 vprint("vm_object_vndeallocate", vp);
549 panic("vm_object_vndeallocate: bad object reference count");
551 #endif
552 for (;;) {
553 int count = object->ref_count;
554 cpu_ccfence();
555 if (count == 1) {
556 vm_object_upgrade(object);
557 if (atomic_cmpset_int(&object->ref_count, count, 0)) {
558 vclrflags(vp, VTEXT);
559 break;
561 } else {
562 if (atomic_cmpset_int(&object->ref_count,
563 count, count - 1)) {
564 break;
567 /* retry */
569 #if defined(DEBUG_LOCKS)
570 debugvm_object_add(object, file, line, -1);
571 #endif
574 * vrele or return the vp to vrele. We can only safely vrele(vp)
575 * if the object was locked exclusively. But there are two races
576 * here.
578 * We had to upgrade the object above to safely clear VTEXT
579 * but the alternative path where the shared lock is retained
580 * can STILL race to 0 in other paths and cause our own vrele()
581 * to terminate the vnode. We can't allow that if the VM object
582 * is still locked shared.
584 if (vpp)
585 *vpp = vp;
586 else
587 vrele(vp);
591 * Release a reference to the specified object, gained either through a
592 * vm_object_allocate or a vm_object_reference call. When all references
593 * are gone, storage associated with this object may be relinquished.
595 * The caller does not have to hold the object locked but must have control
596 * over the reference in question in order to guarantee that the object
597 * does not get ripped out from under us.
599 * XXX Currently all deallocations require an exclusive lock.
601 void
602 VMOBJDEBUG(vm_object_deallocate)(vm_object_t object VMOBJDBARGS)
604 struct vnode *vp;
605 int count;
607 if (object == NULL)
608 return;
610 for (;;) {
611 count = object->ref_count;
612 cpu_ccfence();
615 * If decrementing the count enters into special handling
616 * territory (0, 1, or 2) we have to do it the hard way.
617 * Fortunate though, objects with only a few refs like this
618 * are not likely to be heavily contended anyway.
620 * For vnode objects we only care about 1->0 transitions.
622 if (count <= 3 || (object->type == OBJT_VNODE && count <= 1)) {
623 #if defined(DEBUG_LOCKS)
624 debugvm_object_add(object, file, line, 0);
625 #endif
626 vm_object_hold(object);
627 vm_object_deallocate_locked(object);
628 vm_object_drop(object);
629 break;
633 * Try to decrement ref_count without acquiring a hold on
634 * the object. This is particularly important for the exec*()
635 * and exit*() code paths because the program binary may
636 * have a great deal of sharing and an exclusive lock will
637 * crowbar performance in those circumstances.
639 if (object->type == OBJT_VNODE) {
640 vp = (struct vnode *)object->handle;
641 if (atomic_cmpset_int(&object->ref_count,
642 count, count - 1)) {
643 #if defined(DEBUG_LOCKS)
644 debugvm_object_add(object, file, line, -1);
645 #endif
647 vrele(vp);
648 break;
650 /* retry */
651 } else {
652 if (atomic_cmpset_int(&object->ref_count,
653 count, count - 1)) {
654 #if defined(DEBUG_LOCKS)
655 debugvm_object_add(object, file, line, -1);
656 #endif
657 break;
659 /* retry */
661 /* retry */
665 void
666 VMOBJDEBUG(vm_object_deallocate_locked)(vm_object_t object VMOBJDBARGS)
669 * Degenerate case
671 if (object == NULL)
672 return;
675 * vnode case, caller either locked the object exclusively
676 * or this is a recursion with must_drop != 0 and the vnode
677 * object will be locked shared.
679 * If locked shared we have to drop the object before we can
680 * call vrele() or risk a shared/exclusive livelock.
682 if (object->type == OBJT_VNODE) {
683 ASSERT_LWKT_TOKEN_HELD(&object->token);
684 vm_object_vndeallocate(object, NULL);
685 return;
687 ASSERT_LWKT_TOKEN_HELD_EXCL(&object->token);
690 * Normal case (object is locked exclusively)
692 if (object->ref_count == 0) {
693 panic("vm_object_deallocate: object deallocated "
694 "too many times: %d", object->type);
696 if (object->ref_count > 2) {
697 atomic_add_int(&object->ref_count, -1);
698 #if defined(DEBUG_LOCKS)
699 debugvm_object_add(object, file, line, -1);
700 #endif
701 return;
705 * Drop the ref and handle termination on the 1->0 transition.
706 * We may have blocked above so we have to recheck.
708 KKASSERT(object->ref_count != 0);
709 if (object->ref_count >= 2) {
710 atomic_add_int(&object->ref_count, -1);
711 #if defined(DEBUG_LOCKS)
712 debugvm_object_add(object, file, line, -1);
713 #endif
714 return;
717 atomic_add_int(&object->ref_count, -1);
718 if ((object->flags & OBJ_DEAD) == 0)
719 vm_object_terminate(object);
723 * Destroy the specified object, freeing up related resources.
725 * The object must have zero references.
727 * The object must held. The caller is responsible for dropping the object
728 * after terminate returns. Terminate does NOT drop the object.
730 static int vm_object_terminate_callback(vm_page_t p, void *data);
732 void
733 vm_object_terminate(vm_object_t object)
735 struct rb_vm_page_scan_info info;
736 struct vm_object_hash *hash;
739 * Make sure no one uses us. Once we set OBJ_DEAD we should be
740 * able to safely block.
742 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
743 KKASSERT((object->flags & OBJ_DEAD) == 0);
744 vm_object_set_flag(object, OBJ_DEAD);
747 * Wait for the pageout daemon to be done with the object
749 vm_object_pip_wait(object, "objtrm1");
751 KASSERT(!object->paging_in_progress,
752 ("vm_object_terminate: pageout in progress"));
755 * Clean and free the pages, as appropriate. All references to the
756 * object are gone, so we don't need to lock it.
758 if (object->type == OBJT_VNODE) {
759 struct vnode *vp;
762 * Clean pages and flush buffers.
764 * NOTE! TMPFS buffer flushes do not typically flush the
765 * actual page to swap as this would be highly
766 * inefficient, and normal filesystems usually wrap
767 * page flushes with buffer cache buffers.
769 * To deal with this we have to call vinvalbuf() both
770 * before and after the vm_object_page_clean().
772 vp = (struct vnode *) object->handle;
773 vinvalbuf(vp, V_SAVE, 0, 0);
774 vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
775 vinvalbuf(vp, V_SAVE, 0, 0);
779 * Wait for any I/O to complete, after which there had better not
780 * be any references left on the object.
782 vm_object_pip_wait(object, "objtrm2");
784 if (object->ref_count != 0) {
785 panic("vm_object_terminate: object with references, "
786 "ref_count=%d", object->ref_count);
790 * Cleanup any shared pmaps associated with this object.
792 pmap_object_free(object);
795 * Now free any remaining pages. For internal objects, this also
796 * removes them from paging queues. Don't free wired pages, just
797 * remove them from the object.
799 info.count = 0;
800 info.object = object;
801 do {
802 info.error = 0;
803 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL,
804 vm_object_terminate_callback, &info);
805 } while (info.error);
808 * Let the pager know object is dead.
810 vm_pager_deallocate(object);
813 * Wait for the object hold count to hit 1, clean out pages as
814 * we go. vmobj_token interlocks any race conditions that might
815 * pick the object up from the vm_object_list after we have cleared
816 * rb_memq.
818 for (;;) {
819 if (RB_ROOT(&object->rb_memq) == NULL)
820 break;
821 kprintf("vm_object_terminate: Warning, object %p "
822 "still has %ld pages\n",
823 object, object->resident_page_count);
824 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL,
825 vm_object_terminate_callback, &info);
829 * There had better not be any pages left
831 KKASSERT(object->resident_page_count == 0);
834 * Remove the object from the global object list.
836 hash = vmobj_hash(object);
837 lwkt_gettoken(&hash->token);
838 TAILQ_REMOVE(&hash->list, object, object_entry);
839 lwkt_reltoken(&hash->token);
841 if (object->ref_count != 0) {
842 panic("vm_object_terminate2: object with references, "
843 "ref_count=%d", object->ref_count);
847 * NOTE: The object hold_count is at least 1, so we cannot kfree()
848 * the object here. See vm_object_drop().
853 * The caller must hold the object.
855 static int
856 vm_object_terminate_callback(vm_page_t p, void *data)
858 struct rb_vm_page_scan_info *info = data;
859 vm_object_t object;
861 object = p->object;
862 KKASSERT(object == info->object);
863 if (vm_page_busy_try(p, TRUE)) {
864 vm_page_sleep_busy(p, TRUE, "vmotrm");
865 info->error = 1;
866 return 0;
868 if (object != p->object) {
869 /* XXX remove once we determine it can't happen */
870 kprintf("vm_object_terminate: Warning: Encountered "
871 "busied page %p on queue %d\n", p, p->queue);
872 vm_page_wakeup(p);
873 info->error = 1;
874 } else if (p->wire_count == 0) {
876 * NOTE: p->dirty and PG_NEED_COMMIT are ignored.
878 vm_page_free(p);
879 mycpu->gd_cnt.v_pfree++;
880 } else {
881 if (p->queue != PQ_NONE) {
882 kprintf("vm_object_terminate: Warning: Encountered "
883 "wired page %p on queue %d\n", p, p->queue);
884 if (vm_object_debug > 0) {
885 --vm_object_debug;
886 print_backtrace(10);
889 vm_page_remove(p);
890 vm_page_wakeup(p);
894 * Must be at end to avoid SMP races, caller holds object token
896 if ((++info->count & 63) == 0)
897 lwkt_user_yield();
898 return(0);
902 * Clean all dirty pages in the specified range of object. Leaves page
903 * on whatever queue it is currently on. If NOSYNC is set then do not
904 * write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC),
905 * leaving the object dirty.
907 * When stuffing pages asynchronously, allow clustering. XXX we need a
908 * synchronous clustering mode implementation.
910 * Odd semantics: if start == end, we clean everything.
912 * The object must be locked? XXX
914 static int vm_object_page_clean_pass1(struct vm_page *p, void *data);
915 static int vm_object_page_clean_pass2(struct vm_page *p, void *data);
917 void
918 vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
919 int flags)
921 struct rb_vm_page_scan_info info;
922 struct vnode *vp;
923 int wholescan;
924 int pagerflags;
925 int generation;
927 vm_object_hold(object);
928 if (object->type != OBJT_VNODE ||
929 (object->flags & OBJ_MIGHTBEDIRTY) == 0) {
930 vm_object_drop(object);
931 return;
934 pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ?
935 VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
936 pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;
938 vp = object->handle;
941 * Interlock other major object operations. This allows us to
942 * temporarily clear OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY.
944 vm_object_set_flag(object, OBJ_CLEANING);
947 * Handle 'entire object' case
949 info.start_pindex = start;
950 if (end == 0) {
951 info.end_pindex = object->size - 1;
952 } else {
953 info.end_pindex = end - 1;
955 wholescan = (start == 0 && info.end_pindex == object->size - 1);
956 info.limit = flags;
957 info.pagerflags = pagerflags;
958 info.object = object;
961 * If cleaning the entire object do a pass to mark the pages read-only.
962 * If everything worked out ok, clear OBJ_WRITEABLE and
963 * OBJ_MIGHTBEDIRTY.
965 if (wholescan) {
966 info.error = 0;
967 info.count = 0;
968 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp,
969 vm_object_page_clean_pass1, &info);
970 if (info.error == 0) {
971 vm_object_clear_flag(object,
972 OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
973 if (object->type == OBJT_VNODE &&
974 (vp = (struct vnode *)object->handle) != NULL) {
976 * Use new-style interface to clear VISDIRTY
977 * because the vnode is not necessarily removed
978 * from the syncer list(s) as often as it was
979 * under the old interface, which can leave
980 * the vnode on the syncer list after reclaim.
982 vclrobjdirty(vp);
988 * Do a pass to clean all the dirty pages we find.
990 do {
991 info.error = 0;
992 info.count = 0;
993 generation = object->generation;
994 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp,
995 vm_object_page_clean_pass2, &info);
996 } while (info.error || generation != object->generation);
998 vm_object_clear_flag(object, OBJ_CLEANING);
999 vm_object_drop(object);
1003 * The caller must hold the object.
1005 static
1007 vm_object_page_clean_pass1(struct vm_page *p, void *data)
1009 struct rb_vm_page_scan_info *info = data;
1011 KKASSERT(p->object == info->object);
1013 vm_page_flag_set(p, PG_CLEANCHK);
1014 if ((info->limit & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
1015 info->error = 1;
1016 } else if (vm_page_busy_try(p, FALSE)) {
1017 info->error = 1;
1018 } else {
1019 KKASSERT(p->object == info->object);
1020 vm_page_protect(p, VM_PROT_READ);
1021 vm_page_wakeup(p);
1025 * Must be at end to avoid SMP races, caller holds object token
1027 if ((++info->count & 63) == 0)
1028 lwkt_user_yield();
1029 return(0);
1033 * The caller must hold the object
1035 static
1037 vm_object_page_clean_pass2(struct vm_page *p, void *data)
1039 struct rb_vm_page_scan_info *info = data;
1040 int generation;
1042 KKASSERT(p->object == info->object);
1045 * Do not mess with pages that were inserted after we started
1046 * the cleaning pass.
1048 if ((p->flags & PG_CLEANCHK) == 0)
1049 goto done;
1051 generation = info->object->generation;
1053 if (vm_page_busy_try(p, TRUE)) {
1054 vm_page_sleep_busy(p, TRUE, "vpcwai");
1055 info->error = 1;
1056 goto done;
1059 KKASSERT(p->object == info->object &&
1060 info->object->generation == generation);
1063 * Before wasting time traversing the pmaps, check for trivial
1064 * cases where the page cannot be dirty.
1066 if (p->valid == 0 || (p->queue - p->pc) == PQ_CACHE) {
1067 KKASSERT((p->dirty & p->valid) == 0 &&
1068 (p->flags & PG_NEED_COMMIT) == 0);
1069 vm_page_wakeup(p);
1070 goto done;
1074 * Check whether the page is dirty or not. The page has been set
1075 * to be read-only so the check will not race a user dirtying the
1076 * page.
1078 vm_page_test_dirty(p);
1079 if ((p->dirty & p->valid) == 0 && (p->flags & PG_NEED_COMMIT) == 0) {
1080 vm_page_flag_clear(p, PG_CLEANCHK);
1081 vm_page_wakeup(p);
1082 goto done;
1086 * If we have been asked to skip nosync pages and this is a
1087 * nosync page, skip it. Note that the object flags were
1088 * not cleared in this case (because pass1 will have returned an
1089 * error), so we do not have to set them.
1091 if ((info->limit & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
1092 vm_page_flag_clear(p, PG_CLEANCHK);
1093 vm_page_wakeup(p);
1094 goto done;
1098 * Flush as many pages as we can. PG_CLEANCHK will be cleared on
1099 * the pages that get successfully flushed. Set info->error if
1100 * we raced an object modification.
1102 vm_object_page_collect_flush(info->object, p, info->pagerflags);
1103 /* vm_wait_nominal(); this can deadlock the system in syncer/pageout */
1106 * Must be at end to avoid SMP races, caller holds object token
1108 done:
1109 if ((++info->count & 63) == 0)
1110 lwkt_user_yield();
1111 return(0);
1115 * Collect the specified page and nearby pages and flush them out.
1116 * The number of pages flushed is returned. The passed page is busied
1117 * by the caller and we are responsible for its disposition.
1119 * The caller must hold the object.
1121 static void
1122 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags)
1124 int error;
1125 int is;
1126 int ib;
1127 int i;
1128 int page_base;
1129 vm_pindex_t pi;
1130 vm_page_t ma[BLIST_MAX_ALLOC];
1132 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
1134 pi = p->pindex;
1135 page_base = pi % BLIST_MAX_ALLOC;
1136 ma[page_base] = p;
1137 ib = page_base - 1;
1138 is = page_base + 1;
1140 while (ib >= 0) {
1141 vm_page_t tp;
1143 tp = vm_page_lookup_busy_try(object, pi - page_base + ib,
1144 TRUE, &error);
1145 if (error)
1146 break;
1147 if (tp == NULL)
1148 break;
1149 if ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
1150 (tp->flags & PG_CLEANCHK) == 0) {
1151 vm_page_wakeup(tp);
1152 break;
1154 if ((tp->queue - tp->pc) == PQ_CACHE) {
1155 vm_page_flag_clear(tp, PG_CLEANCHK);
1156 vm_page_wakeup(tp);
1157 break;
1159 vm_page_test_dirty(tp);
1160 if ((tp->dirty & tp->valid) == 0 &&
1161 (tp->flags & PG_NEED_COMMIT) == 0) {
1162 vm_page_flag_clear(tp, PG_CLEANCHK);
1163 vm_page_wakeup(tp);
1164 break;
1166 ma[ib] = tp;
1167 --ib;
1169 ++ib; /* fixup */
1171 while (is < BLIST_MAX_ALLOC &&
1172 pi - page_base + is < object->size) {
1173 vm_page_t tp;
1175 tp = vm_page_lookup_busy_try(object, pi - page_base + is,
1176 TRUE, &error);
1177 if (error)
1178 break;
1179 if (tp == NULL)
1180 break;
1181 if ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
1182 (tp->flags & PG_CLEANCHK) == 0) {
1183 vm_page_wakeup(tp);
1184 break;
1186 if ((tp->queue - tp->pc) == PQ_CACHE) {
1187 vm_page_flag_clear(tp, PG_CLEANCHK);
1188 vm_page_wakeup(tp);
1189 break;
1191 vm_page_test_dirty(tp);
1192 if ((tp->dirty & tp->valid) == 0 &&
1193 (tp->flags & PG_NEED_COMMIT) == 0) {
1194 vm_page_flag_clear(tp, PG_CLEANCHK);
1195 vm_page_wakeup(tp);
1196 break;
1198 ma[is] = tp;
1199 ++is;
1203 * All pages in the ma[] array are busied now
1205 for (i = ib; i < is; ++i) {
1206 vm_page_flag_clear(ma[i], PG_CLEANCHK);
1207 vm_page_hold(ma[i]); /* XXX need this any more? */
1209 vm_pageout_flush(&ma[ib], is - ib, pagerflags);
1210 for (i = ib; i < is; ++i) /* XXX need this any more? */
1211 vm_page_unhold(ma[i]);
1215 * Implements the madvise function at the object/page level.
1217 * MADV_WILLNEED (any object)
1219 * Activate the specified pages if they are resident.
1221 * MADV_DONTNEED (any object)
1223 * Deactivate the specified pages if they are resident.
1225 * MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects, OBJ_ONEMAPPING only)
1227 * Deactivate and clean the specified pages if they are
1228 * resident. This permits the process to reuse the pages
1229 * without faulting or the kernel to reclaim the pages
1230 * without I/O.
1232 * No requirements.
1234 void
1235 vm_object_madvise(vm_object_t object, vm_pindex_t pindex,
1236 vm_pindex_t count, int advise)
1238 vm_pindex_t end;
1239 vm_page_t m;
1240 int error;
1242 if (object == NULL)
1243 return;
1245 end = pindex + count;
1247 vm_object_hold(object);
1250 * Locate and adjust resident pages. This only applies to the
1251 * primary object in the mapping.
1253 for (; pindex < end; pindex += 1) {
1254 relookup:
1256 * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
1257 * and those pages must be OBJ_ONEMAPPING.
1259 if (advise == MADV_FREE) {
1260 if ((object->type != OBJT_DEFAULT &&
1261 object->type != OBJT_SWAP) ||
1262 (object->flags & OBJ_ONEMAPPING) == 0) {
1263 continue;
1267 m = vm_page_lookup_busy_try(object, pindex, TRUE, &error);
1269 if (error) {
1270 vm_page_sleep_busy(m, TRUE, "madvpo");
1271 goto relookup;
1273 if (m == NULL) {
1275 * There may be swap even if there is no backing page
1277 if (advise == MADV_FREE && object->type == OBJT_SWAP)
1278 swap_pager_freespace(object, pindex, 1);
1279 continue;
1283 * If the page is not in a normal active state, we skip it.
1284 * If the page is not managed there are no page queues to
1285 * mess with. Things can break if we mess with pages in
1286 * any of the below states.
1288 if (m->wire_count ||
1289 (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) ||
1290 m->valid != VM_PAGE_BITS_ALL
1292 vm_page_wakeup(m);
1293 continue;
1297 * Theoretically once a page is known not to be busy, an
1298 * interrupt cannot come along and rip it out from under us.
1300 if (advise == MADV_WILLNEED) {
1301 vm_page_activate(m);
1302 } else if (advise == MADV_DONTNEED) {
1303 vm_page_dontneed(m);
1304 } else if (advise == MADV_FREE) {
1306 * Mark the page clean. This will allow the page
1307 * to be freed up by the system. However, such pages
1308 * are often reused quickly by malloc()/free()
1309 * so we do not do anything that would cause
1310 * a page fault if we can help it.
1312 * Specifically, we do not try to actually free
1313 * the page now nor do we try to put it in the
1314 * cache (which would cause a page fault on reuse).
1316 * But we do make the page is freeable as we
1317 * can without actually taking the step of unmapping
1318 * it.
1320 pmap_clear_modify(m);
1321 m->dirty = 0;
1322 m->act_count = 0;
1323 vm_page_dontneed(m);
1324 if (object->type == OBJT_SWAP)
1325 swap_pager_freespace(object, pindex, 1);
1327 vm_page_wakeup(m);
1329 vm_object_drop(object);
1333 * Removes all physical pages in the specified object range from the
1334 * object's list of pages.
1336 * No requirements.
1338 static int vm_object_page_remove_callback(vm_page_t p, void *data);
1340 void
1341 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
1342 boolean_t clean_only)
1344 struct rb_vm_page_scan_info info;
1345 int all;
1348 * Degenerate cases and assertions
1350 vm_object_hold(object);
1351 if (object == NULL ||
1352 (object->resident_page_count == 0 && object->swblock_count == 0)) {
1353 vm_object_drop(object);
1354 return;
1356 KASSERT(object->type != OBJT_PHYS,
1357 ("attempt to remove pages from a physical object"));
1360 * Indicate that paging is occuring on the object
1362 vm_object_pip_add(object, 1);
1365 * Figure out the actual removal range and whether we are removing
1366 * the entire contents of the object or not. If removing the entire
1367 * contents, be sure to get all pages, even those that might be
1368 * beyond the end of the object.
1370 info.object = object;
1371 info.start_pindex = start;
1372 if (end == 0)
1373 info.end_pindex = (vm_pindex_t)-1;
1374 else
1375 info.end_pindex = end - 1;
1376 info.limit = clean_only;
1377 info.count = 0;
1378 all = (start == 0 && info.end_pindex >= object->size - 1);
1381 * Loop until we are sure we have gotten them all.
1383 do {
1384 info.error = 0;
1385 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp,
1386 vm_object_page_remove_callback, &info);
1387 } while (info.error);
1390 * Remove any related swap if throwing away pages, or for
1391 * non-swap objects (the swap is a clean copy in that case).
1393 if (object->type != OBJT_SWAP || clean_only == FALSE) {
1394 if (all)
1395 swap_pager_freespace_all(object);
1396 else
1397 swap_pager_freespace(object, info.start_pindex,
1398 info.end_pindex - info.start_pindex + 1);
1402 * Cleanup
1404 vm_object_pip_wakeup(object);
1405 vm_object_drop(object);
1409 * The caller must hold the object.
1411 * NOTE: User yields are allowed when removing more than one page, but not
1412 * allowed if only removing one page (the path for single page removals
1413 * might hold a spinlock).
1415 static int
1416 vm_object_page_remove_callback(vm_page_t p, void *data)
1418 struct rb_vm_page_scan_info *info = data;
1420 if (info->object != p->object ||
1421 p->pindex < info->start_pindex ||
1422 p->pindex > info->end_pindex) {
1423 kprintf("vm_object_page_remove_callbackA: obj/pg race %p/%p\n",
1424 info->object, p);
1425 return(0);
1427 if (vm_page_busy_try(p, TRUE)) {
1428 vm_page_sleep_busy(p, TRUE, "vmopar");
1429 info->error = 1;
1430 return(0);
1432 if (info->object != p->object) {
1433 /* this should never happen */
1434 kprintf("vm_object_page_remove_callbackB: obj/pg race %p/%p\n",
1435 info->object, p);
1436 vm_page_wakeup(p);
1437 return(0);
1441 * Wired pages cannot be destroyed, but they can be invalidated
1442 * and we do so if clean_only (limit) is not set.
1444 * WARNING! The page may be wired due to being part of a buffer
1445 * cache buffer, and the buffer might be marked B_CACHE.
1446 * This is fine as part of a truncation but VFSs must be
1447 * sure to fix the buffer up when re-extending the file.
1449 * NOTE! PG_NEED_COMMIT is ignored.
1451 if (p->wire_count != 0) {
1452 vm_page_protect(p, VM_PROT_NONE);
1453 if (info->limit == 0)
1454 p->valid = 0;
1455 vm_page_wakeup(p);
1456 goto done;
1460 * limit is our clean_only flag. If set and the page is dirty or
1461 * requires a commit, do not free it. If set and the page is being
1462 * held by someone, do not free it.
1464 if (info->limit && p->valid) {
1465 vm_page_test_dirty(p);
1466 if ((p->valid & p->dirty) || (p->flags & PG_NEED_COMMIT)) {
1467 vm_page_wakeup(p);
1468 goto done;
1473 * Destroy the page
1475 vm_page_protect(p, VM_PROT_NONE);
1476 vm_page_free(p);
1479 * Must be at end to avoid SMP races, caller holds object token
1481 done:
1482 if ((++info->count & 63) == 0)
1483 lwkt_user_yield();
1485 return(0);
1489 * Try to extend prev_object into an adjoining region of virtual
1490 * memory, return TRUE on success.
1492 * The caller does not need to hold (prev_object) but must have a stable
1493 * pointer to it (typically by holding the vm_map locked).
1495 * This function only works for anonymous memory objects which either
1496 * have (a) one reference or (b) we are extending the object's size.
1497 * Otherwise the related VM pages we want to use for the object might
1498 * be in use by another mapping.
1500 boolean_t
1501 vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
1502 vm_size_t prev_size, vm_size_t next_size)
1504 vm_pindex_t next_pindex;
1506 if (prev_object == NULL)
1507 return (TRUE);
1509 vm_object_hold(prev_object);
1511 if (prev_object->type != OBJT_DEFAULT &&
1512 prev_object->type != OBJT_SWAP) {
1513 vm_object_drop(prev_object);
1514 return (FALSE);
1517 #if 0
1518 /* caller now checks this */
1520 * Try to collapse the object first
1522 vm_object_collapse(prev_object, NULL);
1523 #endif
1525 #if 0
1526 /* caller now checks this */
1528 * We can't coalesce if we shadow another object (figuring out the
1529 * relationships become too complex).
1531 if (prev_object->backing_object != NULL) {
1532 vm_object_chain_release(prev_object);
1533 vm_object_drop(prev_object);
1534 return (FALSE);
1536 #endif
1538 prev_size >>= PAGE_SHIFT;
1539 next_size >>= PAGE_SHIFT;
1540 next_pindex = prev_pindex + prev_size;
1543 * We can't if the object has more than one ref count unless we
1544 * are extending it into newly minted space.
1546 if (prev_object->ref_count > 1 &&
1547 prev_object->size != next_pindex) {
1548 vm_object_drop(prev_object);
1549 return (FALSE);
1553 * Remove any pages that may still be in the object from a previous
1554 * deallocation.
1556 if (next_pindex < prev_object->size) {
1557 vm_object_page_remove(prev_object,
1558 next_pindex,
1559 next_pindex + next_size, FALSE);
1560 if (prev_object->type == OBJT_SWAP)
1561 swap_pager_freespace(prev_object,
1562 next_pindex, next_size);
1566 * Extend the object if necessary.
1568 if (next_pindex + next_size > prev_object->size)
1569 prev_object->size = next_pindex + next_size;
1570 vm_object_drop(prev_object);
1572 return (TRUE);
1576 * Make the object writable and flag is being possibly dirty.
1578 * The object might not be held (or might be held but held shared),
1579 * the related vnode is probably not held either. Object and vnode are
1580 * stable by virtue of the vm_page busied by the caller preventing
1581 * destruction.
1583 * If the related mount is flagged MNTK_THR_SYNC we need to call
1584 * vsetobjdirty(). Filesystems using this option usually shortcut
1585 * synchronization by only scanning the syncer list.
1587 void
1588 vm_object_set_writeable_dirty(vm_object_t object)
1590 struct vnode *vp;
1592 /*vm_object_assert_held(object);*/
1594 * Avoid contention in vm fault path by checking the state before
1595 * issuing an atomic op on it.
1597 if ((object->flags & (OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY)) !=
1598 (OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY)) {
1599 vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
1601 if (object->type == OBJT_VNODE &&
1602 (vp = (struct vnode *)object->handle) != NULL) {
1603 if ((vp->v_flag & VOBJDIRTY) == 0) {
1604 if (vp->v_mount &&
1605 (vp->v_mount->mnt_kern_flag & MNTK_THR_SYNC)) {
1607 * New style THR_SYNC places vnodes on the
1608 * syncer list more deterministically.
1610 vsetobjdirty(vp);
1611 } else {
1613 * Old style scan would not necessarily place
1614 * a vnode on the syncer list when possibly
1615 * modified via mmap.
1617 vsetflags(vp, VOBJDIRTY);
1623 #include "opt_ddb.h"
1624 #ifdef DDB
1625 #include <sys/cons.h>
1627 #include <ddb/ddb.h>
1629 static int _vm_object_in_map (vm_map_t map, vm_object_t object,
1630 vm_map_entry_t entry);
1631 static int vm_object_in_map (vm_object_t object);
1634 * The caller must hold the object.
1636 static int
1637 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
1639 vm_map_backing_t ba;
1640 vm_map_t tmpm;
1641 vm_map_entry_t tmpe;
1642 int entcount;
1644 if (map == NULL)
1645 return 0;
1646 if (entry == NULL) {
1647 tmpe = RB_MIN(vm_map_rb_tree, &map->rb_root);
1648 entcount = map->nentries;
1649 while (entcount-- && tmpe) {
1650 if( _vm_object_in_map(map, object, tmpe)) {
1651 return 1;
1653 tmpe = vm_map_rb_tree_RB_NEXT(tmpe);
1655 return (0);
1657 switch(entry->maptype) {
1658 case VM_MAPTYPE_SUBMAP:
1659 tmpm = entry->ba.sub_map;
1660 tmpe = RB_MIN(vm_map_rb_tree, &tmpm->rb_root);
1661 entcount = tmpm->nentries;
1662 while (entcount-- && tmpe) {
1663 if( _vm_object_in_map(tmpm, object, tmpe)) {
1664 return 1;
1666 tmpe = vm_map_rb_tree_RB_NEXT(tmpe);
1668 break;
1669 case VM_MAPTYPE_NORMAL:
1670 case VM_MAPTYPE_VPAGETABLE:
1671 ba = &entry->ba;
1672 while (ba) {
1673 if (ba->object == object)
1674 return TRUE;
1675 ba = ba->backing_ba;
1677 break;
1678 default:
1679 break;
1681 return 0;
1684 static int vm_object_in_map_callback(struct proc *p, void *data);
1686 struct vm_object_in_map_info {
1687 vm_object_t object;
1688 int rv;
1692 * Debugging only
1694 static int
1695 vm_object_in_map(vm_object_t object)
1697 struct vm_object_in_map_info info;
1699 info.rv = 0;
1700 info.object = object;
1702 allproc_scan(vm_object_in_map_callback, &info, 0);
1703 if (info.rv)
1704 return 1;
1705 if( _vm_object_in_map(&kernel_map, object, 0))
1706 return 1;
1707 if( _vm_object_in_map(&pager_map, object, 0))
1708 return 1;
1709 if( _vm_object_in_map(&buffer_map, object, 0))
1710 return 1;
1711 return 0;
1715 * Debugging only
1717 static int
1718 vm_object_in_map_callback(struct proc *p, void *data)
1720 struct vm_object_in_map_info *info = data;
1722 if (p->p_vmspace) {
1723 if (_vm_object_in_map(&p->p_vmspace->vm_map, info->object, 0)) {
1724 info->rv = 1;
1725 return -1;
1728 return (0);
1731 DB_SHOW_COMMAND(vmochk, vm_object_check)
1733 struct vm_object_hash *hash;
1734 vm_object_t object;
1735 int n;
1738 * make sure that internal objs are in a map somewhere
1739 * and none have zero ref counts.
1741 for (n = 0; n < VMOBJ_HSIZE; ++n) {
1742 hash = &vm_object_hash[n];
1743 for (object = TAILQ_FIRST(&hash->list);
1744 object != NULL;
1745 object = TAILQ_NEXT(object, object_entry)) {
1746 if (object->type == OBJT_MARKER)
1747 continue;
1748 if (object->handle != NULL ||
1749 (object->type != OBJT_DEFAULT &&
1750 object->type != OBJT_SWAP)) {
1751 continue;
1753 if (object->ref_count == 0) {
1754 db_printf("vmochk: internal obj has "
1755 "zero ref count: %ld\n",
1756 (long)object->size);
1758 if (vm_object_in_map(object))
1759 continue;
1760 db_printf("vmochk: internal obj is not in a map: "
1761 "ref: %d, size: %lu: 0x%lx\n",
1762 object->ref_count, (u_long)object->size,
1763 (u_long)object->size);
1769 * Debugging only
1771 DB_SHOW_COMMAND(object, vm_object_print_static)
1773 /* XXX convert args. */
1774 vm_object_t object = (vm_object_t)addr;
1775 boolean_t full = have_addr;
1777 vm_page_t p;
1779 /* XXX count is an (unused) arg. Avoid shadowing it. */
1780 #define count was_count
1782 int count;
1784 if (object == NULL)
1785 return;
1787 db_iprintf(
1788 "Object %p: type=%d, size=0x%lx, res=%ld, ref=%d, flags=0x%x\n",
1789 object, (int)object->type, (u_long)object->size,
1790 object->resident_page_count, object->ref_count, object->flags);
1792 * XXX no %qd in kernel. Truncate object->backing_object_offset.
1794 db_iprintf("\n");
1796 if (!full)
1797 return;
1799 db_indent += 2;
1800 count = 0;
1801 RB_FOREACH(p, vm_page_rb_tree, &object->rb_memq) {
1802 if (count == 0)
1803 db_iprintf("memory:=");
1804 else if (count == 6) {
1805 db_printf("\n");
1806 db_iprintf(" ...");
1807 count = 0;
1808 } else
1809 db_printf(",");
1810 count++;
1812 db_printf("(off=0x%lx,page=0x%lx)",
1813 (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p));
1815 if (count != 0)
1816 db_printf("\n");
1817 db_indent -= 2;
1820 /* XXX. */
1821 #undef count
1824 * XXX need this non-static entry for calling from vm_map_print.
1826 * Debugging only
1828 void
1829 vm_object_print(/* db_expr_t */ long addr,
1830 boolean_t have_addr,
1831 /* db_expr_t */ long count,
1832 char *modif)
1834 vm_object_print_static(addr, have_addr, count, modif);
1838 * Debugging only
1840 DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
1842 struct vm_object_hash *hash;
1843 vm_object_t object;
1844 int nl = 0;
1845 int c;
1846 int n;
1848 for (n = 0; n < VMOBJ_HSIZE; ++n) {
1849 hash = &vm_object_hash[n];
1850 for (object = TAILQ_FIRST(&hash->list);
1851 object != NULL;
1852 object = TAILQ_NEXT(object, object_entry)) {
1853 vm_pindex_t idx, fidx;
1854 vm_pindex_t osize;
1855 vm_paddr_t pa = -1, padiff;
1856 int rcount;
1857 vm_page_t m;
1859 if (object->type == OBJT_MARKER)
1860 continue;
1861 db_printf("new object: %p\n", (void *)object);
1862 if ( nl > 18) {
1863 c = cngetc();
1864 if (c != ' ')
1865 return;
1866 nl = 0;
1868 nl++;
1869 rcount = 0;
1870 fidx = 0;
1871 osize = object->size;
1872 if (osize > 128)
1873 osize = 128;
1874 for (idx = 0; idx < osize; idx++) {
1875 m = vm_page_lookup(object, idx);
1876 if (m == NULL) {
1877 if (rcount) {
1878 db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
1879 (long)fidx, rcount, (long)pa);
1880 if ( nl > 18) {
1881 c = cngetc();
1882 if (c != ' ')
1883 return;
1884 nl = 0;
1886 nl++;
1887 rcount = 0;
1889 continue;
1892 if (rcount &&
1893 (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
1894 ++rcount;
1895 continue;
1897 if (rcount) {
1898 padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m);
1899 padiff >>= PAGE_SHIFT;
1900 padiff &= PQ_L2_MASK;
1901 if (padiff == 0) {
1902 pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE;
1903 ++rcount;
1904 continue;
1906 db_printf(" index(%ld)run(%d)pa(0x%lx)",
1907 (long)fidx, rcount, (long)pa);
1908 db_printf("pd(%ld)\n", (long)padiff);
1909 if ( nl > 18) {
1910 c = cngetc();
1911 if (c != ' ')
1912 return;
1913 nl = 0;
1915 nl++;
1917 fidx = idx;
1918 pa = VM_PAGE_TO_PHYS(m);
1919 rcount = 1;
1921 if (rcount) {
1922 db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
1923 (long)fidx, rcount, (long)pa);
1924 if ( nl > 18) {
1925 c = cngetc();
1926 if (c != ' ')
1927 return;
1928 nl = 0;
1930 nl++;
1935 #endif /* DDB */