From 530e94fc9e8b4693c7e841a45371bdb6e76ee4cd Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 17 May 2019 10:03:35 -0700 Subject: [PATCH] kernel - VM rework part 9 - Precursor work for terminal pv_entry removal * Cleanup the API a bit * Get rid of pmap_enter_quick() * Remove unused procedures. * Document that vm_page_protect() (and thus the related pmap_page_protect()) must be called with a hard-busied page. This ensures that the operation does not race a new pmap_enter() of the page. --- sys/dev/drm/drm_vm.c | 2 +- sys/net/netmap/netmap_freebsd.c | 3 +- sys/platform/pc64/x86_64/pmap.c | 55 +++----------------- sys/platform/vkernel64/platform/pmap.c | 58 +++------------------- sys/vfs/tmpfs/tmpfs_vnops.c | 4 +- sys/vm/pmap.h | 7 +-- sys/vm/vm_map.c | 12 ++--- sys/vm/vm_object.c | 91 ---------------------------------- sys/vm/vm_object.h | 2 - sys/vm/vm_page.c | 4 +- sys/vm/vm_page.h | 4 +- sys/vm/vm_page2.h | 6 ++- 12 files changed, 34 insertions(+), 214 deletions(-) diff --git a/sys/dev/drm/drm_vm.c b/sys/dev/drm/drm_vm.c index 3b3f45ee49..5f4d8c441f 100644 --- a/sys/dev/drm/drm_vm.c +++ b/sys/dev/drm/drm_vm.c @@ -164,7 +164,7 @@ vm_phys_fictitious_to_vm_page(vm_paddr_t pa) int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, - vm_memattr_t memattr) + vm_memattr_t memattr) { struct vm_phys_fictitious_seg *seg; vm_page_t fp; diff --git a/sys/net/netmap/netmap_freebsd.c b/sys/net/netmap/netmap_freebsd.c index 38868ba2db..ad6f006030 100644 --- a/sys/net/netmap/netmap_freebsd.c +++ b/sys/net/netmap/netmap_freebsd.c @@ -233,8 +233,7 @@ vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr) { vm_page_t m; - m = kmalloc(sizeof(struct vm_page), M_FICT_PAGES, - M_WAITOK | M_ZERO); + m = kmalloc(sizeof(struct vm_page), M_FICT_PAGES, M_WAITOK | M_ZERO); vm_page_initfake(m, paddr, memattr); return (m); } diff --git a/sys/platform/pc64/x86_64/pmap.c b/sys/platform/pc64/x86_64/pmap.c index 0c149dfa02..9d850d4b11 100644 --- a/sys/platform/pc64/x86_64/pmap.c +++ b/sys/platform/pc64/x86_64/pmap.c @@ -5590,19 +5590,6 @@ done: } /* - * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired. - * This code also assumes that the pmap has no pre-existing entry for this - * VA. - * - * This code currently may only be used on user pmaps, not kernel_pmap. - */ -void -pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) -{ - pmap_enter(pmap, va, m, VM_PROT_READ, FALSE, NULL); -} - -/* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. * @@ -5625,10 +5612,12 @@ pmap_kenter_temporary(vm_paddr_t pa, long i) static int pmap_object_init_pt_callback(vm_page_t p, void *data); void -pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, - vm_object_t object, vm_pindex_t pindex, - vm_size_t size, int limit) +pmap_object_init_pt(pmap_t pmap, vm_map_entry_t entry, + vm_offset_t addr, vm_size_t size, int limit) { + vm_prot_t prot = entry->protection; + vm_object_t object = entry->ba.object; + vm_pindex_t pindex = atop(entry->ba.offset + (addr - entry->ba.start)); struct rb_vm_page_scan_info info; struct lwp *lp; vm_size_t psize; @@ -5693,6 +5682,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, info.addr = addr; info.pmap = pmap; info.object = object; + info.entry = entry; /* * By using the NOLK scan, the callback function must be sure @@ -5747,8 +5737,8 @@ again: vm_page_deactivate(p); } rel_index = p->pindex - info->start_pindex; - pmap_enter_quick(info->pmap, - info->addr + x86_64_ptob(rel_index), p); + pmap_enter(info->pmap, info->addr + x86_64_ptob(rel_index), p, + VM_PROT_READ, FALSE, info->entry); } if (hard_busy) vm_page_wakeup(p); @@ -5941,35 +5931,6 @@ pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) } /* - * Returns true if the pmap's pv is one of the first 16 pvs linked to from - * this page. This count may be changed upwards or downwards in the future; - * it is only necessary that true be returned for a small subset of pmaps - * for proper page aging. - */ -boolean_t -pmap_page_exists_quick(pmap_t pmap, vm_page_t m) -{ - pv_entry_t pv; - int loops = 0; - - if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) - return FALSE; - - vm_page_spin_lock(m); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - if (pv->pv_pmap == pmap) { - vm_page_spin_unlock(m); - return TRUE; - } - loops++; - if (loops >= 16) - break; - } - vm_page_spin_unlock(m); - return (FALSE); -} - -/* * Remove all pages from specified address space this aids process exit * speeds. Also, this code may be special cased for the current process * only. diff --git a/sys/platform/vkernel64/platform/pmap.c b/sys/platform/vkernel64/platform/pmap.c index bbb370dc44..ea13f00fec 100644 --- a/sys/platform/vkernel64/platform/pmap.c +++ b/sys/platform/vkernel64/platform/pmap.c @@ -2521,19 +2521,6 @@ validate: } /* - * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired. - * - * Currently this routine may only be used on user pmaps, not kernel_pmap. - * - * No requirements. - */ -void -pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) -{ - pmap_enter(pmap, va, m, VM_PROT_READ, 0, NULL); -} - -/* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. * @@ -2558,10 +2545,12 @@ pmap_kenter_temporary(vm_paddr_t pa, long i) static int pmap_object_init_pt_callback(vm_page_t p, void *data); void -pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, - vm_object_t object, vm_pindex_t pindex, - vm_size_t size, int limit) +pmap_object_init_pt(pmap_t pmap, vm_map_entry_t entry, + vm_offset_t addr, vm_size_t size, int limit) { + vm_prot_t prot = entry->protection; + vm_object_t object = entry->ba.object; + vm_pindex_t pindex = atop(entry->ba.offset + (addr - entry->ba.start)); struct rb_vm_page_scan_info info; struct lwp *lp; vm_size_t psize; @@ -2613,6 +2602,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, info.mpte = NULL; info.addr = addr; info.pmap = pmap; + info.entry = entry; vm_object_hold_shared(object); vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, @@ -2648,8 +2638,8 @@ pmap_object_init_pt_callback(vm_page_t p, void *data) if ((p->queue - p->pc) == PQ_CACHE) vm_page_deactivate(p); rel_index = p->pindex - info->start_pindex; - pmap_enter_quick(info->pmap, - info->addr + x86_64_ptob(rel_index), p); + pmap_enter(info->pmap, info->addr + x86_64_ptob(rel_index), p, + VM_PROT_READ, FALSE, info->entry); } vm_page_wakeup(p); return(0); @@ -2818,38 +2808,6 @@ pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) } /* - * Returns true if the pmap's pv is one of the first 16 pvs linked to - * from this page. This count may be changed upwards or downwards - * in the future; it is only necessary that true be returned for a small - * subset of pmaps for proper page aging. - * - * No other requirements. - */ -boolean_t -pmap_page_exists_quick(pmap_t pmap, vm_page_t m) -{ - pv_entry_t pv; - int loops = 0; - - if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) - return FALSE; - - vm_page_spin_lock(m); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - if (pv->pv_pmap == pmap) { - vm_page_spin_unlock(m); - return TRUE; - } - loops++; - if (loops >= 16) - break; - } - vm_page_spin_unlock(m); - - return (FALSE); -} - -/* * Remove all pages from specified address space this aids process * exit speeds. Also, this code is special cased for current * process only, but can have the more generic (and slightly slower) diff --git a/sys/vfs/tmpfs/tmpfs_vnops.c b/sys/vfs/tmpfs/tmpfs_vnops.c index a2dd4d2078..ee636db754 100644 --- a/sys/vfs/tmpfs/tmpfs_vnops.c +++ b/sys/vfs/tmpfs/tmpfs_vnops.c @@ -1969,7 +1969,7 @@ tmpfs_move_pages_callback(vm_page_t p, void *data) info->error = -1; return -1; } - vm_page_rename(p, info->backing_object, pindex); + vm_page_rename(p, info->dest_object, pindex); vm_page_clear_commit(p); vm_page_wakeup(p); /* page automaticaly made dirty */ @@ -1986,7 +1986,7 @@ tmpfs_move_pages(vm_object_t src, vm_object_t dst) vm_object_hold(src); vm_object_hold(dst); info.object = src; - info.backing_object = dst; + info.dest_object = dst; do { info.error = 1; vm_page_rb_tree_RB_SCAN(&src->rb_memq, NULL, diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 6310781ab7..6277b2c271 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -176,7 +176,6 @@ void pmap_copy_page (vm_paddr_t, vm_paddr_t); void pmap_copy_page_frag (vm_paddr_t, vm_paddr_t, size_t bytes); void pmap_enter (pmap_t, vm_offset_t, struct vm_page *, vm_prot_t, boolean_t, struct vm_map_entry *); -void pmap_enter_quick (pmap_t, vm_offset_t, struct vm_page *); vm_page_t pmap_fault_page_quick(pmap_t, vm_offset_t, vm_prot_t, int *); vm_paddr_t pmap_extract (pmap_t pmap, vm_offset_t va, void **handlep); void pmap_extract_done (void *handle); @@ -185,10 +184,8 @@ void pmap_init (void); boolean_t pmap_is_modified (struct vm_page *m); int pmap_ts_referenced (struct vm_page *m); vm_offset_t pmap_map (vm_offset_t *, vm_paddr_t, vm_paddr_t, int); -void pmap_object_init_pt (pmap_t pmap, vm_offset_t addr, - vm_prot_t prot, vm_object_t object, vm_pindex_t pindex, - vm_offset_t size, int pagelimit); -boolean_t pmap_page_exists_quick (pmap_t pmap, struct vm_page *m); +void pmap_object_init_pt (pmap_t pmap, struct vm_map_entry *entry, + vm_offset_t addr, vm_offset_t size, int pagelimit); void pmap_page_protect (struct vm_page *m, vm_prot_t prot); void pmap_page_init (struct vm_page *m); vm_paddr_t uservtophys(vm_offset_t va); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 26a19cee62..5808818526 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1423,8 +1423,9 @@ vm_map_insert(vm_map_t map, int *countp, void *map_object, void *map_aux, vm_object_lock_swap(); vm_object_drop(object); } - pmap_object_init_pt(map->pmap, start, prot, - object, OFF_TO_IDX(offset), end - start, + pmap_object_init_pt(map->pmap, new_entry, + new_entry->ba.start, + new_entry->ba.end - new_entry->ba.start, cow & MAP_PREFAULT_PARTIAL); if (dorelock) { vm_object_hold(object); @@ -2432,12 +2433,9 @@ vm_map_madvise(vm_map_t map, vm_offset_t start, vm_offset_t end, if (behav == MADV_WILLNEED && current->maptype != VM_MAPTYPE_VPAGETABLE) { pmap_object_init_pt( - map->pmap, + map->pmap, current, useStart, - current->protection, - current->ba.object, - pindex, - (count << PAGE_SHIFT), + (delta << PAGE_SHIFT), MAP_PREFAULT_MADVISE ); } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index e98ccd29a6..876e4c5951 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1212,97 +1212,6 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags) } /* - * Same as vm_object_pmap_copy, except range checking really - * works, and is meant for small sections of an object. - * - * This code protects resident pages by making them read-only - * and is typically called on a fork or split when a page - * is converted to copy-on-write. - * - * NOTE: If the page is already at VM_PROT_NONE, calling - * vm_page_protect will have no effect. - */ -void -vm_object_pmap_copy_1(vm_object_t object, vm_pindex_t start, vm_pindex_t end) -{ - vm_pindex_t idx; - vm_page_t p; - - if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0) - return; - - vm_object_hold(object); - for (idx = start; idx < end; idx++) { - p = vm_page_lookup(object, idx); - if (p == NULL) - continue; - vm_page_protect(p, VM_PROT_READ); - } - vm_object_drop(object); -} - -/* - * Removes all physical pages in the specified object range from all - * physical maps. - * - * The object must *not* be locked. - */ - -static int vm_object_pmap_remove_callback(vm_page_t p, void *data); - -void -vm_object_pmap_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end) -{ - struct rb_vm_page_scan_info info; - - if (object == NULL) - return; - if (start == end) - return; - info.start_pindex = start; - info.end_pindex = end - 1; - info.count = 0; - info.object = object; - - vm_object_hold(object); - do { - info.error = 0; - vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, - vm_object_pmap_remove_callback, &info); - } while (info.error); - if (start == 0 && end == object->size) - vm_object_clear_flag(object, OBJ_WRITEABLE); - vm_object_drop(object); -} - -/* - * The caller must hold the object - */ -static int -vm_object_pmap_remove_callback(vm_page_t p, void *data) -{ - struct rb_vm_page_scan_info *info = data; - - if (info->object != p->object || - p->pindex < info->start_pindex || - p->pindex > info->end_pindex) { - kprintf("vm_object_pmap_remove_callback: obj/pg race %p/%p\n", - info->object, p); - info->error = 1; - return(0); - } - - vm_page_protect(p, VM_PROT_NONE); - - /* - * Must be at end to avoid SMP races, caller holds object token - */ - if ((++info->count & 63) == 0) - lwkt_user_yield(); - return(0); -} - -/* * Implements the madvise function at the object/page level. * * MADV_WILLNEED (any object) diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index db6a09b7b9..98edacddf0 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -317,8 +317,6 @@ void vm_object_init1 (void); void vm_object_page_clean (vm_object_t, vm_pindex_t, vm_pindex_t, int); void vm_object_page_remove (vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t); void vm_object_pmap_copy (vm_object_t, vm_pindex_t, vm_pindex_t); -void vm_object_pmap_copy_1 (vm_object_t, vm_pindex_t, vm_pindex_t); -void vm_object_pmap_remove (vm_object_t, vm_pindex_t, vm_pindex_t); void vm_object_madvise (vm_object_t, vm_pindex_t, vm_pindex_t, int); void vm_object_init2 (void); vm_page_t vm_fault_object_page(vm_object_t, vm_ooffset_t, diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 90a936267f..f56bfaa0fa 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1343,7 +1343,6 @@ vm_page_unhold(vm_page_t m) void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) { - if ((m->flags & PG_FICTITIOUS) != 0) { /* * The page's memattr might have changed since the @@ -1390,8 +1389,7 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) atomic_add_int(&object->generation, 1); /* - * Record the object/offset pair in this page and add the - * pv_list_count of the page to the object. + * Associate the VM page with an (object, offset). * * The vm_page spin lock is required for interactions with the pmap. */ diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index a82cc963e2..ca7cbd538b 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -216,9 +216,9 @@ struct rb_vm_page_scan_info { int count; int unused01; vm_offset_t addr; - vm_pindex_t backing_offset_index; + struct vm_map_entry *entry; struct vm_object *object; - struct vm_object *backing_object; + struct vm_object *dest_object; struct vm_page *mpte; struct pmap *pmap; struct vm_map *map; diff --git a/sys/vm/vm_page2.h b/sys/vm/vm_page2.h index c1d057f854..e59572f086 100644 --- a/sys/vm/vm_page2.h +++ b/sys/vm/vm_page2.h @@ -323,9 +323,11 @@ vm_page_sbusy_drop(vm_page_t m) * Since 'prot' is usually a constant, this inline usually winds up optimizing * out the primary conditional. * + * Must be called with (m) hard-busied. + * * WARNING: VM_PROT_NONE can block, but will loop until all mappings have - * been cleared. Callers should be aware that other page related elements - * might have changed, however. + * been cleared. Callers should be aware that other page related + * elements might have changed, however. */ static __inline void vm_page_protect(vm_page_t m, int prot) -- 2.11.4.GIT