From 93afe6bea5b03eaae6a6ad66513ddacdcb4315d2 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 28 Apr 2008 21:16:27 +0000 Subject: [PATCH] Paging and swapping system fixes. * Do not try to free a VM page after a failed IO read from swap. It is illegal to free a VM page from an interrupt. Just deactivate it instead. * Do not attempt to move a VM page into the cache queue after a successful pageout from the vnode or swap pagers, and do not try to adjust page protections to read-only (they should already be read-only). Both operations require making serious pmap calls which we really do not want to do from an interrupt. Instead, leave the page on its current queue or, if the system is low on pages, deactivate the page. The pmap protection code is supposed to be runnable from an interrupt but testing with vkernels shows program corruption occuring under severe paging loads. Pmap protection changes were only being made from pageout interrupts. brelse() itself, which can also be called from an interrupt via biodone(), does not make such changes for asynchronous I/O. With these changes in place the program corruption stopped or has been greatly reduced. Further testing in a 64MB vkernel environment is ongoing. In addition, trying to move the page after a completed pageout/swappout to the cache queue was improperly depressing the priority of read-heavy pages. Under severe paging loads we now only deactivate the page. Plus moving a page to the cache queue causes pmap operations to be run which we again do not want to run from an interrupt. --- sys/vm/swap_pager.c | 36 ++++++++++++++++++++++++++---------- sys/vm/vm_pageout.c | 11 ++++++++++- sys/vm/vnode_pager.c | 8 +++++++- 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 8fc9236ab2..f4b405d444 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -96,7 +96,7 @@ * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 * * $FreeBSD: src/sys/vm/swap_pager.c,v 1.130.2.12 2002/08/31 21:15:55 dillon Exp $ - * $DragonFly: src/sys/vm/swap_pager.c,v 1.29 2008/04/28 07:07:02 dillon Exp $ + * $DragonFly: src/sys/vm/swap_pager.c,v 1.30 2008/04/28 21:16:27 dillon Exp $ */ #include @@ -1666,10 +1666,11 @@ swp_pager_async_iodone(struct bio *bio) * be overridden by the original caller of * getpages so don't play cute tricks here. * - * XXX IT IS NOT LEGAL TO FREE THE PAGE HERE - * AS THIS MESSES WITH object->memq, and it is - * not legal to mess with object->memq from an - * interrupt. + * NOTE: We can't actually free the page from + * here, because this is an interrupt. It + * is not legal to mess with object->memq + * from an interrupt. Deactivate the page + * instead. */ m->valid = 0; @@ -1679,10 +1680,12 @@ swp_pager_async_iodone(struct bio *bio) * bio_driver_info holds the requested page * index. */ - if (i != (int)bio->bio_driver_info) - vm_page_free(m); - else + if (i != (int)bio->bio_driver_info) { + vm_page_deactivate(m); + vm_page_wakeup(m); + } else { vm_page_flash(m); + } /* * If i == bp->b_pager.pg_reqpage, do not wake * the page up. The caller needs to. @@ -1746,13 +1749,26 @@ swp_pager_async_iodone(struct bio *bio) } } else { /* - * Mark the page clean, but note that the dirty - * bit may have been set in any of the page's pmaps. + * Mark the page clean but do not mess with the + * pmap-layer's modified state. That state should + * also be clear since the caller protected the + * page VM_PROT_READ, but allow the case. + * + * We are in an interrupt, avoid pmap operations. + * + * If we have a severe page deficit, deactivate the + * page. Do not try to cache it (which would also + * involve a pmap op), because the page might still + * be read-heavy. */ vm_page_undirty(m); vm_page_io_finish(m); + if (vm_page_count_severe()) + vm_page_deactivate(m); +#if 0 if (!vm_page_count_severe() || !vm_page_try_to_cache(m)) vm_page_protect(m, VM_PROT_READ); +#endif } } diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 7d4e47da1e..a4a64c1431 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -66,7 +66,7 @@ * rights to redistribute these changes. * * $FreeBSD: src/sys/vm/vm_pageout.c,v 1.151.2.15 2002/12/29 18:21:04 dillon Exp $ - * $DragonFly: src/sys/vm/vm_pageout.c,v 1.33 2008/03/20 06:02:50 dillon Exp $ + * $DragonFly: src/sys/vm/vm_pageout.c,v 1.34 2008/04/28 21:16:27 dillon Exp $ */ /* @@ -452,12 +452,21 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags) * block all other accesses. Also, leave the paging in * progress indicator set so that we don't attempt an object * collapse. + * + * For any pages which have completed synchronously, + * deactivate the page if we are under a severe deficit. + * Do not try to enter them into the cache, though, they + * might still be read-heavy. */ if (pageout_status[i] != VM_PAGER_PEND) { vm_object_pip_wakeup(object); vm_page_io_finish(mt); + if (vm_page_count_severe()) + vm_page_deactivate(mt); +#if 0 if (!vm_page_count_severe() || !vm_page_try_to_cache(mt)) vm_page_protect(mt, VM_PROT_READ); +#endif } } return numpagedout; diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index d3e8ea78de..9a3e380209 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -39,7 +39,7 @@ * * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 * $FreeBSD: src/sys/vm/vnode_pager.c,v 1.116.2.7 2002/12/31 09:34:51 dillon Exp $ - * $DragonFly: src/sys/vm/vnode_pager.c,v 1.40 2007/08/28 01:09:07 dillon Exp $ + * $DragonFly: src/sys/vm/vnode_pager.c,v 1.41 2008/04/28 21:16:27 dillon Exp $ */ /* @@ -655,6 +655,12 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *m, int bytecount, * * We do not under any circumstances truncate the valid bits, as * this will screw up bogus page replacement. + * + * The caller has already read-protected the pages. The VFS must + * use the buffer cache to wrap the pages. The pages might not + * be immediately flushed by the buffer cache but once under its + * control the pages themselves can wind up being marked clean + * and their covering buffer cache buffer can be marked dirty. */ if (maxsize + poffset > vp->v_filesize) { if (vp->v_filesize > poffset) { -- 2.11.4.GIT