mm/page_io.c

   1 /*
   2  *  linux/mm/page_io.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  *
   6  *  Swap reorganised 29.12.95,
   7  *  Asynchronous swapping added 30.12.95. Stephen Tweedie
   8  *  Removed race in async swapping. 14.4.1996. Bruno Haible
   9  *  Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
  10  *  Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
  11  */
  12
  13 #include <linux/mm.h>
  14 #include <linux/kernel_stat.h>
  15 #include <linux/swap.h>
  16 #include <linux/locks.h>
  17 #include <linux/swapctl.h>
  18
  19 #include <asm/pgtable.h>
  20
  21 static DECLARE_WAIT_QUEUE_HEAD(lock_queue);
  22
  23 /*
  24  * Reads or writes a swap page.
  25  * wait=1: start I/O and wait for completion. wait=0: start asynchronous I/O.
  26  *
  27  * Important prevention of race condition: the caller *must* atomically
  28  * create a unique swap cache entry for this swap page before calling
  29  * rw_swap_page, and must lock that page.  By ensuring that there is a
  30  * single page of memory reserved for the swap entry, the normal VM page
  31  * lock on that page also doubles as a lock on swap entries.  Having only
  32  * one lock to deal with per swap entry (rather than locking swap and memory
  33  * independently) also makes it easier to make certain swapping operations
  34  * atomic, which is particularly important when we are trying to ensure
  35  * that shared pages stay shared while being swapped.
  36  */
  37
  38 static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait, int dolock)
  39 {
  40         unsigned long type, offset;
  41         struct swap_info_struct * p;
  42         int zones[PAGE_SIZE/512];
  43         int zones_used;
  44         kdev_t dev = 0;
  45         int block_size;
  46
  47 #ifdef DEBUG_SWAP
  48         printk ("DebugVM: %s_swap_page entry %08lx, page %p (count %d), %s\n",
  49                 (rw == READ) ? "read" : "write",
  50                 entry, (char *) page_address(page), page_count(page),
  51                 wait ? "wait" : "nowait");
  52 #endif
  53
  54         type = SWP_TYPE(entry);
  55         if (type >= nr_swapfiles) {
  56                 printk("Internal error: bad swap-device\n");
  57                 return;
  58         }
  59
  60         /* Don't allow too many pending pages in flight.. */
  61         if (atomic_read(&nr_async_pages) > pager_daemon.swap_cluster)
  62                 wait = 1;
  63
  64         p = &swap_info[type];
  65         offset = SWP_OFFSET(entry);
  66         if (offset >= p->max) {
  67                 printk("rw_swap_page: weirdness\n");
  68                 return;
  69         }
  70         if (p->swap_map && !p->swap_map[offset]) {
  71                 printk(KERN_ERR "rw_swap_page: "
  72                         "Trying to %s unallocated swap (%08lx)\n",
  73                         (rw == READ) ? "read" : "write", entry);
  74                 return;
  75         }
  76         if (!(p->flags & SWP_USED)) {
  77                 printk(KERN_ERR "rw_swap_page: "
  78                         "Trying to swap to unused swap-device\n");
  79                 return;
  80         }
  81
  82         if (!PageLocked(page)) {
  83                 printk(KERN_ERR "VM: swap page is unlocked\n");
  84                 return;
  85         }
  86
  87         if (rw == READ) {
  88                 ClearPageUptodate(page);
  89                 kstat.pswpin++;
  90         } else
  91                 kstat.pswpout++;
  92
  93         get_page(page);
  94         if (p->swap_device) {
  95                 zones[0] = offset;
  96                 zones_used = 1;
  97                 dev = p->swap_device;
  98                 block_size = PAGE_SIZE;
  99         } else if (p->swap_file) {
 100                 struct inode *swapf = p->swap_file->d_inode;
 101                 int i;
 102                 if (swapf->i_op->get_block == NULL
 103                         && swapf->i_op->smap != NULL){
 104                         /*
 105                                 With MS-DOS, we use msdos_smap which returns
 106                                 a sector number (not a cluster or block number).
 107                                 It is a patch to enable the UMSDOS project.
 108                                 Other people are working on better solution.
 109
 110                                 It sounds like ll_rw_swap_file defined
 111                                 its operation size (sector size) based on
 112                                 PAGE_SIZE and the number of blocks to read.
 113                                 So using get_block or smap should work even if
 114                                 smap will require more blocks.
 115                         */
 116                         int j;
 117                         unsigned int block = offset << 3;
 118
 119                         for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
 120                                 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
 121                                         printk("rw_swap_page: bad swap file\n");
 122                                         return;
 123                                 }
 124                         }
 125                         block_size = 512;
 126                 }else{
 127                         int j;
 128                         unsigned int block = offset
 129                                 << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
 130
 131                         block_size = swapf->i_sb->s_blocksize;
 132                         for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
 133                                 if (!(zones[i] = bmap(swapf,block++))) {
 134                                         printk("rw_swap_page: bad swap file\n");
 135                                         return;
 136                                 }
 137                         zones_used = i;
 138                         dev = swapf->i_dev;
 139                 }
 140         } else {
 141                 printk(KERN_ERR "rw_swap_page: no swap file or device\n");
 142                 put_page(page);
 143                 return;
 144         }
 145         if (!wait) {
 146                 set_bit(PG_decr_after, &page->flags);
 147                 atomic_inc(&nr_async_pages);
 148         }
 149         if (dolock) {
 150                 set_bit(PG_free_swap_after, &page->flags);
 151                 p->swap_map[offset]++;
 152         }
 153         set_bit(PG_free_after, &page->flags);
 154
 155         /* block_size == PAGE_SIZE/zones_used */
 156         brw_page(rw, page, dev, zones, block_size, 0);
 157
 158         /* Note! For consistency we do all of the logic,
 159          * decrementing the page count, and unlocking the page in the
 160          * swap lock map - in the IO completion handler.
 161          */
 162         if (!wait) {
 163                 return;
 164         }
 165         wait_on_page(page);
 166         /* This shouldn't happen, but check to be sure. */
 167         if (page_count(page) == 0)
 168                 printk(KERN_ERR "rw_swap_page: page unused while waiting!\n");
 169
 170 #ifdef DEBUG_SWAP
 171         printk ("DebugVM: %s_swap_page finished on page %p (count %d)\n",
 172                 (rw == READ) ? "read" : "write",
 173                 (char *) page_address(page),
 174                 page_count(page));
 175 #endif
 176 }
 177
 178 /*
 179  * A simple wrapper so the base function doesn't need to enforce
 180  * that all swap pages go through the swap cache! We verify that:
 181  *  - the page is locked
 182  *  - it's marked as being swap-cache
 183  *  - it's associated with the swap inode
 184  */
 185 void rw_swap_page(int rw, struct page *page, int wait)
 186 {
 187         unsigned long entry = page->offset;
 188
 189         if (!PageLocked(page))
 190                 PAGE_BUG(page);
 191         if (!PageSwapCache(page))
 192                 PAGE_BUG(page);
 193         if (page->inode != &swapper_inode)
 194                 PAGE_BUG(page);
 195         rw_swap_page_base(rw, entry, page, wait, 1);
 196 }
 197
 198 /*
 199  * Setting up a new swap file needs a simple wrapper just to read the
 200  * swap signature.  SysV shared memory also needs a simple wrapper.
 201  */
 202 void rw_swap_page_nocache(int rw, unsigned long entry, char *buf)
 203 {
 204         struct page *page = mem_map + MAP_NR(buf);
 205
 206         if (TryLockPage(page))
 207                 PAGE_BUG(page);
 208         if (PageSwapCache(page))
 209                 PAGE_BUG(page);
 210         if (page->inode)
 211                 PAGE_BUG(page);
 212         page->offset = entry;
 213         rw_swap_page_base(rw, entry, page, 1, 1);
 214 }
 215
 216 /*
 217  * shmfs needs a version that doesn't put the page in the page cache!
 218  * The swap lock map insists that pages be in the page cache!
 219  * Therefore we can't use it.  Later when we can remove the need for the
 220  * lock map and we can reduce the number of functions exported.
 221  */
 222 void rw_swap_page_nolock(int rw, unsigned long entry, char *buf, int wait)
 223 {
 224         struct page *page = mem_map + MAP_NR(buf);
 225
 226         if (!PageLocked(page))
 227                 PAGE_BUG(page);
 228         if (PageSwapCache(page))
 229                 PAGE_BUG(page);
 230         rw_swap_page_base(rw, entry, page, wait, 0);
 231 }