Import 2.3.12pre9
[davej-history.git] / mm / page_io.c
blob0f7e6d199c9c60bc95db1f0985209a14a39c54e7
1 /*
2 * linux/mm/page_io.c
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
6 * Swap reorganised 29.12.95,
7 * Asynchronous swapping added 30.12.95. Stephen Tweedie
8 * Removed race in async swapping. 14.4.1996. Bruno Haible
9 * Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
10 * Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
13 #include <linux/mm.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/swap.h>
16 #include <linux/locks.h>
17 #include <linux/swapctl.h>
19 #include <asm/pgtable.h>
21 static DECLARE_WAIT_QUEUE_HEAD(lock_queue);
24 * Reads or writes a swap page.
25 * wait=1: start I/O and wait for completion. wait=0: start asynchronous I/O.
27 * Important prevention of race condition: the caller *must* atomically
28 * create a unique swap cache entry for this swap page before calling
29 * rw_swap_page, and must lock that page. By ensuring that there is a
30 * single page of memory reserved for the swap entry, the normal VM page
31 * lock on that page also doubles as a lock on swap entries. Having only
32 * one lock to deal with per swap entry (rather than locking swap and memory
33 * independently) also makes it easier to make certain swapping operations
34 * atomic, which is particularly important when we are trying to ensure
35 * that shared pages stay shared while being swapped.
38 static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait, int dolock)
40 unsigned long type, offset;
41 struct swap_info_struct * p;
42 int zones[PAGE_SIZE/512];
43 int zones_used;
44 kdev_t dev = 0;
45 int block_size;
47 #ifdef DEBUG_SWAP
48 printk ("DebugVM: %s_swap_page entry %08lx, page %p (count %d), %s\n",
49 (rw == READ) ? "read" : "write",
50 entry, (char *) page_address(page), page_count(page),
51 wait ? "wait" : "nowait");
52 #endif
54 type = SWP_TYPE(entry);
55 if (type >= nr_swapfiles) {
56 printk("Internal error: bad swap-device\n");
57 return;
60 /* Don't allow too many pending pages in flight.. */
61 if (atomic_read(&nr_async_pages) > pager_daemon.swap_cluster)
62 wait = 1;
64 p = &swap_info[type];
65 offset = SWP_OFFSET(entry);
66 if (offset >= p->max) {
67 printk("rw_swap_page: weirdness\n");
68 return;
70 if (p->swap_map && !p->swap_map[offset]) {
71 printk(KERN_ERR "rw_swap_page: "
72 "Trying to %s unallocated swap (%08lx)\n",
73 (rw == READ) ? "read" : "write", entry);
74 return;
76 if (!(p->flags & SWP_USED)) {
77 printk(KERN_ERR "rw_swap_page: "
78 "Trying to swap to unused swap-device\n");
79 return;
82 if (!PageLocked(page)) {
83 printk(KERN_ERR "VM: swap page is unlocked\n");
84 return;
87 if (rw == READ) {
88 ClearPageUptodate(page);
89 kstat.pswpin++;
90 } else
91 kstat.pswpout++;
93 get_page(page);
94 if (p->swap_device) {
95 zones[0] = offset;
96 zones_used = 1;
97 dev = p->swap_device;
98 block_size = PAGE_SIZE;
99 } else if (p->swap_file) {
100 struct inode *swapf = p->swap_file->d_inode;
101 int i;
102 if (swapf->i_op->get_block == NULL
103 && swapf->i_op->smap != NULL){
105 With MS-DOS, we use msdos_smap which returns
106 a sector number (not a cluster or block number).
107 It is a patch to enable the UMSDOS project.
108 Other people are working on better solution.
110 It sounds like ll_rw_swap_file defined
111 its operation size (sector size) based on
112 PAGE_SIZE and the number of blocks to read.
113 So using get_block or smap should work even if
114 smap will require more blocks.
116 int j;
117 unsigned int block = offset << 3;
119 for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
120 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
121 printk("rw_swap_page: bad swap file\n");
122 return;
125 block_size = 512;
126 }else{
127 int j;
128 unsigned int block = offset
129 << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
131 block_size = swapf->i_sb->s_blocksize;
132 for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
133 if (!(zones[i] = bmap(swapf,block++))) {
134 printk("rw_swap_page: bad swap file\n");
135 return;
137 zones_used = i;
138 dev = swapf->i_dev;
140 } else {
141 printk(KERN_ERR "rw_swap_page: no swap file or device\n");
142 put_page(page);
143 return;
145 if (!wait) {
146 set_bit(PG_decr_after, &page->flags);
147 atomic_inc(&nr_async_pages);
149 if (dolock) {
150 set_bit(PG_free_swap_after, &page->flags);
151 p->swap_map[offset]++;
153 set_bit(PG_free_after, &page->flags);
155 /* block_size == PAGE_SIZE/zones_used */
156 brw_page(rw, page, dev, zones, block_size, 0);
158 /* Note! For consistency we do all of the logic,
159 * decrementing the page count, and unlocking the page in the
160 * swap lock map - in the IO completion handler.
162 if (!wait) {
163 return;
165 wait_on_page(page);
166 /* This shouldn't happen, but check to be sure. */
167 if (page_count(page) == 0)
168 printk(KERN_ERR "rw_swap_page: page unused while waiting!\n");
170 #ifdef DEBUG_SWAP
171 printk ("DebugVM: %s_swap_page finished on page %p (count %d)\n",
172 (rw == READ) ? "read" : "write",
173 (char *) page_address(page),
174 page_count(page));
175 #endif
179 * A simple wrapper so the base function doesn't need to enforce
180 * that all swap pages go through the swap cache! We verify that:
181 * - the page is locked
182 * - it's marked as being swap-cache
183 * - it's associated with the swap inode
185 void rw_swap_page(int rw, struct page *page, int wait)
187 unsigned long entry = page->offset;
189 if (!PageLocked(page))
190 PAGE_BUG(page);
191 if (!PageSwapCache(page))
192 PAGE_BUG(page);
193 if (page->inode != &swapper_inode)
194 PAGE_BUG(page);
195 rw_swap_page_base(rw, entry, page, wait, 1);
199 * Setting up a new swap file needs a simple wrapper just to read the
200 * swap signature. SysV shared memory also needs a simple wrapper.
202 void rw_swap_page_nocache(int rw, unsigned long entry, char *buf)
204 struct page *page = mem_map + MAP_NR(buf);
206 if (TryLockPage(page))
207 PAGE_BUG(page);
208 if (PageSwapCache(page))
209 PAGE_BUG(page);
210 if (page->inode)
211 PAGE_BUG(page);
212 page->offset = entry;
213 rw_swap_page_base(rw, entry, page, 1, 1);
217 * shmfs needs a version that doesn't put the page in the page cache!
218 * The swap lock map insists that pages be in the page cache!
219 * Therefore we can't use it. Later when we can remove the need for the
220 * lock map and we can reduce the number of functions exported.
222 void rw_swap_page_nolock(int rw, unsigned long entry, char *buf, int wait)
224 struct page *page = mem_map + MAP_NR(buf);
226 if (!PageLocked(page))
227 PAGE_BUG(page);
228 if (PageSwapCache(page))
229 PAGE_BUG(page);
230 rw_swap_page_base(rw, entry, page, wait, 0);