4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
6 * Swap reorganised 29.12.95,
7 * Asynchronous swapping added 30.12.95. Stephen Tweedie
8 * Removed race in async swapping. 14.4.1996. Bruno Haible
9 * Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
10 * Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
14 #include <linux/kernel_stat.h>
15 #include <linux/swap.h>
16 #include <linux/locks.h>
17 #include <linux/swapctl.h>
19 #include <asm/pgtable.h>
21 static DECLARE_WAIT_QUEUE_HEAD(lock_queue
);
24 * Reads or writes a swap page.
25 * wait=1: start I/O and wait for completion. wait=0: start asynchronous I/O.
27 * Important prevention of race condition: the caller *must* atomically
28 * create a unique swap cache entry for this swap page before calling
29 * rw_swap_page, and must lock that page. By ensuring that there is a
30 * single page of memory reserved for the swap entry, the normal VM page
31 * lock on that page also doubles as a lock on swap entries. Having only
32 * one lock to deal with per swap entry (rather than locking swap and memory
33 * independently) also makes it easier to make certain swapping operations
34 * atomic, which is particularly important when we are trying to ensure
35 * that shared pages stay shared while being swapped.
38 static void rw_swap_page_base(int rw
, unsigned long entry
, struct page
*page
, int wait
, int dolock
)
40 unsigned long type
, offset
;
41 struct swap_info_struct
* p
;
42 int zones
[PAGE_SIZE
/512];
48 printk ("DebugVM: %s_swap_page entry %08lx, page %p (count %d), %s\n",
49 (rw
== READ
) ? "read" : "write",
50 entry
, (char *) page_address(page
), page_count(page
),
51 wait
? "wait" : "nowait");
54 type
= SWP_TYPE(entry
);
55 if (type
>= nr_swapfiles
) {
56 printk("Internal error: bad swap-device\n");
60 /* Don't allow too many pending pages in flight.. */
61 if (atomic_read(&nr_async_pages
) > pager_daemon
.swap_cluster
)
65 offset
= SWP_OFFSET(entry
);
66 if (offset
>= p
->max
) {
67 printk("rw_swap_page: weirdness\n");
70 if (p
->swap_map
&& !p
->swap_map
[offset
]) {
71 printk(KERN_ERR
"rw_swap_page: "
72 "Trying to %s unallocated swap (%08lx)\n",
73 (rw
== READ
) ? "read" : "write", entry
);
76 if (!(p
->flags
& SWP_USED
)) {
77 printk(KERN_ERR
"rw_swap_page: "
78 "Trying to swap to unused swap-device\n");
82 if (!PageLocked(page
)) {
83 printk(KERN_ERR
"VM: swap page is unlocked\n");
88 ClearPageUptodate(page
);
98 block_size
= PAGE_SIZE
;
99 } else if (p
->swap_file
) {
100 struct inode
*swapf
= p
->swap_file
->d_inode
;
102 if (swapf
->i_op
->get_block
== NULL
103 && swapf
->i_op
->smap
!= NULL
){
105 With MS-DOS, we use msdos_smap which returns
106 a sector number (not a cluster or block number).
107 It is a patch to enable the UMSDOS project.
108 Other people are working on better solution.
110 It sounds like ll_rw_swap_file defined
111 its operation size (sector size) based on
112 PAGE_SIZE and the number of blocks to read.
113 So using get_block or smap should work even if
114 smap will require more blocks.
117 unsigned int block
= offset
<< 3;
119 for (i
=0, j
=0; j
< PAGE_SIZE
; i
++, j
+= 512){
120 if (!(zones
[i
] = swapf
->i_op
->smap(swapf
,block
++))) {
121 printk("rw_swap_page: bad swap file\n");
128 unsigned int block
= offset
129 << (PAGE_SHIFT
- swapf
->i_sb
->s_blocksize_bits
);
131 block_size
= swapf
->i_sb
->s_blocksize
;
132 for (i
=0, j
=0; j
< PAGE_SIZE
; i
++, j
+= block_size
)
133 if (!(zones
[i
] = bmap(swapf
,block
++))) {
134 printk("rw_swap_page: bad swap file\n");
141 printk(KERN_ERR
"rw_swap_page: no swap file or device\n");
146 set_bit(PG_decr_after
, &page
->flags
);
147 atomic_inc(&nr_async_pages
);
150 set_bit(PG_free_swap_after
, &page
->flags
);
151 p
->swap_map
[offset
]++;
153 set_bit(PG_free_after
, &page
->flags
);
155 /* block_size == PAGE_SIZE/zones_used */
156 brw_page(rw
, page
, dev
, zones
, block_size
, 0);
158 /* Note! For consistency we do all of the logic,
159 * decrementing the page count, and unlocking the page in the
160 * swap lock map - in the IO completion handler.
166 /* This shouldn't happen, but check to be sure. */
167 if (page_count(page
) == 0)
168 printk(KERN_ERR
"rw_swap_page: page unused while waiting!\n");
171 printk ("DebugVM: %s_swap_page finished on page %p (count %d)\n",
172 (rw
== READ
) ? "read" : "write",
173 (char *) page_address(page
),
179 * A simple wrapper so the base function doesn't need to enforce
180 * that all swap pages go through the swap cache! We verify that:
181 * - the page is locked
182 * - it's marked as being swap-cache
183 * - it's associated with the swap inode
185 void rw_swap_page(int rw
, struct page
*page
, int wait
)
187 unsigned long entry
= page
->offset
;
189 if (!PageLocked(page
))
191 if (!PageSwapCache(page
))
193 if (page
->inode
!= &swapper_inode
)
195 rw_swap_page_base(rw
, entry
, page
, wait
, 1);
199 * Setting up a new swap file needs a simple wrapper just to read the
200 * swap signature. SysV shared memory also needs a simple wrapper.
202 void rw_swap_page_nocache(int rw
, unsigned long entry
, char *buf
)
204 struct page
*page
= mem_map
+ MAP_NR(buf
);
206 if (TryLockPage(page
))
208 if (PageSwapCache(page
))
212 page
->offset
= entry
;
213 rw_swap_page_base(rw
, entry
, page
, 1, 1);
217 * shmfs needs a version that doesn't put the page in the page cache!
218 * The swap lock map insists that pages be in the page cache!
219 * Therefore we can't use it. Later when we can remove the need for the
220 * lock map and we can reduce the number of functions exported.
222 void rw_swap_page_nolock(int rw
, unsigned long entry
, char *buf
, int wait
)
224 struct page
*page
= mem_map
+ MAP_NR(buf
);
226 if (!PageLocked(page
))
228 if (PageSwapCache(page
))
230 rw_swap_page_base(rw
, entry
, page
, wait
, 0);