2 * linux/mm/swap_state.c
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 * Swap reorganised 29.12.95, Stephen Tweedie
7 * Rewritten to use page cache, (C) 1998 Stephen Tweedie
11 #include <linux/kernel_stat.h>
12 #include <linux/swap.h>
13 #include <linux/init.h>
14 #include <linux/pagemap.h>
15 #include <linux/backing-dev.h>
17 #include <asm/pgtable.h>
19 static struct backing_dev_info swap_backing_dev_info
= {
20 .ra_pages
= 0, /* No readahead */
21 .memory_backed
= 1, /* Does not contribute to dirty memory */
24 extern struct address_space_operations swap_aops
;
26 struct address_space swapper_space
= {
27 .page_tree
= RADIX_TREE_INIT(GFP_ATOMIC
),
28 .page_lock
= SPIN_LOCK_UNLOCKED
,
29 .clean_pages
= LIST_HEAD_INIT(swapper_space
.clean_pages
),
30 .dirty_pages
= LIST_HEAD_INIT(swapper_space
.dirty_pages
),
31 .io_pages
= LIST_HEAD_INIT(swapper_space
.io_pages
),
32 .locked_pages
= LIST_HEAD_INIT(swapper_space
.locked_pages
),
34 .backing_dev_info
= &swap_backing_dev_info
,
35 .i_mmap
= LIST_HEAD_INIT(swapper_space
.i_mmap
),
36 .i_mmap_shared
= LIST_HEAD_INIT(swapper_space
.i_mmap_shared
),
37 .i_shared_sem
= __MUTEX_INITIALIZER(swapper_space
.i_shared_sem
),
38 .truncate_count
= ATOMIC_INIT(0),
39 .private_lock
= SPIN_LOCK_UNLOCKED
,
40 .private_list
= LIST_HEAD_INIT(swapper_space
.private_list
),
43 #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
46 unsigned long add_total
;
47 unsigned long del_total
;
48 unsigned long find_success
;
49 unsigned long find_total
;
50 unsigned long noent_race
;
51 unsigned long exist_race
;
54 void show_swap_cache_info(void)
56 printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
57 swap_cache_info
.add_total
, swap_cache_info
.del_total
,
58 swap_cache_info
.find_success
, swap_cache_info
.find_total
,
59 swap_cache_info
.noent_race
, swap_cache_info
.exist_race
);
62 static int add_to_swap_cache(struct page
*page
, swp_entry_t entry
)
68 if (!swap_duplicate(entry
)) {
69 INC_CACHE_INFO(noent_race
);
72 error
= add_to_page_cache(page
, &swapper_space
, entry
.val
, GFP_KERNEL
);
74 * Anon pages are already on the LRU, we don't run lru_cache_add here.
79 INC_CACHE_INFO(exist_race
);
82 if (!PageLocked(page
))
84 if (!PageSwapCache(page
))
86 INC_CACHE_INFO(add_total
);
91 * This must be called only on pages that have
92 * been verified to be in the swap cache.
94 void __delete_from_swap_cache(struct page
*page
)
96 BUG_ON(!PageLocked(page
));
97 BUG_ON(!PageSwapCache(page
));
98 BUG_ON(PageWriteback(page
));
99 __remove_from_page_cache(page
);
100 INC_CACHE_INFO(del_total
);
104 * add_to_swap - allocate swap space for a page
105 * @page: page we want to move to swap
107 * Allocate swap space for the page and add the page to the
108 * swap cache. Caller needs to hold the page lock.
110 int add_to_swap(struct page
* page
)
116 if (!PageLocked(page
))
120 entry
= get_swap_page();
124 /* Radix-tree node allocations are performing
125 * GFP_ATOMIC allocations under PF_MEMALLOC.
126 * They can completely exhaust the page allocator.
128 * So PF_MEMALLOC is dropped here. This causes the slab
129 * allocations to fail earlier, so radix-tree nodes will
130 * then be allocated from the mempool reserves.
132 * We're still using __GFP_HIGH for radix-tree node
133 * allocations, so some of the emergency pools are available,
134 * just not all of them.
137 pf_flags
= current
->flags
;
138 current
->flags
&= ~PF_MEMALLOC
;
141 * Add it to the swap cache and mark it dirty
143 err
= add_to_page_cache(page
, &swapper_space
,
144 entry
.val
, GFP_ATOMIC
);
146 if (pf_flags
& PF_MEMALLOC
)
147 current
->flags
|= PF_MEMALLOC
;
150 case 0: /* Success */
151 SetPageUptodate(page
);
152 ClearPageDirty(page
);
153 set_page_dirty(page
);
154 INC_CACHE_INFO(add_total
);
157 /* Raced with "speculative" read_swap_cache_async */
158 INC_CACHE_INFO(exist_race
);
162 /* -ENOMEM radix-tree allocation failure */
170 * This must be called only on pages that have
171 * been verified to be in the swap cache and locked.
172 * It will never put the page into the free list,
173 * the caller has a reference on the page.
175 void delete_from_swap_cache(struct page
*page
)
179 BUG_ON(!PageLocked(page
));
180 BUG_ON(PageWriteback(page
));
181 BUG_ON(PagePrivate(page
));
183 entry
.val
= page
->index
;
185 spin_lock(&swapper_space
.page_lock
);
186 __delete_from_swap_cache(page
);
187 spin_unlock(&swapper_space
.page_lock
);
190 page_cache_release(page
);
193 int move_to_swap_cache(struct page
*page
, swp_entry_t entry
)
195 struct address_space
*mapping
= page
->mapping
;
198 spin_lock(&swapper_space
.page_lock
);
199 spin_lock(&mapping
->page_lock
);
201 err
= radix_tree_insert(&swapper_space
.page_tree
, entry
.val
, page
);
203 __remove_from_page_cache(page
);
204 ___add_to_page_cache(page
, &swapper_space
, entry
.val
);
207 spin_unlock(&mapping
->page_lock
);
208 spin_unlock(&swapper_space
.page_lock
);
211 if (!swap_duplicate(entry
))
213 /* shift page from clean_pages to dirty_pages list */
214 BUG_ON(PageDirty(page
));
215 set_page_dirty(page
);
216 INC_CACHE_INFO(add_total
);
217 } else if (err
== -EEXIST
)
218 INC_CACHE_INFO(exist_race
);
222 int move_from_swap_cache(struct page
*page
, unsigned long index
,
223 struct address_space
*mapping
)
228 BUG_ON(!PageLocked(page
));
229 BUG_ON(PageWriteback(page
));
230 BUG_ON(PagePrivate(page
));
232 entry
.val
= page
->index
;
234 spin_lock(&swapper_space
.page_lock
);
235 spin_lock(&mapping
->page_lock
);
237 err
= radix_tree_insert(&mapping
->page_tree
, index
, page
);
239 __delete_from_swap_cache(page
);
240 ___add_to_page_cache(page
, mapping
, index
);
243 spin_unlock(&mapping
->page_lock
);
244 spin_unlock(&swapper_space
.page_lock
);
248 /* shift page from clean_pages to dirty_pages list */
249 ClearPageDirty(page
);
250 set_page_dirty(page
);
257 * If we are the only user, then try to free up the swap cache.
259 * Its ok to check for PageSwapCache without the page lock
260 * here because we are going to recheck again inside
261 * exclusive_swap_page() _with_ the lock.
264 static inline void free_swap_cache(struct page
*page
)
266 if (PageSwapCache(page
) && !TestSetPageLocked(page
)) {
267 remove_exclusive_swap_page(page
);
273 * Perform a free_page(), also freeing any swap cache associated with
274 * this page if it is the last user of the page. Can not do a lock_page,
275 * as we are holding the page_table_lock spinlock.
277 void free_page_and_swap_cache(struct page
*page
)
279 free_swap_cache(page
);
280 page_cache_release(page
);
284 * Passed an array of pages, drop them all from swapcache and then release
285 * them. They are removed from the LRU and freed if this is their last use.
287 void free_pages_and_swap_cache(struct page
**pages
, int nr
)
290 struct page
**pagep
= pages
;
294 int todo
= min(chunk
, nr
);
297 for (i
= 0; i
< todo
; i
++)
298 free_swap_cache(pagep
[i
]);
299 release_pages(pagep
, todo
, 0);
306 * Lookup a swap entry in the swap cache. A found page will be returned
307 * unlocked and with its refcount incremented - we rely on the kernel
308 * lock getting page table operations atomic even if we drop the page
309 * lock before returning.
311 struct page
* lookup_swap_cache(swp_entry_t entry
)
315 found
= find_get_page(&swapper_space
, entry
.val
);
317 * Unsafe to assert PageSwapCache and mapping on page found:
318 * if SMP nothing prevents swapoff from deleting this page from
319 * the swap cache at this moment. find_lock_page would prevent
320 * that, but no need to change: we _have_ got the right page.
322 INC_CACHE_INFO(find_total
);
324 INC_CACHE_INFO(find_success
);
329 * Locate a page of swap in physical memory, reserving swap cache space
330 * and reading the disk if it is not already cached.
331 * A failure return means that either the page allocation failed or that
332 * the swap entry is no longer in use.
334 struct page
* read_swap_cache_async(swp_entry_t entry
)
336 struct page
*found_page
, *new_page
= NULL
;
341 * First check the swap cache. Since this is normally
342 * called after lookup_swap_cache() failed, re-calling
343 * that would confuse statistics: use find_get_page()
346 found_page
= find_get_page(&swapper_space
, entry
.val
);
351 * Get a new page to read into from swap.
354 new_page
= alloc_page(GFP_HIGHUSER
);
356 break; /* Out of memory */
360 * Associate the page with swap entry in the swap cache.
361 * May fail (-ENOENT) if swap entry has been freed since
362 * our caller observed it. May fail (-EEXIST) if there
363 * is already a page associated with this entry in the
364 * swap cache: added by a racing read_swap_cache_async,
365 * or by try_to_swap_out (or shmem_writepage) re-using
366 * the just freed swap entry for an existing page.
367 * May fail (-ENOMEM) if radix-tree node allocation failed.
369 err
= add_to_swap_cache(new_page
, entry
);
372 * Initiate read into locked page and return.
374 lru_cache_add_active(new_page
);
375 swap_readpage(NULL
, new_page
);
378 } while (err
!= -ENOENT
&& err
!= -ENOMEM
);
381 page_cache_release(new_page
);