2 * linux/mm/swap_state.c
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 * Swap reorganised 29.12.95, Stephen Tweedie
7 * Rewritten to use page cache, (C) 1998 Stephen Tweedie
11 #include <linux/kernel_stat.h>
12 #include <linux/swap.h>
13 #include <linux/init.h>
14 #include <linux/pagemap.h>
15 #include <linux/backing-dev.h>
17 #include <asm/pgtable.h>
19 static struct backing_dev_info swap_backing_dev_info
= {
20 .ra_pages
= 0, /* No readahead */
21 .memory_backed
= 1, /* Does not contribute to dirty memory */
24 extern struct address_space_operations swap_aops
;
26 struct address_space swapper_space
= {
27 .page_tree
= RADIX_TREE_INIT(GFP_ATOMIC
),
28 .page_lock
= SPIN_LOCK_UNLOCKED
,
29 .clean_pages
= LIST_HEAD_INIT(swapper_space
.clean_pages
),
30 .dirty_pages
= LIST_HEAD_INIT(swapper_space
.dirty_pages
),
31 .io_pages
= LIST_HEAD_INIT(swapper_space
.io_pages
),
32 .locked_pages
= LIST_HEAD_INIT(swapper_space
.locked_pages
),
34 .backing_dev_info
= &swap_backing_dev_info
,
35 .i_mmap
= LIST_HEAD_INIT(swapper_space
.i_mmap
),
36 .i_mmap_shared
= LIST_HEAD_INIT(swapper_space
.i_mmap_shared
),
37 .i_shared_sem
= __MUTEX_INITIALIZER(swapper_space
.i_shared_sem
),
38 .private_lock
= SPIN_LOCK_UNLOCKED
,
39 .private_list
= LIST_HEAD_INIT(swapper_space
.private_list
),
42 #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
45 unsigned long add_total
;
46 unsigned long del_total
;
47 unsigned long find_success
;
48 unsigned long find_total
;
49 unsigned long noent_race
;
50 unsigned long exist_race
;
53 void show_swap_cache_info(void)
55 printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
56 swap_cache_info
.add_total
, swap_cache_info
.del_total
,
57 swap_cache_info
.find_success
, swap_cache_info
.find_total
,
58 swap_cache_info
.noent_race
, swap_cache_info
.exist_race
);
61 static int add_to_swap_cache(struct page
*page
, swp_entry_t entry
)
67 if (!swap_duplicate(entry
)) {
68 INC_CACHE_INFO(noent_race
);
71 error
= add_to_page_cache(page
, &swapper_space
, entry
.val
, GFP_KERNEL
);
73 * Anon pages are already on the LRU, we don't run lru_cache_add here.
78 INC_CACHE_INFO(exist_race
);
81 if (!PageLocked(page
))
83 if (!PageSwapCache(page
))
85 INC_CACHE_INFO(add_total
);
90 * This must be called only on pages that have
91 * been verified to be in the swap cache.
93 void __delete_from_swap_cache(struct page
*page
)
95 BUG_ON(!PageLocked(page
));
96 BUG_ON(!PageSwapCache(page
));
97 BUG_ON(PageWriteback(page
));
98 __remove_from_page_cache(page
);
99 INC_CACHE_INFO(del_total
);
103 * add_to_swap - allocate swap space for a page
104 * @page: page we want to move to swap
106 * Allocate swap space for the page and add the page to the
107 * swap cache. Caller needs to hold the page lock.
109 int add_to_swap(struct page
* page
)
115 if (!PageLocked(page
))
119 entry
= get_swap_page();
123 /* Radix-tree node allocations are performing
124 * GFP_ATOMIC allocations under PF_MEMALLOC.
125 * They can completely exhaust the page allocator.
127 * So PF_MEMALLOC is dropped here. This causes the slab
128 * allocations to fail earlier, so radix-tree nodes will
129 * then be allocated from the mempool reserves.
131 * We're still using __GFP_HIGH for radix-tree node
132 * allocations, so some of the emergency pools are available,
133 * just not all of them.
136 pf_flags
= current
->flags
;
137 current
->flags
&= ~PF_MEMALLOC
;
140 * Add it to the swap cache and mark it dirty
142 err
= add_to_page_cache(page
, &swapper_space
,
143 entry
.val
, GFP_ATOMIC
);
145 if (pf_flags
& PF_MEMALLOC
)
146 current
->flags
|= PF_MEMALLOC
;
149 case 0: /* Success */
150 SetPageUptodate(page
);
151 ClearPageDirty(page
);
152 set_page_dirty(page
);
153 INC_CACHE_INFO(add_total
);
156 /* Raced with "speculative" read_swap_cache_async */
157 INC_CACHE_INFO(exist_race
);
161 /* -ENOMEM radix-tree allocation failure */
169 * This must be called only on pages that have
170 * been verified to be in the swap cache and locked.
171 * It will never put the page into the free list,
172 * the caller has a reference on the page.
174 void delete_from_swap_cache(struct page
*page
)
178 BUG_ON(!PageLocked(page
));
179 BUG_ON(PageWriteback(page
));
180 BUG_ON(PagePrivate(page
));
182 entry
.val
= page
->index
;
184 spin_lock(&swapper_space
.page_lock
);
185 __delete_from_swap_cache(page
);
186 spin_unlock(&swapper_space
.page_lock
);
189 page_cache_release(page
);
192 int move_to_swap_cache(struct page
*page
, swp_entry_t entry
)
194 struct address_space
*mapping
= page
->mapping
;
197 spin_lock(&swapper_space
.page_lock
);
198 spin_lock(&mapping
->page_lock
);
200 err
= radix_tree_insert(&swapper_space
.page_tree
, entry
.val
, page
);
202 __remove_from_page_cache(page
);
203 ___add_to_page_cache(page
, &swapper_space
, entry
.val
);
206 spin_unlock(&mapping
->page_lock
);
207 spin_unlock(&swapper_space
.page_lock
);
210 if (!swap_duplicate(entry
))
212 /* shift page from clean_pages to dirty_pages list */
213 BUG_ON(PageDirty(page
));
214 set_page_dirty(page
);
215 INC_CACHE_INFO(add_total
);
216 } else if (err
== -EEXIST
)
217 INC_CACHE_INFO(exist_race
);
221 int move_from_swap_cache(struct page
*page
, unsigned long index
,
222 struct address_space
*mapping
)
227 BUG_ON(!PageLocked(page
));
228 BUG_ON(PageWriteback(page
));
229 BUG_ON(PagePrivate(page
));
231 entry
.val
= page
->index
;
233 spin_lock(&swapper_space
.page_lock
);
234 spin_lock(&mapping
->page_lock
);
236 err
= radix_tree_insert(&mapping
->page_tree
, index
, page
);
238 __delete_from_swap_cache(page
);
239 ___add_to_page_cache(page
, mapping
, index
);
242 spin_unlock(&mapping
->page_lock
);
243 spin_unlock(&swapper_space
.page_lock
);
247 /* shift page from clean_pages to dirty_pages list */
248 ClearPageDirty(page
);
249 set_page_dirty(page
);
256 * If we are the only user, then try to free up the swap cache.
258 * Its ok to check for PageSwapCache without the page lock
259 * here because we are going to recheck again inside
260 * exclusive_swap_page() _with_ the lock.
263 static inline void free_swap_cache(struct page
*page
)
265 if (PageSwapCache(page
) && !TestSetPageLocked(page
)) {
266 remove_exclusive_swap_page(page
);
272 * Perform a free_page(), also freeing any swap cache associated with
273 * this page if it is the last user of the page. Can not do a lock_page,
274 * as we are holding the page_table_lock spinlock.
276 void free_page_and_swap_cache(struct page
*page
)
278 free_swap_cache(page
);
279 page_cache_release(page
);
283 * Passed an array of pages, drop them all from swapcache and then release
284 * them. They are removed from the LRU and freed if this is their last use.
286 void free_pages_and_swap_cache(struct page
**pages
, int nr
)
289 struct page
**pagep
= pages
;
293 int todo
= min(chunk
, nr
);
296 for (i
= 0; i
< todo
; i
++)
297 free_swap_cache(pagep
[i
]);
298 release_pages(pagep
, todo
, 0);
305 * Lookup a swap entry in the swap cache. A found page will be returned
306 * unlocked and with its refcount incremented - we rely on the kernel
307 * lock getting page table operations atomic even if we drop the page
308 * lock before returning.
310 struct page
* lookup_swap_cache(swp_entry_t entry
)
314 found
= find_get_page(&swapper_space
, entry
.val
);
316 * Unsafe to assert PageSwapCache and mapping on page found:
317 * if SMP nothing prevents swapoff from deleting this page from
318 * the swap cache at this moment. find_lock_page would prevent
319 * that, but no need to change: we _have_ got the right page.
321 INC_CACHE_INFO(find_total
);
323 INC_CACHE_INFO(find_success
);
328 * Locate a page of swap in physical memory, reserving swap cache space
329 * and reading the disk if it is not already cached.
330 * A failure return means that either the page allocation failed or that
331 * the swap entry is no longer in use.
333 struct page
* read_swap_cache_async(swp_entry_t entry
)
335 struct page
*found_page
, *new_page
= NULL
;
340 * First check the swap cache. Since this is normally
341 * called after lookup_swap_cache() failed, re-calling
342 * that would confuse statistics: use find_get_page()
345 found_page
= find_get_page(&swapper_space
, entry
.val
);
350 * Get a new page to read into from swap.
353 new_page
= alloc_page(GFP_HIGHUSER
);
355 break; /* Out of memory */
359 * Associate the page with swap entry in the swap cache.
360 * May fail (-ENOENT) if swap entry has been freed since
361 * our caller observed it. May fail (-EEXIST) if there
362 * is already a page associated with this entry in the
363 * swap cache: added by a racing read_swap_cache_async,
364 * or by try_to_swap_out (or shmem_writepage) re-using
365 * the just freed swap entry for an existing page.
366 * May fail (-ENOMEM) if radix-tree node allocation failed.
368 err
= add_to_swap_cache(new_page
, entry
);
371 * Initiate read into locked page and return.
373 lru_cache_add_active(new_page
);
374 swap_readpage(NULL
, new_page
);
377 } while (err
!= -ENOENT
&& err
!= -ENOMEM
);
380 page_cache_release(new_page
);