[PATCH] media/video i2c updates
[linux-2.6/history.git] / mm / rmap.c
blobda277624459ecf2de9b07e7d482d9eb9af823e68
1 /*
2 * mm/rmap.c - physical to virtual reverse mappings
4 * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
5 * Released under the General Public License (GPL).
8 * Simple, low overhead pte-based reverse mapping scheme.
9 * This is kept modular because we may want to experiment
10 * with object-based reverse mapping schemes. Please try
11 * to keep this thing as modular as possible.
15 * Locking:
16 * - the page->pte.chain is protected by the PG_chainlock bit,
17 * which nests within the zone->lru_lock, then the
18 * mm->page_table_lock, and then the page lock.
19 * - because swapout locking is opposite to the locking order
20 * in the page fault path, the swapout path uses trylocks
21 * on the mm->page_table_lock
23 #include <linux/mm.h>
24 #include <linux/pagemap.h>
25 #include <linux/swapops.h>
26 #include <linux/slab.h>
27 #include <linux/init.h>
28 #include <linux/rmap-locking.h>
29 #include <linux/cache.h>
30 #include <linux/percpu.h>
32 #include <asm/pgalloc.h>
33 #include <asm/rmap.h>
34 #include <asm/tlb.h>
35 #include <asm/tlbflush.h>
37 /* #define DEBUG_RMAP */
40 * Shared pages have a chain of pte_chain structures, used to locate
41 * all the mappings to this page. We only need a pointer to the pte
42 * here, the page struct for the page table page contains the process
43 * it belongs to and the offset within that process.
45 * We use an array of pte pointers in this structure to minimise cache misses
46 * while traversing reverse maps.
48 #define NRPTE ((L1_CACHE_BYTES - sizeof(void *))/sizeof(pte_addr_t))
50 struct pte_chain {
51 struct pte_chain *next;
52 pte_addr_t ptes[NRPTE];
53 } ____cacheline_aligned;
55 kmem_cache_t *pte_chain_cache;
58 * pte_chain list management policy:
60 * - If a page has a pte_chain list then it is shared by at least two processes,
61 * because a single sharing uses PageDirect. (Well, this isn't true yet,
62 * coz this code doesn't collapse singletons back to PageDirect on the remove
63 * path).
64 * - A pte_chain list has free space only in the head member - all succeeding
65 * members are 100% full.
66 * - If the head element has free space, it occurs in its leading slots.
67 * - All free space in the pte_chain is at the start of the head member.
68 * - Insertion into the pte_chain puts a pte pointer in the last free slot of
69 * the head member.
70 * - Removal from a pte chain moves the head pte of the head member onto the
71 * victim pte and frees the head member if it became empty.
74 /**
75 ** VM stuff below this comment
76 **/
78 /**
79 * page_referenced - test if the page was referenced
80 * @page: the page to test
82 * Quick test_and_clear_referenced for all mappings to a page,
83 * returns the number of processes which referenced the page.
84 * Caller needs to hold the pte_chain_lock.
86 * If the page has a single-entry pte_chain, collapse that back to a PageDirect
87 * representation. This way, it's only done under memory pressure.
89 int page_referenced(struct page * page)
91 struct pte_chain * pc;
92 int referenced = 0;
94 if (TestClearPageReferenced(page))
95 referenced++;
97 if (PageDirect(page)) {
98 pte_t *pte = rmap_ptep_map(page->pte.direct);
99 if (ptep_test_and_clear_young(pte))
100 referenced++;
101 rmap_ptep_unmap(pte);
102 } else {
103 int nr_chains = 0;
105 /* Check all the page tables mapping this page. */
106 for (pc = page->pte.chain; pc; pc = pc->next) {
107 int i;
109 for (i = NRPTE-1; i >= 0; i--) {
110 pte_addr_t pte_paddr = pc->ptes[i];
111 pte_t *p;
113 if (!pte_paddr)
114 break;
115 p = rmap_ptep_map(pte_paddr);
116 if (ptep_test_and_clear_young(p))
117 referenced++;
118 rmap_ptep_unmap(p);
119 nr_chains++;
122 if (nr_chains == 1) {
123 pc = page->pte.chain;
124 page->pte.direct = pc->ptes[NRPTE-1];
125 SetPageDirect(page);
126 pc->ptes[NRPTE-1] = 0;
127 __pte_chain_free(pc);
130 return referenced;
134 * page_add_rmap - add reverse mapping entry to a page
135 * @page: the page to add the mapping to
136 * @ptep: the page table entry mapping this page
138 * Add a new pte reverse mapping to a page.
139 * The caller needs to hold the mm->page_table_lock.
141 struct pte_chain *
142 page_add_rmap(struct page *page, pte_t *ptep, struct pte_chain *pte_chain)
144 pte_addr_t pte_paddr = ptep_to_paddr(ptep);
145 struct pte_chain *cur_pte_chain;
146 int i;
148 #ifdef DEBUG_RMAP
149 if (!page || !ptep)
150 BUG();
151 if (!pte_present(*ptep))
152 BUG();
153 if (!ptep_to_mm(ptep))
154 BUG();
155 #endif
157 if (!pfn_valid(page_to_pfn(page)) || PageReserved(page))
158 return pte_chain;
160 pte_chain_lock(page);
162 #ifdef DEBUG_RMAP
164 * This stuff needs help to get up to highmem speed.
167 struct pte_chain * pc;
168 if (PageDirect(page)) {
169 if (page->pte.direct == pte_paddr)
170 BUG();
171 } else {
172 for (pc = page->pte.chain; pc; pc = pc->next) {
173 for (i = 0; i < NRPTE; i++) {
174 pte_addr_t p = pc->ptes[i];
176 if (p && p == pte_paddr)
177 BUG();
182 #endif
184 if (page->pte.direct == 0) {
185 page->pte.direct = pte_paddr;
186 SetPageDirect(page);
187 inc_page_state(nr_mapped);
188 goto out;
191 if (PageDirect(page)) {
192 /* Convert a direct pointer into a pte_chain */
193 ClearPageDirect(page);
194 pte_chain->ptes[NRPTE-1] = page->pte.direct;
195 pte_chain->ptes[NRPTE-2] = pte_paddr;
196 page->pte.direct = 0;
197 page->pte.chain = pte_chain;
198 pte_chain = NULL; /* We consumed it */
199 goto out;
202 cur_pte_chain = page->pte.chain;
203 if (cur_pte_chain->ptes[0]) { /* It's full */
204 pte_chain->next = cur_pte_chain;
205 page->pte.chain = pte_chain;
206 pte_chain->ptes[NRPTE-1] = pte_paddr;
207 pte_chain = NULL; /* We consumed it */
208 goto out;
211 BUG_ON(!cur_pte_chain->ptes[NRPTE-1]);
213 for (i = NRPTE-2; i >= 0; i--) {
214 if (!cur_pte_chain->ptes[i]) {
215 cur_pte_chain->ptes[i] = pte_paddr;
216 goto out;
219 BUG();
220 out:
221 pte_chain_unlock(page);
222 inc_page_state(nr_reverse_maps);
223 return pte_chain;
227 * page_remove_rmap - take down reverse mapping to a page
228 * @page: page to remove mapping from
229 * @ptep: page table entry to remove
231 * Removes the reverse mapping from the pte_chain of the page,
232 * after that the caller can clear the page table entry and free
233 * the page.
234 * Caller needs to hold the mm->page_table_lock.
236 void page_remove_rmap(struct page * page, pte_t * ptep)
238 pte_addr_t pte_paddr = ptep_to_paddr(ptep);
239 struct pte_chain *pc;
241 if (!page || !ptep)
242 BUG();
243 if (!pfn_valid(page_to_pfn(page)) || PageReserved(page))
244 return;
245 if (!page_mapped(page))
246 return; /* remap_page_range() from a driver? */
248 pte_chain_lock(page);
250 if (PageDirect(page)) {
251 if (page->pte.direct == pte_paddr) {
252 page->pte.direct = 0;
253 dec_page_state(nr_reverse_maps);
254 ClearPageDirect(page);
255 goto out;
257 } else {
258 struct pte_chain *start = page->pte.chain;
259 int victim_i = -1;
261 for (pc = start; pc; pc = pc->next) {
262 int i;
264 if (pc->next)
265 prefetch(pc->next);
266 for (i = 0; i < NRPTE; i++) {
267 pte_addr_t pa = pc->ptes[i];
269 if (!pa)
270 continue;
271 if (victim_i == -1)
272 victim_i = i;
273 if (pa != pte_paddr)
274 continue;
275 pc->ptes[i] = start->ptes[victim_i];
276 dec_page_state(nr_reverse_maps);
277 start->ptes[victim_i] = 0;
278 if (victim_i == NRPTE-1) {
279 /* Emptied a pte_chain */
280 page->pte.chain = start->next;
281 __pte_chain_free(start);
282 } else {
283 /* Do singleton->PageDirect here */
285 goto out;
289 #ifdef DEBUG_RMAP
290 /* Not found. This should NEVER happen! */
291 printk(KERN_ERR "page_remove_rmap: pte_chain %p not present.\n", ptep);
292 printk(KERN_ERR "page_remove_rmap: only found: ");
293 if (PageDirect(page)) {
294 printk("%llx", (u64)page->pte.direct);
295 } else {
296 for (pc = page->pte.chain; pc; pc = pc->next) {
297 int i;
298 for (i = 0; i < NRPTE; i++)
299 printk(" %d:%llx", i, (u64)pc->ptes[i]);
302 printk("\n");
303 printk(KERN_ERR "page_remove_rmap: driver cleared PG_reserved ?\n");
304 #endif
306 out:
307 pte_chain_unlock(page);
308 if (!page_mapped(page))
309 dec_page_state(nr_mapped);
310 return;
314 * try_to_unmap_one - worker function for try_to_unmap
315 * @page: page to unmap
316 * @ptep: page table entry to unmap from page
318 * Internal helper function for try_to_unmap, called for each page
319 * table entry mapping a page. Because locking order here is opposite
320 * to the locking order used by the page fault path, we use trylocks.
321 * Locking:
322 * zone->lru_lock page_launder()
323 * page lock page_launder(), trylock
324 * pte_chain_lock page_launder()
325 * mm->page_table_lock try_to_unmap_one(), trylock
327 static int FASTCALL(try_to_unmap_one(struct page *, pte_addr_t));
328 static int try_to_unmap_one(struct page * page, pte_addr_t paddr)
330 pte_t *ptep = rmap_ptep_map(paddr);
331 unsigned long address = ptep_to_address(ptep);
332 struct mm_struct * mm = ptep_to_mm(ptep);
333 struct vm_area_struct * vma;
334 pte_t pte;
335 int ret;
337 if (!mm)
338 BUG();
341 * We need the page_table_lock to protect us from page faults,
342 * munmap, fork, etc...
344 if (!spin_trylock(&mm->page_table_lock)) {
345 rmap_ptep_unmap(ptep);
346 return SWAP_AGAIN;
350 /* During mremap, it's possible pages are not in a VMA. */
351 vma = find_vma(mm, address);
352 if (!vma) {
353 ret = SWAP_FAIL;
354 goto out_unlock;
357 /* The page is mlock()d, we cannot swap it out. */
358 if (vma->vm_flags & VM_LOCKED) {
359 ret = SWAP_FAIL;
360 goto out_unlock;
363 /* Nuke the page table entry. */
364 flush_cache_page(vma, address);
365 pte = ptep_get_and_clear(ptep);
366 flush_tlb_page(vma, address);
368 /* Store the swap location in the pte. See handle_pte_fault() ... */
369 if (PageSwapCache(page)) {
370 swp_entry_t entry = { .val = page->index };
371 swap_duplicate(entry);
372 set_pte(ptep, swp_entry_to_pte(entry));
375 /* Move the dirty bit to the physical page now the pte is gone. */
376 if (pte_dirty(pte))
377 set_page_dirty(page);
379 mm->rss--;
380 page_cache_release(page);
381 ret = SWAP_SUCCESS;
383 out_unlock:
384 rmap_ptep_unmap(ptep);
385 spin_unlock(&mm->page_table_lock);
386 return ret;
390 * try_to_unmap - try to remove all page table mappings to a page
391 * @page: the page to get unmapped
393 * Tries to remove all the page table entries which are mapping this
394 * page, used in the pageout path. Caller must hold zone->lru_lock
395 * and the page lock. Return values are:
397 * SWAP_SUCCESS - we succeeded in removing all mappings
398 * SWAP_AGAIN - we missed a trylock, try again later
399 * SWAP_FAIL - the page is unswappable
400 * SWAP_ERROR - an error occurred
402 int try_to_unmap(struct page * page)
404 struct pte_chain *pc, *next_pc, *start;
405 int ret = SWAP_SUCCESS;
406 int victim_i = -1;
408 /* This page should not be on the pageout lists. */
409 if (PageReserved(page))
410 BUG();
411 if (!PageLocked(page))
412 BUG();
413 /* We need backing store to swap out a page. */
414 if (!page->mapping)
415 BUG();
417 if (PageDirect(page)) {
418 ret = try_to_unmap_one(page, page->pte.direct);
419 if (ret == SWAP_SUCCESS) {
420 page->pte.direct = 0;
421 dec_page_state(nr_reverse_maps);
422 ClearPageDirect(page);
424 goto out;
427 start = page->pte.chain;
428 for (pc = start; pc; pc = next_pc) {
429 int i;
431 next_pc = pc->next;
432 if (next_pc)
433 prefetch(next_pc);
434 for (i = 0; i < NRPTE; i++) {
435 pte_addr_t pte_paddr = pc->ptes[i];
437 if (!pte_paddr)
438 continue;
439 if (victim_i == -1)
440 victim_i = i;
442 switch (try_to_unmap_one(page, pte_paddr)) {
443 case SWAP_SUCCESS:
445 * Release a slot. If we're releasing the
446 * first pte in the first pte_chain then
447 * pc->ptes[i] and start->ptes[victim_i] both
448 * refer to the same thing. It works out.
450 pc->ptes[i] = start->ptes[victim_i];
451 start->ptes[victim_i] = 0;
452 dec_page_state(nr_reverse_maps);
453 victim_i++;
454 if (victim_i == NRPTE) {
455 page->pte.chain = start->next;
456 __pte_chain_free(start);
457 start = page->pte.chain;
458 victim_i = 0;
460 break;
461 case SWAP_AGAIN:
462 /* Skip this pte, remembering status. */
463 ret = SWAP_AGAIN;
464 continue;
465 case SWAP_FAIL:
466 ret = SWAP_FAIL;
467 goto out;
468 case SWAP_ERROR:
469 ret = SWAP_ERROR;
470 goto out;
474 out:
475 if (!page_mapped(page))
476 dec_page_state(nr_mapped);
477 return ret;
481 ** No more VM stuff below this comment, only pte_chain helper
482 ** functions.
485 static void pte_chain_ctor(void *p, kmem_cache_t *cachep, unsigned long flags)
487 struct pte_chain *pc = p;
489 memset(pc, 0, sizeof(*pc));
492 DEFINE_PER_CPU(struct pte_chain *, local_pte_chain) = 0;
495 * __pte_chain_free - free pte_chain structure
496 * @pte_chain: pte_chain struct to free
498 void __pte_chain_free(struct pte_chain *pte_chain)
500 int cpu = get_cpu();
501 struct pte_chain **pte_chainp;
503 if (pte_chain->next)
504 pte_chain->next = NULL;
505 pte_chainp = &per_cpu(local_pte_chain, cpu);
506 if (*pte_chainp)
507 kmem_cache_free(pte_chain_cache, *pte_chainp);
508 *pte_chainp = pte_chain;
509 put_cpu();
513 * pte_chain_alloc(): allocate a pte_chain structure for use by page_add_rmap().
515 * The caller of page_add_rmap() must perform the allocation because
516 * page_add_rmap() is invariably called under spinlock. Often, page_add_rmap()
517 * will not actually use the pte_chain, because there is space available in one
518 * of the existing pte_chains which are attached to the page. So the case of
519 * allocating and then freeing a single pte_chain is specially optimised here,
520 * with a one-deep per-cpu cache.
522 struct pte_chain *pte_chain_alloc(int gfp_flags)
524 int cpu;
525 struct pte_chain *ret;
526 struct pte_chain **pte_chainp;
528 if (gfp_flags & __GFP_WAIT)
529 might_sleep();
531 cpu = get_cpu();
532 pte_chainp = &per_cpu(local_pte_chain, cpu);
533 if (*pte_chainp) {
534 ret = *pte_chainp;
535 *pte_chainp = NULL;
536 put_cpu();
537 } else {
538 put_cpu();
539 ret = kmem_cache_alloc(pte_chain_cache, gfp_flags);
541 return ret;
544 void __init pte_chain_init(void)
546 pte_chain_cache = kmem_cache_create( "pte_chain",
547 sizeof(struct pte_chain),
550 pte_chain_ctor,
551 NULL);
553 if (!pte_chain_cache)
554 panic("failed to create pte_chain cache!\n");