4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 * Swap reorganised 29.12.95, Stephen Tweedie
8 #include <linux/malloc.h>
9 #include <linux/smp_lock.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/swap.h>
12 #include <linux/swapctl.h>
13 #include <linux/blkdev.h> /* for blk_size */
14 #include <linux/vmalloc.h>
15 #include <linux/pagemap.h>
16 #include <linux/shm.h>
18 #include <asm/pgtable.h>
20 unsigned int nr_swapfiles
= 0;
22 struct swap_list_t swap_list
= {-1, -1};
24 struct swap_info_struct swap_info
[MAX_SWAPFILES
];
26 #define SWAPFILE_CLUSTER 256
28 static inline int scan_swap_map(struct swap_info_struct
*si
)
32 * We try to cluster swap pages by allocating them
33 * sequentially in swap. Once we've allocated
34 * SWAPFILE_CLUSTER pages this way, however, we resort to
35 * first-free allocation, starting a new cluster. This
36 * prevents us from scattering swap pages all over the entire
37 * swap partition, so that we reduce overall disk seek times
38 * between swap pages. -- sct */
40 while (si
->cluster_next
<= si
->highest_bit
) {
41 offset
= si
->cluster_next
++;
42 if (si
->swap_map
[offset
])
48 si
->cluster_nr
= SWAPFILE_CLUSTER
;
49 for (offset
= si
->lowest_bit
; offset
<= si
->highest_bit
; offset
++) {
50 if (si
->swap_map
[offset
])
52 si
->lowest_bit
= offset
;
54 si
->swap_map
[offset
] = 1;
56 if (offset
== si
->highest_bit
)
58 si
->cluster_next
= offset
;
64 unsigned long get_swap_page(void)
66 struct swap_info_struct
* p
;
67 unsigned long offset
, entry
;
68 int type
, wrapped
= 0;
70 type
= swap_list
.next
;
73 if (nr_swap_pages
== 0)
78 if ((p
->flags
& SWP_WRITEOK
) == SWP_WRITEOK
) {
79 offset
= scan_swap_map(p
);
81 entry
= SWP_ENTRY(type
,offset
);
82 type
= swap_info
[type
].next
;
84 p
->prio
!= swap_info
[type
].prio
)
86 swap_list
.next
= swap_list
.head
;
90 swap_list
.next
= type
;
97 if (type
< 0 || p
->prio
!= swap_info
[type
].prio
) {
98 type
= swap_list
.head
;
101 } else if (type
< 0) {
102 return 0; /* out of swap space */
108 void swap_free(unsigned long entry
)
110 struct swap_info_struct
* p
;
111 unsigned long offset
, type
;
116 type
= SWP_TYPE(entry
);
117 if (type
& SHM_SWP_TYPE
)
119 if (type
>= nr_swapfiles
)
121 p
= & swap_info
[type
];
122 if (!(p
->flags
& SWP_USED
))
124 if (p
->prio
> swap_info
[swap_list
.next
].prio
)
125 swap_list
.next
= swap_list
.head
;
126 offset
= SWP_OFFSET(entry
);
127 if (offset
>= p
->max
)
129 if (offset
< p
->lowest_bit
)
130 p
->lowest_bit
= offset
;
131 if (offset
> p
->highest_bit
)
132 p
->highest_bit
= offset
;
133 if (!p
->swap_map
[offset
])
135 if (p
->swap_map
[offset
] < SWAP_MAP_MAX
) {
136 if (!--p
->swap_map
[offset
])
140 printk("DebugVM: swap_free(entry %08lx, count now %d)\n",
141 entry
, p
->swap_map
[offset
]);
147 printk("swap_free: Trying to free nonexistent swap-page\n");
150 printk("swap_free: Trying to free swap from unused swap-device\n");
153 printk("swap_free: offset exceeds max\n");
156 printk("swap_free: swap-space map bad (entry %08lx)\n",entry
);
161 * The swap entry has been read in advance, and we return 1 to indicate
162 * that the page has been used or is no longer needed.
164 * Always set the resulting pte to be nowrite (the same as COW pages
165 * after one process has exited). We don't know just how many PTEs will
166 * share this swap entry, so be cautious and let do_wp_page work out
167 * what to do if a write is requested later.
169 static inline void unuse_pte(struct vm_area_struct
* vma
, unsigned long address
,
170 pte_t
*dir
, unsigned long entry
, unsigned long page
)
176 if (pte_present(pte
)) {
177 /* If this entry is swap-cached, then page must already
178 hold the right address for any copies in physical
180 if (pte_page(pte
) != page
)
182 /* We will be removing the swap cache in a moment, so... */
183 set_pte(dir
, pte_mkdirty(pte
));
186 if (pte_val(pte
) != entry
)
188 set_pte(dir
, pte_mkdirty(mk_pte(page
, vma
->vm_page_prot
)));
190 get_page(mem_map
+ MAP_NR(page
));
194 static inline void unuse_pmd(struct vm_area_struct
* vma
, pmd_t
*dir
,
195 unsigned long address
, unsigned long size
, unsigned long offset
,
196 unsigned long entry
, unsigned long page
)
204 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir
));
208 pte
= pte_offset(dir
, address
);
209 offset
+= address
& PMD_MASK
;
210 address
&= ~PMD_MASK
;
211 end
= address
+ size
;
215 unuse_pte(vma
, offset
+address
-vma
->vm_start
, pte
, entry
, page
);
216 address
+= PAGE_SIZE
;
218 } while (address
< end
);
221 static inline void unuse_pgd(struct vm_area_struct
* vma
, pgd_t
*dir
,
222 unsigned long address
, unsigned long size
,
223 unsigned long entry
, unsigned long page
)
226 unsigned long offset
, end
;
231 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir
));
235 pmd
= pmd_offset(dir
, address
);
236 offset
= address
& PGDIR_MASK
;
237 address
&= ~PGDIR_MASK
;
238 end
= address
+ size
;
239 if (end
> PGDIR_SIZE
)
242 unuse_pmd(vma
, pmd
, address
, end
- address
, offset
, entry
,
244 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
246 } while (address
< end
);
249 static void unuse_vma(struct vm_area_struct
* vma
, pgd_t
*pgdir
,
250 unsigned long entry
, unsigned long page
)
252 unsigned long start
= vma
->vm_start
, end
= vma
->vm_end
;
254 while (start
< end
) {
255 unuse_pgd(vma
, pgdir
, start
, end
- start
, entry
, page
);
256 start
= (start
+ PGDIR_SIZE
) & PGDIR_MASK
;
261 static void unuse_process(struct mm_struct
* mm
, unsigned long entry
,
264 struct vm_area_struct
* vma
;
267 * Go through process' page directory.
271 for (vma
= mm
->mmap
; vma
; vma
= vma
->vm_next
) {
272 pgd_t
* pgd
= pgd_offset(mm
, vma
->vm_start
);
273 unuse_vma(vma
, pgd
, entry
, page
);
279 * We completely avoid races by reading each swap page in advance,
280 * and then search for the process using it. All the necessary
281 * page table adjustments can then be made atomically.
283 static int try_to_unuse(unsigned int type
)
285 struct swap_info_struct
* si
= &swap_info
[type
];
286 struct task_struct
*p
;
287 struct page
*page_map
;
288 unsigned long entry
, page
;
293 * Find a swap page in use and read it in.
295 for (i
= 1; i
< si
->max
; i
++) {
296 if (si
->swap_map
[i
] > 0 && si
->swap_map
[i
] != SWAP_MAP_BAD
) {
303 entry
= SWP_ENTRY(type
, i
);
305 /* Get a page for the entry, using the existing swap
306 cache page if there is one. Otherwise, get a clean
307 page and read the swap into it. */
308 page_map
= read_swap_cache(entry
);
311 * Continue searching if the entry became unused.
313 if (si
->swap_map
[i
] == 0)
317 page
= page_address(page_map
);
318 read_lock(&tasklist_lock
);
320 unuse_process(p
->mm
, entry
, page
);
321 read_unlock(&tasklist_lock
);
322 shm_unuse(entry
, page
);
323 /* Now get rid of the extra reference to the temporary
324 page we've been using. */
325 if (PageSwapCache(page_map
))
326 delete_from_swap_cache(page_map
);
327 __free_page(page_map
);
329 * Check for and clear any overflowed swap map counts.
331 if (si
->swap_map
[i
] != 0) {
332 if (si
->swap_map
[i
] != SWAP_MAP_MAX
)
334 "try_to_unuse: entry %08lx count=%d\n",
335 entry
, si
->swap_map
[i
]);
343 asmlinkage
int sys_swapoff(const char * specialfile
)
345 struct swap_info_struct
* p
= NULL
;
346 struct dentry
* dentry
;
352 if (!capable(CAP_SYS_ADMIN
))
355 dentry
= namei(specialfile
);
356 err
= PTR_ERR(dentry
);
361 for (type
= swap_list
.head
; type
>= 0; type
= swap_info
[type
].next
) {
362 p
= swap_info
+ type
;
363 if ((p
->flags
& SWP_WRITEOK
) == SWP_WRITEOK
) {
365 if (p
->swap_file
== dentry
)
368 if (S_ISBLK(dentry
->d_inode
->i_mode
)
369 && (p
->swap_device
== dentry
->d_inode
->i_rdev
))
380 swap_list
.head
= p
->next
;
382 swap_info
[prev
].next
= p
->next
;
384 if (type
== swap_list
.next
) {
385 /* just pick something that's safe... */
386 swap_list
.next
= swap_list
.head
;
389 err
= try_to_unuse(type
);
391 /* re-insert swap space back into swap_list */
392 for (prev
= -1, i
= swap_list
.head
; i
>= 0; prev
= i
, i
= swap_info
[i
].next
)
393 if (p
->prio
>= swap_info
[i
].prio
)
397 swap_list
.head
= swap_list
.next
= p
- swap_info
;
399 swap_info
[prev
].next
= p
- swap_info
;
400 p
->flags
= SWP_WRITEOK
;
404 memset(&filp
, 0, sizeof(filp
));
405 filp
.f_dentry
= dentry
;
406 filp
.f_mode
= 3; /* read write */
407 /* open it again to get fops */
408 if( !blkdev_open(dentry
->d_inode
, &filp
) &&
409 filp
.f_op
&& filp
.f_op
->release
){
410 filp
.f_op
->release(dentry
->d_inode
,&filp
);
411 filp
.f_op
->release(dentry
->d_inode
,&filp
);
416 dentry
= p
->swap_file
;
418 nr_swap_pages
-= p
->pages
;
432 int get_swaparea_info(char *buf
)
434 char * page
= (char *) __get_free_page(GFP_KERNEL
);
435 struct swap_info_struct
*ptr
= swap_info
;
436 int i
, j
, len
= 0, usedswap
;
441 len
+= sprintf(buf
, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n");
442 for (i
= 0 ; i
< nr_swapfiles
; i
++, ptr
++) {
443 if (ptr
->flags
& SWP_USED
) {
444 char * path
= d_path(ptr
->swap_file
, page
, PAGE_SIZE
);
446 len
+= sprintf(buf
+ len
, "%-31s ", path
);
448 if (!ptr
->swap_device
)
449 len
+= sprintf(buf
+ len
, "file\t\t");
451 len
+= sprintf(buf
+ len
, "partition\t");
454 for (j
= 0; j
< ptr
->max
; ++j
)
455 switch (ptr
->swap_map
[j
]) {
462 len
+= sprintf(buf
+ len
, "%d\t%d\t%d\n", ptr
->pages
<< (PAGE_SHIFT
- 10),
463 usedswap
<< (PAGE_SHIFT
- 10), ptr
->prio
);
466 free_page((unsigned long) page
);
470 int is_swap_partition(kdev_t dev
) {
471 struct swap_info_struct
*ptr
= swap_info
;
474 for (i
= 0 ; i
< nr_swapfiles
; i
++, ptr
++) {
475 if (ptr
->flags
& SWP_USED
)
476 if (ptr
->swap_device
== dev
)
483 * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
485 * The swapon system call
487 asmlinkage
int sys_swapon(const char * specialfile
, int swap_flags
)
489 struct swap_info_struct
* p
;
490 struct dentry
* swap_dentry
;
495 static int least_priority
= 0;
496 union swap_header
*swap_header
= 0;
497 int swap_header_version
;
498 int lock_map_size
= PAGE_SIZE
;
499 int nr_good_pages
= 0;
500 unsigned long maxpages
;
504 if (!capable(CAP_SYS_ADMIN
))
506 memset(&filp
, 0, sizeof(filp
));
508 for (type
= 0 ; type
< nr_swapfiles
; type
++,p
++)
509 if (!(p
->flags
& SWP_USED
))
511 if (type
>= MAX_SWAPFILES
)
513 if (type
>= nr_swapfiles
)
514 nr_swapfiles
= type
+1;
524 if (swap_flags
& SWAP_FLAG_PREFER
) {
526 (swap_flags
& SWAP_FLAG_PRIO_MASK
)>>SWAP_FLAG_PRIO_SHIFT
;
528 p
->prio
= --least_priority
;
530 swap_dentry
= namei(specialfile
);
531 error
= PTR_ERR(swap_dentry
);
532 if (IS_ERR(swap_dentry
))
535 p
->swap_file
= swap_dentry
;
538 if (S_ISBLK(swap_dentry
->d_inode
->i_mode
)) {
539 kdev_t dev
= swap_dentry
->d_inode
->i_rdev
;
541 p
->swap_device
= dev
;
542 set_blocksize(dev
, PAGE_SIZE
);
544 filp
.f_dentry
= swap_dentry
;
545 filp
.f_mode
= 3; /* read write */
546 error
= blkdev_open(swap_dentry
->d_inode
, &filp
);
549 set_blocksize(dev
, PAGE_SIZE
);
551 if (!dev
|| (blk_size
[MAJOR(dev
)] &&
552 !blk_size
[MAJOR(dev
)][MINOR(dev
)]))
555 for (i
= 0 ; i
< nr_swapfiles
; i
++) {
558 if (dev
== swap_info
[i
].swap_device
)
562 if (blk_size
[MAJOR(dev
)])
563 swapfilesize
= blk_size
[MAJOR(dev
)][MINOR(dev
)]
564 / (PAGE_SIZE
/ 1024);
565 } else if (S_ISREG(swap_dentry
->d_inode
->i_mode
)) {
567 for (i
= 0 ; i
< nr_swapfiles
; i
++) {
568 if (i
== type
|| !swap_info
[i
].swap_file
)
570 if (swap_dentry
->d_inode
== swap_info
[i
].swap_file
->d_inode
)
573 swapfilesize
= swap_dentry
->d_inode
->i_size
/ PAGE_SIZE
;
577 swap_header
= (void *) __get_free_page(GFP_USER
);
579 printk("Unable to start swapping: out of memory :-)\n");
584 lock_page(mem_map
+ MAP_NR(swap_header
));
585 rw_swap_page_nolock(READ
, SWP_ENTRY(type
,0), (char *) swap_header
, 1);
587 if (!memcmp("SWAP-SPACE",swap_header
->magic
.magic
,10))
588 swap_header_version
= 1;
589 else if (!memcmp("SWAPSPACE2",swap_header
->magic
.magic
,10))
590 swap_header_version
= 2;
592 printk("Unable to find swap-space signature\n");
597 switch (swap_header_version
) {
599 memset(((char *) swap_header
)+PAGE_SIZE
-10,0,10);
603 for (i
= 1 ; i
< 8*PAGE_SIZE
; i
++) {
604 if (test_bit(i
,(char *) swap_header
)) {
613 p
->swap_map
= vmalloc(p
->max
* sizeof(short));
618 for (i
= 1 ; i
< p
->max
; i
++) {
619 if (test_bit(i
,(char *) swap_header
))
622 p
->swap_map
[i
] = SWAP_MAP_BAD
;
627 /* Check the swap header's sub-version and the size of
628 the swap file and bad block lists */
629 if (swap_header
->info
.version
!= 1) {
631 "Unable to handle swap header version %d\n",
632 swap_header
->info
.version
);
638 p
->highest_bit
= swap_header
->info
.last_page
- 1;
639 p
->max
= swap_header
->info
.last_page
;
641 maxpages
= SWP_OFFSET(SWP_ENTRY(0,~0UL));
642 if (p
->max
>= maxpages
)
646 if (swap_header
->info
.nr_badpages
> MAX_SWAP_BADPAGES
)
649 /* OK, set up the swap map and apply the bad block list */
650 if (!(p
->swap_map
= vmalloc (p
->max
* sizeof(short)))) {
656 memset(p
->swap_map
, 0, p
->max
* sizeof(short));
657 for (i
=0; i
<swap_header
->info
.nr_badpages
; i
++) {
658 int page
= swap_header
->info
.badpages
[i
];
659 if (page
<= 0 || page
>= swap_header
->info
.last_page
)
662 p
->swap_map
[page
] = SWAP_MAP_BAD
;
664 nr_good_pages
= swap_header
->info
.last_page
-
665 swap_header
->info
.nr_badpages
-
667 lock_map_size
= (p
->max
+ 7) / 8;
672 if (swapfilesize
&& p
->max
> swapfilesize
) {
674 "Swap area shorter than signature indicates\n");
678 if (!nr_good_pages
) {
679 printk(KERN_WARNING
"Empty swap-file\n");
683 p
->swap_map
[0] = SWAP_MAP_BAD
;
684 p
->flags
= SWP_WRITEOK
;
685 p
->pages
= nr_good_pages
;
686 nr_swap_pages
+= nr_good_pages
;
687 printk(KERN_INFO
"Adding Swap: %dk swap-space (priority %d)\n",
688 nr_good_pages
<<(PAGE_SHIFT
-10), p
->prio
);
690 /* insert swap space into swap_list: */
692 for (i
= swap_list
.head
; i
>= 0; i
= swap_info
[i
].next
) {
693 if (p
->prio
>= swap_info
[i
].prio
) {
700 swap_list
.head
= swap_list
.next
= p
- swap_info
;
702 swap_info
[prev
].next
= p
- swap_info
;
707 if(filp
.f_op
&& filp
.f_op
->release
)
708 filp
.f_op
->release(filp
.f_dentry
->d_inode
,&filp
);
717 if (!(swap_flags
& SWAP_FLAG_PREFER
))
721 free_page((long) swap_header
);
726 void si_swapinfo(struct sysinfo
*val
)
730 val
->freeswap
= val
->totalswap
= 0;
731 for (i
= 0; i
< nr_swapfiles
; i
++) {
732 if ((swap_info
[i
].flags
& SWP_WRITEOK
) != SWP_WRITEOK
)
734 for (j
= 0; j
< swap_info
[i
].max
; ++j
)
735 switch (swap_info
[i
].swap_map
[j
]) {
744 val
->freeswap
<<= PAGE_SHIFT
;
745 val
->totalswap
<<= PAGE_SHIFT
;