4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 * Swap reorganised 29.12.95, Stephen Tweedie
8 #include <linux/config.h>
9 #include <linux/malloc.h>
10 #include <linux/smp_lock.h>
11 #include <linux/kernel_stat.h>
12 #include <linux/swap.h>
13 #include <linux/swapctl.h>
14 #include <linux/blkdev.h> /* for blk_size */
15 #include <linux/vmalloc.h>
16 #include <linux/pagemap.h>
17 #include <linux/shm.h>
19 #include <asm/pgtable.h>
21 unsigned int nr_swapfiles
= 0;
23 struct swap_list_t swap_list
= {-1, -1};
25 struct swap_info_struct swap_info
[MAX_SWAPFILES
];
27 #define SWAPFILE_CLUSTER 256
29 static inline int scan_swap_map(struct swap_info_struct
*si
)
33 * We try to cluster swap pages by allocating them
34 * sequentially in swap. Once we've allocated
35 * SWAPFILE_CLUSTER pages this way, however, we resort to
36 * first-free allocation, starting a new cluster. This
37 * prevents us from scattering swap pages all over the entire
38 * swap partition, so that we reduce overall disk seek times
39 * between swap pages. -- sct */
41 while (si
->cluster_next
<= si
->highest_bit
) {
42 offset
= si
->cluster_next
++;
43 if (si
->swap_map
[offset
])
49 si
->cluster_nr
= SWAPFILE_CLUSTER
;
50 for (offset
= si
->lowest_bit
; offset
<= si
->highest_bit
; offset
++) {
51 if (si
->swap_map
[offset
])
53 si
->lowest_bit
= offset
;
55 si
->swap_map
[offset
] = 1;
57 if (offset
== si
->highest_bit
)
59 si
->cluster_next
= offset
;
65 unsigned long get_swap_page(void)
67 struct swap_info_struct
* p
;
68 unsigned long offset
, entry
;
69 int type
, wrapped
= 0;
71 type
= swap_list
.next
;
74 if (nr_swap_pages
== 0)
79 if ((p
->flags
& SWP_WRITEOK
) == SWP_WRITEOK
) {
80 offset
= scan_swap_map(p
);
82 entry
= SWP_ENTRY(type
,offset
);
83 type
= swap_info
[type
].next
;
85 p
->prio
!= swap_info
[type
].prio
)
87 swap_list
.next
= swap_list
.head
;
91 swap_list
.next
= type
;
98 if (type
< 0 || p
->prio
!= swap_info
[type
].prio
) {
99 type
= swap_list
.head
;
102 } else if (type
< 0) {
103 return 0; /* out of swap space */
109 void swap_free(unsigned long entry
)
111 struct swap_info_struct
* p
;
112 unsigned long offset
, type
;
117 type
= SWP_TYPE(entry
);
118 if (type
& SHM_SWP_TYPE
)
120 if (type
>= nr_swapfiles
)
122 p
= & swap_info
[type
];
123 if (!(p
->flags
& SWP_USED
))
125 if (p
->prio
> swap_info
[swap_list
.next
].prio
)
126 swap_list
.next
= swap_list
.head
;
127 offset
= SWP_OFFSET(entry
);
128 if (offset
>= p
->max
)
130 if (offset
< p
->lowest_bit
)
131 p
->lowest_bit
= offset
;
132 if (offset
> p
->highest_bit
)
133 p
->highest_bit
= offset
;
134 if (!p
->swap_map
[offset
])
136 if (p
->swap_map
[offset
] < SWAP_MAP_MAX
) {
137 if (!--p
->swap_map
[offset
])
141 printk("DebugVM: swap_free(entry %08lx, count now %d)\n",
142 entry
, p
->swap_map
[offset
]);
148 printk("swap_free: Trying to free nonexistent swap-page\n");
151 printk("swap_free: Trying to free swap from unused swap-device\n");
154 printk("swap_free: offset exceeds max\n");
157 printk("swap_free: swap-space map bad (entry %08lx)\n",entry
);
162 * The swap entry has been read in advance, and we return 1 to indicate
163 * that the page has been used or is no longer needed.
165 * Always set the resulting pte to be nowrite (the same as COW pages
166 * after one process has exited). We don't know just how many PTEs will
167 * share this swap entry, so be cautious and let do_wp_page work out
168 * what to do if a write is requested later.
170 static inline void unuse_pte(struct vm_area_struct
* vma
, unsigned long address
,
171 pte_t
*dir
, unsigned long entry
, unsigned long page
)
177 if (pte_present(pte
)) {
178 /* If this entry is swap-cached, then page must already
179 hold the right address for any copies in physical
181 if (pte_page(pte
) != page
)
183 /* We will be removing the swap cache in a moment, so... */
184 set_pte(dir
, pte_mkdirty(pte
));
187 if (pte_val(pte
) != entry
)
189 set_pte(dir
, pte_mkdirty(mk_pte(page
, vma
->vm_page_prot
)));
191 get_page(mem_map
+ MAP_NR(page
));
195 static inline void unuse_pmd(struct vm_area_struct
* vma
, pmd_t
*dir
,
196 unsigned long address
, unsigned long size
, unsigned long offset
,
197 unsigned long entry
, unsigned long page
)
205 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir
));
209 pte
= pte_offset(dir
, address
);
210 offset
+= address
& PMD_MASK
;
211 address
&= ~PMD_MASK
;
212 end
= address
+ size
;
216 unuse_pte(vma
, offset
+address
-vma
->vm_start
, pte
, entry
, page
);
217 address
+= PAGE_SIZE
;
219 } while (address
< end
);
222 static inline void unuse_pgd(struct vm_area_struct
* vma
, pgd_t
*dir
,
223 unsigned long address
, unsigned long size
,
224 unsigned long entry
, unsigned long page
)
227 unsigned long offset
, end
;
232 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir
));
236 pmd
= pmd_offset(dir
, address
);
237 offset
= address
& PGDIR_MASK
;
238 address
&= ~PGDIR_MASK
;
239 end
= address
+ size
;
240 if (end
> PGDIR_SIZE
)
243 unuse_pmd(vma
, pmd
, address
, end
- address
, offset
, entry
,
245 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
247 } while (address
< end
);
250 static void unuse_vma(struct vm_area_struct
* vma
, pgd_t
*pgdir
,
251 unsigned long entry
, unsigned long page
)
253 unsigned long start
= vma
->vm_start
, end
= vma
->vm_end
;
255 while (start
< end
) {
256 unuse_pgd(vma
, pgdir
, start
, end
- start
, entry
, page
);
257 start
= (start
+ PGDIR_SIZE
) & PGDIR_MASK
;
262 static void unuse_process(struct mm_struct
* mm
, unsigned long entry
,
265 struct vm_area_struct
* vma
;
268 * Go through process' page directory.
270 if (!mm
|| mm
== &init_mm
)
272 for (vma
= mm
->mmap
; vma
; vma
= vma
->vm_next
) {
273 pgd_t
* pgd
= pgd_offset(mm
, vma
->vm_start
);
274 unuse_vma(vma
, pgd
, entry
, page
);
280 * We completely avoid races by reading each swap page in advance,
281 * and then search for the process using it. All the necessary
282 * page table adjustments can then be made atomically.
284 static int try_to_unuse(unsigned int type
)
286 struct swap_info_struct
* si
= &swap_info
[type
];
287 struct task_struct
*p
;
288 struct page
*page_map
;
289 unsigned long entry
, page
;
294 * Find a swap page in use and read it in.
296 for (i
= 1; i
< si
->max
; i
++) {
297 if (si
->swap_map
[i
] > 0 && si
->swap_map
[i
] != SWAP_MAP_BAD
) {
304 entry
= SWP_ENTRY(type
, i
);
306 /* Get a page for the entry, using the existing swap
307 cache page if there is one. Otherwise, get a clean
308 page and read the swap into it. */
309 page_map
= read_swap_cache(entry
);
312 * Continue searching if the entry became unused.
314 if (si
->swap_map
[i
] == 0)
318 page
= page_address(page_map
);
319 read_lock(&tasklist_lock
);
321 unuse_process(p
->mm
, entry
, page
);
322 read_unlock(&tasklist_lock
);
323 shm_unuse(entry
, page
);
324 /* Now get rid of the extra reference to the temporary
325 page we've been using. */
326 if (PageSwapCache(page_map
))
327 delete_from_swap_cache(page_map
);
328 __free_page(page_map
);
330 * Check for and clear any overflowed swap map counts.
332 if (si
->swap_map
[i
] != 0) {
333 if (si
->swap_map
[i
] != SWAP_MAP_MAX
)
335 "try_to_unuse: entry %08lx count=%d\n",
336 entry
, si
->swap_map
[i
]);
344 asmlinkage
int sys_swapoff(const char * specialfile
)
346 struct swap_info_struct
* p
= NULL
;
347 struct dentry
* dentry
;
353 if (!capable(CAP_SYS_ADMIN
))
356 dentry
= namei(specialfile
);
357 err
= PTR_ERR(dentry
);
362 for (type
= swap_list
.head
; type
>= 0; type
= swap_info
[type
].next
) {
363 p
= swap_info
+ type
;
364 if ((p
->flags
& SWP_WRITEOK
) == SWP_WRITEOK
) {
366 if (p
->swap_file
== dentry
)
369 if (S_ISBLK(dentry
->d_inode
->i_mode
)
370 && (p
->swap_device
== dentry
->d_inode
->i_rdev
))
381 swap_list
.head
= p
->next
;
383 swap_info
[prev
].next
= p
->next
;
385 if (type
== swap_list
.next
) {
386 /* just pick something that's safe... */
387 swap_list
.next
= swap_list
.head
;
390 err
= try_to_unuse(type
);
392 /* re-insert swap space back into swap_list */
393 for (prev
= -1, i
= swap_list
.head
; i
>= 0; prev
= i
, i
= swap_info
[i
].next
)
394 if (p
->prio
>= swap_info
[i
].prio
)
398 swap_list
.head
= swap_list
.next
= p
- swap_info
;
400 swap_info
[prev
].next
= p
- swap_info
;
401 p
->flags
= SWP_WRITEOK
;
405 memset(&filp
, 0, sizeof(filp
));
406 filp
.f_dentry
= dentry
;
407 filp
.f_mode
= 3; /* read write */
408 /* open it again to get fops */
409 if( !blkdev_open(dentry
->d_inode
, &filp
) &&
410 filp
.f_op
&& filp
.f_op
->release
){
411 filp
.f_op
->release(dentry
->d_inode
,&filp
);
412 filp
.f_op
->release(dentry
->d_inode
,&filp
);
417 dentry
= p
->swap_file
;
419 nr_swap_pages
-= p
->pages
;
433 int get_swaparea_info(char *buf
)
435 char * page
= (char *) __get_free_page(GFP_KERNEL
);
436 struct swap_info_struct
*ptr
= swap_info
;
437 int i
, j
, len
= 0, usedswap
;
442 len
+= sprintf(buf
, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n");
443 for (i
= 0 ; i
< nr_swapfiles
; i
++, ptr
++) {
444 if (ptr
->flags
& SWP_USED
) {
445 char * path
= d_path(ptr
->swap_file
, page
, PAGE_SIZE
);
447 len
+= sprintf(buf
+ len
, "%-31s ", path
);
449 if (!ptr
->swap_device
)
450 len
+= sprintf(buf
+ len
, "file\t\t");
452 len
+= sprintf(buf
+ len
, "partition\t");
455 for (j
= 0; j
< ptr
->max
; ++j
)
456 switch (ptr
->swap_map
[j
]) {
463 len
+= sprintf(buf
+ len
, "%d\t%d\t%d\n", ptr
->pages
<< (PAGE_SHIFT
- 10),
464 usedswap
<< (PAGE_SHIFT
- 10), ptr
->prio
);
467 free_page((unsigned long) page
);
471 int is_swap_partition(kdev_t dev
) {
472 struct swap_info_struct
*ptr
= swap_info
;
475 for (i
= 0 ; i
< nr_swapfiles
; i
++, ptr
++) {
476 if (ptr
->flags
& SWP_USED
)
477 if (ptr
->swap_device
== dev
)
484 * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
486 * The swapon system call
488 asmlinkage
int sys_swapon(const char * specialfile
, int swap_flags
)
490 struct swap_info_struct
* p
;
491 struct dentry
* swap_dentry
;
496 static int least_priority
= 0;
497 union swap_header
*swap_header
= 0;
498 int swap_header_version
;
499 int lock_map_size
= PAGE_SIZE
;
500 int nr_good_pages
= 0;
501 unsigned long maxpages
;
505 if (!capable(CAP_SYS_ADMIN
))
507 memset(&filp
, 0, sizeof(filp
));
509 for (type
= 0 ; type
< nr_swapfiles
; type
++,p
++)
510 if (!(p
->flags
& SWP_USED
))
512 if (type
>= MAX_SWAPFILES
)
514 if (type
>= nr_swapfiles
)
515 nr_swapfiles
= type
+1;
525 if (swap_flags
& SWAP_FLAG_PREFER
) {
527 (swap_flags
& SWAP_FLAG_PRIO_MASK
)>>SWAP_FLAG_PRIO_SHIFT
;
529 p
->prio
= --least_priority
;
531 swap_dentry
= namei(specialfile
);
532 error
= PTR_ERR(swap_dentry
);
533 if (IS_ERR(swap_dentry
))
536 p
->swap_file
= swap_dentry
;
539 if (S_ISBLK(swap_dentry
->d_inode
->i_mode
)) {
540 kdev_t dev
= swap_dentry
->d_inode
->i_rdev
;
542 p
->swap_device
= dev
;
543 set_blocksize(dev
, PAGE_SIZE
);
545 filp
.f_dentry
= swap_dentry
;
546 filp
.f_mode
= 3; /* read write */
547 error
= blkdev_open(swap_dentry
->d_inode
, &filp
);
550 set_blocksize(dev
, PAGE_SIZE
);
552 if (!dev
|| (blk_size
[MAJOR(dev
)] &&
553 !blk_size
[MAJOR(dev
)][MINOR(dev
)]))
556 for (i
= 0 ; i
< nr_swapfiles
; i
++) {
559 if (dev
== swap_info
[i
].swap_device
)
563 if (blk_size
[MAJOR(dev
)])
564 swapfilesize
= blk_size
[MAJOR(dev
)][MINOR(dev
)]
565 / (PAGE_SIZE
/ 1024);
566 } else if (S_ISREG(swap_dentry
->d_inode
->i_mode
)) {
568 for (i
= 0 ; i
< nr_swapfiles
; i
++) {
569 if (i
== type
|| !swap_info
[i
].swap_file
)
571 if (swap_dentry
->d_inode
== swap_info
[i
].swap_file
->d_inode
)
574 swapfilesize
= swap_dentry
->d_inode
->i_size
/ PAGE_SIZE
;
578 swap_header
= (void *) __get_free_page(GFP_USER
);
580 printk("Unable to start swapping: out of memory :-)\n");
585 lock_page(mem_map
+ MAP_NR(swap_header
));
586 rw_swap_page_nolock(READ
, SWP_ENTRY(type
,0), (char *) swap_header
, 1);
588 if (!memcmp("SWAP-SPACE",swap_header
->magic
.magic
,10))
589 swap_header_version
= 1;
590 else if (!memcmp("SWAPSPACE2",swap_header
->magic
.magic
,10))
591 swap_header_version
= 2;
593 printk("Unable to find swap-space signature\n");
598 switch (swap_header_version
) {
600 memset(((char *) swap_header
)+PAGE_SIZE
-10,0,10);
604 for (i
= 1 ; i
< 8*PAGE_SIZE
; i
++) {
605 if (test_bit(i
,(char *) swap_header
)) {
614 p
->swap_map
= vmalloc(p
->max
* sizeof(short));
619 for (i
= 1 ; i
< p
->max
; i
++) {
620 if (test_bit(i
,(char *) swap_header
))
623 p
->swap_map
[i
] = SWAP_MAP_BAD
;
628 /* Check the swap header's sub-version and the size of
629 the swap file and bad block lists */
630 if (swap_header
->info
.version
!= 1) {
632 "Unable to handle swap header version %d\n",
633 swap_header
->info
.version
);
639 p
->highest_bit
= swap_header
->info
.last_page
- 1;
640 p
->max
= swap_header
->info
.last_page
;
642 maxpages
= SWP_OFFSET(SWP_ENTRY(0,~0UL));
643 if (p
->max
>= maxpages
)
647 if (swap_header
->info
.nr_badpages
> MAX_SWAP_BADPAGES
)
650 /* OK, set up the swap map and apply the bad block list */
651 if (!(p
->swap_map
= vmalloc (p
->max
* sizeof(short)))) {
657 memset(p
->swap_map
, 0, p
->max
* sizeof(short));
658 for (i
=0; i
<swap_header
->info
.nr_badpages
; i
++) {
659 int page
= swap_header
->info
.badpages
[i
];
660 if (page
<= 0 || page
>= swap_header
->info
.last_page
)
663 p
->swap_map
[page
] = SWAP_MAP_BAD
;
665 nr_good_pages
= swap_header
->info
.last_page
- i
;
666 lock_map_size
= (p
->max
+ 7) / 8;
671 if (swapfilesize
&& p
->max
> swapfilesize
) {
673 "Swap area shorter than signature indicates\n");
677 if (!nr_good_pages
) {
678 printk(KERN_WARNING
"Empty swap-file\n");
682 p
->swap_map
[0] = SWAP_MAP_BAD
;
683 p
->flags
= SWP_WRITEOK
;
684 p
->pages
= nr_good_pages
;
685 nr_swap_pages
+= nr_good_pages
;
686 printk(KERN_INFO
"Adding Swap: %dk swap-space (priority %d)\n",
687 nr_good_pages
<<(PAGE_SHIFT
-10), p
->prio
);
689 /* insert swap space into swap_list: */
691 for (i
= swap_list
.head
; i
>= 0; i
= swap_info
[i
].next
) {
692 if (p
->prio
>= swap_info
[i
].prio
) {
699 swap_list
.head
= swap_list
.next
= p
- swap_info
;
701 swap_info
[prev
].next
= p
- swap_info
;
706 if(filp
.f_op
&& filp
.f_op
->release
)
707 filp
.f_op
->release(filp
.f_dentry
->d_inode
,&filp
);
716 if (!(swap_flags
& SWAP_FLAG_PREFER
))
720 free_page((long) swap_header
);
725 void si_swapinfo(struct sysinfo
*val
)
729 val
->freeswap
= val
->totalswap
= 0;
730 for (i
= 0; i
< nr_swapfiles
; i
++) {
731 if ((swap_info
[i
].flags
& SWP_WRITEOK
) != SWP_WRITEOK
)
733 for (j
= 0; j
< swap_info
[i
].max
; ++j
)
734 switch (swap_info
[i
].swap_map
[j
]) {
743 val
->freeswap
<<= PAGE_SHIFT
;
744 val
->totalswap
<<= PAGE_SHIFT
;