2 * Compressed RAM based swap device
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
12 * Project home: http://compcache.googlecode.com
15 #define KMSG_COMPONENT "ramzswap"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bitops.h>
21 #include <linux/blkdev.h>
22 #include <linux/buffer_head.h>
23 #include <linux/device.h>
24 #include <linux/genhd.h>
25 #include <linux/highmem.h>
26 #include <linux/slab.h>
27 #include <linux/lzo.h>
28 #include <linux/string.h>
29 #include <linux/swap.h>
30 #include <linux/swapops.h>
31 #include <linux/vmalloc.h>
33 #include "ramzswap_drv.h"
36 static int ramzswap_major
;
37 static struct ramzswap
*devices
;
39 /* Module params (documentation at end) */
40 static unsigned int num_devices
;
42 static int rzs_test_flag(struct ramzswap
*rzs
, u32 index
,
43 enum rzs_pageflags flag
)
45 return rzs
->table
[index
].flags
& BIT(flag
);
48 static void rzs_set_flag(struct ramzswap
*rzs
, u32 index
,
49 enum rzs_pageflags flag
)
51 rzs
->table
[index
].flags
|= BIT(flag
);
54 static void rzs_clear_flag(struct ramzswap
*rzs
, u32 index
,
55 enum rzs_pageflags flag
)
57 rzs
->table
[index
].flags
&= ~BIT(flag
);
60 static int page_zero_filled(void *ptr
)
65 page
= (unsigned long *)ptr
;
67 for (pos
= 0; pos
!= PAGE_SIZE
/ sizeof(*page
); pos
++) {
75 static void ramzswap_set_disksize(struct ramzswap
*rzs
, size_t totalram_bytes
)
79 "disk size not provided. You can use disksize_kb module "
80 "param to specify size.\nUsing default: (%u%% of RAM).\n",
81 default_disksize_perc_ram
83 rzs
->disksize
= default_disksize_perc_ram
*
84 (totalram_bytes
/ 100);
87 if (rzs
->disksize
> 2 * (totalram_bytes
)) {
89 "There is little point creating a ramzswap of greater than "
90 "twice the size of memory since we expect a 2:1 compression "
91 "ratio. Note that ramzswap uses about 0.1%% of the size of "
92 "the swap device when not in use so a huge ramzswap is "
94 "\tMemory Size: %zu kB\n"
95 "\tSize you selected: %zu kB\n"
96 "Continuing anyway ...\n",
97 totalram_bytes
>> 10, rzs
->disksize
101 rzs
->disksize
&= PAGE_MASK
;
105 * Swap header (1st page of swap device) contains information
106 * about a swap file/partition. Prepare such a header for the
107 * given ramzswap device so that swapon can identify it as a
110 static void setup_swap_header(struct ramzswap
*rzs
, union swap_header
*s
)
113 s
->info
.last_page
= (rzs
->disksize
>> PAGE_SHIFT
) - 1;
114 s
->info
.nr_badpages
= 0;
115 memcpy(s
->magic
.magic
, "SWAPSPACE2", 10);
118 static void ramzswap_ioctl_get_stats(struct ramzswap
*rzs
,
119 struct ramzswap_ioctl_stats
*s
)
121 s
->disksize
= rzs
->disksize
;
123 #if defined(CONFIG_RAMZSWAP_STATS)
125 struct ramzswap_stats
*rs
= &rzs
->stats
;
126 size_t succ_writes
, mem_used
;
127 unsigned int good_compress_perc
= 0, no_compress_perc
= 0;
129 mem_used
= xv_get_total_size_bytes(rzs
->mem_pool
)
130 + (rs
->pages_expand
<< PAGE_SHIFT
);
131 succ_writes
= rzs_stat64_read(rzs
, &rs
->num_writes
) -
132 rzs_stat64_read(rzs
, &rs
->failed_writes
);
134 if (succ_writes
&& rs
->pages_stored
) {
135 good_compress_perc
= rs
->good_compress
* 100
137 no_compress_perc
= rs
->pages_expand
* 100
141 s
->num_reads
= rzs_stat64_read(rzs
, &rs
->num_reads
);
142 s
->num_writes
= rzs_stat64_read(rzs
, &rs
->num_writes
);
143 s
->failed_reads
= rzs_stat64_read(rzs
, &rs
->failed_reads
);
144 s
->failed_writes
= rzs_stat64_read(rzs
, &rs
->failed_writes
);
145 s
->invalid_io
= rzs_stat64_read(rzs
, &rs
->invalid_io
);
146 s
->notify_free
= rzs_stat64_read(rzs
, &rs
->notify_free
);
147 s
->pages_zero
= rs
->pages_zero
;
149 s
->good_compress_pct
= good_compress_perc
;
150 s
->pages_expand_pct
= no_compress_perc
;
152 s
->pages_stored
= rs
->pages_stored
;
153 s
->pages_used
= mem_used
>> PAGE_SHIFT
;
154 s
->orig_data_size
= rs
->pages_stored
<< PAGE_SHIFT
;
155 s
->compr_data_size
= rs
->compr_size
;
156 s
->mem_used_total
= mem_used
;
158 #endif /* CONFIG_RAMZSWAP_STATS */
161 static void ramzswap_free_page(struct ramzswap
*rzs
, size_t index
)
166 struct page
*page
= rzs
->table
[index
].page
;
167 u32 offset
= rzs
->table
[index
].offset
;
169 if (unlikely(!page
)) {
171 * No memory is allocated for zero filled pages.
172 * Simply clear zero page flag.
174 if (rzs_test_flag(rzs
, index
, RZS_ZERO
)) {
175 rzs_clear_flag(rzs
, index
, RZS_ZERO
);
176 rzs_stat_dec(&rzs
->stats
.pages_zero
);
181 if (unlikely(rzs_test_flag(rzs
, index
, RZS_UNCOMPRESSED
))) {
184 rzs_clear_flag(rzs
, index
, RZS_UNCOMPRESSED
);
185 rzs_stat_dec(&rzs
->stats
.pages_expand
);
189 obj
= kmap_atomic(page
, KM_USER0
) + offset
;
190 clen
= xv_get_object_size(obj
) - sizeof(struct zobj_header
);
191 kunmap_atomic(obj
, KM_USER0
);
193 xv_free(rzs
->mem_pool
, page
, offset
);
194 if (clen
<= PAGE_SIZE
/ 2)
195 rzs_stat_dec(&rzs
->stats
.good_compress
);
198 rzs
->stats
.compr_size
-= clen
;
199 rzs_stat_dec(&rzs
->stats
.pages_stored
);
201 rzs
->table
[index
].page
= NULL
;
202 rzs
->table
[index
].offset
= 0;
205 static int handle_zero_page(struct bio
*bio
)
208 struct page
*page
= bio
->bi_io_vec
[0].bv_page
;
210 user_mem
= kmap_atomic(page
, KM_USER0
);
211 memset(user_mem
, 0, PAGE_SIZE
);
212 kunmap_atomic(user_mem
, KM_USER0
);
214 flush_dcache_page(page
);
216 set_bit(BIO_UPTODATE
, &bio
->bi_flags
);
221 static int handle_uncompressed_page(struct ramzswap
*rzs
, struct bio
*bio
)
225 unsigned char *user_mem
, *cmem
;
227 page
= bio
->bi_io_vec
[0].bv_page
;
228 index
= bio
->bi_sector
>> SECTORS_PER_PAGE_SHIFT
;
230 user_mem
= kmap_atomic(page
, KM_USER0
);
231 cmem
= kmap_atomic(rzs
->table
[index
].page
, KM_USER1
) +
232 rzs
->table
[index
].offset
;
234 memcpy(user_mem
, cmem
, PAGE_SIZE
);
235 kunmap_atomic(user_mem
, KM_USER0
);
236 kunmap_atomic(cmem
, KM_USER1
);
238 flush_dcache_page(page
);
240 set_bit(BIO_UPTODATE
, &bio
->bi_flags
);
246 * Called when request page is not present in ramzswap.
247 * This is an attempt to read before any previous write
248 * to this location - this happens due to readahead when
249 * swap device is read from user-space (e.g. during swapon)
251 static int handle_ramzswap_fault(struct ramzswap
*rzs
, struct bio
*bio
)
253 pr_debug("Read before write on swap device: "
254 "sector=%lu, size=%u, offset=%u\n",
255 (ulong
)(bio
->bi_sector
), bio
->bi_size
,
256 bio
->bi_io_vec
[0].bv_offset
);
258 /* Do nothing. Just return success */
259 set_bit(BIO_UPTODATE
, &bio
->bi_flags
);
264 static int ramzswap_read(struct ramzswap
*rzs
, struct bio
*bio
)
270 struct zobj_header
*zheader
;
271 unsigned char *user_mem
, *cmem
;
273 rzs_stat64_inc(rzs
, &rzs
->stats
.num_reads
);
275 page
= bio
->bi_io_vec
[0].bv_page
;
276 index
= bio
->bi_sector
>> SECTORS_PER_PAGE_SHIFT
;
278 if (rzs_test_flag(rzs
, index
, RZS_ZERO
))
279 return handle_zero_page(bio
);
281 /* Requested page is not present in compressed area */
282 if (!rzs
->table
[index
].page
)
283 return handle_ramzswap_fault(rzs
, bio
);
285 /* Page is stored uncompressed since it's incompressible */
286 if (unlikely(rzs_test_flag(rzs
, index
, RZS_UNCOMPRESSED
)))
287 return handle_uncompressed_page(rzs
, bio
);
289 user_mem
= kmap_atomic(page
, KM_USER0
);
292 cmem
= kmap_atomic(rzs
->table
[index
].page
, KM_USER1
) +
293 rzs
->table
[index
].offset
;
295 ret
= lzo1x_decompress_safe(
296 cmem
+ sizeof(*zheader
),
297 xv_get_object_size(cmem
) - sizeof(*zheader
),
300 kunmap_atomic(user_mem
, KM_USER0
);
301 kunmap_atomic(cmem
, KM_USER1
);
303 /* should NEVER happen */
304 if (unlikely(ret
!= LZO_E_OK
)) {
305 pr_err("Decompression failed! err=%d, page=%u\n",
307 rzs_stat64_inc(rzs
, &rzs
->stats
.failed_reads
);
311 flush_dcache_page(page
);
313 set_bit(BIO_UPTODATE
, &bio
->bi_flags
);
322 static int ramzswap_write(struct ramzswap
*rzs
, struct bio
*bio
)
327 struct zobj_header
*zheader
;
328 struct page
*page
, *page_store
;
329 unsigned char *user_mem
, *cmem
, *src
;
331 rzs_stat64_inc(rzs
, &rzs
->stats
.num_writes
);
333 page
= bio
->bi_io_vec
[0].bv_page
;
334 index
= bio
->bi_sector
>> SECTORS_PER_PAGE_SHIFT
;
336 src
= rzs
->compress_buffer
;
338 mutex_lock(&rzs
->lock
);
340 user_mem
= kmap_atomic(page
, KM_USER0
);
341 if (page_zero_filled(user_mem
)) {
342 kunmap_atomic(user_mem
, KM_USER0
);
343 mutex_unlock(&rzs
->lock
);
344 rzs_stat_inc(&rzs
->stats
.pages_zero
);
345 rzs_set_flag(rzs
, index
, RZS_ZERO
);
347 set_bit(BIO_UPTODATE
, &bio
->bi_flags
);
352 ret
= lzo1x_1_compress(user_mem
, PAGE_SIZE
, src
, &clen
,
353 rzs
->compress_workmem
);
355 kunmap_atomic(user_mem
, KM_USER0
);
357 if (unlikely(ret
!= LZO_E_OK
)) {
358 mutex_unlock(&rzs
->lock
);
359 pr_err("Compression failed! err=%d\n", ret
);
360 rzs_stat64_inc(rzs
, &rzs
->stats
.failed_writes
);
365 * Page is incompressible. Store it as-is (uncompressed)
366 * since we do not want to return too many swap write
367 * errors which has side effect of hanging the system.
369 if (unlikely(clen
> max_zpage_size
)) {
371 page_store
= alloc_page(GFP_NOIO
| __GFP_HIGHMEM
);
372 if (unlikely(!page_store
)) {
373 mutex_unlock(&rzs
->lock
);
374 pr_info("Error allocating memory for incompressible "
375 "page: %u\n", index
);
376 rzs_stat64_inc(rzs
, &rzs
->stats
.failed_writes
);
381 rzs_set_flag(rzs
, index
, RZS_UNCOMPRESSED
);
382 rzs_stat_inc(&rzs
->stats
.pages_expand
);
383 rzs
->table
[index
].page
= page_store
;
384 src
= kmap_atomic(page
, KM_USER0
);
388 if (xv_malloc(rzs
->mem_pool
, clen
+ sizeof(*zheader
),
389 &rzs
->table
[index
].page
, &offset
,
390 GFP_NOIO
| __GFP_HIGHMEM
)) {
391 mutex_unlock(&rzs
->lock
);
392 pr_info("Error allocating memory for compressed "
393 "page: %u, size=%zu\n", index
, clen
);
394 rzs_stat64_inc(rzs
, &rzs
->stats
.failed_writes
);
399 rzs
->table
[index
].offset
= offset
;
401 cmem
= kmap_atomic(rzs
->table
[index
].page
, KM_USER1
) +
402 rzs
->table
[index
].offset
;
405 /* Back-reference needed for memory defragmentation */
406 if (!rzs_test_flag(rzs
, index
, RZS_UNCOMPRESSED
)) {
407 zheader
= (struct zobj_header
*)cmem
;
408 zheader
->table_idx
= index
;
409 cmem
+= sizeof(*zheader
);
413 memcpy(cmem
, src
, clen
);
415 kunmap_atomic(cmem
, KM_USER1
);
416 if (unlikely(rzs_test_flag(rzs
, index
, RZS_UNCOMPRESSED
)))
417 kunmap_atomic(src
, KM_USER0
);
420 rzs
->stats
.compr_size
+= clen
;
421 rzs_stat_inc(&rzs
->stats
.pages_stored
);
422 if (clen
<= PAGE_SIZE
/ 2)
423 rzs_stat_inc(&rzs
->stats
.good_compress
);
425 mutex_unlock(&rzs
->lock
);
427 set_bit(BIO_UPTODATE
, &bio
->bi_flags
);
437 * Check if request is within bounds and page aligned.
439 static inline int valid_swap_request(struct ramzswap
*rzs
, struct bio
*bio
)
442 (bio
->bi_sector
>= (rzs
->disksize
>> SECTOR_SHIFT
)) ||
443 (bio
->bi_sector
& (SECTORS_PER_PAGE
- 1)) ||
444 (bio
->bi_vcnt
!= 1) ||
445 (bio
->bi_size
!= PAGE_SIZE
) ||
446 (bio
->bi_io_vec
[0].bv_offset
!= 0))) {
451 /* swap request is valid */
456 * Handler function for all ramzswap I/O requests.
458 static int ramzswap_make_request(struct request_queue
*queue
, struct bio
*bio
)
461 struct ramzswap
*rzs
= queue
->queuedata
;
463 if (unlikely(!rzs
->init_done
)) {
468 if (!valid_swap_request(rzs
, bio
)) {
469 rzs_stat64_inc(rzs
, &rzs
->stats
.invalid_io
);
474 switch (bio_data_dir(bio
)) {
476 ret
= ramzswap_read(rzs
, bio
);
480 ret
= ramzswap_write(rzs
, bio
);
487 static void reset_device(struct ramzswap
*rzs
)
491 /* Do not accept any new I/O request */
494 /* Free various per-device buffers */
495 kfree(rzs
->compress_workmem
);
496 free_pages((unsigned long)rzs
->compress_buffer
, 1);
498 rzs
->compress_workmem
= NULL
;
499 rzs
->compress_buffer
= NULL
;
501 /* Free all pages that are still in this ramzswap device */
502 for (index
= 0; index
< rzs
->disksize
>> PAGE_SHIFT
; index
++) {
506 page
= rzs
->table
[index
].page
;
507 offset
= rzs
->table
[index
].offset
;
512 if (unlikely(rzs_test_flag(rzs
, index
, RZS_UNCOMPRESSED
)))
515 xv_free(rzs
->mem_pool
, page
, offset
);
521 xv_destroy_pool(rzs
->mem_pool
);
522 rzs
->mem_pool
= NULL
;
525 memset(&rzs
->stats
, 0, sizeof(rzs
->stats
));
530 static int ramzswap_ioctl_init_device(struct ramzswap
*rzs
)
535 union swap_header
*swap_header
;
537 if (rzs
->init_done
) {
538 pr_info("Device already initialized!\n");
542 ramzswap_set_disksize(rzs
, totalram_pages
<< PAGE_SHIFT
);
544 rzs
->compress_workmem
= kzalloc(LZO1X_MEM_COMPRESS
, GFP_KERNEL
);
545 if (!rzs
->compress_workmem
) {
546 pr_err("Error allocating compressor working memory!\n");
551 rzs
->compress_buffer
= (void *)__get_free_pages(__GFP_ZERO
, 1);
552 if (!rzs
->compress_buffer
) {
553 pr_err("Error allocating compressor buffer space\n");
558 num_pages
= rzs
->disksize
>> PAGE_SHIFT
;
559 rzs
->table
= vmalloc(num_pages
* sizeof(*rzs
->table
));
561 pr_err("Error allocating ramzswap address table\n");
562 /* To prevent accessing table entries during cleanup */
567 memset(rzs
->table
, 0, num_pages
* sizeof(*rzs
->table
));
569 page
= alloc_page(__GFP_ZERO
);
571 pr_err("Error allocating swap header page\n");
575 rzs
->table
[0].page
= page
;
576 rzs_set_flag(rzs
, 0, RZS_UNCOMPRESSED
);
578 swap_header
= kmap(page
);
579 setup_swap_header(rzs
, swap_header
);
582 set_capacity(rzs
->disk
, rzs
->disksize
>> SECTOR_SHIFT
);
584 /* ramzswap devices sort of resembles non-rotational disks */
585 queue_flag_set_unlocked(QUEUE_FLAG_NONROT
, rzs
->disk
->queue
);
587 rzs
->mem_pool
= xv_create_pool();
588 if (!rzs
->mem_pool
) {
589 pr_err("Error creating memory pool\n");
596 pr_debug("Initialization done!\n");
602 pr_err("Initialization failed: err=%d\n", ret
);
606 static int ramzswap_ioctl_reset_device(struct ramzswap
*rzs
)
614 static int ramzswap_ioctl(struct block_device
*bdev
, fmode_t mode
,
615 unsigned int cmd
, unsigned long arg
)
620 struct ramzswap
*rzs
= bdev
->bd_disk
->private_data
;
623 case RZSIO_SET_DISKSIZE_KB
:
624 if (rzs
->init_done
) {
628 if (copy_from_user(&disksize_kb
, (void *)arg
,
633 rzs
->disksize
= disksize_kb
<< 10;
634 pr_info("Disk size set to %zu kB\n", disksize_kb
);
637 case RZSIO_GET_STATS
:
639 struct ramzswap_ioctl_stats
*stats
;
640 if (!rzs
->init_done
) {
644 stats
= kzalloc(sizeof(*stats
), GFP_KERNEL
);
649 ramzswap_ioctl_get_stats(rzs
, stats
);
650 if (copy_to_user((void *)arg
, stats
, sizeof(*stats
))) {
659 ret
= ramzswap_ioctl_init_device(rzs
);
663 /* Do not reset an active device! */
664 if (bdev
->bd_holders
) {
669 /* Make sure all pending I/O is finished */
673 ret
= ramzswap_ioctl_reset_device(rzs
);
677 pr_info("Invalid ioctl %u\n", cmd
);
685 void ramzswap_slot_free_notify(struct block_device
*bdev
, unsigned long index
)
687 struct ramzswap
*rzs
;
689 rzs
= bdev
->bd_disk
->private_data
;
690 ramzswap_free_page(rzs
, index
);
691 rzs_stat64_inc(rzs
, &rzs
->stats
.notify_free
);
696 static struct block_device_operations ramzswap_devops
= {
697 .ioctl
= ramzswap_ioctl
,
698 .swap_slot_free_notify
= ramzswap_slot_free_notify
,
702 static int create_device(struct ramzswap
*rzs
, int device_id
)
706 mutex_init(&rzs
->lock
);
707 spin_lock_init(&rzs
->stat64_lock
);
709 rzs
->queue
= blk_alloc_queue(GFP_KERNEL
);
711 pr_err("Error allocating disk queue for device %d\n",
717 blk_queue_make_request(rzs
->queue
, ramzswap_make_request
);
718 rzs
->queue
->queuedata
= rzs
;
720 /* gendisk structure */
721 rzs
->disk
= alloc_disk(1);
723 blk_cleanup_queue(rzs
->queue
);
724 pr_warning("Error allocating disk structure for device %d\n",
730 rzs
->disk
->major
= ramzswap_major
;
731 rzs
->disk
->first_minor
= device_id
;
732 rzs
->disk
->fops
= &ramzswap_devops
;
733 rzs
->disk
->queue
= rzs
->queue
;
734 rzs
->disk
->private_data
= rzs
;
735 snprintf(rzs
->disk
->disk_name
, 16, "ramzswap%d", device_id
);
737 /* Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl */
738 set_capacity(rzs
->disk
, 0);
740 blk_queue_physical_block_size(rzs
->disk
->queue
, PAGE_SIZE
);
741 blk_queue_logical_block_size(rzs
->disk
->queue
, PAGE_SIZE
);
751 static void destroy_device(struct ramzswap
*rzs
)
754 del_gendisk(rzs
->disk
);
759 blk_cleanup_queue(rzs
->queue
);
762 static int __init
ramzswap_init(void)
766 if (num_devices
> max_num_devices
) {
767 pr_warning("Invalid value for num_devices: %u\n",
773 ramzswap_major
= register_blkdev(0, "ramzswap");
774 if (ramzswap_major
<= 0) {
775 pr_warning("Unable to get major number\n");
781 pr_info("num_devices not specified. Using default: 1\n");
785 /* Allocate the device array and initialize each one */
786 pr_info("Creating %u devices ...\n", num_devices
);
787 devices
= kzalloc(num_devices
* sizeof(struct ramzswap
), GFP_KERNEL
);
793 for (dev_id
= 0; dev_id
< num_devices
; dev_id
++) {
794 ret
= create_device(&devices
[dev_id
], dev_id
);
803 destroy_device(&devices
[--dev_id
]);
805 unregister_blkdev(ramzswap_major
, "ramzswap");
810 static void __exit
ramzswap_exit(void)
813 struct ramzswap
*rzs
;
815 for (i
= 0; i
< num_devices
; i
++) {
823 unregister_blkdev(ramzswap_major
, "ramzswap");
826 pr_debug("Cleanup done!\n");
829 module_param(num_devices
, uint
, 0);
830 MODULE_PARM_DESC(num_devices
, "Number of ramzswap devices");
832 module_init(ramzswap_init
);
833 module_exit(ramzswap_exit
);
835 MODULE_LICENSE("Dual BSD/GPL");
836 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
837 MODULE_DESCRIPTION("Compressed RAM Based Swap Device");