2 * Compressed RAM block device
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/backing-dev.h>
29 #include <linux/string.h>
30 #include <linux/vmalloc.h>
31 #include <linux/err.h>
32 #include <linux/idr.h>
33 #include <linux/sysfs.h>
34 #include <linux/debugfs.h>
35 #include <linux/cpuhotplug.h>
39 static DEFINE_IDR(zram_index_idr
);
40 /* idr index must be protected */
41 static DEFINE_MUTEX(zram_index_mutex
);
43 static int zram_major
;
44 static const char *default_compressor
= "lzo";
46 /* Module params (documentation at end) */
47 static unsigned int num_devices
= 1;
49 * Pages that compress to sizes equals or greater than this are stored
50 * uncompressed in memory.
52 static size_t huge_class_size
;
54 static void zram_free_page(struct zram
*zram
, size_t index
);
56 static void zram_slot_lock(struct zram
*zram
, u32 index
)
58 bit_spin_lock(ZRAM_LOCK
, &zram
->table
[index
].value
);
61 static void zram_slot_unlock(struct zram
*zram
, u32 index
)
63 bit_spin_unlock(ZRAM_LOCK
, &zram
->table
[index
].value
);
66 static inline bool init_done(struct zram
*zram
)
68 return zram
->disksize
;
71 static inline bool zram_allocated(struct zram
*zram
, u32 index
)
74 return (zram
->table
[index
].value
>> (ZRAM_FLAG_SHIFT
+ 1)) ||
75 zram
->table
[index
].handle
;
78 static inline struct zram
*dev_to_zram(struct device
*dev
)
80 return (struct zram
*)dev_to_disk(dev
)->private_data
;
83 static unsigned long zram_get_handle(struct zram
*zram
, u32 index
)
85 return zram
->table
[index
].handle
;
88 static void zram_set_handle(struct zram
*zram
, u32 index
, unsigned long handle
)
90 zram
->table
[index
].handle
= handle
;
93 /* flag operations require table entry bit_spin_lock() being held */
94 static bool zram_test_flag(struct zram
*zram
, u32 index
,
95 enum zram_pageflags flag
)
97 return zram
->table
[index
].value
& BIT(flag
);
100 static void zram_set_flag(struct zram
*zram
, u32 index
,
101 enum zram_pageflags flag
)
103 zram
->table
[index
].value
|= BIT(flag
);
106 static void zram_clear_flag(struct zram
*zram
, u32 index
,
107 enum zram_pageflags flag
)
109 zram
->table
[index
].value
&= ~BIT(flag
);
112 static inline void zram_set_element(struct zram
*zram
, u32 index
,
113 unsigned long element
)
115 zram
->table
[index
].element
= element
;
118 static unsigned long zram_get_element(struct zram
*zram
, u32 index
)
120 return zram
->table
[index
].element
;
123 static size_t zram_get_obj_size(struct zram
*zram
, u32 index
)
125 return zram
->table
[index
].value
& (BIT(ZRAM_FLAG_SHIFT
) - 1);
128 static void zram_set_obj_size(struct zram
*zram
,
129 u32 index
, size_t size
)
131 unsigned long flags
= zram
->table
[index
].value
>> ZRAM_FLAG_SHIFT
;
133 zram
->table
[index
].value
= (flags
<< ZRAM_FLAG_SHIFT
) | size
;
136 #if PAGE_SIZE != 4096
137 static inline bool is_partial_io(struct bio_vec
*bvec
)
139 return bvec
->bv_len
!= PAGE_SIZE
;
142 static inline bool is_partial_io(struct bio_vec
*bvec
)
149 * Check if request is within bounds and aligned on zram logical blocks.
151 static inline bool valid_io_request(struct zram
*zram
,
152 sector_t start
, unsigned int size
)
156 /* unaligned request */
157 if (unlikely(start
& (ZRAM_SECTOR_PER_LOGICAL_BLOCK
- 1)))
159 if (unlikely(size
& (ZRAM_LOGICAL_BLOCK_SIZE
- 1)))
162 end
= start
+ (size
>> SECTOR_SHIFT
);
163 bound
= zram
->disksize
>> SECTOR_SHIFT
;
164 /* out of range range */
165 if (unlikely(start
>= bound
|| end
> bound
|| start
> end
))
168 /* I/O request is valid */
172 static void update_position(u32
*index
, int *offset
, struct bio_vec
*bvec
)
174 *index
+= (*offset
+ bvec
->bv_len
) / PAGE_SIZE
;
175 *offset
= (*offset
+ bvec
->bv_len
) % PAGE_SIZE
;
178 static inline void update_used_max(struct zram
*zram
,
179 const unsigned long pages
)
181 unsigned long old_max
, cur_max
;
183 old_max
= atomic_long_read(&zram
->stats
.max_used_pages
);
188 old_max
= atomic_long_cmpxchg(
189 &zram
->stats
.max_used_pages
, cur_max
, pages
);
190 } while (old_max
!= cur_max
);
193 static inline void zram_fill_page(void *ptr
, unsigned long len
,
196 WARN_ON_ONCE(!IS_ALIGNED(len
, sizeof(unsigned long)));
197 memset_l(ptr
, value
, len
/ sizeof(unsigned long));
200 static bool page_same_filled(void *ptr
, unsigned long *element
)
206 page
= (unsigned long *)ptr
;
209 for (pos
= 1; pos
< PAGE_SIZE
/ sizeof(*page
); pos
++) {
210 if (val
!= page
[pos
])
219 static ssize_t
initstate_show(struct device
*dev
,
220 struct device_attribute
*attr
, char *buf
)
223 struct zram
*zram
= dev_to_zram(dev
);
225 down_read(&zram
->init_lock
);
226 val
= init_done(zram
);
227 up_read(&zram
->init_lock
);
229 return scnprintf(buf
, PAGE_SIZE
, "%u\n", val
);
232 static ssize_t
disksize_show(struct device
*dev
,
233 struct device_attribute
*attr
, char *buf
)
235 struct zram
*zram
= dev_to_zram(dev
);
237 return scnprintf(buf
, PAGE_SIZE
, "%llu\n", zram
->disksize
);
240 static ssize_t
mem_limit_store(struct device
*dev
,
241 struct device_attribute
*attr
, const char *buf
, size_t len
)
245 struct zram
*zram
= dev_to_zram(dev
);
247 limit
= memparse(buf
, &tmp
);
248 if (buf
== tmp
) /* no chars parsed, invalid input */
251 down_write(&zram
->init_lock
);
252 zram
->limit_pages
= PAGE_ALIGN(limit
) >> PAGE_SHIFT
;
253 up_write(&zram
->init_lock
);
258 static ssize_t
mem_used_max_store(struct device
*dev
,
259 struct device_attribute
*attr
, const char *buf
, size_t len
)
263 struct zram
*zram
= dev_to_zram(dev
);
265 err
= kstrtoul(buf
, 10, &val
);
269 down_read(&zram
->init_lock
);
270 if (init_done(zram
)) {
271 atomic_long_set(&zram
->stats
.max_used_pages
,
272 zs_get_total_pages(zram
->mem_pool
));
274 up_read(&zram
->init_lock
);
279 #ifdef CONFIG_ZRAM_WRITEBACK
280 static bool zram_wb_enabled(struct zram
*zram
)
282 return zram
->backing_dev
;
285 static void reset_bdev(struct zram
*zram
)
287 struct block_device
*bdev
;
289 if (!zram_wb_enabled(zram
))
293 if (zram
->old_block_size
)
294 set_blocksize(bdev
, zram
->old_block_size
);
295 blkdev_put(bdev
, FMODE_READ
|FMODE_WRITE
|FMODE_EXCL
);
296 /* hope filp_close flush all of IO */
297 filp_close(zram
->backing_dev
, NULL
);
298 zram
->backing_dev
= NULL
;
299 zram
->old_block_size
= 0;
301 zram
->disk
->queue
->backing_dev_info
->capabilities
|=
302 BDI_CAP_SYNCHRONOUS_IO
;
303 kvfree(zram
->bitmap
);
307 static ssize_t
backing_dev_show(struct device
*dev
,
308 struct device_attribute
*attr
, char *buf
)
310 struct zram
*zram
= dev_to_zram(dev
);
311 struct file
*file
= zram
->backing_dev
;
315 down_read(&zram
->init_lock
);
316 if (!zram_wb_enabled(zram
)) {
317 memcpy(buf
, "none\n", 5);
318 up_read(&zram
->init_lock
);
322 p
= file_path(file
, buf
, PAGE_SIZE
- 1);
329 memmove(buf
, p
, ret
);
332 up_read(&zram
->init_lock
);
336 static ssize_t
backing_dev_store(struct device
*dev
,
337 struct device_attribute
*attr
, const char *buf
, size_t len
)
341 struct file
*backing_dev
= NULL
;
343 struct address_space
*mapping
;
344 unsigned int bitmap_sz
, old_block_size
= 0;
345 unsigned long nr_pages
, *bitmap
= NULL
;
346 struct block_device
*bdev
= NULL
;
348 struct zram
*zram
= dev_to_zram(dev
);
350 file_name
= kmalloc(PATH_MAX
, GFP_KERNEL
);
354 down_write(&zram
->init_lock
);
355 if (init_done(zram
)) {
356 pr_info("Can't setup backing device for initialized device\n");
361 strlcpy(file_name
, buf
, PATH_MAX
);
362 /* ignore trailing newline */
363 sz
= strlen(file_name
);
364 if (sz
> 0 && file_name
[sz
- 1] == '\n')
365 file_name
[sz
- 1] = 0x00;
367 backing_dev
= filp_open(file_name
, O_RDWR
|O_LARGEFILE
, 0);
368 if (IS_ERR(backing_dev
)) {
369 err
= PTR_ERR(backing_dev
);
374 mapping
= backing_dev
->f_mapping
;
375 inode
= mapping
->host
;
377 /* Support only block device in this moment */
378 if (!S_ISBLK(inode
->i_mode
)) {
383 bdev
= bdgrab(I_BDEV(inode
));
384 err
= blkdev_get(bdev
, FMODE_READ
| FMODE_WRITE
| FMODE_EXCL
, zram
);
388 nr_pages
= i_size_read(inode
) >> PAGE_SHIFT
;
389 bitmap_sz
= BITS_TO_LONGS(nr_pages
) * sizeof(long);
390 bitmap
= kvzalloc(bitmap_sz
, GFP_KERNEL
);
396 old_block_size
= block_size(bdev
);
397 err
= set_blocksize(bdev
, PAGE_SIZE
);
402 spin_lock_init(&zram
->bitmap_lock
);
404 zram
->old_block_size
= old_block_size
;
406 zram
->backing_dev
= backing_dev
;
407 zram
->bitmap
= bitmap
;
408 zram
->nr_pages
= nr_pages
;
410 * With writeback feature, zram does asynchronous IO so it's no longer
411 * synchronous device so let's remove synchronous io flag. Othewise,
412 * upper layer(e.g., swap) could wait IO completion rather than
413 * (submit and return), which will cause system sluggish.
414 * Furthermore, when the IO function returns(e.g., swap_readpage),
415 * upper layer expects IO was done so it could deallocate the page
416 * freely but in fact, IO is going on so finally could cause
417 * use-after-free when the IO is really done.
419 zram
->disk
->queue
->backing_dev_info
->capabilities
&=
420 ~BDI_CAP_SYNCHRONOUS_IO
;
421 up_write(&zram
->init_lock
);
423 pr_info("setup backing device %s\n", file_name
);
432 blkdev_put(bdev
, FMODE_READ
| FMODE_WRITE
| FMODE_EXCL
);
435 filp_close(backing_dev
, NULL
);
437 up_write(&zram
->init_lock
);
444 static unsigned long get_entry_bdev(struct zram
*zram
)
448 spin_lock(&zram
->bitmap_lock
);
449 /* skip 0 bit to confuse zram.handle = 0 */
450 entry
= find_next_zero_bit(zram
->bitmap
, zram
->nr_pages
, 1);
451 if (entry
== zram
->nr_pages
) {
452 spin_unlock(&zram
->bitmap_lock
);
456 set_bit(entry
, zram
->bitmap
);
457 spin_unlock(&zram
->bitmap_lock
);
462 static void put_entry_bdev(struct zram
*zram
, unsigned long entry
)
466 spin_lock(&zram
->bitmap_lock
);
467 was_set
= test_and_clear_bit(entry
, zram
->bitmap
);
468 spin_unlock(&zram
->bitmap_lock
);
469 WARN_ON_ONCE(!was_set
);
472 static void zram_page_end_io(struct bio
*bio
)
474 struct page
*page
= bio_first_page_all(bio
);
476 page_endio(page
, op_is_write(bio_op(bio
)),
477 blk_status_to_errno(bio
->bi_status
));
482 * Returns 1 if the submission is successful.
484 static int read_from_bdev_async(struct zram
*zram
, struct bio_vec
*bvec
,
485 unsigned long entry
, struct bio
*parent
)
489 bio
= bio_alloc(GFP_ATOMIC
, 1);
493 bio
->bi_iter
.bi_sector
= entry
* (PAGE_SIZE
>> 9);
494 bio_set_dev(bio
, zram
->bdev
);
495 if (!bio_add_page(bio
, bvec
->bv_page
, bvec
->bv_len
, bvec
->bv_offset
)) {
501 bio
->bi_opf
= REQ_OP_READ
;
502 bio
->bi_end_io
= zram_page_end_io
;
504 bio
->bi_opf
= parent
->bi_opf
;
505 bio_chain(bio
, parent
);
513 struct work_struct work
;
519 #if PAGE_SIZE != 4096
520 static void zram_sync_read(struct work_struct
*work
)
523 struct zram_work
*zw
= container_of(work
, struct zram_work
, work
);
524 struct zram
*zram
= zw
->zram
;
525 unsigned long entry
= zw
->entry
;
526 struct bio
*bio
= zw
->bio
;
528 read_from_bdev_async(zram
, &bvec
, entry
, bio
);
532 * Block layer want one ->make_request_fn to be active at a time
533 * so if we use chained IO with parent IO in same context,
534 * it's a deadlock. To avoid, it, it uses worker thread context.
536 static int read_from_bdev_sync(struct zram
*zram
, struct bio_vec
*bvec
,
537 unsigned long entry
, struct bio
*bio
)
539 struct zram_work work
;
545 INIT_WORK_ONSTACK(&work
.work
, zram_sync_read
);
546 queue_work(system_unbound_wq
, &work
.work
);
547 flush_work(&work
.work
);
548 destroy_work_on_stack(&work
.work
);
553 static int read_from_bdev_sync(struct zram
*zram
, struct bio_vec
*bvec
,
554 unsigned long entry
, struct bio
*bio
)
561 static int read_from_bdev(struct zram
*zram
, struct bio_vec
*bvec
,
562 unsigned long entry
, struct bio
*parent
, bool sync
)
565 return read_from_bdev_sync(zram
, bvec
, entry
, parent
);
567 return read_from_bdev_async(zram
, bvec
, entry
, parent
);
570 static int write_to_bdev(struct zram
*zram
, struct bio_vec
*bvec
,
571 u32 index
, struct bio
*parent
,
572 unsigned long *pentry
)
577 bio
= bio_alloc(GFP_ATOMIC
, 1);
581 entry
= get_entry_bdev(zram
);
587 bio
->bi_iter
.bi_sector
= entry
* (PAGE_SIZE
>> 9);
588 bio_set_dev(bio
, zram
->bdev
);
589 if (!bio_add_page(bio
, bvec
->bv_page
, bvec
->bv_len
,
592 put_entry_bdev(zram
, entry
);
597 bio
->bi_opf
= REQ_OP_WRITE
| REQ_SYNC
;
598 bio
->bi_end_io
= zram_page_end_io
;
600 bio
->bi_opf
= parent
->bi_opf
;
601 bio_chain(bio
, parent
);
610 static void zram_wb_clear(struct zram
*zram
, u32 index
)
614 zram_clear_flag(zram
, index
, ZRAM_WB
);
615 entry
= zram_get_element(zram
, index
);
616 zram_set_element(zram
, index
, 0);
617 put_entry_bdev(zram
, entry
);
621 static bool zram_wb_enabled(struct zram
*zram
) { return false; }
622 static inline void reset_bdev(struct zram
*zram
) {};
623 static int write_to_bdev(struct zram
*zram
, struct bio_vec
*bvec
,
624 u32 index
, struct bio
*parent
,
625 unsigned long *pentry
)
631 static int read_from_bdev(struct zram
*zram
, struct bio_vec
*bvec
,
632 unsigned long entry
, struct bio
*parent
, bool sync
)
636 static void zram_wb_clear(struct zram
*zram
, u32 index
) {}
639 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
641 static struct dentry
*zram_debugfs_root
;
643 static void zram_debugfs_create(void)
645 zram_debugfs_root
= debugfs_create_dir("zram", NULL
);
648 static void zram_debugfs_destroy(void)
650 debugfs_remove_recursive(zram_debugfs_root
);
653 static void zram_accessed(struct zram
*zram
, u32 index
)
655 zram
->table
[index
].ac_time
= ktime_get_boottime();
658 static void zram_reset_access(struct zram
*zram
, u32 index
)
660 zram
->table
[index
].ac_time
= 0;
663 static ssize_t
read_block_state(struct file
*file
, char __user
*buf
,
664 size_t count
, loff_t
*ppos
)
667 ssize_t index
, written
= 0;
668 struct zram
*zram
= file
->private_data
;
669 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
670 struct timespec64 ts
;
672 kbuf
= kvmalloc(count
, GFP_KERNEL
);
676 down_read(&zram
->init_lock
);
677 if (!init_done(zram
)) {
678 up_read(&zram
->init_lock
);
683 for (index
= *ppos
; index
< nr_pages
; index
++) {
686 zram_slot_lock(zram
, index
);
687 if (!zram_allocated(zram
, index
))
690 ts
= ktime_to_timespec64(zram
->table
[index
].ac_time
);
691 copied
= snprintf(kbuf
+ written
, count
,
692 "%12zd %12lld.%06lu %c%c%c\n",
693 index
, (s64
)ts
.tv_sec
,
694 ts
.tv_nsec
/ NSEC_PER_USEC
,
695 zram_test_flag(zram
, index
, ZRAM_SAME
) ? 's' : '.',
696 zram_test_flag(zram
, index
, ZRAM_WB
) ? 'w' : '.',
697 zram_test_flag(zram
, index
, ZRAM_HUGE
) ? 'h' : '.');
699 if (count
< copied
) {
700 zram_slot_unlock(zram
, index
);
706 zram_slot_unlock(zram
, index
);
710 up_read(&zram
->init_lock
);
711 if (copy_to_user(buf
, kbuf
, written
))
718 static const struct file_operations proc_zram_block_state_op
= {
720 .read
= read_block_state
,
721 .llseek
= default_llseek
,
724 static void zram_debugfs_register(struct zram
*zram
)
726 if (!zram_debugfs_root
)
729 zram
->debugfs_dir
= debugfs_create_dir(zram
->disk
->disk_name
,
731 debugfs_create_file("block_state", 0400, zram
->debugfs_dir
,
732 zram
, &proc_zram_block_state_op
);
735 static void zram_debugfs_unregister(struct zram
*zram
)
737 debugfs_remove_recursive(zram
->debugfs_dir
);
740 static void zram_debugfs_create(void) {};
741 static void zram_debugfs_destroy(void) {};
742 static void zram_accessed(struct zram
*zram
, u32 index
) {};
743 static void zram_reset_access(struct zram
*zram
, u32 index
) {};
744 static void zram_debugfs_register(struct zram
*zram
) {};
745 static void zram_debugfs_unregister(struct zram
*zram
) {};
749 * We switched to per-cpu streams and this attr is not needed anymore.
750 * However, we will keep it around for some time, because:
751 * a) we may revert per-cpu streams in the future
752 * b) it's visible to user space and we need to follow our 2 years
753 * retirement rule; but we already have a number of 'soon to be
754 * altered' attrs, so max_comp_streams need to wait for the next
757 static ssize_t
max_comp_streams_show(struct device
*dev
,
758 struct device_attribute
*attr
, char *buf
)
760 return scnprintf(buf
, PAGE_SIZE
, "%d\n", num_online_cpus());
763 static ssize_t
max_comp_streams_store(struct device
*dev
,
764 struct device_attribute
*attr
, const char *buf
, size_t len
)
769 static ssize_t
comp_algorithm_show(struct device
*dev
,
770 struct device_attribute
*attr
, char *buf
)
773 struct zram
*zram
= dev_to_zram(dev
);
775 down_read(&zram
->init_lock
);
776 sz
= zcomp_available_show(zram
->compressor
, buf
);
777 up_read(&zram
->init_lock
);
782 static ssize_t
comp_algorithm_store(struct device
*dev
,
783 struct device_attribute
*attr
, const char *buf
, size_t len
)
785 struct zram
*zram
= dev_to_zram(dev
);
786 char compressor
[ARRAY_SIZE(zram
->compressor
)];
789 strlcpy(compressor
, buf
, sizeof(compressor
));
790 /* ignore trailing newline */
791 sz
= strlen(compressor
);
792 if (sz
> 0 && compressor
[sz
- 1] == '\n')
793 compressor
[sz
- 1] = 0x00;
795 if (!zcomp_available_algorithm(compressor
))
798 down_write(&zram
->init_lock
);
799 if (init_done(zram
)) {
800 up_write(&zram
->init_lock
);
801 pr_info("Can't change algorithm for initialized device\n");
805 strcpy(zram
->compressor
, compressor
);
806 up_write(&zram
->init_lock
);
810 static ssize_t
compact_store(struct device
*dev
,
811 struct device_attribute
*attr
, const char *buf
, size_t len
)
813 struct zram
*zram
= dev_to_zram(dev
);
815 down_read(&zram
->init_lock
);
816 if (!init_done(zram
)) {
817 up_read(&zram
->init_lock
);
821 zs_compact(zram
->mem_pool
);
822 up_read(&zram
->init_lock
);
827 static ssize_t
io_stat_show(struct device
*dev
,
828 struct device_attribute
*attr
, char *buf
)
830 struct zram
*zram
= dev_to_zram(dev
);
833 down_read(&zram
->init_lock
);
834 ret
= scnprintf(buf
, PAGE_SIZE
,
835 "%8llu %8llu %8llu %8llu\n",
836 (u64
)atomic64_read(&zram
->stats
.failed_reads
),
837 (u64
)atomic64_read(&zram
->stats
.failed_writes
),
838 (u64
)atomic64_read(&zram
->stats
.invalid_io
),
839 (u64
)atomic64_read(&zram
->stats
.notify_free
));
840 up_read(&zram
->init_lock
);
845 static ssize_t
mm_stat_show(struct device
*dev
,
846 struct device_attribute
*attr
, char *buf
)
848 struct zram
*zram
= dev_to_zram(dev
);
849 struct zs_pool_stats pool_stats
;
850 u64 orig_size
, mem_used
= 0;
854 memset(&pool_stats
, 0x00, sizeof(struct zs_pool_stats
));
856 down_read(&zram
->init_lock
);
857 if (init_done(zram
)) {
858 mem_used
= zs_get_total_pages(zram
->mem_pool
);
859 zs_pool_stats(zram
->mem_pool
, &pool_stats
);
862 orig_size
= atomic64_read(&zram
->stats
.pages_stored
);
863 max_used
= atomic_long_read(&zram
->stats
.max_used_pages
);
865 ret
= scnprintf(buf
, PAGE_SIZE
,
866 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
867 orig_size
<< PAGE_SHIFT
,
868 (u64
)atomic64_read(&zram
->stats
.compr_data_size
),
869 mem_used
<< PAGE_SHIFT
,
870 zram
->limit_pages
<< PAGE_SHIFT
,
871 max_used
<< PAGE_SHIFT
,
872 (u64
)atomic64_read(&zram
->stats
.same_pages
),
873 pool_stats
.pages_compacted
,
874 (u64
)atomic64_read(&zram
->stats
.huge_pages
));
875 up_read(&zram
->init_lock
);
880 static ssize_t
debug_stat_show(struct device
*dev
,
881 struct device_attribute
*attr
, char *buf
)
884 struct zram
*zram
= dev_to_zram(dev
);
887 down_read(&zram
->init_lock
);
888 ret
= scnprintf(buf
, PAGE_SIZE
,
889 "version: %d\n%8llu\n",
891 (u64
)atomic64_read(&zram
->stats
.writestall
));
892 up_read(&zram
->init_lock
);
897 static DEVICE_ATTR_RO(io_stat
);
898 static DEVICE_ATTR_RO(mm_stat
);
899 static DEVICE_ATTR_RO(debug_stat
);
901 static void zram_meta_free(struct zram
*zram
, u64 disksize
)
903 size_t num_pages
= disksize
>> PAGE_SHIFT
;
906 /* Free all pages that are still in this zram device */
907 for (index
= 0; index
< num_pages
; index
++)
908 zram_free_page(zram
, index
);
910 zs_destroy_pool(zram
->mem_pool
);
914 static bool zram_meta_alloc(struct zram
*zram
, u64 disksize
)
918 num_pages
= disksize
>> PAGE_SHIFT
;
919 zram
->table
= vzalloc(array_size(num_pages
, sizeof(*zram
->table
)));
923 zram
->mem_pool
= zs_create_pool(zram
->disk
->disk_name
);
924 if (!zram
->mem_pool
) {
929 if (!huge_class_size
)
930 huge_class_size
= zs_huge_class_size(zram
->mem_pool
);
935 * To protect concurrent access to the same index entry,
936 * caller should hold this table index entry's bit_spinlock to
937 * indicate this index entry is accessing.
939 static void zram_free_page(struct zram
*zram
, size_t index
)
941 unsigned long handle
;
943 zram_reset_access(zram
, index
);
945 if (zram_test_flag(zram
, index
, ZRAM_HUGE
)) {
946 zram_clear_flag(zram
, index
, ZRAM_HUGE
);
947 atomic64_dec(&zram
->stats
.huge_pages
);
950 if (zram_wb_enabled(zram
) && zram_test_flag(zram
, index
, ZRAM_WB
)) {
951 zram_wb_clear(zram
, index
);
952 atomic64_dec(&zram
->stats
.pages_stored
);
957 * No memory is allocated for same element filled pages.
958 * Simply clear same page flag.
960 if (zram_test_flag(zram
, index
, ZRAM_SAME
)) {
961 zram_clear_flag(zram
, index
, ZRAM_SAME
);
962 zram_set_element(zram
, index
, 0);
963 atomic64_dec(&zram
->stats
.same_pages
);
964 atomic64_dec(&zram
->stats
.pages_stored
);
968 handle
= zram_get_handle(zram
, index
);
972 zs_free(zram
->mem_pool
, handle
);
974 atomic64_sub(zram_get_obj_size(zram
, index
),
975 &zram
->stats
.compr_data_size
);
976 atomic64_dec(&zram
->stats
.pages_stored
);
978 zram_set_handle(zram
, index
, 0);
979 zram_set_obj_size(zram
, index
, 0);
982 static int __zram_bvec_read(struct zram
*zram
, struct page
*page
, u32 index
,
983 struct bio
*bio
, bool partial_io
)
986 unsigned long handle
;
990 if (zram_wb_enabled(zram
)) {
991 zram_slot_lock(zram
, index
);
992 if (zram_test_flag(zram
, index
, ZRAM_WB
)) {
995 zram_slot_unlock(zram
, index
);
998 bvec
.bv_len
= PAGE_SIZE
;
1000 return read_from_bdev(zram
, &bvec
,
1001 zram_get_element(zram
, index
),
1004 zram_slot_unlock(zram
, index
);
1007 zram_slot_lock(zram
, index
);
1008 handle
= zram_get_handle(zram
, index
);
1009 if (!handle
|| zram_test_flag(zram
, index
, ZRAM_SAME
)) {
1010 unsigned long value
;
1013 value
= handle
? zram_get_element(zram
, index
) : 0;
1014 mem
= kmap_atomic(page
);
1015 zram_fill_page(mem
, PAGE_SIZE
, value
);
1017 zram_slot_unlock(zram
, index
);
1021 size
= zram_get_obj_size(zram
, index
);
1023 src
= zs_map_object(zram
->mem_pool
, handle
, ZS_MM_RO
);
1024 if (size
== PAGE_SIZE
) {
1025 dst
= kmap_atomic(page
);
1026 memcpy(dst
, src
, PAGE_SIZE
);
1030 struct zcomp_strm
*zstrm
= zcomp_stream_get(zram
->comp
);
1032 dst
= kmap_atomic(page
);
1033 ret
= zcomp_decompress(zstrm
, src
, size
, dst
);
1035 zcomp_stream_put(zram
->comp
);
1037 zs_unmap_object(zram
->mem_pool
, handle
);
1038 zram_slot_unlock(zram
, index
);
1040 /* Should NEVER happen. Return bio error if it does. */
1042 pr_err("Decompression failed! err=%d, page=%u\n", ret
, index
);
1047 static int zram_bvec_read(struct zram
*zram
, struct bio_vec
*bvec
,
1048 u32 index
, int offset
, struct bio
*bio
)
1053 page
= bvec
->bv_page
;
1054 if (is_partial_io(bvec
)) {
1055 /* Use a temporary buffer to decompress the page */
1056 page
= alloc_page(GFP_NOIO
|__GFP_HIGHMEM
);
1061 ret
= __zram_bvec_read(zram
, page
, index
, bio
, is_partial_io(bvec
));
1065 if (is_partial_io(bvec
)) {
1066 void *dst
= kmap_atomic(bvec
->bv_page
);
1067 void *src
= kmap_atomic(page
);
1069 memcpy(dst
+ bvec
->bv_offset
, src
+ offset
, bvec
->bv_len
);
1074 if (is_partial_io(bvec
))
1080 static int __zram_bvec_write(struct zram
*zram
, struct bio_vec
*bvec
,
1081 u32 index
, struct bio
*bio
)
1084 unsigned long alloced_pages
;
1085 unsigned long handle
= 0;
1086 unsigned int comp_len
= 0;
1087 void *src
, *dst
, *mem
;
1088 struct zcomp_strm
*zstrm
;
1089 struct page
*page
= bvec
->bv_page
;
1090 unsigned long element
= 0;
1091 enum zram_pageflags flags
= 0;
1092 bool allow_wb
= true;
1094 mem
= kmap_atomic(page
);
1095 if (page_same_filled(mem
, &element
)) {
1097 /* Free memory associated with this sector now. */
1099 atomic64_inc(&zram
->stats
.same_pages
);
1105 zstrm
= zcomp_stream_get(zram
->comp
);
1106 src
= kmap_atomic(page
);
1107 ret
= zcomp_compress(zstrm
, src
, &comp_len
);
1110 if (unlikely(ret
)) {
1111 zcomp_stream_put(zram
->comp
);
1112 pr_err("Compression failed! err=%d\n", ret
);
1113 zs_free(zram
->mem_pool
, handle
);
1117 if (unlikely(comp_len
>= huge_class_size
)) {
1118 comp_len
= PAGE_SIZE
;
1119 if (zram_wb_enabled(zram
) && allow_wb
) {
1120 zcomp_stream_put(zram
->comp
);
1121 ret
= write_to_bdev(zram
, bvec
, index
, bio
, &element
);
1128 goto compress_again
;
1133 * handle allocation has 2 paths:
1134 * a) fast path is executed with preemption disabled (for
1135 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1136 * since we can't sleep;
1137 * b) slow path enables preemption and attempts to allocate
1138 * the page with __GFP_DIRECT_RECLAIM bit set. we have to
1139 * put per-cpu compression stream and, thus, to re-do
1140 * the compression once handle is allocated.
1142 * if we have a 'non-null' handle here then we are coming
1143 * from the slow path and handle has already been allocated.
1146 handle
= zs_malloc(zram
->mem_pool
, comp_len
,
1147 __GFP_KSWAPD_RECLAIM
|
1152 zcomp_stream_put(zram
->comp
);
1153 atomic64_inc(&zram
->stats
.writestall
);
1154 handle
= zs_malloc(zram
->mem_pool
, comp_len
,
1155 GFP_NOIO
| __GFP_HIGHMEM
|
1158 goto compress_again
;
1162 alloced_pages
= zs_get_total_pages(zram
->mem_pool
);
1163 update_used_max(zram
, alloced_pages
);
1165 if (zram
->limit_pages
&& alloced_pages
> zram
->limit_pages
) {
1166 zcomp_stream_put(zram
->comp
);
1167 zs_free(zram
->mem_pool
, handle
);
1171 dst
= zs_map_object(zram
->mem_pool
, handle
, ZS_MM_WO
);
1173 src
= zstrm
->buffer
;
1174 if (comp_len
== PAGE_SIZE
)
1175 src
= kmap_atomic(page
);
1176 memcpy(dst
, src
, comp_len
);
1177 if (comp_len
== PAGE_SIZE
)
1180 zcomp_stream_put(zram
->comp
);
1181 zs_unmap_object(zram
->mem_pool
, handle
);
1182 atomic64_add(comp_len
, &zram
->stats
.compr_data_size
);
1185 * Free memory associated with this sector
1186 * before overwriting unused sectors.
1188 zram_slot_lock(zram
, index
);
1189 zram_free_page(zram
, index
);
1191 if (comp_len
== PAGE_SIZE
) {
1192 zram_set_flag(zram
, index
, ZRAM_HUGE
);
1193 atomic64_inc(&zram
->stats
.huge_pages
);
1197 zram_set_flag(zram
, index
, flags
);
1198 zram_set_element(zram
, index
, element
);
1200 zram_set_handle(zram
, index
, handle
);
1201 zram_set_obj_size(zram
, index
, comp_len
);
1203 zram_slot_unlock(zram
, index
);
1206 atomic64_inc(&zram
->stats
.pages_stored
);
1210 static int zram_bvec_write(struct zram
*zram
, struct bio_vec
*bvec
,
1211 u32 index
, int offset
, struct bio
*bio
)
1214 struct page
*page
= NULL
;
1219 if (is_partial_io(bvec
)) {
1222 * This is a partial IO. We need to read the full page
1223 * before to write the changes.
1225 page
= alloc_page(GFP_NOIO
|__GFP_HIGHMEM
);
1229 ret
= __zram_bvec_read(zram
, page
, index
, bio
, true);
1233 src
= kmap_atomic(bvec
->bv_page
);
1234 dst
= kmap_atomic(page
);
1235 memcpy(dst
+ offset
, src
+ bvec
->bv_offset
, bvec
->bv_len
);
1240 vec
.bv_len
= PAGE_SIZE
;
1244 ret
= __zram_bvec_write(zram
, &vec
, index
, bio
);
1246 if (is_partial_io(bvec
))
1252 * zram_bio_discard - handler on discard request
1253 * @index: physical block index in PAGE_SIZE units
1254 * @offset: byte offset within physical block
1256 static void zram_bio_discard(struct zram
*zram
, u32 index
,
1257 int offset
, struct bio
*bio
)
1259 size_t n
= bio
->bi_iter
.bi_size
;
1262 * zram manages data in physical block size units. Because logical block
1263 * size isn't identical with physical block size on some arch, we
1264 * could get a discard request pointing to a specific offset within a
1265 * certain physical block. Although we can handle this request by
1266 * reading that physiclal block and decompressing and partially zeroing
1267 * and re-compressing and then re-storing it, this isn't reasonable
1268 * because our intent with a discard request is to save memory. So
1269 * skipping this logical block is appropriate here.
1272 if (n
<= (PAGE_SIZE
- offset
))
1275 n
-= (PAGE_SIZE
- offset
);
1279 while (n
>= PAGE_SIZE
) {
1280 zram_slot_lock(zram
, index
);
1281 zram_free_page(zram
, index
);
1282 zram_slot_unlock(zram
, index
);
1283 atomic64_inc(&zram
->stats
.notify_free
);
1290 * Returns errno if it has some problem. Otherwise return 0 or 1.
1291 * Returns 0 if IO request was done synchronously
1292 * Returns 1 if IO request was successfully submitted.
1294 static int zram_bvec_rw(struct zram
*zram
, struct bio_vec
*bvec
, u32 index
,
1295 int offset
, unsigned int op
, struct bio
*bio
)
1297 unsigned long start_time
= jiffies
;
1298 struct request_queue
*q
= zram
->disk
->queue
;
1301 generic_start_io_acct(q
, op
, bvec
->bv_len
>> SECTOR_SHIFT
,
1302 &zram
->disk
->part0
);
1304 if (!op_is_write(op
)) {
1305 atomic64_inc(&zram
->stats
.num_reads
);
1306 ret
= zram_bvec_read(zram
, bvec
, index
, offset
, bio
);
1307 flush_dcache_page(bvec
->bv_page
);
1309 atomic64_inc(&zram
->stats
.num_writes
);
1310 ret
= zram_bvec_write(zram
, bvec
, index
, offset
, bio
);
1313 generic_end_io_acct(q
, op
, &zram
->disk
->part0
, start_time
);
1315 zram_slot_lock(zram
, index
);
1316 zram_accessed(zram
, index
);
1317 zram_slot_unlock(zram
, index
);
1319 if (unlikely(ret
< 0)) {
1320 if (!op_is_write(op
))
1321 atomic64_inc(&zram
->stats
.failed_reads
);
1323 atomic64_inc(&zram
->stats
.failed_writes
);
1329 static void __zram_make_request(struct zram
*zram
, struct bio
*bio
)
1333 struct bio_vec bvec
;
1334 struct bvec_iter iter
;
1336 index
= bio
->bi_iter
.bi_sector
>> SECTORS_PER_PAGE_SHIFT
;
1337 offset
= (bio
->bi_iter
.bi_sector
&
1338 (SECTORS_PER_PAGE
- 1)) << SECTOR_SHIFT
;
1340 switch (bio_op(bio
)) {
1341 case REQ_OP_DISCARD
:
1342 case REQ_OP_WRITE_ZEROES
:
1343 zram_bio_discard(zram
, index
, offset
, bio
);
1350 bio_for_each_segment(bvec
, bio
, iter
) {
1351 struct bio_vec bv
= bvec
;
1352 unsigned int unwritten
= bvec
.bv_len
;
1355 bv
.bv_len
= min_t(unsigned int, PAGE_SIZE
- offset
,
1357 if (zram_bvec_rw(zram
, &bv
, index
, offset
,
1358 bio_op(bio
), bio
) < 0)
1361 bv
.bv_offset
+= bv
.bv_len
;
1362 unwritten
-= bv
.bv_len
;
1364 update_position(&index
, &offset
, &bv
);
1365 } while (unwritten
);
1376 * Handler function for all zram I/O requests.
1378 static blk_qc_t
zram_make_request(struct request_queue
*queue
, struct bio
*bio
)
1380 struct zram
*zram
= queue
->queuedata
;
1382 if (!valid_io_request(zram
, bio
->bi_iter
.bi_sector
,
1383 bio
->bi_iter
.bi_size
)) {
1384 atomic64_inc(&zram
->stats
.invalid_io
);
1388 __zram_make_request(zram
, bio
);
1389 return BLK_QC_T_NONE
;
1393 return BLK_QC_T_NONE
;
1396 static void zram_slot_free_notify(struct block_device
*bdev
,
1397 unsigned long index
)
1401 zram
= bdev
->bd_disk
->private_data
;
1403 zram_slot_lock(zram
, index
);
1404 zram_free_page(zram
, index
);
1405 zram_slot_unlock(zram
, index
);
1406 atomic64_inc(&zram
->stats
.notify_free
);
1409 static int zram_rw_page(struct block_device
*bdev
, sector_t sector
,
1410 struct page
*page
, unsigned int op
)
1417 if (PageTransHuge(page
))
1419 zram
= bdev
->bd_disk
->private_data
;
1421 if (!valid_io_request(zram
, sector
, PAGE_SIZE
)) {
1422 atomic64_inc(&zram
->stats
.invalid_io
);
1427 index
= sector
>> SECTORS_PER_PAGE_SHIFT
;
1428 offset
= (sector
& (SECTORS_PER_PAGE
- 1)) << SECTOR_SHIFT
;
1431 bv
.bv_len
= PAGE_SIZE
;
1434 ret
= zram_bvec_rw(zram
, &bv
, index
, offset
, op
, NULL
);
1437 * If I/O fails, just return error(ie, non-zero) without
1438 * calling page_endio.
1439 * It causes resubmit the I/O with bio request by upper functions
1440 * of rw_page(e.g., swap_readpage, __swap_writepage) and
1441 * bio->bi_end_io does things to handle the error
1442 * (e.g., SetPageError, set_page_dirty and extra works).
1444 if (unlikely(ret
< 0))
1449 page_endio(page
, op_is_write(op
), 0);
1460 static void zram_reset_device(struct zram
*zram
)
1465 down_write(&zram
->init_lock
);
1467 zram
->limit_pages
= 0;
1469 if (!init_done(zram
)) {
1470 up_write(&zram
->init_lock
);
1475 disksize
= zram
->disksize
;
1478 set_capacity(zram
->disk
, 0);
1479 part_stat_set_all(&zram
->disk
->part0
, 0);
1481 up_write(&zram
->init_lock
);
1482 /* I/O operation under all of CPU are done so let's free */
1483 zram_meta_free(zram
, disksize
);
1484 memset(&zram
->stats
, 0, sizeof(zram
->stats
));
1485 zcomp_destroy(comp
);
1489 static ssize_t
disksize_store(struct device
*dev
,
1490 struct device_attribute
*attr
, const char *buf
, size_t len
)
1494 struct zram
*zram
= dev_to_zram(dev
);
1497 disksize
= memparse(buf
, NULL
);
1501 down_write(&zram
->init_lock
);
1502 if (init_done(zram
)) {
1503 pr_info("Cannot change disksize for initialized device\n");
1508 disksize
= PAGE_ALIGN(disksize
);
1509 if (!zram_meta_alloc(zram
, disksize
)) {
1514 comp
= zcomp_create(zram
->compressor
);
1516 pr_err("Cannot initialise %s compressing backend\n",
1518 err
= PTR_ERR(comp
);
1523 zram
->disksize
= disksize
;
1524 set_capacity(zram
->disk
, zram
->disksize
>> SECTOR_SHIFT
);
1526 revalidate_disk(zram
->disk
);
1527 up_write(&zram
->init_lock
);
1532 zram_meta_free(zram
, disksize
);
1534 up_write(&zram
->init_lock
);
1538 static ssize_t
reset_store(struct device
*dev
,
1539 struct device_attribute
*attr
, const char *buf
, size_t len
)
1542 unsigned short do_reset
;
1544 struct block_device
*bdev
;
1546 ret
= kstrtou16(buf
, 10, &do_reset
);
1553 zram
= dev_to_zram(dev
);
1554 bdev
= bdget_disk(zram
->disk
, 0);
1558 mutex_lock(&bdev
->bd_mutex
);
1559 /* Do not reset an active device or claimed device */
1560 if (bdev
->bd_openers
|| zram
->claim
) {
1561 mutex_unlock(&bdev
->bd_mutex
);
1566 /* From now on, anyone can't open /dev/zram[0-9] */
1568 mutex_unlock(&bdev
->bd_mutex
);
1570 /* Make sure all the pending I/O are finished */
1572 zram_reset_device(zram
);
1573 revalidate_disk(zram
->disk
);
1576 mutex_lock(&bdev
->bd_mutex
);
1577 zram
->claim
= false;
1578 mutex_unlock(&bdev
->bd_mutex
);
1583 static int zram_open(struct block_device
*bdev
, fmode_t mode
)
1588 WARN_ON(!mutex_is_locked(&bdev
->bd_mutex
));
1590 zram
= bdev
->bd_disk
->private_data
;
1591 /* zram was claimed to reset so open request fails */
1598 static const struct block_device_operations zram_devops
= {
1600 .swap_slot_free_notify
= zram_slot_free_notify
,
1601 .rw_page
= zram_rw_page
,
1602 .owner
= THIS_MODULE
1605 static DEVICE_ATTR_WO(compact
);
1606 static DEVICE_ATTR_RW(disksize
);
1607 static DEVICE_ATTR_RO(initstate
);
1608 static DEVICE_ATTR_WO(reset
);
1609 static DEVICE_ATTR_WO(mem_limit
);
1610 static DEVICE_ATTR_WO(mem_used_max
);
1611 static DEVICE_ATTR_RW(max_comp_streams
);
1612 static DEVICE_ATTR_RW(comp_algorithm
);
1613 #ifdef CONFIG_ZRAM_WRITEBACK
1614 static DEVICE_ATTR_RW(backing_dev
);
1617 static struct attribute
*zram_disk_attrs
[] = {
1618 &dev_attr_disksize
.attr
,
1619 &dev_attr_initstate
.attr
,
1620 &dev_attr_reset
.attr
,
1621 &dev_attr_compact
.attr
,
1622 &dev_attr_mem_limit
.attr
,
1623 &dev_attr_mem_used_max
.attr
,
1624 &dev_attr_max_comp_streams
.attr
,
1625 &dev_attr_comp_algorithm
.attr
,
1626 #ifdef CONFIG_ZRAM_WRITEBACK
1627 &dev_attr_backing_dev
.attr
,
1629 &dev_attr_io_stat
.attr
,
1630 &dev_attr_mm_stat
.attr
,
1631 &dev_attr_debug_stat
.attr
,
1635 static const struct attribute_group zram_disk_attr_group
= {
1636 .attrs
= zram_disk_attrs
,
1640 * Allocate and initialize new zram device. the function returns
1641 * '>= 0' device_id upon success, and negative value otherwise.
1643 static int zram_add(void)
1646 struct request_queue
*queue
;
1649 zram
= kzalloc(sizeof(struct zram
), GFP_KERNEL
);
1653 ret
= idr_alloc(&zram_index_idr
, zram
, 0, 0, GFP_KERNEL
);
1658 init_rwsem(&zram
->init_lock
);
1660 queue
= blk_alloc_queue(GFP_KERNEL
);
1662 pr_err("Error allocating disk queue for device %d\n",
1668 blk_queue_make_request(queue
, zram_make_request
);
1670 /* gendisk structure */
1671 zram
->disk
= alloc_disk(1);
1673 pr_err("Error allocating disk structure for device %d\n",
1676 goto out_free_queue
;
1679 zram
->disk
->major
= zram_major
;
1680 zram
->disk
->first_minor
= device_id
;
1681 zram
->disk
->fops
= &zram_devops
;
1682 zram
->disk
->queue
= queue
;
1683 zram
->disk
->queue
->queuedata
= zram
;
1684 zram
->disk
->private_data
= zram
;
1685 snprintf(zram
->disk
->disk_name
, 16, "zram%d", device_id
);
1687 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1688 set_capacity(zram
->disk
, 0);
1689 /* zram devices sort of resembles non-rotational disks */
1690 blk_queue_flag_set(QUEUE_FLAG_NONROT
, zram
->disk
->queue
);
1691 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM
, zram
->disk
->queue
);
1694 * To ensure that we always get PAGE_SIZE aligned
1695 * and n*PAGE_SIZED sized I/O requests.
1697 blk_queue_physical_block_size(zram
->disk
->queue
, PAGE_SIZE
);
1698 blk_queue_logical_block_size(zram
->disk
->queue
,
1699 ZRAM_LOGICAL_BLOCK_SIZE
);
1700 blk_queue_io_min(zram
->disk
->queue
, PAGE_SIZE
);
1701 blk_queue_io_opt(zram
->disk
->queue
, PAGE_SIZE
);
1702 zram
->disk
->queue
->limits
.discard_granularity
= PAGE_SIZE
;
1703 blk_queue_max_discard_sectors(zram
->disk
->queue
, UINT_MAX
);
1704 blk_queue_flag_set(QUEUE_FLAG_DISCARD
, zram
->disk
->queue
);
1707 * zram_bio_discard() will clear all logical blocks if logical block
1708 * size is identical with physical block size(PAGE_SIZE). But if it is
1709 * different, we will skip discarding some parts of logical blocks in
1710 * the part of the request range which isn't aligned to physical block
1711 * size. So we can't ensure that all discarded logical blocks are
1714 if (ZRAM_LOGICAL_BLOCK_SIZE
== PAGE_SIZE
)
1715 blk_queue_max_write_zeroes_sectors(zram
->disk
->queue
, UINT_MAX
);
1717 zram
->disk
->queue
->backing_dev_info
->capabilities
|=
1718 (BDI_CAP_STABLE_WRITES
| BDI_CAP_SYNCHRONOUS_IO
);
1719 add_disk(zram
->disk
);
1721 ret
= sysfs_create_group(&disk_to_dev(zram
->disk
)->kobj
,
1722 &zram_disk_attr_group
);
1724 pr_err("Error creating sysfs group for device %d\n",
1728 strlcpy(zram
->compressor
, default_compressor
, sizeof(zram
->compressor
));
1730 zram_debugfs_register(zram
);
1731 pr_info("Added device: %s\n", zram
->disk
->disk_name
);
1735 del_gendisk(zram
->disk
);
1736 put_disk(zram
->disk
);
1738 blk_cleanup_queue(queue
);
1740 idr_remove(&zram_index_idr
, device_id
);
1746 static int zram_remove(struct zram
*zram
)
1748 struct block_device
*bdev
;
1750 bdev
= bdget_disk(zram
->disk
, 0);
1754 mutex_lock(&bdev
->bd_mutex
);
1755 if (bdev
->bd_openers
|| zram
->claim
) {
1756 mutex_unlock(&bdev
->bd_mutex
);
1762 mutex_unlock(&bdev
->bd_mutex
);
1764 zram_debugfs_unregister(zram
);
1766 * Remove sysfs first, so no one will perform a disksize
1767 * store while we destroy the devices. This also helps during
1768 * hot_remove -- zram_reset_device() is the last holder of
1769 * ->init_lock, no later/concurrent disksize_store() or any
1770 * other sysfs handlers are possible.
1772 sysfs_remove_group(&disk_to_dev(zram
->disk
)->kobj
,
1773 &zram_disk_attr_group
);
1775 /* Make sure all the pending I/O are finished */
1777 zram_reset_device(zram
);
1780 pr_info("Removed device: %s\n", zram
->disk
->disk_name
);
1782 del_gendisk(zram
->disk
);
1783 blk_cleanup_queue(zram
->disk
->queue
);
1784 put_disk(zram
->disk
);
1789 /* zram-control sysfs attributes */
1792 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
1793 * sense that reading from this file does alter the state of your system -- it
1794 * creates a new un-initialized zram device and returns back this device's
1795 * device_id (or an error code if it fails to create a new device).
1797 static ssize_t
hot_add_show(struct class *class,
1798 struct class_attribute
*attr
,
1803 mutex_lock(&zram_index_mutex
);
1805 mutex_unlock(&zram_index_mutex
);
1809 return scnprintf(buf
, PAGE_SIZE
, "%d\n", ret
);
1811 static CLASS_ATTR_RO(hot_add
);
1813 static ssize_t
hot_remove_store(struct class *class,
1814 struct class_attribute
*attr
,
1821 /* dev_id is gendisk->first_minor, which is `int' */
1822 ret
= kstrtoint(buf
, 10, &dev_id
);
1828 mutex_lock(&zram_index_mutex
);
1830 zram
= idr_find(&zram_index_idr
, dev_id
);
1832 ret
= zram_remove(zram
);
1834 idr_remove(&zram_index_idr
, dev_id
);
1839 mutex_unlock(&zram_index_mutex
);
1840 return ret
? ret
: count
;
1842 static CLASS_ATTR_WO(hot_remove
);
1844 static struct attribute
*zram_control_class_attrs
[] = {
1845 &class_attr_hot_add
.attr
,
1846 &class_attr_hot_remove
.attr
,
1849 ATTRIBUTE_GROUPS(zram_control_class
);
1851 static struct class zram_control_class
= {
1852 .name
= "zram-control",
1853 .owner
= THIS_MODULE
,
1854 .class_groups
= zram_control_class_groups
,
1857 static int zram_remove_cb(int id
, void *ptr
, void *data
)
1863 static void destroy_devices(void)
1865 class_unregister(&zram_control_class
);
1866 idr_for_each(&zram_index_idr
, &zram_remove_cb
, NULL
);
1867 zram_debugfs_destroy();
1868 idr_destroy(&zram_index_idr
);
1869 unregister_blkdev(zram_major
, "zram");
1870 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
1873 static int __init
zram_init(void)
1877 ret
= cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE
, "block/zram:prepare",
1878 zcomp_cpu_up_prepare
, zcomp_cpu_dead
);
1882 ret
= class_register(&zram_control_class
);
1884 pr_err("Unable to register zram-control class\n");
1885 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
1889 zram_debugfs_create();
1890 zram_major
= register_blkdev(0, "zram");
1891 if (zram_major
<= 0) {
1892 pr_err("Unable to get major number\n");
1893 class_unregister(&zram_control_class
);
1894 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
1898 while (num_devices
!= 0) {
1899 mutex_lock(&zram_index_mutex
);
1901 mutex_unlock(&zram_index_mutex
);
1914 static void __exit
zram_exit(void)
1919 module_init(zram_init
);
1920 module_exit(zram_exit
);
1922 module_param(num_devices
, uint
, 0);
1923 MODULE_PARM_DESC(num_devices
, "Number of pre-created zram devices");
1925 MODULE_LICENSE("Dual BSD/GPL");
1926 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1927 MODULE_DESCRIPTION("Compressed RAM Block Device");