2 * linux/drivers/block/loop.c
4 * Written by Theodore Ts'o, 3/29/93
6 * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
7 * permitted under the GNU Public License.
9 * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
10 * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
12 * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
13 * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
15 * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
17 * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
19 * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
21 * Loadable modules and other fixes by AK, 1998
23 * Make real block number available to downstream transfer functions, enables
24 * CBC (and relatives) mode encryption requiring unique IVs per data block.
25 * Reed H. Petty, rhp@draper.net
27 * Maximum number of loop devices now dynamic via max_loop module parameter.
28 * Russell Kroll <rkroll@exploits.org> 19990701
30 * Maximum number of loop devices when compiled-in now selectable by passing
31 * max_loop=<1-255> to the kernel on boot.
32 * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
35 * - Advisory locking is ignored here.
36 * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
37 * - Should use the underlying filesystems/devices read function if possible
38 * to support read ahead (and for write)
41 * - The block number as IV passing to low level transfer functions is broken:
42 * it passes the underlying device's block number instead of the
43 * offset. This makes it change for a given block when the file is
44 * moved/restored/copied and also doesn't work over NFS.
45 * AV, Feb 12, 2000: we pass the logical block number now. It fixes the
46 * problem above. Encryption modules that used to rely on the old scheme
47 * should just call ->i_mapping->bmap() to calculate the physical block
51 #include <linux/module.h>
53 #include <linux/sched.h>
55 #include <linux/file.h>
56 #include <linux/stat.h>
57 #include <linux/errno.h>
58 #include <linux/major.h>
60 #include <linux/init.h>
61 #include <linux/devfs_fs_kernel.h>
63 #include <asm/uaccess.h>
65 #include <linux/loop.h>
67 #define MAJOR_NR LOOP_MAJOR
69 #define DEVICE_NAME "loop"
70 #define DEVICE_REQUEST do_lo_request
71 #define DEVICE_NR(device) (MINOR(device))
72 #define DEVICE_ON(device)
73 #define DEVICE_OFF(device)
74 #define DEVICE_NO_RANDOM
75 #define TIMEOUT_VALUE (6 * HZ)
76 #include <linux/blk.h>
78 #include <linux/malloc.h>
79 static int max_loop
= 8;
80 static struct loop_device
*loop_dev
;
81 static int *loop_sizes
;
82 static int *loop_blksizes
;
83 static devfs_handle_t devfs_handle
= NULL
; /* For the directory */
91 static int transfer_none(struct loop_device
*lo
, int cmd
, char *raw_buf
,
92 char *loop_buf
, int size
, int real_block
)
95 memcpy(loop_buf
, raw_buf
, size
);
97 memcpy(raw_buf
, loop_buf
, size
);
101 static int transfer_xor(struct loop_device
*lo
, int cmd
, char *raw_buf
,
102 char *loop_buf
, int size
, int real_block
)
104 char *in
, *out
, *key
;
114 key
= lo
->lo_encrypt_key
;
115 keysize
= lo
->lo_encrypt_key_size
;
116 for (i
=0; i
< size
; i
++)
117 *out
++ = *in
++ ^ key
[(i
& 511) % keysize
];
121 static int none_status(struct loop_device
*lo
, struct loop_info
*info
)
126 static int xor_status(struct loop_device
*lo
, struct loop_info
*info
)
128 if (info
->lo_encrypt_key_size
<= 0)
133 struct loop_func_table none_funcs
= {
134 number
: LO_CRYPT_NONE
,
135 transfer
: transfer_none
,
139 struct loop_func_table xor_funcs
= {
140 number
: LO_CRYPT_XOR
,
141 transfer
: transfer_xor
,
145 /* xfer_funcs[0] is special - its release function is never called */
146 struct loop_func_table
*xfer_funcs
[MAX_LO_CRYPT
] = {
151 #define MAX_DISK_SIZE 1024*1024*1024
153 static void figure_loop_size(struct loop_device
*lo
)
157 if (S_ISREG(lo
->lo_dentry
->d_inode
->i_mode
))
158 size
= (lo
->lo_dentry
->d_inode
->i_size
- lo
->lo_offset
) >> BLOCK_SIZE_BITS
;
160 kdev_t lodev
= lo
->lo_device
;
161 if (blk_size
[MAJOR(lodev
)])
162 size
= blk_size
[MAJOR(lodev
)][MINOR(lodev
)] -
163 (lo
->lo_offset
>> BLOCK_SIZE_BITS
);
165 size
= MAX_DISK_SIZE
;
168 loop_sizes
[lo
->lo_number
] = size
;
171 static int lo_send(struct loop_device
*lo
, char *data
, int len
, loff_t pos
,
174 struct file
*file
= lo
->lo_backing_file
; /* kudos to NFsckingS */
175 struct address_space
*mapping
= lo
->lo_dentry
->d_inode
->i_mapping
;
176 struct address_space_operations
*aops
= mapping
->a_ops
;
180 unsigned size
, offset
;
182 index
= pos
>> PAGE_CACHE_SHIFT
;
183 offset
= pos
& (PAGE_CACHE_SIZE
- 1);
185 int IV
= index
* (PAGE_CACHE_SIZE
/blksize
) + offset
/blksize
;
186 size
= PAGE_CACHE_SIZE
- offset
;
190 page
= grab_cache_page(mapping
, index
);
193 if (aops
->prepare_write(page
, offset
, offset
+size
))
195 kaddr
= (char*)page_address(page
);
196 if ((lo
->transfer
)(lo
, WRITE
, kaddr
+offset
, data
, size
, IV
))
198 if (aops
->commit_write(file
, page
, offset
, offset
+size
))
206 page_cache_release(page
);
211 printk(KERN_ERR
"loop: transfer error block %ld\n", index
);
212 ClearPageUptodate(page
);
216 page_cache_release(page
);
221 struct lo_read_data
{
222 struct loop_device
*lo
;
227 static int lo_read_actor(read_descriptor_t
* desc
, struct page
*page
, unsigned long offset
, unsigned long size
)
230 unsigned long count
= desc
->count
;
231 struct lo_read_data
*p
= (struct lo_read_data
*)desc
->buf
;
232 struct loop_device
*lo
= p
->lo
;
233 int IV
= page
->index
* (PAGE_CACHE_SIZE
/p
->blksize
) + offset
/p
->blksize
;
238 kaddr
= (char*)kmap(page
);
239 if ((lo
->transfer
)(lo
,READ
,kaddr
+offset
,p
->data
,size
,IV
)) {
241 printk(KERN_ERR
"loop: transfer error block %ld\n",page
->index
);
242 desc
->error
= -EINVAL
;
246 desc
->count
= count
- size
;
247 desc
->written
+= size
;
252 static int lo_receive(struct loop_device
*lo
, char *data
, int len
, loff_t pos
,
255 struct file
*file
= lo
->lo_backing_file
;
256 struct lo_read_data cookie
;
257 read_descriptor_t desc
;
261 cookie
.blksize
= blksize
;
264 desc
.buf
= (char*)&cookie
;
266 do_generic_file_read(file
, &pos
, &desc
, lo_read_actor
);
270 static void do_lo_request(request_queue_t
* q
)
272 int block
, offset
, len
, blksize
, size
;
274 struct loop_device
*lo
;
275 struct buffer_head
*bh
;
276 struct request
*current_request
;
281 current_request
=CURRENT
;
282 blkdev_dequeue_request(current_request
);
283 if (MINOR(current_request
->rq_dev
) >= max_loop
)
285 lo
= &loop_dev
[MINOR(current_request
->rq_dev
)];
286 if (!lo
->lo_dentry
|| !lo
->transfer
)
288 if (current_request
->cmd
== WRITE
) {
289 if (lo
->lo_flags
& LO_FLAGS_READ_ONLY
)
291 } else if (current_request
->cmd
!= READ
) {
292 printk(KERN_ERR
"unknown loop device command (%d)?!?", current_request
->cmd
);
296 dest_addr
= current_request
->buffer
;
297 len
= current_request
->current_nr_sectors
<< 9;
299 blksize
= BLOCK_SIZE
;
300 if (blksize_size
[MAJOR(lo
->lo_device
)]) {
301 blksize
= blksize_size
[MAJOR(lo
->lo_device
)][MINOR(lo
->lo_device
)];
303 blksize
= BLOCK_SIZE
;
306 if (lo
->lo_flags
& LO_FLAGS_DO_BMAP
)
310 block
= current_request
->sector
* (512/blksize
);
313 block
= current_request
->sector
/ (blksize
>> 9);
314 offset
= (current_request
->sector
% (blksize
>> 9)) << 9;
316 block
+= lo
->lo_offset
/ blksize
;
317 offset
+= lo
->lo_offset
% blksize
;
318 if (offset
>= blksize
) {
322 spin_unlock_irq(&io_request_lock
);
326 size
= blksize
- offset
;
330 bh
= getblk(lo
->lo_device
, block
, blksize
);
332 printk(KERN_ERR
"loop: device %s: getblk(-, %d, %d) returned NULL",
333 kdevname(lo
->lo_device
),
337 if (!buffer_uptodate(bh
) && ((current_request
->cmd
== READ
) ||
338 (offset
|| (len
< blksize
)))) {
339 ll_rw_block(READ
, 1, &bh
);
341 if (!buffer_uptodate(bh
)) {
347 if ((lo
->transfer
)(lo
, current_request
->cmd
, bh
->b_data
+ offset
,
348 dest_addr
, size
, block
)) {
349 printk(KERN_ERR
"loop: transfer error block %d\n", block
);
354 if (current_request
->cmd
== WRITE
) {
355 mark_buffer_uptodate(bh
, 1);
356 mark_buffer_dirty(bh
, 1);
367 pos
= ((loff_t
)current_request
->sector
<< 9) + lo
->lo_offset
;
368 spin_unlock_irq(&io_request_lock
);
369 if (current_request
->cmd
== WRITE
) {
370 if (lo_send(lo
, dest_addr
, len
, pos
, blksize
))
373 if (lo_receive(lo
, dest_addr
, len
, pos
, blksize
))
377 spin_lock_irq(&io_request_lock
);
378 current_request
->sector
+= current_request
->current_nr_sectors
;
379 current_request
->nr_sectors
-= current_request
->current_nr_sectors
;
380 list_add(¤t_request
->queue
, ¤t_request
->q
->queue_head
);
384 spin_lock_irq(&io_request_lock
);
386 list_add(¤t_request
->queue
, ¤t_request
->q
->queue_head
);
391 static int loop_set_fd(struct loop_device
*lo
, kdev_t dev
, unsigned int arg
)
409 inode
= file
->f_dentry
->d_inode
;
411 printk(KERN_ERR
"loop_set_fd: NULL inode?!?\n");
415 if (S_ISBLK(inode
->i_mode
)) {
416 /* dentry will be wired, so... */
417 error
= blkdev_get(inode
->i_bdev
, file
->f_mode
,
418 file
->f_flags
, BDEV_FILE
);
420 lo
->lo_device
= inode
->i_rdev
;
423 /* Backed by a block device - don't need to hold onto
425 lo
->lo_backing_file
= NULL
;
426 } else if (S_ISREG(inode
->i_mode
)) {
427 struct address_space_operations
*aops
;
428 /* Backed by a regular file - we need to hold onto a file
429 structure for this file. Friggin' NFS can't live without
430 it on write and for reading we use do_generic_file_read(),
431 so... We create a new file structure based on the one
432 passed to us via 'arg'. This is to avoid changing the file
433 structure that the caller is using */
435 lo
->lo_device
= inode
->i_dev
;
436 lo
->lo_flags
= LO_FLAGS_DO_BMAP
;
439 lo
->lo_backing_file
= get_empty_filp();
440 if (lo
->lo_backing_file
) {
441 lo
->lo_backing_file
->f_mode
= file
->f_mode
;
442 lo
->lo_backing_file
->f_pos
= file
->f_pos
;
443 lo
->lo_backing_file
->f_flags
= file
->f_flags
;
444 lo
->lo_backing_file
->f_owner
= file
->f_owner
;
445 lo
->lo_backing_file
->f_dentry
= file
->f_dentry
;
446 lo
->lo_backing_file
->f_op
= file
->f_op
;
447 lo
->lo_backing_file
->private_data
= file
->private_data
;
448 file_moveto(lo
->lo_backing_file
, file
);
450 error
= get_write_access(inode
);
452 put_filp(lo
->lo_backing_file
);
453 lo
->lo_backing_file
= NULL
;
456 aops
= inode
->i_mapping
->a_ops
;
458 * If we can't read - sorry. If we only can't write - well,
459 * it's going to be read-only.
463 else if (!aops
->prepare_write
|| !aops
->commit_write
)
464 lo
->lo_flags
|= LO_FLAGS_READ_ONLY
;
469 if (IS_RDONLY (inode
) || is_read_only(lo
->lo_device
))
470 lo
->lo_flags
|= LO_FLAGS_READ_ONLY
;
472 set_device_ro(dev
, (lo
->lo_flags
& LO_FLAGS_READ_ONLY
)!=0);
474 lo
->lo_dentry
= dget(file
->f_dentry
);
477 figure_loop_size(lo
);
487 static int loop_release_xfer(struct loop_device
*lo
)
490 if (lo
->lo_encrypt_type
) {
491 struct loop_func_table
*xfer
= xfer_funcs
[lo
->lo_encrypt_type
];
492 if (xfer
&& xfer
->release
)
493 err
= xfer
->release(lo
);
494 if (xfer
&& xfer
->unlock
)
496 lo
->lo_encrypt_type
= 0;
501 static int loop_init_xfer(struct loop_device
*lo
, int type
,struct loop_info
*i
)
505 struct loop_func_table
*xfer
= xfer_funcs
[type
];
507 err
= xfer
->init(lo
, i
);
509 lo
->lo_encrypt_type
= type
;
517 static int loop_clr_fd(struct loop_device
*lo
, kdev_t dev
)
519 struct dentry
*dentry
= lo
->lo_dentry
;
523 if (lo
->lo_refcnt
> 1) /* we needed one fd for the ioctl */
526 if (S_ISBLK(dentry
->d_inode
->i_mode
))
527 blkdev_put(dentry
->d_inode
->i_bdev
, BDEV_FILE
);
529 lo
->lo_dentry
= NULL
;
531 if (lo
->lo_backing_file
!= NULL
) {
532 fput(lo
->lo_backing_file
);
533 lo
->lo_backing_file
= NULL
;
538 loop_release_xfer(lo
);
542 lo
->lo_encrypt_type
= 0;
544 lo
->lo_encrypt_key_size
= 0;
545 memset(lo
->lo_encrypt_key
, 0, LO_KEY_SIZE
);
546 memset(lo
->lo_name
, 0, LO_NAME_SIZE
);
547 loop_sizes
[lo
->lo_number
] = 0;
548 invalidate_buffers(dev
);
553 static int loop_set_status(struct loop_device
*lo
, struct loop_info
*arg
)
555 struct loop_info info
;
559 if (lo
->lo_encrypt_key_size
&& lo
->lo_key_owner
!= current
->uid
&&
560 !capable(CAP_SYS_ADMIN
))
564 if (copy_from_user(&info
, arg
, sizeof (struct loop_info
)))
566 if ((unsigned int) info
.lo_encrypt_key_size
> LO_KEY_SIZE
)
568 type
= info
.lo_encrypt_type
;
569 if (type
>= MAX_LO_CRYPT
|| xfer_funcs
[type
] == NULL
)
571 err
= loop_release_xfer(lo
);
573 err
= loop_init_xfer(lo
, type
, &info
);
577 lo
->lo_offset
= info
.lo_offset
;
578 strncpy(lo
->lo_name
, info
.lo_name
, LO_NAME_SIZE
);
580 lo
->transfer
= xfer_funcs
[type
]->transfer
;
581 lo
->ioctl
= xfer_funcs
[type
]->ioctl
;
582 lo
->lo_encrypt_key_size
= info
.lo_encrypt_key_size
;
583 lo
->lo_init
[0] = info
.lo_init
[0];
584 lo
->lo_init
[1] = info
.lo_init
[1];
585 if (info
.lo_encrypt_key_size
) {
586 memcpy(lo
->lo_encrypt_key
, info
.lo_encrypt_key
,
587 info
.lo_encrypt_key_size
);
588 lo
->lo_key_owner
= current
->uid
;
590 figure_loop_size(lo
);
594 static int loop_get_status(struct loop_device
*lo
, struct loop_info
*arg
)
596 struct loop_info info
;
602 memset(&info
, 0, sizeof(info
));
603 info
.lo_number
= lo
->lo_number
;
604 info
.lo_device
= kdev_t_to_nr(lo
->lo_dentry
->d_inode
->i_dev
);
605 info
.lo_inode
= lo
->lo_dentry
->d_inode
->i_ino
;
606 info
.lo_rdevice
= kdev_t_to_nr(lo
->lo_device
);
607 info
.lo_offset
= lo
->lo_offset
;
608 info
.lo_flags
= lo
->lo_flags
;
609 strncpy(info
.lo_name
, lo
->lo_name
, LO_NAME_SIZE
);
610 info
.lo_encrypt_type
= lo
->lo_encrypt_type
;
611 if (lo
->lo_encrypt_key_size
&& capable(CAP_SYS_ADMIN
)) {
612 info
.lo_encrypt_key_size
= lo
->lo_encrypt_key_size
;
613 memcpy(info
.lo_encrypt_key
, lo
->lo_encrypt_key
,
614 lo
->lo_encrypt_key_size
);
616 return copy_to_user(arg
, &info
, sizeof(info
)) ? -EFAULT
: 0;
619 static int lo_ioctl(struct inode
* inode
, struct file
* file
,
620 unsigned int cmd
, unsigned long arg
)
622 struct loop_device
*lo
;
627 if (MAJOR(inode
->i_rdev
) != MAJOR_NR
) {
628 printk(KERN_WARNING
"lo_ioctl: pseudo-major != %d\n", MAJOR_NR
);
631 dev
= MINOR(inode
->i_rdev
);
637 return loop_set_fd(lo
, inode
->i_rdev
, arg
);
639 return loop_clr_fd(lo
, inode
->i_rdev
);
640 case LOOP_SET_STATUS
:
641 return loop_set_status(lo
, (struct loop_info
*) arg
);
642 case LOOP_GET_STATUS
:
643 return loop_get_status(lo
, (struct loop_info
*) arg
);
644 case BLKGETSIZE
: /* Return device size */
649 return put_user(loop_sizes
[lo
->lo_number
] << 1, (long *) arg
);
651 return lo
->ioctl
? lo
->ioctl(lo
, cmd
, arg
) : -EINVAL
;
656 static int lo_open(struct inode
*inode
, struct file
*file
)
658 struct loop_device
*lo
;
664 if (MAJOR(inode
->i_rdev
) != MAJOR_NR
) {
665 printk(KERN_WARNING
"lo_open: pseudo-major != %d\n", MAJOR_NR
);
668 dev
= MINOR(inode
->i_rdev
);
669 if (dev
>= max_loop
) {
674 type
= lo
->lo_encrypt_type
;
675 if (type
&& xfer_funcs
[type
] && xfer_funcs
[type
]->lock
)
676 xfer_funcs
[type
]->lock(lo
);
682 static int lo_release(struct inode
*inode
, struct file
*file
)
684 struct loop_device
*lo
;
689 if (MAJOR(inode
->i_rdev
) != MAJOR_NR
) {
690 printk(KERN_WARNING
"lo_release: pseudo-major != %d\n", MAJOR_NR
);
693 dev
= MINOR(inode
->i_rdev
);
697 if (lo
->lo_refcnt
<= 0)
698 printk(KERN_ERR
"lo_release: refcount(%d) <= 0\n", lo
->lo_refcnt
);
700 int type
= lo
->lo_encrypt_type
;
702 if (xfer_funcs
[type
] && xfer_funcs
[type
]->unlock
)
703 xfer_funcs
[type
]->unlock(lo
);
709 static struct block_device_operations lo_fops
= {
716 * And now the modules code and kernel interface.
719 #define loop_init init_module
720 MODULE_PARM(max_loop
, "i");
721 MODULE_PARM_DESC(max_loop
, "Maximum number of loop devices (1-255)");
724 int loop_register_transfer(struct loop_func_table
*funcs
)
726 if ((unsigned)funcs
->number
> MAX_LO_CRYPT
|| xfer_funcs
[funcs
->number
])
728 xfer_funcs
[funcs
->number
] = funcs
;
732 int loop_unregister_transfer(int number
)
734 struct loop_device
*lo
;
736 if ((unsigned)number
>= MAX_LO_CRYPT
)
738 for (lo
= &loop_dev
[0]; lo
< &loop_dev
[max_loop
]; lo
++) {
739 int type
= lo
->lo_encrypt_type
;
740 if (type
== number
) {
741 xfer_funcs
[type
]->release(lo
);
743 lo
->lo_encrypt_type
= 0;
746 xfer_funcs
[number
] = NULL
;
750 EXPORT_SYMBOL(loop_register_transfer
);
751 EXPORT_SYMBOL(loop_unregister_transfer
);
753 int __init
loop_init(void)
757 if (devfs_register_blkdev(MAJOR_NR
, "loop", &lo_fops
)) {
758 printk(KERN_WARNING
"Unable to get major number %d for loop device\n",
762 devfs_handle
= devfs_mk_dir (NULL
, "loop", 0, NULL
);
763 devfs_register_series (devfs_handle
, "%u", max_loop
, DEVFS_FL_DEFAULT
,
765 S_IFBLK
| S_IRUSR
| S_IWUSR
| S_IRGRP
, 0, 0,
768 if ((max_loop
< 1) || (max_loop
> 255)) {
769 printk (KERN_WARNING
"loop: invalid max_loop (must be between 1 and 255), using default (8)\n");
773 printk(KERN_INFO
"loop: registered device at major %d\n", MAJOR_NR
);
774 printk(KERN_INFO
"loop: enabling %d loop devices\n", max_loop
);
776 loop_dev
= kmalloc (max_loop
* sizeof(struct loop_device
), GFP_KERNEL
);
778 printk (KERN_ERR
"loop: Unable to create loop_dev\n");
782 loop_sizes
= kmalloc(max_loop
* sizeof(int), GFP_KERNEL
);
784 printk (KERN_ERR
"loop: Unable to create loop_sizes\n");
789 loop_blksizes
= kmalloc (max_loop
* sizeof(int), GFP_KERNEL
);
790 if (!loop_blksizes
) {
791 printk (KERN_ERR
"loop: Unable to create loop_blksizes\n");
797 blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR
), DEVICE_REQUEST
);
798 blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR
), 0);
799 for (i
=0; i
< max_loop
; i
++) {
800 memset(&loop_dev
[i
], 0, sizeof(struct loop_device
));
801 loop_dev
[i
].lo_number
= i
;
803 memset(loop_sizes
, 0, max_loop
* sizeof(int));
804 memset(loop_blksizes
, 0, max_loop
* sizeof(int));
805 blk_size
[MAJOR_NR
] = loop_sizes
;
806 blksize_size
[MAJOR_NR
] = loop_blksizes
;
807 for (i
=0; i
< max_loop
; i
++)
808 register_disk(NULL
, MKDEV(MAJOR_NR
,i
), 1, &lo_fops
, 0);
814 void cleanup_module(void)
816 devfs_unregister (devfs_handle
);
817 if (devfs_unregister_blkdev(MAJOR_NR
, "loop") != 0)
818 printk(KERN_WARNING
"loop: cannot unregister blkdev\n");
822 kfree (loop_blksizes
);
827 static int __init
max_loop_setup(char *str
)
829 max_loop
= simple_strtol(str
,NULL
,0);
833 __setup("max_loop=", max_loop_setup
);