drivers/block/loop.c

   1 /*
   2  *  linux/drivers/block/loop.c
   3  *
   4  *  Written by Theodore Ts'o, 3/29/93
   5  *
   6  * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
   7  * permitted under the GNU General Public License.
   8  *
   9  * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
  10  * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
  11  *
  12  * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
  13  * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
  14  *
  15  * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
  16  *
  17  * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
  18  *
  19  * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
  20  *
  21  * Loadable modules and other fixes by AK, 1998
  22  *
  23  * Make real block number available to downstream transfer functions, enables
  24  * CBC (and relatives) mode encryption requiring unique IVs per data block.
  25  * Reed H. Petty, rhp@draper.net
  26  *
  27  * Maximum number of loop devices now dynamic via max_loop module parameter.
  28  * Russell Kroll <rkroll@exploits.org> 19990701
  29  *
  30  * Maximum number of loop devices when compiled-in now selectable by passing
  31  * max_loop=<1-255> to the kernel on boot.
  32  * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
  33  *
  34  * Completely rewrite request handling to be make_request_fn style and
  35  * non blocking, pushing work to a helper thread. Lots of fixes from
  36  * Al Viro too.
  37  * Jens Axboe <axboe@suse.de>, Nov 2000
  38  *
  39  * Support up to 256 loop devices
  40  * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
  41  *
  42  * Still To Fix:
  43  * - Advisory locking is ignored here.
  44  * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
  45  *
  46  * WARNING/FIXME:
  47  * - The block number as IV passing to low level transfer functions is broken:
  48  *   it passes the underlying device's block number instead of the
  49  *   offset. This makes it change for a given block when the file is
  50  *   moved/restored/copied and also doesn't work over NFS.
  51  * AV, Feb 12, 2000: we pass the logical block number now. It fixes the
  52  *   problem above. Encryption modules that used to rely on the old scheme
  53  *   should just call ->i_mapping->bmap() to calculate the physical block
  54  *   number.
  55  */
  56
  57 #include <linux/config.h>
  58 #include <linux/module.h>
  59
  60 #include <linux/sched.h>
  61 #include <linux/fs.h>
  62 #include <linux/file.h>
  63 #include <linux/bio.h>
  64 #include <linux/stat.h>
  65 #include <linux/errno.h>
  66 #include <linux/major.h>
  67 #include <linux/wait.h>
  68 #include <linux/blk.h>
  69 #include <linux/blkpg.h>
  70 #include <linux/init.h>
  71 #include <linux/devfs_fs_kernel.h>
  72 #include <linux/smp_lock.h>
  73 #include <linux/swap.h>
  74 #include <linux/slab.h>
  75 #include <linux/loop.h>
  76 #include <linux/suspend.h>
  77 #include <linux/writeback.h>
  78 #include <linux/buffer_head.h>          /* for invalidate_bdev() */
  79
  80 #include <asm/uaccess.h>
  81
  82 #define MAJOR_NR LOOP_MAJOR
  83
  84 static int max_loop = 8;
  85 static struct loop_device *loop_dev;
  86 static struct gendisk **disks;
  87 static devfs_handle_t devfs_handle;      /*  For the directory */
  88
  89 /*
  90  * Transfer functions
  91  */
  92 static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
  93                          char *loop_buf, int size, sector_t real_block)
  94 {
  95         if (raw_buf != loop_buf) {
  96                 if (cmd == READ)
  97                         memcpy(loop_buf, raw_buf, size);
  98                 else
  99                         memcpy(raw_buf, loop_buf, size);
 100         }
 101
 102         return 0;
 103 }
 104
 105 static int transfer_xor(struct loop_device *lo, int cmd, char *raw_buf,
 106                         char *loop_buf, int size, sector_t real_block)
 107 {
 108         char    *in, *out, *key;
 109         int     i, keysize;
 110
 111         if (cmd == READ) {
 112                 in = raw_buf;
 113                 out = loop_buf;
 114         } else {
 115                 in = loop_buf;
 116                 out = raw_buf;
 117         }
 118
 119         key = lo->lo_encrypt_key;
 120         keysize = lo->lo_encrypt_key_size;
 121         for (i = 0; i < size; i++)
 122                 *out++ = *in++ ^ key[(i & 511) % keysize];
 123         return 0;
 124 }
 125
 126 static int none_status(struct loop_device *lo, struct loop_info *info)
 127 {
 128         lo->lo_flags |= LO_FLAGS_BH_REMAP;
 129         return 0;
 130 }
 131
 132 static int xor_status(struct loop_device *lo, struct loop_info *info)
 133 {
 134         if (info->lo_encrypt_key_size <= 0)
 135                 return -EINVAL;
 136         return 0;
 137 }
 138
 139 struct loop_func_table none_funcs = {
 140         .number = LO_CRYPT_NONE,
 141         .transfer = transfer_none,
 142         .init = none_status,
 143 };
 144
 145 struct loop_func_table xor_funcs = {
 146         .number = LO_CRYPT_XOR,
 147         .transfer = transfer_xor,
 148         .init = xor_status
 149 };
 150
 151 /* xfer_funcs[0] is special - its release function is never called */
 152 struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
 153         &none_funcs,
 154         &xor_funcs
 155 };
 156
 157 static int figure_loop_size(struct loop_device *lo)
 158 {
 159         loff_t size = lo->lo_backing_file->f_dentry->d_inode->i_mapping->host->i_size;
 160         sector_t x;
 161         /*
 162          * Unfortunately, if we want to do I/O on the device,
 163          * the number of 512-byte sectors has to fit into a sector_t.
 164          */
 165         size = (size - lo->lo_offset) >> 9;
 166         x = (sector_t)size;
 167         if ((loff_t)x != size)
 168                 return -EFBIG;
 169
 170         set_capacity(disks[lo->lo_number], size);
 171         return 0;
 172 }
 173
 174 static inline int lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf,
 175                                  char *lbuf, int size, sector_t rblock)
 176 {
 177         if (!lo->transfer)
 178                 return 0;
 179
 180         return lo->transfer(lo, cmd, rbuf, lbuf, size, rblock);
 181 }
 182
 183 static int
 184 do_lo_send(struct loop_device *lo, struct bio_vec *bvec, int bsize, loff_t pos)
 185 {
 186         struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
 187         struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
 188         struct address_space_operations *aops = mapping->a_ops;
 189         struct page *page;
 190         char *kaddr, *data;
 191         pgoff_t index;
 192         unsigned size, offset;
 193         int len;
 194         int ret = 0;
 195
 196         down(&mapping->host->i_sem);
 197         index = pos >> PAGE_CACHE_SHIFT;
 198         offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
 199         data = kmap(bvec->bv_page) + bvec->bv_offset;
 200         len = bvec->bv_len;
 201         while (len > 0) {
 202                 sector_t IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize;
 203                 int transfer_result;
 204
 205                 size = PAGE_CACHE_SIZE - offset;
 206                 if (size > len)
 207                         size = len;
 208
 209                 page = grab_cache_page(mapping, index);
 210                 if (!page)
 211                         goto fail;
 212                 if (aops->prepare_write(file, page, offset, offset+size))
 213                         goto unlock;
 214                 kaddr = kmap(page);
 215                 transfer_result = lo_do_transfer(lo, WRITE, kaddr + offset, data, size, IV);
 216                 if (transfer_result) {
 217                         /*
 218                          * The transfer failed, but we still write the data to
 219                          * keep prepare/commit calls balanced.
 220                          */
 221                         printk(KERN_ERR "loop: transfer error block %llu\n", (unsigned long long)index);
 222                         memset(kaddr + offset, 0, size);
 223                 }
 224                 flush_dcache_page(page);
 225                 kunmap(page);
 226                 if (aops->commit_write(file, page, offset, offset+size))
 227                         goto unlock;
 228                 if (transfer_result)
 229                         goto unlock;
 230                 data += size;
 231                 len -= size;
 232                 offset = 0;
 233                 index++;
 234                 pos += size;
 235                 unlock_page(page);
 236                 page_cache_release(page);
 237         }
 238         up(&mapping->host->i_sem);
 239 out:
 240         kunmap(bvec->bv_page);
 241         balance_dirty_pages(mapping);
 242         return ret;
 243
 244 unlock:
 245         unlock_page(page);
 246         page_cache_release(page);
 247 fail:
 248         up(&mapping->host->i_sem);
 249         ret = -1;
 250         goto out;
 251 }
 252
 253 static int
 254 lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 255 {
 256         unsigned vecnr;
 257         int ret = 0;
 258
 259         for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
 260                 struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
 261
 262                 ret = do_lo_send(lo, bvec, bsize, pos);
 263                 if (ret < 0)
 264                         break;
 265                 pos += bvec->bv_len;
 266         }
 267         return ret;
 268 }
 269
 270 struct lo_read_data {
 271         struct loop_device *lo;
 272         char *data;
 273         int bsize;
 274 };
 275
 276 static int lo_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
 277 {
 278         char *kaddr;
 279         unsigned long count = desc->count;
 280         struct lo_read_data *p = (struct lo_read_data*)desc->buf;
 281         struct loop_device *lo = p->lo;
 282         int IV = page->index * (PAGE_CACHE_SIZE/p->bsize) + offset/p->bsize;
 283
 284         if (size > count)
 285                 size = count;
 286
 287         kaddr = kmap(page);
 288         if (lo_do_transfer(lo, READ, kaddr + offset, p->data, size, IV)) {
 289                 size = 0;
 290                 printk(KERN_ERR "loop: transfer error block %ld\n",page->index);
 291                 desc->error = -EINVAL;
 292         }
 293         kunmap(page);
 294
 295         desc->count = count - size;
 296         desc->written += size;
 297         p->data += size;
 298         return size;
 299 }
 300
 301 static int
 302 do_lo_receive(struct loop_device *lo,
 303                 struct bio_vec *bvec, int bsize, loff_t pos)
 304 {
 305         struct lo_read_data cookie;
 306         struct file *file;
 307         int retval;
 308
 309         cookie.lo = lo;
 310         cookie.data = kmap(bvec->bv_page) + bvec->bv_offset;
 311         cookie.bsize = bsize;
 312         file = lo->lo_backing_file;
 313         retval = file->f_op->sendfile(file, &pos, bvec->bv_len,
 314                         lo_read_actor, &cookie);
 315         kunmap(bvec->bv_page);
 316         return (retval < 0)? retval: 0;
 317 }
 318
 319 static int
 320 lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 321 {
 322         unsigned vecnr;
 323         int ret = 0;
 324
 325         for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
 326                 struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
 327
 328                 ret = do_lo_receive(lo, bvec, bsize, pos);
 329                 if (ret < 0)
 330                         break;
 331                 pos += bvec->bv_len;
 332         }
 333         return ret;
 334 }
 335
 336 static inline unsigned long loop_get_iv(struct loop_device *lo,
 337                                         unsigned long sector)
 338 {
 339         int bs = lo->lo_blocksize;
 340         unsigned long offset, IV;
 341
 342         IV = sector / (bs >> 9) + lo->lo_offset / bs;
 343         offset = ((sector % (bs >> 9)) << 9) + lo->lo_offset % bs;
 344         if (offset >= bs)
 345                 IV++;
 346
 347         return IV;
 348 }
 349
 350 static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
 351 {
 352         loff_t pos;
 353         int ret;
 354
 355         pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
 356
 357         do {
 358                 if (bio_rw(bio) == WRITE)
 359                         ret = lo_send(lo, bio, lo->lo_blocksize, pos);
 360                 else
 361                         ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
 362
 363         } while (++bio->bi_idx < bio->bi_vcnt);
 364
 365         return ret;
 366 }
 367
 368 static int loop_end_io_transfer(struct bio *, unsigned int, int);
 369 static void loop_put_buffer(struct bio *bio)
 370 {
 371         /*
 372          * check bi_end_io, may just be a remapped bio
 373          */
 374         if (bio && bio->bi_end_io == loop_end_io_transfer) {
 375                 int i;
 376
 377                 for (i = 0; i < bio->bi_vcnt; i++)
 378                         __free_page(bio->bi_io_vec[i].bv_page);
 379
 380                 bio_put(bio);
 381         }
 382 }
 383
 384 /*
 385  * Add bio to back of pending list
 386  */
 387 static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 388 {
 389         unsigned long flags;
 390
 391         spin_lock_irqsave(&lo->lo_lock, flags);
 392         if (lo->lo_biotail) {
 393                 lo->lo_biotail->bi_next = bio;
 394                 lo->lo_biotail = bio;
 395         } else
 396                 lo->lo_bio = lo->lo_biotail = bio;
 397         spin_unlock_irqrestore(&lo->lo_lock, flags);
 398
 399         up(&lo->lo_bh_mutex);
 400 }
 401
 402 /*
 403  * Grab first pending buffer
 404  */
 405 static struct bio *loop_get_bio(struct loop_device *lo)
 406 {
 407         struct bio *bio;
 408
 409         spin_lock_irq(&lo->lo_lock);
 410         if ((bio = lo->lo_bio)) {
 411                 if (bio == lo->lo_biotail)
 412                         lo->lo_biotail = NULL;
 413                 lo->lo_bio = bio->bi_next;
 414                 bio->bi_next = NULL;
 415         }
 416         spin_unlock_irq(&lo->lo_lock);
 417
 418         return bio;
 419 }
 420
 421 /*
 422  * if this was a WRITE lo->transfer stuff has already been done. for READs,
 423  * queue it for the loop thread and let it do the transfer out of
 424  * bi_end_io context (we don't want to do decrypt of a page with irqs
 425  * disabled)
 426  */
 427 static int loop_end_io_transfer(struct bio *bio, unsigned int bytes_done, int err)
 428 {
 429         struct bio *rbh = bio->bi_private;
 430         struct loop_device *lo = rbh->bi_bdev->bd_disk->private_data;
 431
 432         if (bio->bi_size)
 433                 return 1;
 434
 435         if (err || bio_rw(bio) == WRITE) {
 436                 bio_endio(rbh, rbh->bi_size, err);
 437                 if (atomic_dec_and_test(&lo->lo_pending))
 438                         up(&lo->lo_bh_mutex);
 439                 loop_put_buffer(bio);
 440         } else
 441                 loop_add_bio(lo, bio);
 442
 443         return 0;
 444 }
 445
 446 static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh)
 447 {
 448         struct bio *bio;
 449
 450         /*
 451          * for xfer_funcs that can operate on the same bh, do that
 452          */
 453         if (lo->lo_flags & LO_FLAGS_BH_REMAP) {
 454                 bio = rbh;
 455                 goto out_bh;
 456         }
 457
 458         bio = bio_copy(rbh, GFP_NOIO, rbh->bi_rw & WRITE);
 459
 460         bio->bi_end_io = loop_end_io_transfer;
 461         bio->bi_private = rbh;
 462
 463 out_bh:
 464         bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
 465         bio->bi_rw = rbh->bi_rw;
 466         bio->bi_bdev = lo->lo_device;
 467
 468         return bio;
 469 }
 470
 471 static int
 472 bio_transfer(struct loop_device *lo, struct bio *to_bio,
 473                               struct bio *from_bio)
 474 {
 475         unsigned long IV = loop_get_iv(lo, from_bio->bi_sector);
 476         struct bio_vec *from_bvec, *to_bvec;
 477         char *vto, *vfrom;
 478         int ret = 0, i;
 479
 480         __bio_for_each_segment(from_bvec, from_bio, i, 0) {
 481                 to_bvec = &to_bio->bi_io_vec[i];
 482
 483                 kmap(from_bvec->bv_page);
 484                 kmap(to_bvec->bv_page);
 485                 vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
 486                 vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
 487                 ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
 488                                         from_bvec->bv_len, IV);
 489                 kunmap(from_bvec->bv_page);
 490                 kunmap(to_bvec->bv_page);
 491         }
 492
 493         return ret;
 494 }
 495
 496 static int loop_make_request(request_queue_t *q, struct bio *old_bio)
 497 {
 498         struct bio *new_bio = NULL;
 499         struct loop_device *lo = q->queuedata;
 500         unsigned long IV;
 501         int rw = bio_rw(old_bio);
 502
 503         if (!lo)
 504                 goto out;
 505
 506         spin_lock_irq(&lo->lo_lock);
 507         if (lo->lo_state != Lo_bound)
 508                 goto inactive;
 509         atomic_inc(&lo->lo_pending);
 510         spin_unlock_irq(&lo->lo_lock);
 511
 512         if (rw == WRITE) {
 513                 if (lo->lo_flags & LO_FLAGS_READ_ONLY)
 514                         goto err;
 515         } else if (rw == READA) {
 516                 rw = READ;
 517         } else if (rw != READ) {
 518                 printk(KERN_ERR "loop: unknown command (%x)\n", rw);
 519                 goto err;
 520         }
 521
 522         blk_queue_bounce(q, &old_bio);
 523
 524         /*
 525          * file backed, queue for loop_thread to handle
 526          */
 527         if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
 528                 loop_add_bio(lo, old_bio);
 529                 return 0;
 530         }
 531
 532         /*
 533          * piggy old buffer on original, and submit for I/O
 534          */
 535         new_bio = loop_get_buffer(lo, old_bio);
 536         IV = loop_get_iv(lo, old_bio->bi_sector);
 537         if (rw == WRITE) {
 538                 if (bio_transfer(lo, new_bio, old_bio))
 539                         goto err;
 540         }
 541
 542         generic_make_request(new_bio);
 543         return 0;
 544
 545 err:
 546         if (atomic_dec_and_test(&lo->lo_pending))
 547                 up(&lo->lo_bh_mutex);
 548         loop_put_buffer(new_bio);
 549 out:
 550         bio_io_error(old_bio, old_bio->bi_size);
 551         return 0;
 552 inactive:
 553         spin_unlock_irq(&lo->lo_lock);
 554         goto out;
 555 }
 556
 557 static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
 558 {
 559         int ret;
 560
 561         /*
 562          * For block backed loop, we know this is a READ
 563          */
 564         if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
 565                 ret = do_bio_filebacked(lo, bio);
 566                 bio_endio(bio, bio->bi_size, ret);
 567         } else {
 568                 struct bio *rbh = bio->bi_private;
 569
 570                 ret = bio_transfer(lo, bio, rbh);
 571
 572                 bio_endio(rbh, rbh->bi_size, ret);
 573                 loop_put_buffer(bio);
 574         }
 575 }
 576
 577 /*
 578  * worker thread that handles reads/writes to file backed loop devices,
 579  * to avoid blocking in our make_request_fn. it also does loop decrypting
 580  * on reads for block backed loop, as that is too heavy to do from
 581  * b_end_io context where irqs may be disabled.
 582  */
 583 static int loop_thread(void *data)
 584 {
 585         struct loop_device *lo = data;
 586         struct bio *bio;
 587
 588         daemonize();
 589
 590         sprintf(current->comm, "loop%d", lo->lo_number);
 591         current->flags |= PF_IOTHREAD;  /* loop can be used in an encrypted device
 592                                            hence, it mustn't be stopped at all because it could
 593                                            be indirectly used during suspension */
 594
 595         spin_lock_irq(&current->sig->siglock);
 596         sigfillset(&current->blocked);
 597         flush_signals(current);
 598         spin_unlock_irq(&current->sig->siglock);
 599
 600         set_user_nice(current, -20);
 601
 602         lo->lo_state = Lo_bound;
 603         atomic_inc(&lo->lo_pending);
 604
 605         /*
 606          * up sem, we are running
 607          */
 608         up(&lo->lo_sem);
 609
 610         for (;;) {
 611                 down_interruptible(&lo->lo_bh_mutex);
 612                 /*
 613                  * could be upped because of tear-down, not because of
 614                  * pending work
 615                  */
 616                 if (!atomic_read(&lo->lo_pending))
 617                         break;
 618
 619                 bio = loop_get_bio(lo);
 620                 if (!bio) {
 621                         printk("loop: missing bio\n");
 622                         continue;
 623                 }
 624                 loop_handle_bio(lo, bio);
 625
 626                 /*
 627                  * upped both for pending work and tear-down, lo_pending
 628                  * will hit zero then
 629                  */
 630                 if (atomic_dec_and_test(&lo->lo_pending))
 631                         break;
 632         }
 633
 634         up(&lo->lo_sem);
 635         return 0;
 636 }
 637
 638 static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
 639                        struct block_device *bdev, unsigned int arg)
 640 {
 641         struct file     *file;
 642         struct inode    *inode;
 643         struct block_device *lo_device = NULL;
 644         unsigned lo_blocksize;
 645         int             lo_flags = 0;
 646         int             error;
 647
 648         MOD_INC_USE_COUNT;
 649
 650         error = -EBUSY;
 651         if (lo->lo_state != Lo_unbound)
 652                 goto out;
 653
 654         error = -EBADF;
 655         file = fget(arg);
 656         if (!file)
 657                 goto out;
 658
 659         error = -EINVAL;
 660         inode = file->f_dentry->d_inode;
 661
 662         if (!(file->f_mode & FMODE_WRITE))
 663                 lo_flags |= LO_FLAGS_READ_ONLY;
 664
 665         if (S_ISBLK(inode->i_mode)) {
 666                 lo_device = inode->i_bdev;
 667                 if (lo_device == bdev) {
 668                         error = -EBUSY;
 669                         goto out;
 670                 }
 671                 lo_blocksize = block_size(lo_device);
 672                 if (bdev_read_only(lo_device))
 673                         lo_flags |= LO_FLAGS_READ_ONLY;
 674         } else if (S_ISREG(inode->i_mode)) {
 675                 struct address_space_operations *aops = inode->i_mapping->a_ops;
 676                 /*
 677                  * If we can't read - sorry. If we only can't write - well,
 678                  * it's going to be read-only.
 679                  */
 680                 if (!inode->i_fop->sendfile)
 681                         goto out_putf;
 682
 683                 if (!aops->prepare_write || !aops->commit_write)
 684                         lo_flags |= LO_FLAGS_READ_ONLY;
 685
 686                 lo_blocksize = inode->i_blksize;
 687                 lo_flags |= LO_FLAGS_DO_BMAP;
 688                 error = 0;
 689         } else
 690                 goto out_putf;
 691
 692         get_file(file);
 693
 694         if (!(lo_file->f_mode & FMODE_WRITE))
 695                 lo_flags |= LO_FLAGS_READ_ONLY;
 696
 697         set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
 698
 699         lo->lo_blocksize = lo_blocksize;
 700         lo->lo_device = lo_device;
 701         lo->lo_flags = lo_flags;
 702         lo->lo_backing_file = file;
 703         lo->transfer = NULL;
 704         lo->ioctl = NULL;
 705         if (figure_loop_size(lo)) {
 706                 error = -EFBIG;
 707                 fput(file);
 708                 goto out_putf;
 709         }
 710         lo->old_gfp_mask = inode->i_mapping->gfp_mask;
 711         inode->i_mapping->gfp_mask = GFP_NOIO;
 712
 713         set_blocksize(bdev, lo_blocksize);
 714
 715         lo->lo_bio = lo->lo_biotail = NULL;
 716
 717         /*
 718          * set queue make_request_fn, and add limits based on lower level
 719          * device
 720          */
 721         blk_queue_make_request(&lo->lo_queue, loop_make_request);
 722         blk_queue_bounce_limit(&lo->lo_queue, BLK_BOUNCE_HIGH);
 723         lo->lo_queue.queuedata = lo;
 724
 725         /*
 726          * we remap to a block device, make sure we correctly stack limits
 727          */
 728         if (S_ISBLK(inode->i_mode)) {
 729                 request_queue_t *q = bdev_get_queue(lo_device);
 730
 731                 blk_queue_max_sectors(&lo->lo_queue, q->max_sectors);
 732                 blk_queue_max_phys_segments(&lo->lo_queue,q->max_phys_segments);
 733                 blk_queue_max_hw_segments(&lo->lo_queue, q->max_hw_segments);
 734                 blk_queue_max_segment_size(&lo->lo_queue, q->max_segment_size);
 735                 blk_queue_segment_boundary(&lo->lo_queue, q->seg_boundary_mask);
 736                 blk_queue_merge_bvec(&lo->lo_queue, q->merge_bvec_fn);
 737         }
 738
 739         kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
 740         down(&lo->lo_sem);
 741
 742         fput(file);
 743         return 0;
 744
 745  out_putf:
 746         fput(file);
 747  out:
 748         MOD_DEC_USE_COUNT;
 749         return error;
 750 }
 751
 752 static int loop_release_xfer(struct loop_device *lo)
 753 {
 754         int err = 0;
 755         if (lo->lo_encrypt_type) {
 756                 struct loop_func_table *xfer= xfer_funcs[lo->lo_encrypt_type];
 757                 if (xfer && xfer->release)
 758                         err = xfer->release(lo);
 759                 if (xfer && xfer->unlock)
 760                         xfer->unlock(lo);
 761                 lo->lo_encrypt_type = 0;
 762         }
 763         return err;
 764 }
 765
 766 static int loop_init_xfer(struct loop_device *lo, int type,struct loop_info *i)
 767 {
 768         int err = 0;
 769         if (type) {
 770                 struct loop_func_table *xfer = xfer_funcs[type];
 771                 if (xfer->init)
 772                         err = xfer->init(lo, i);
 773                 if (!err) {
 774                         lo->lo_encrypt_type = type;
 775                         if (xfer->lock)
 776                                 xfer->lock(lo);
 777                 }
 778         }
 779         return err;
 780 }
 781
 782 static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 783 {
 784         struct file *filp = lo->lo_backing_file;
 785         int gfp = lo->old_gfp_mask;
 786
 787         if (lo->lo_state != Lo_bound)
 788                 return -ENXIO;
 789         if (lo->lo_refcnt > 1)  /* we needed one fd for the ioctl */
 790                 return -EBUSY;
 791         if (filp==NULL)
 792                 return -EINVAL;
 793
 794         spin_lock_irq(&lo->lo_lock);
 795         lo->lo_state = Lo_rundown;
 796         if (atomic_dec_and_test(&lo->lo_pending))
 797                 up(&lo->lo_bh_mutex);
 798         spin_unlock_irq(&lo->lo_lock);
 799
 800         down(&lo->lo_sem);
 801
 802         lo->lo_backing_file = NULL;
 803
 804         loop_release_xfer(lo);
 805         lo->transfer = NULL;
 806         lo->ioctl = NULL;
 807         lo->lo_device = NULL;
 808         lo->lo_encrypt_type = 0;
 809         lo->lo_offset = 0;
 810         lo->lo_encrypt_key_size = 0;
 811         lo->lo_flags = 0;
 812         lo->lo_queue.queuedata = NULL;
 813         memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
 814         memset(lo->lo_name, 0, LO_NAME_SIZE);
 815         invalidate_bdev(bdev, 0);
 816         set_capacity(disks[lo->lo_number], 0);
 817         filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp;
 818         lo->lo_state = Lo_unbound;
 819         fput(filp);
 820         MOD_DEC_USE_COUNT;
 821         return 0;
 822 }
 823
 824 static int loop_set_status(struct loop_device *lo, struct loop_info *arg)
 825 {
 826         struct loop_info info;
 827         int err;
 828         unsigned int type;
 829         loff_t offset;
 830
 831         if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid &&
 832             !capable(CAP_SYS_ADMIN))
 833                 return -EPERM;
 834         if (lo->lo_state != Lo_bound)
 835                 return -ENXIO;
 836         if (copy_from_user(&info, arg, sizeof (struct loop_info)))
 837                 return -EFAULT;
 838         if ((unsigned int) info.lo_encrypt_key_size > LO_KEY_SIZE)
 839                 return -EINVAL;
 840         type = info.lo_encrypt_type;
 841         if (type >= MAX_LO_CRYPT || xfer_funcs[type] == NULL)
 842                 return -EINVAL;
 843         if (type == LO_CRYPT_XOR && info.lo_encrypt_key_size == 0)
 844                 return -EINVAL;
 845
 846         err = loop_release_xfer(lo);
 847         if (!err)
 848                 err = loop_init_xfer(lo, type, &info);
 849
 850         offset = lo->lo_offset;
 851         if (offset != info.lo_offset) {
 852                 lo->lo_offset = info.lo_offset;
 853                 if (figure_loop_size(lo)){
 854                         err = -EFBIG;
 855                         lo->lo_offset = offset;
 856                 }
 857         }
 858
 859         if (err)
 860                 return err;
 861
 862         strncpy(lo->lo_name, info.lo_name, LO_NAME_SIZE);
 863
 864         lo->transfer = xfer_funcs[type]->transfer;
 865         lo->ioctl = xfer_funcs[type]->ioctl;
 866         lo->lo_encrypt_key_size = info.lo_encrypt_key_size;
 867         lo->lo_init[0] = info.lo_init[0];
 868         lo->lo_init[1] = info.lo_init[1];
 869         if (info.lo_encrypt_key_size) {
 870                 memcpy(lo->lo_encrypt_key, info.lo_encrypt_key,
 871                        info.lo_encrypt_key_size);
 872                 lo->lo_key_owner = current->uid;
 873         }
 874
 875         return 0;
 876 }
 877
 878 static int loop_get_status(struct loop_device *lo, struct loop_info *arg)
 879 {
 880         struct file *file = lo->lo_backing_file;
 881         struct loop_info info;
 882         struct kstat stat;
 883         int error;
 884
 885         if (lo->lo_state != Lo_bound)
 886                 return -ENXIO;
 887         if (!arg)
 888                 return -EINVAL;
 889         error = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
 890         if (error)
 891                 return error;
 892         memset(&info, 0, sizeof(info));
 893         info.lo_number = lo->lo_number;
 894         info.lo_device = stat.dev;
 895         info.lo_inode = stat.ino;
 896         info.lo_rdevice = lo->lo_device ? stat.rdev : stat.dev;
 897         info.lo_offset = lo->lo_offset;
 898         info.lo_flags = lo->lo_flags;
 899         strncpy(info.lo_name, lo->lo_name, LO_NAME_SIZE);
 900         info.lo_encrypt_type = lo->lo_encrypt_type;
 901         if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
 902                 info.lo_encrypt_key_size = lo->lo_encrypt_key_size;
 903                 memcpy(info.lo_encrypt_key, lo->lo_encrypt_key,
 904                        lo->lo_encrypt_key_size);
 905         }
 906         return copy_to_user(arg, &info, sizeof(info)) ? -EFAULT : 0;
 907 }
 908
 909 static int lo_ioctl(struct inode * inode, struct file * file,
 910         unsigned int cmd, unsigned long arg)
 911 {
 912         struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
 913         int err;
 914
 915         down(&lo->lo_ctl_mutex);
 916         switch (cmd) {
 917         case LOOP_SET_FD:
 918                 err = loop_set_fd(lo, file, inode->i_bdev, arg);
 919                 break;
 920         case LOOP_CLR_FD:
 921                 err = loop_clr_fd(lo, inode->i_bdev);
 922                 break;
 923         case LOOP_SET_STATUS:
 924                 err = loop_set_status(lo, (struct loop_info *) arg);
 925                 break;
 926         case LOOP_GET_STATUS:
 927                 err = loop_get_status(lo, (struct loop_info *) arg);
 928                 break;
 929         default:
 930                 err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
 931         }
 932         up(&lo->lo_ctl_mutex);
 933         return err;
 934 }
 935
 936 static int lo_open(struct inode *inode, struct file *file)
 937 {
 938         struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
 939         int type;
 940
 941         down(&lo->lo_ctl_mutex);
 942
 943         type = lo->lo_encrypt_type;
 944         if (type && xfer_funcs[type] && xfer_funcs[type]->lock)
 945                 xfer_funcs[type]->lock(lo);
 946         lo->lo_refcnt++;
 947         up(&lo->lo_ctl_mutex);
 948         return 0;
 949 }
 950
 951 static int lo_release(struct inode *inode, struct file *file)
 952 {
 953         struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
 954         int type;
 955
 956         down(&lo->lo_ctl_mutex);
 957         type = lo->lo_encrypt_type;
 958         --lo->lo_refcnt;
 959         if (xfer_funcs[type] && xfer_funcs[type]->unlock)
 960                 xfer_funcs[type]->unlock(lo);
 961
 962         up(&lo->lo_ctl_mutex);
 963         return 0;
 964 }
 965
 966 static struct block_device_operations lo_fops = {
 967         .owner =        THIS_MODULE,
 968         .open =         lo_open,
 969         .release =      lo_release,
 970         .ioctl =        lo_ioctl,
 971 };
 972
 973 /*
 974  * And now the modules code and kernel interface.
 975  */
 976 MODULE_PARM(max_loop, "i");
 977 MODULE_PARM_DESC(max_loop, "Maximum number of loop devices (1-256)");
 978 MODULE_LICENSE("GPL");
 979
 980 int loop_register_transfer(struct loop_func_table *funcs)
 981 {
 982         if ((unsigned)funcs->number > MAX_LO_CRYPT || xfer_funcs[funcs->number])
 983                 return -EINVAL;
 984         xfer_funcs[funcs->number] = funcs;
 985         return 0;
 986 }
 987
 988 int loop_unregister_transfer(int number)
 989 {
 990         struct loop_device *lo;
 991
 992         if ((unsigned)number >= MAX_LO_CRYPT)
 993                 return -EINVAL;
 994         for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) {
 995                 int type = lo->lo_encrypt_type;
 996                 if (type == number) {
 997                         xfer_funcs[type]->release(lo);
 998                         lo->transfer = NULL;
 999                         lo->lo_encrypt_type = 0;
1000                 }
1001         }
1002         xfer_funcs[number] = NULL;
1003         return 0;
1004 }
1005
1006 EXPORT_SYMBOL(loop_register_transfer);
1007 EXPORT_SYMBOL(loop_unregister_transfer);
1008
1009 int __init loop_init(void)
1010 {
1011         int     i;
1012
1013         if ((max_loop < 1) || (max_loop > 256)) {
1014                 printk(KERN_WARNING "loop: invalid max_loop (must be between"
1015                                     " 1 and 256), using default (8)\n");
1016                 max_loop = 8;
1017         }
1018
1019         if (register_blkdev(MAJOR_NR, "loop", &lo_fops)) {
1020                 printk(KERN_WARNING "Unable to get major number %d for loop"
1021                                     " device\n", MAJOR_NR);
1022                 return -EIO;
1023         }
1024
1025         devfs_handle = devfs_mk_dir(NULL, "loop", NULL);
1026         devfs_register_series(devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT,
1027                               MAJOR_NR, 0,
1028                               S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
1029                               &lo_fops, NULL);
1030
1031         loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL);
1032         if (!loop_dev)
1033                 return -ENOMEM;
1034
1035         disks = kmalloc(max_loop * sizeof(struct gendisk *), GFP_KERNEL);
1036         if (!disks)
1037                 goto out_mem;
1038
1039         for (i = 0; i < max_loop; i++) {
1040                 disks[i] = alloc_disk(1);
1041                 if (!disks[i])
1042                         goto out_mem2;
1043         }
1044
1045         for (i = 0; i < max_loop; i++) {
1046                 struct loop_device *lo = &loop_dev[i];
1047                 struct gendisk *disk = disks[i];
1048                 memset(lo, 0, sizeof(*lo));
1049                 init_MUTEX(&lo->lo_ctl_mutex);
1050                 init_MUTEX_LOCKED(&lo->lo_sem);
1051                 init_MUTEX_LOCKED(&lo->lo_bh_mutex);
1052                 lo->lo_number = i;
1053                 spin_lock_init(&lo->lo_lock);
1054                 disk->major = LOOP_MAJOR;
1055                 disk->first_minor = i;
1056                 disk->fops = &lo_fops;
1057                 sprintf(disk->disk_name, "loop%d", i);
1058                 disk->private_data = lo;
1059                 disk->queue = &lo->lo_queue;
1060                 add_disk(disk);
1061         }
1062         printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
1063         return 0;
1064
1065 out_mem2:
1066         while (i--)
1067                 put_disk(disks[i]);
1068         kfree(disks);
1069 out_mem:
1070         kfree(loop_dev);
1071         printk(KERN_ERR "loop: ran out of memory\n");
1072         return -ENOMEM;
1073 }
1074
1075 void loop_exit(void)
1076 {
1077         int i;
1078         for (i = 0; i < max_loop; i++) {
1079                 del_gendisk(disks[i]);
1080                 put_disk(disks[i]);
1081         }
1082         devfs_unregister(devfs_handle);
1083         if (unregister_blkdev(MAJOR_NR, "loop"))
1084                 printk(KERN_WARNING "loop: cannot unregister blkdev\n");
1085
1086         kfree(disks);
1087         kfree(loop_dev);
1088 }
1089
1090 module_init(loop_init);
1091 module_exit(loop_exit);
1092
1093 #ifndef MODULE
1094 static int __init max_loop_setup(char *str)
1095 {
1096         max_loop = simple_strtol(str, NULL, 0);
1097         return 1;
1098 }
1099
1100 __setup("max_loop=", max_loop_setup);
1101 #endif