- Linus: drop support for old-style Makefiles entirely. Big.
[davej-history.git] / fs / block_dev.c
blob103332fc437b986c50db2cd364579ca06ad18958
1 /*
2 * linux/fs/block_dev.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 #include <linux/config.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/locks.h>
11 #include <linux/fcntl.h>
12 #include <linux/malloc.h>
13 #include <linux/kmod.h>
14 #include <linux/devfs_fs_kernel.h>
15 #include <linux/smp_lock.h>
17 #include <asm/uaccess.h>
19 extern int *blk_size[];
20 extern int *blksize_size[];
22 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
23 #define NBUF 64
25 ssize_t block_write(struct file * filp, const char * buf,
26 size_t count, loff_t *ppos)
28 struct inode * inode = filp->f_dentry->d_inode;
29 ssize_t blocksize, blocksize_bits, i, buffercount, write_error;
30 ssize_t block, blocks;
31 loff_t offset;
32 ssize_t chars;
33 ssize_t written;
34 struct buffer_head * bhlist[NBUF];
35 size_t size;
36 kdev_t dev = inode->i_rdev;
37 struct buffer_head * bh, *bufferlist[NBUF];
38 register char * p;
40 if (is_read_only(dev))
41 return -EPERM;
43 written = write_error = buffercount = 0;
44 blocksize = BLOCK_SIZE;
45 if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
46 blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
48 i = blocksize;
49 blocksize_bits = 0;
50 while(i != 1) {
51 blocksize_bits++;
52 i >>= 1;
55 block = *ppos >> blocksize_bits;
56 offset = *ppos & (blocksize-1);
58 if (blk_size[MAJOR(dev)])
59 size = ((loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS) >> blocksize_bits;
60 else
61 size = INT_MAX;
62 while (count>0) {
63 if (block >= size)
64 return written ? written : -ENOSPC;
65 chars = blocksize - offset;
66 if (chars > count)
67 chars=count;
69 #if 0
70 /* get the buffer head */
72 struct buffer_head * (*fn)(kdev_t, int, int) = getblk;
73 if (chars != blocksize)
74 fn = bread;
75 bh = fn(dev, block, blocksize);
76 if (!bh)
77 return written ? written : -EIO;
78 if (!buffer_uptodate(bh))
79 wait_on_buffer(bh);
81 #else
82 bh = getblk(dev, block, blocksize);
83 if (!bh)
84 return written ? written : -EIO;
86 if (!buffer_uptodate(bh))
88 if (chars == blocksize)
89 wait_on_buffer(bh);
90 else
92 bhlist[0] = bh;
93 if (!filp->f_reada || !read_ahead[MAJOR(dev)]) {
94 /* We do this to force the read of a single buffer */
95 blocks = 1;
96 } else {
97 /* Read-ahead before write */
98 blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9) / 2;
99 if (block + blocks > size) blocks = size - block;
100 if (blocks > NBUF) blocks=NBUF;
101 if (!blocks) blocks = 1;
102 for(i=1; i<blocks; i++)
104 bhlist[i] = getblk (dev, block+i, blocksize);
105 if (!bhlist[i])
107 while(i >= 0) brelse(bhlist[i--]);
108 return written ? written : -EIO;
112 ll_rw_block(READ, blocks, bhlist);
113 for(i=1; i<blocks; i++) brelse(bhlist[i]);
114 wait_on_buffer(bh);
115 if (!buffer_uptodate(bh)) {
116 brelse(bh);
117 return written ? written : -EIO;
121 #endif
122 block++;
123 p = offset + bh->b_data;
124 offset = 0;
125 *ppos += chars;
126 written += chars;
127 count -= chars;
128 copy_from_user(p,buf,chars);
129 p += chars;
130 buf += chars;
131 mark_buffer_uptodate(bh, 1);
132 mark_buffer_dirty(bh);
133 if (filp->f_flags & O_SYNC)
134 bufferlist[buffercount++] = bh;
135 else
136 brelse(bh);
137 if (buffercount == NBUF){
138 ll_rw_block(WRITE, buffercount, bufferlist);
139 for(i=0; i<buffercount; i++){
140 wait_on_buffer(bufferlist[i]);
141 if (!buffer_uptodate(bufferlist[i]))
142 write_error=1;
143 brelse(bufferlist[i]);
145 buffercount=0;
147 balance_dirty(dev);
148 if (write_error)
149 break;
151 if ( buffercount ){
152 ll_rw_block(WRITE, buffercount, bufferlist);
153 for(i=0; i<buffercount; i++){
154 wait_on_buffer(bufferlist[i]);
155 if (!buffer_uptodate(bufferlist[i]))
156 write_error=1;
157 brelse(bufferlist[i]);
160 filp->f_reada = 1;
161 if(write_error)
162 return -EIO;
163 return written;
166 ssize_t block_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
168 struct inode * inode = filp->f_dentry->d_inode;
169 size_t block;
170 loff_t offset;
171 ssize_t blocksize;
172 ssize_t blocksize_bits, i;
173 size_t blocks, rblocks, left;
174 int bhrequest, uptodate;
175 struct buffer_head ** bhb, ** bhe;
176 struct buffer_head * buflist[NBUF];
177 struct buffer_head * bhreq[NBUF];
178 unsigned int chars;
179 loff_t size;
180 kdev_t dev;
181 ssize_t read;
183 dev = inode->i_rdev;
184 blocksize = BLOCK_SIZE;
185 if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
186 blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
187 i = blocksize;
188 blocksize_bits = 0;
189 while (i != 1) {
190 blocksize_bits++;
191 i >>= 1;
194 offset = *ppos;
195 if (blk_size[MAJOR(dev)])
196 size = (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;
197 else
198 size = (loff_t) INT_MAX << BLOCK_SIZE_BITS;
200 if (offset > size)
201 left = 0;
202 /* size - offset might not fit into left, so check explicitly. */
203 else if (size - offset > INT_MAX)
204 left = INT_MAX;
205 else
206 left = size - offset;
207 if (left > count)
208 left = count;
209 if (left <= 0)
210 return 0;
211 read = 0;
212 block = offset >> blocksize_bits;
213 offset &= blocksize-1;
214 size >>= blocksize_bits;
215 rblocks = blocks = (left + offset + blocksize - 1) >> blocksize_bits;
216 bhb = bhe = buflist;
217 if (filp->f_reada) {
218 if (blocks < read_ahead[MAJOR(dev)] / (blocksize >> 9))
219 blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9);
220 if (rblocks > blocks)
221 blocks = rblocks;
224 if (block + blocks > size) {
225 blocks = size - block;
226 if (blocks == 0)
227 return 0;
230 /* We do this in a two stage process. We first try to request
231 as many blocks as we can, then we wait for the first one to
232 complete, and then we try to wrap up as many as are actually
233 done. This routine is rather generic, in that it can be used
234 in a filesystem by substituting the appropriate function in
235 for getblk.
237 This routine is optimized to make maximum use of the various
238 buffers and caches. */
240 do {
241 bhrequest = 0;
242 uptodate = 1;
243 while (blocks) {
244 --blocks;
245 *bhb = getblk(dev, block++, blocksize);
246 if (*bhb && !buffer_uptodate(*bhb)) {
247 uptodate = 0;
248 bhreq[bhrequest++] = *bhb;
251 if (++bhb == &buflist[NBUF])
252 bhb = buflist;
254 /* If the block we have on hand is uptodate, go ahead
255 and complete processing. */
256 if (uptodate)
257 break;
258 if (bhb == bhe)
259 break;
262 /* Now request them all */
263 if (bhrequest) {
264 ll_rw_block(READ, bhrequest, bhreq);
267 do { /* Finish off all I/O that has actually completed */
268 if (*bhe) {
269 wait_on_buffer(*bhe);
270 if (!buffer_uptodate(*bhe)) { /* read error? */
271 brelse(*bhe);
272 if (++bhe == &buflist[NBUF])
273 bhe = buflist;
274 left = 0;
275 break;
278 if (left < blocksize - offset)
279 chars = left;
280 else
281 chars = blocksize - offset;
282 *ppos += chars;
283 left -= chars;
284 read += chars;
285 if (*bhe) {
286 copy_to_user(buf,offset+(*bhe)->b_data,chars);
287 brelse(*bhe);
288 buf += chars;
289 } else {
290 while (chars-- > 0)
291 put_user(0,buf++);
293 offset = 0;
294 if (++bhe == &buflist[NBUF])
295 bhe = buflist;
296 } while (left > 0 && bhe != bhb && (!*bhe || !buffer_locked(*bhe)));
297 if (bhe == bhb && !blocks)
298 break;
299 } while (left > 0);
301 /* Release the read-ahead blocks */
302 while (bhe != bhb) {
303 brelse(*bhe);
304 if (++bhe == &buflist[NBUF])
305 bhe = buflist;
307 if (!read)
308 return -EIO;
309 filp->f_reada = 1;
310 return read;
314 * private llseek:
315 * for a block special file file->f_dentry->d_inode->i_size is zero
316 * so we compute the size by hand (just as in block_read/write above)
318 static loff_t block_llseek(struct file *file, loff_t offset, int origin)
320 long long retval;
321 kdev_t dev;
323 switch (origin) {
324 case 2:
325 dev = file->f_dentry->d_inode->i_rdev;
326 if (blk_size[MAJOR(dev)])
327 offset += (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;
328 /* else? return -EINVAL? */
329 break;
330 case 1:
331 offset += file->f_pos;
333 retval = -EINVAL;
334 if (offset >= 0) {
335 if (offset != file->f_pos) {
336 file->f_pos = offset;
337 file->f_reada = 0;
338 file->f_version = ++event;
340 retval = offset;
342 return retval;
347 * Filp may be NULL when we are called by an msync of a vma
348 * since the vma has no handle.
351 static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
353 return fsync_dev(dentry->d_inode->i_rdev);
357 * bdev cache handling - shamelessly stolen from inode.c
358 * We use smaller hashtable, though.
361 #define HASH_BITS 6
362 #define HASH_SIZE (1UL << HASH_BITS)
363 #define HASH_MASK (HASH_SIZE-1)
364 static struct list_head bdev_hashtable[HASH_SIZE];
365 static spinlock_t bdev_lock = SPIN_LOCK_UNLOCKED;
366 static kmem_cache_t * bdev_cachep;
368 #define alloc_bdev() \
369 ((struct block_device *) kmem_cache_alloc(bdev_cachep, SLAB_KERNEL))
370 #define destroy_bdev(bdev) kmem_cache_free(bdev_cachep, (bdev))
372 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
374 struct block_device * bdev = (struct block_device *) foo;
376 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
377 SLAB_CTOR_CONSTRUCTOR)
379 memset(bdev, 0, sizeof(*bdev));
380 sema_init(&bdev->bd_sem, 1);
384 void __init bdev_init(void)
386 int i;
387 struct list_head *head = bdev_hashtable;
389 i = HASH_SIZE;
390 do {
391 INIT_LIST_HEAD(head);
392 head++;
393 i--;
394 } while (i);
396 bdev_cachep = kmem_cache_create("bdev_cache",
397 sizeof(struct block_device),
398 0, SLAB_HWCACHE_ALIGN, init_once,
399 NULL);
400 if (!bdev_cachep)
401 panic("Cannot create bdev_cache SLAB cache");
405 * Most likely _very_ bad one - but then it's hardly critical for small
406 * /dev and can be fixed when somebody will need really large one.
408 static inline unsigned long hash(dev_t dev)
410 unsigned long tmp = dev;
411 tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
412 return tmp & HASH_MASK;
415 static struct block_device *bdfind(dev_t dev, struct list_head *head)
417 struct list_head *p;
418 struct block_device *bdev;
419 for (p=head->next; p!=head; p=p->next) {
420 bdev = list_entry(p, struct block_device, bd_hash);
421 if (bdev->bd_dev != dev)
422 continue;
423 atomic_inc(&bdev->bd_count);
424 return bdev;
426 return NULL;
429 struct block_device *bdget(dev_t dev)
431 struct list_head * head = bdev_hashtable + hash(dev);
432 struct block_device *bdev, *new_bdev;
433 spin_lock(&bdev_lock);
434 bdev = bdfind(dev, head);
435 spin_unlock(&bdev_lock);
436 if (bdev)
437 return bdev;
438 new_bdev = alloc_bdev();
439 if (!new_bdev)
440 return NULL;
441 atomic_set(&new_bdev->bd_count,1);
442 new_bdev->bd_dev = dev;
443 new_bdev->bd_op = NULL;
444 spin_lock(&bdev_lock);
445 bdev = bdfind(dev, head);
446 if (!bdev) {
447 list_add(&new_bdev->bd_hash, head);
448 spin_unlock(&bdev_lock);
449 return new_bdev;
451 spin_unlock(&bdev_lock);
452 destroy_bdev(new_bdev);
453 return bdev;
456 void bdput(struct block_device *bdev)
458 if (atomic_dec_and_test(&bdev->bd_count)) {
459 spin_lock(&bdev_lock);
460 if (atomic_read(&bdev->bd_openers))
461 BUG();
462 list_del(&bdev->bd_hash);
463 spin_unlock(&bdev_lock);
464 destroy_bdev(bdev);
468 static struct {
469 const char *name;
470 struct block_device_operations *bdops;
471 } blkdevs[MAX_BLKDEV];
473 int get_blkdev_list(char * p)
475 int i;
476 int len;
478 len = sprintf(p, "\nBlock devices:\n");
479 for (i = 0; i < MAX_BLKDEV ; i++) {
480 if (blkdevs[i].bdops) {
481 len += sprintf(p+len, "%3d %s\n", i, blkdevs[i].name);
484 return len;
488 Return the function table of a device.
489 Load the driver if needed.
491 const struct block_device_operations * get_blkfops(unsigned int major)
493 const struct block_device_operations *ret = NULL;
495 /* major 0 is used for non-device mounts */
496 if (major && major < MAX_BLKDEV) {
497 #ifdef CONFIG_KMOD
498 if (!blkdevs[major].bdops) {
499 char name[20];
500 sprintf(name, "block-major-%d", major);
501 request_module(name);
503 #endif
504 ret = blkdevs[major].bdops;
506 return ret;
509 int register_blkdev(unsigned int major, const char * name, struct block_device_operations *bdops)
511 if (major == 0) {
512 for (major = MAX_BLKDEV-1; major > 0; major--) {
513 if (blkdevs[major].bdops == NULL) {
514 blkdevs[major].name = name;
515 blkdevs[major].bdops = bdops;
516 return major;
519 return -EBUSY;
521 if (major >= MAX_BLKDEV)
522 return -EINVAL;
523 if (blkdevs[major].bdops && blkdevs[major].bdops != bdops)
524 return -EBUSY;
525 blkdevs[major].name = name;
526 blkdevs[major].bdops = bdops;
527 return 0;
530 int unregister_blkdev(unsigned int major, const char * name)
532 if (major >= MAX_BLKDEV)
533 return -EINVAL;
534 if (!blkdevs[major].bdops)
535 return -EINVAL;
536 if (strcmp(blkdevs[major].name, name))
537 return -EINVAL;
538 blkdevs[major].name = NULL;
539 blkdevs[major].bdops = NULL;
540 return 0;
544 * This routine checks whether a removable media has been changed,
545 * and invalidates all buffer-cache-entries in that case. This
546 * is a relatively slow routine, so we have to try to minimize using
547 * it. Thus it is called only upon a 'mount' or 'open'. This
548 * is the best way of combining speed and utility, I think.
549 * People changing diskettes in the middle of an operation deserve
550 * to lose :-)
552 int check_disk_change(kdev_t dev)
554 int i;
555 const struct block_device_operations * bdops = NULL;
556 struct super_block * sb;
558 i = MAJOR(dev);
559 if (i < MAX_BLKDEV)
560 bdops = blkdevs[i].bdops;
561 if (bdops == NULL) {
562 devfs_handle_t de;
564 de = devfs_find_handle (NULL, NULL, i, MINOR (dev),
565 DEVFS_SPECIAL_BLK, 0);
566 if (de) bdops = devfs_get_ops (de);
568 if (bdops == NULL)
569 return 0;
570 if (bdops->check_media_change == NULL)
571 return 0;
572 if (!bdops->check_media_change(dev))
573 return 0;
575 printk(KERN_DEBUG "VFS: Disk change detected on device %s\n",
576 bdevname(dev));
578 sb = get_super(dev);
579 if (sb && invalidate_inodes(sb))
580 printk("VFS: busy inodes on changed media.\n");
582 destroy_buffers(dev);
584 if (bdops->revalidate)
585 bdops->revalidate(dev);
586 return 1;
589 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
591 kdev_t rdev = to_kdev_t(bdev->bd_dev);
592 struct inode inode_fake;
593 int res;
594 mm_segment_t old_fs = get_fs();
596 if (!bdev->bd_op->ioctl)
597 return -EINVAL;
598 inode_fake.i_rdev=rdev;
599 init_waitqueue_head(&inode_fake.i_wait);
600 set_fs(KERNEL_DS);
601 res = bdev->bd_op->ioctl(&inode_fake, NULL, cmd, arg);
602 set_fs(old_fs);
603 return res;
606 int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind)
608 int ret = -ENODEV;
609 kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
610 down(&bdev->bd_sem);
611 if (!bdev->bd_op)
612 bdev->bd_op = get_blkfops(MAJOR(rdev));
613 if (bdev->bd_op) {
615 * This crockload is due to bad choice of ->open() type.
616 * It will go away.
617 * For now, block device ->open() routine must _not_
618 * examine anything in 'inode' argument except ->i_rdev.
620 struct file fake_file = {};
621 struct dentry fake_dentry = {};
622 struct inode *fake_inode = get_empty_inode();
623 ret = -ENOMEM;
624 if (fake_inode) {
625 fake_file.f_mode = mode;
626 fake_file.f_flags = flags;
627 fake_file.f_dentry = &fake_dentry;
628 fake_dentry.d_inode = fake_inode;
629 fake_inode->i_rdev = rdev;
630 ret = 0;
631 if (bdev->bd_op->open)
632 ret = bdev->bd_op->open(fake_inode, &fake_file);
633 if (!ret)
634 atomic_inc(&bdev->bd_openers);
635 else if (!atomic_read(&bdev->bd_openers))
636 bdev->bd_op = NULL;
637 iput(fake_inode);
640 up(&bdev->bd_sem);
641 return ret;
644 int blkdev_open(struct inode * inode, struct file * filp)
646 int ret = -ENXIO;
647 struct block_device *bdev = inode->i_bdev;
648 down(&bdev->bd_sem);
649 lock_kernel();
650 if (!bdev->bd_op)
651 bdev->bd_op = get_blkfops(MAJOR(inode->i_rdev));
652 if (bdev->bd_op) {
653 ret = 0;
654 if (bdev->bd_op->open)
655 ret = bdev->bd_op->open(inode,filp);
656 if (!ret)
657 atomic_inc(&bdev->bd_openers);
658 else if (!atomic_read(&bdev->bd_openers))
659 bdev->bd_op = NULL;
661 unlock_kernel();
662 up(&bdev->bd_sem);
663 return ret;
666 int blkdev_put(struct block_device *bdev, int kind)
668 int ret = 0;
669 kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
670 down(&bdev->bd_sem);
671 /* syncing will go here */
672 lock_kernel();
673 if (kind == BDEV_FILE || kind == BDEV_FS)
674 fsync_dev(rdev);
675 if (atomic_dec_and_test(&bdev->bd_openers)) {
676 /* invalidating buffers will go here */
677 invalidate_buffers(rdev);
679 if (bdev->bd_op->release) {
680 struct inode * fake_inode = get_empty_inode();
681 ret = -ENOMEM;
682 if (fake_inode) {
683 fake_inode->i_rdev = rdev;
684 ret = bdev->bd_op->release(fake_inode, NULL);
685 iput(fake_inode);
688 if (!atomic_read(&bdev->bd_openers))
689 bdev->bd_op = NULL; /* we can't rely on driver being */
690 /* kind to stay around. */
691 unlock_kernel();
692 up(&bdev->bd_sem);
693 return ret;
696 static int blkdev_close(struct inode * inode, struct file * filp)
698 return blkdev_put(inode->i_bdev, BDEV_FILE);
701 static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
702 unsigned long arg)
704 if (inode->i_bdev->bd_op->ioctl)
705 return inode->i_bdev->bd_op->ioctl(inode, file, cmd, arg);
706 return -EINVAL;
709 struct file_operations def_blk_fops = {
710 open: blkdev_open,
711 release: blkdev_close,
712 llseek: block_llseek,
713 read: block_read,
714 write: block_write,
715 fsync: block_fsync,
716 ioctl: blkdev_ioctl,
719 const char * bdevname(kdev_t dev)
721 static char buffer[32];
722 const char * name = blkdevs[MAJOR(dev)].name;
724 if (!name)
725 name = "unknown-block";
727 sprintf(buffer, "%s(%d,%d)", name, MAJOR(dev), MINOR(dev));
728 return buffer;