Merge with Linux 2.4.0-test5-pre1. This works again on Origin UP.
[linux-2.6/linux-mips.git] / fs / block_dev.c
blob02a2b2758c3497e4d98f6d7c0c79b3c3e534b435
1 /*
2 * linux/fs/block_dev.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 #include <linux/config.h>
8 #include <linux/mm.h>
9 #include <linux/locks.h>
10 #include <linux/fcntl.h>
11 #include <linux/malloc.h>
12 #include <linux/kmod.h>
13 #include <linux/devfs_fs_kernel.h>
14 #include <linux/smp_lock.h>
16 #include <asm/uaccess.h>
18 extern int *blk_size[];
19 extern int *blksize_size[];
21 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
22 #define NBUF 64
24 ssize_t block_write(struct file * filp, const char * buf,
25 size_t count, loff_t *ppos)
27 struct inode * inode = filp->f_dentry->d_inode;
28 ssize_t blocksize, blocksize_bits, i, buffercount, write_error;
29 ssize_t block, blocks;
30 loff_t offset;
31 ssize_t chars;
32 ssize_t written = 0;
33 struct buffer_head * bhlist[NBUF];
34 size_t size;
35 kdev_t dev;
36 struct buffer_head * bh, *bufferlist[NBUF];
37 register char * p;
39 write_error = buffercount = 0;
40 dev = inode->i_rdev;
41 if ( is_read_only( inode->i_rdev ))
42 return -EPERM;
43 blocksize = BLOCK_SIZE;
44 if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
45 blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
47 i = blocksize;
48 blocksize_bits = 0;
49 while(i != 1) {
50 blocksize_bits++;
51 i >>= 1;
54 block = *ppos >> blocksize_bits;
55 offset = *ppos & (blocksize-1);
57 if (blk_size[MAJOR(dev)])
58 size = ((loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS) >> blocksize_bits;
59 else
60 size = INT_MAX;
61 while (count>0) {
62 if (block >= size)
63 return written ? written : -ENOSPC;
64 chars = blocksize - offset;
65 if (chars > count)
66 chars=count;
68 #if 0
69 /* get the buffer head */
71 struct buffer_head * (*fn)(kdev_t, int, int) = getblk;
72 if (chars != blocksize)
73 fn = bread;
74 bh = fn(dev, block, blocksize);
75 if (!bh)
76 return written ? written : -EIO;
77 if (!buffer_uptodate(bh))
78 wait_on_buffer(bh);
80 #else
81 bh = getblk(dev, block, blocksize);
82 if (!bh)
83 return written ? written : -EIO;
85 if (!buffer_uptodate(bh))
87 if (chars == blocksize)
88 wait_on_buffer(bh);
89 else
91 bhlist[0] = bh;
92 if (!filp->f_reada || !read_ahead[MAJOR(dev)]) {
93 /* We do this to force the read of a single buffer */
94 blocks = 1;
95 } else {
96 /* Read-ahead before write */
97 blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9) / 2;
98 if (block + blocks > size) blocks = size - block;
99 if (blocks > NBUF) blocks=NBUF;
100 if (!blocks) blocks = 1;
101 for(i=1; i<blocks; i++)
103 bhlist[i] = getblk (dev, block+i, blocksize);
104 if (!bhlist[i])
106 while(i >= 0) brelse(bhlist[i--]);
107 return written ? written : -EIO;
111 ll_rw_block(READ, blocks, bhlist);
112 for(i=1; i<blocks; i++) brelse(bhlist[i]);
113 wait_on_buffer(bh);
114 if (!buffer_uptodate(bh)) {
115 brelse(bh);
116 return written ? written : -EIO;
120 #endif
121 block++;
122 p = offset + bh->b_data;
123 offset = 0;
124 *ppos += chars;
125 written += chars;
126 count -= chars;
127 copy_from_user(p,buf,chars);
128 p += chars;
129 buf += chars;
130 mark_buffer_uptodate(bh, 1);
131 mark_buffer_dirty(bh, 0);
132 if (filp->f_flags & O_SYNC)
133 bufferlist[buffercount++] = bh;
134 else
135 brelse(bh);
136 if (buffercount == NBUF){
137 ll_rw_block(WRITE, buffercount, bufferlist);
138 for(i=0; i<buffercount; i++){
139 wait_on_buffer(bufferlist[i]);
140 if (!buffer_uptodate(bufferlist[i]))
141 write_error=1;
142 brelse(bufferlist[i]);
144 buffercount=0;
146 balance_dirty(dev);
147 if (write_error)
148 break;
150 if ( buffercount ){
151 ll_rw_block(WRITE, buffercount, bufferlist);
152 for(i=0; i<buffercount; i++){
153 wait_on_buffer(bufferlist[i]);
154 if (!buffer_uptodate(bufferlist[i]))
155 write_error=1;
156 brelse(bufferlist[i]);
159 filp->f_reada = 1;
160 if(write_error)
161 return -EIO;
162 return written;
165 ssize_t block_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
167 struct inode * inode = filp->f_dentry->d_inode;
168 size_t block;
169 loff_t offset;
170 ssize_t blocksize;
171 ssize_t blocksize_bits, i;
172 size_t blocks, rblocks, left;
173 int bhrequest, uptodate;
174 struct buffer_head ** bhb, ** bhe;
175 struct buffer_head * buflist[NBUF];
176 struct buffer_head * bhreq[NBUF];
177 unsigned int chars;
178 loff_t size;
179 kdev_t dev;
180 ssize_t read;
182 dev = inode->i_rdev;
183 blocksize = BLOCK_SIZE;
184 if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
185 blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
186 i = blocksize;
187 blocksize_bits = 0;
188 while (i != 1) {
189 blocksize_bits++;
190 i >>= 1;
193 offset = *ppos;
194 if (blk_size[MAJOR(dev)])
195 size = (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;
196 else
197 size = (loff_t) INT_MAX << BLOCK_SIZE_BITS;
199 if (offset > size)
200 left = 0;
201 /* size - offset might not fit into left, so check explicitly. */
202 else if (size - offset > INT_MAX)
203 left = INT_MAX;
204 else
205 left = size - offset;
206 if (left > count)
207 left = count;
208 if (left <= 0)
209 return 0;
210 read = 0;
211 block = offset >> blocksize_bits;
212 offset &= blocksize-1;
213 size >>= blocksize_bits;
214 rblocks = blocks = (left + offset + blocksize - 1) >> blocksize_bits;
215 bhb = bhe = buflist;
216 if (filp->f_reada) {
217 if (blocks < read_ahead[MAJOR(dev)] / (blocksize >> 9))
218 blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9);
219 if (rblocks > blocks)
220 blocks = rblocks;
223 if (block + blocks > size) {
224 blocks = size - block;
225 if (blocks == 0)
226 return 0;
229 /* We do this in a two stage process. We first try to request
230 as many blocks as we can, then we wait for the first one to
231 complete, and then we try to wrap up as many as are actually
232 done. This routine is rather generic, in that it can be used
233 in a filesystem by substituting the appropriate function in
234 for getblk.
236 This routine is optimized to make maximum use of the various
237 buffers and caches. */
239 do {
240 bhrequest = 0;
241 uptodate = 1;
242 while (blocks) {
243 --blocks;
244 *bhb = getblk(dev, block++, blocksize);
245 if (*bhb && !buffer_uptodate(*bhb)) {
246 uptodate = 0;
247 bhreq[bhrequest++] = *bhb;
250 if (++bhb == &buflist[NBUF])
251 bhb = buflist;
253 /* If the block we have on hand is uptodate, go ahead
254 and complete processing. */
255 if (uptodate)
256 break;
257 if (bhb == bhe)
258 break;
261 /* Now request them all */
262 if (bhrequest) {
263 ll_rw_block(READ, bhrequest, bhreq);
266 do { /* Finish off all I/O that has actually completed */
267 if (*bhe) {
268 wait_on_buffer(*bhe);
269 if (!buffer_uptodate(*bhe)) { /* read error? */
270 brelse(*bhe);
271 if (++bhe == &buflist[NBUF])
272 bhe = buflist;
273 left = 0;
274 break;
277 if (left < blocksize - offset)
278 chars = left;
279 else
280 chars = blocksize - offset;
281 *ppos += chars;
282 left -= chars;
283 read += chars;
284 if (*bhe) {
285 copy_to_user(buf,offset+(*bhe)->b_data,chars);
286 brelse(*bhe);
287 buf += chars;
288 } else {
289 while (chars-- > 0)
290 put_user(0,buf++);
292 offset = 0;
293 if (++bhe == &buflist[NBUF])
294 bhe = buflist;
295 } while (left > 0 && bhe != bhb && (!*bhe || !buffer_locked(*bhe)));
296 if (bhe == bhb && !blocks)
297 break;
298 } while (left > 0);
300 /* Release the read-ahead blocks */
301 while (bhe != bhb) {
302 brelse(*bhe);
303 if (++bhe == &buflist[NBUF])
304 bhe = buflist;
306 if (!read)
307 return -EIO;
308 filp->f_reada = 1;
309 return read;
313 * Filp may be NULL when we are called by an msync of a vma
314 * since the vma has no handle.
317 static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
319 return fsync_dev(dentry->d_inode->i_rdev);
323 * bdev cache handling - shamelessly stolen from inode.c
324 * We use smaller hashtable, though.
327 #define HASH_BITS 6
328 #define HASH_SIZE (1UL << HASH_BITS)
329 #define HASH_MASK (HASH_SIZE-1)
330 static struct list_head bdev_hashtable[HASH_SIZE];
331 static spinlock_t bdev_lock = SPIN_LOCK_UNLOCKED;
332 static kmem_cache_t * bdev_cachep;
334 #define alloc_bdev() \
335 ((struct block_device *) kmem_cache_alloc(bdev_cachep, SLAB_KERNEL))
336 #define destroy_bdev(bdev) kmem_cache_free(bdev_cachep, (bdev))
338 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
340 struct block_device * bdev = (struct block_device *) foo;
342 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
343 SLAB_CTOR_CONSTRUCTOR)
345 memset(bdev, 0, sizeof(*bdev));
346 sema_init(&bdev->bd_sem, 1);
350 void bdev_init(void)
352 int i;
353 struct list_head *head = bdev_hashtable;
355 i = HASH_SIZE;
356 do {
357 INIT_LIST_HEAD(head);
358 head++;
359 i--;
360 } while (i);
362 bdev_cachep = kmem_cache_create("bdev_cache",
363 sizeof(struct block_device),
364 0, SLAB_HWCACHE_ALIGN, init_once,
365 NULL);
366 if (!bdev_cachep)
367 panic("cannot create bdev slab cache");
371 * Most likely _very_ bad one - but then it's hardly critical for small
372 * /dev and can be fixed when somebody will need really large one.
374 static inline unsigned long hash(dev_t dev)
376 unsigned long tmp = dev;
377 tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
378 return tmp & HASH_MASK;
381 static struct block_device *bdfind(dev_t dev, struct list_head *head)
383 struct list_head *p;
384 struct block_device *bdev;
385 for (p=head->next; p!=head; p=p->next) {
386 bdev = list_entry(p, struct block_device, bd_hash);
387 if (bdev->bd_dev != dev)
388 continue;
389 atomic_inc(&bdev->bd_count);
390 return bdev;
392 return NULL;
395 struct block_device *bdget(dev_t dev)
397 struct list_head * head = bdev_hashtable + hash(dev);
398 struct block_device *bdev, *new_bdev;
399 spin_lock(&bdev_lock);
400 bdev = bdfind(dev, head);
401 spin_unlock(&bdev_lock);
402 if (bdev)
403 return bdev;
404 new_bdev = alloc_bdev();
405 if (!new_bdev)
406 return NULL;
407 atomic_set(&new_bdev->bd_count,1);
408 new_bdev->bd_dev = dev;
409 new_bdev->bd_op = NULL;
410 spin_lock(&bdev_lock);
411 bdev = bdfind(dev, head);
412 if (!bdev) {
413 list_add(&new_bdev->bd_hash, head);
414 spin_unlock(&bdev_lock);
415 return new_bdev;
417 spin_unlock(&bdev_lock);
418 destroy_bdev(new_bdev);
419 return bdev;
422 void bdput(struct block_device *bdev)
424 if (atomic_dec_and_test(&bdev->bd_count)) {
425 spin_lock(&bdev_lock);
426 if (atomic_read(&bdev->bd_openers))
427 BUG();
428 list_del(&bdev->bd_hash);
429 spin_unlock(&bdev_lock);
430 destroy_bdev(bdev);
434 static struct {
435 const char *name;
436 struct block_device_operations *bdops;
437 } blkdevs[MAX_BLKDEV] = {
438 { NULL, NULL },
441 int get_blkdev_list(char * p)
443 int i;
444 int len;
446 len = sprintf(p, "\nBlock devices:\n");
447 for (i = 0; i < MAX_BLKDEV ; i++) {
448 if (blkdevs[i].bdops) {
449 len += sprintf(p+len, "%3d %s\n", i, blkdevs[i].name);
452 return len;
456 Return the function table of a device.
457 Load the driver if needed.
459 const struct block_device_operations * get_blkfops(unsigned int major)
461 const struct block_device_operations *ret = NULL;
463 /* major 0 is used for non-device mounts */
464 if (major && major < MAX_BLKDEV) {
465 #ifdef CONFIG_KMOD
466 if (!blkdevs[major].bdops) {
467 char name[20];
468 sprintf(name, "block-major-%d", major);
469 request_module(name);
471 #endif
472 ret = blkdevs[major].bdops;
474 return ret;
477 int register_blkdev(unsigned int major, const char * name, struct block_device_operations *bdops)
479 if (major == 0) {
480 for (major = MAX_BLKDEV-1; major > 0; major--) {
481 if (blkdevs[major].bdops == NULL) {
482 blkdevs[major].name = name;
483 blkdevs[major].bdops = bdops;
484 return major;
487 return -EBUSY;
489 if (major >= MAX_BLKDEV)
490 return -EINVAL;
491 if (blkdevs[major].bdops && blkdevs[major].bdops != bdops)
492 return -EBUSY;
493 blkdevs[major].name = name;
494 blkdevs[major].bdops = bdops;
495 return 0;
498 int unregister_blkdev(unsigned int major, const char * name)
500 if (major >= MAX_BLKDEV)
501 return -EINVAL;
502 if (!blkdevs[major].bdops)
503 return -EINVAL;
504 if (strcmp(blkdevs[major].name, name))
505 return -EINVAL;
506 blkdevs[major].name = NULL;
507 blkdevs[major].bdops = NULL;
508 return 0;
512 * This routine checks whether a removable media has been changed,
513 * and invalidates all buffer-cache-entries in that case. This
514 * is a relatively slow routine, so we have to try to minimize using
515 * it. Thus it is called only upon a 'mount' or 'open'. This
516 * is the best way of combining speed and utility, I think.
517 * People changing diskettes in the middle of an operation deserve
518 * to lose :-)
520 int check_disk_change(kdev_t dev)
522 int i;
523 const struct block_device_operations * bdops = NULL;
524 struct super_block * sb;
526 i = MAJOR(dev);
527 if (i < MAX_BLKDEV)
528 bdops = blkdevs[i].bdops;
529 if (bdops == NULL) {
530 devfs_handle_t de;
532 de = devfs_find_handle (NULL, NULL, i, MINOR (dev),
533 DEVFS_SPECIAL_BLK, 0);
534 if (de) bdops = devfs_get_ops (de);
536 if (bdops == NULL)
537 return 0;
538 if (bdops->check_media_change == NULL)
539 return 0;
540 if (!bdops->check_media_change(dev))
541 return 0;
543 printk(KERN_DEBUG "VFS: Disk change detected on device %s\n",
544 bdevname(dev));
546 sb = get_super(dev);
547 if (sb && invalidate_inodes(sb))
548 printk("VFS: busy inodes on changed media.\n");
550 destroy_buffers(dev);
552 if (bdops->revalidate)
553 bdops->revalidate(dev);
554 return 1;
557 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
559 kdev_t rdev = to_kdev_t(bdev->bd_dev);
560 struct inode inode_fake;
561 int res;
562 mm_segment_t old_fs = get_fs();
564 if (!bdev->bd_op->ioctl)
565 return -EINVAL;
566 inode_fake.i_rdev=rdev;
567 init_waitqueue_head(&inode_fake.i_wait);
568 set_fs(KERNEL_DS);
569 res = bdev->bd_op->ioctl(&inode_fake, NULL, cmd, arg);
570 set_fs(old_fs);
571 return res;
574 int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind)
576 int ret = -ENODEV;
577 kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
578 down(&bdev->bd_sem);
579 if (!bdev->bd_op)
580 bdev->bd_op = get_blkfops(MAJOR(rdev));
581 if (bdev->bd_op) {
583 * This crockload is due to bad choice of ->open() type.
584 * It will go away.
586 struct file fake_file = {};
587 struct dentry fake_dentry = {};
588 struct inode *fake_inode = get_empty_inode();
589 ret = -ENOMEM;
590 if (fake_inode) {
591 fake_file.f_mode = mode;
592 fake_file.f_flags = flags;
593 fake_file.f_dentry = &fake_dentry;
594 fake_dentry.d_inode = fake_inode;
595 fake_inode->i_rdev = rdev;
596 ret = 0;
597 if (bdev->bd_op->open)
598 ret = bdev->bd_op->open(fake_inode, &fake_file);
599 if (!ret)
600 atomic_inc(&bdev->bd_openers);
601 else if (!atomic_read(&bdev->bd_openers))
602 bdev->bd_op = NULL;
603 iput(fake_inode);
606 up(&bdev->bd_sem);
607 return ret;
610 int blkdev_open(struct inode * inode, struct file * filp)
612 int ret = -ENODEV;
613 struct block_device *bdev = inode->i_bdev;
614 down(&bdev->bd_sem);
615 lock_kernel();
616 if (!bdev->bd_op)
617 bdev->bd_op = get_blkfops(MAJOR(inode->i_rdev));
618 if (bdev->bd_op) {
619 ret = 0;
620 if (bdev->bd_op->open)
621 ret = bdev->bd_op->open(inode,filp);
622 if (!ret)
623 atomic_inc(&bdev->bd_openers);
624 else if (!atomic_read(&bdev->bd_openers))
625 bdev->bd_op = NULL;
627 unlock_kernel();
628 up(&bdev->bd_sem);
629 return ret;
632 int blkdev_put(struct block_device *bdev, int kind)
634 int ret = 0;
635 kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
636 down(&bdev->bd_sem);
637 /* syncing will go here */
638 lock_kernel();
639 if (kind == BDEV_FILE || kind == BDEV_FS)
640 fsync_dev(rdev);
641 if (atomic_dec_and_test(&bdev->bd_openers)) {
642 /* invalidating buffers will go here */
643 invalidate_buffers(rdev);
645 if (bdev->bd_op->release) {
646 struct inode * fake_inode = get_empty_inode();
647 ret = -ENOMEM;
648 if (fake_inode) {
649 fake_inode->i_rdev = rdev;
650 ret = bdev->bd_op->release(fake_inode, NULL);
651 iput(fake_inode);
654 if (!atomic_read(&bdev->bd_openers))
655 bdev->bd_op = NULL; /* we can't rely on driver being */
656 /* kind to stay around. */
657 unlock_kernel();
658 up(&bdev->bd_sem);
659 return ret;
662 static int blkdev_close(struct inode * inode, struct file * filp)
664 return blkdev_put(inode->i_bdev, BDEV_FILE);
667 static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
668 unsigned long arg)
670 if (inode->i_bdev->bd_op->ioctl)
671 return inode->i_bdev->bd_op->ioctl(inode, file, cmd, arg);
672 return -EINVAL;
675 struct file_operations def_blk_fops = {
676 open: blkdev_open,
677 release: blkdev_close,
678 read: block_read,
679 write: block_write,
680 fsync: block_fsync,
681 ioctl: blkdev_ioctl,
684 const char * bdevname(kdev_t dev)
686 static char buffer[32];
687 const char * name = blkdevs[MAJOR(dev)].name;
689 if (!name)
690 name = "unknown-block";
692 sprintf(buffer, "%s(%d,%d)", name, MAJOR(dev), MINOR(dev));
693 return buffer;