4 * Copyright (C) 1991, 1992 Linus Torvalds
7 #include <linux/config.h>
8 #include <linux/init.h>
10 #include <linux/locks.h>
11 #include <linux/fcntl.h>
12 #include <linux/malloc.h>
13 #include <linux/kmod.h>
14 #include <linux/devfs_fs_kernel.h>
15 #include <linux/smp_lock.h>
17 #include <asm/uaccess.h>
19 extern int *blk_size
[];
20 extern int *blksize_size
[];
22 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
25 ssize_t
block_write(struct file
* filp
, const char * buf
,
26 size_t count
, loff_t
*ppos
)
28 struct inode
* inode
= filp
->f_dentry
->d_inode
;
29 ssize_t blocksize
, blocksize_bits
, i
, buffercount
, write_error
;
30 ssize_t block
, blocks
;
34 struct buffer_head
* bhlist
[NBUF
];
37 struct buffer_head
* bh
, *bufferlist
[NBUF
];
40 write_error
= buffercount
= 0;
42 if ( is_read_only( inode
->i_rdev
))
44 blocksize
= BLOCK_SIZE
;
45 if (blksize_size
[MAJOR(dev
)] && blksize_size
[MAJOR(dev
)][MINOR(dev
)])
46 blocksize
= blksize_size
[MAJOR(dev
)][MINOR(dev
)];
55 block
= *ppos
>> blocksize_bits
;
56 offset
= *ppos
& (blocksize
-1);
58 if (blk_size
[MAJOR(dev
)])
59 size
= ((loff_t
) blk_size
[MAJOR(dev
)][MINOR(dev
)] << BLOCK_SIZE_BITS
) >> blocksize_bits
;
64 return written
? written
: -ENOSPC
;
65 chars
= blocksize
- offset
;
70 /* get the buffer head */
72 struct buffer_head
* (*fn
)(kdev_t
, int, int) = getblk
;
73 if (chars
!= blocksize
)
75 bh
= fn(dev
, block
, blocksize
);
77 return written
? written
: -EIO
;
78 if (!buffer_uptodate(bh
))
82 bh
= getblk(dev
, block
, blocksize
);
84 return written
? written
: -EIO
;
86 if (!buffer_uptodate(bh
))
88 if (chars
== blocksize
)
93 if (!filp
->f_reada
|| !read_ahead
[MAJOR(dev
)]) {
94 /* We do this to force the read of a single buffer */
97 /* Read-ahead before write */
98 blocks
= read_ahead
[MAJOR(dev
)] / (blocksize
>> 9) / 2;
99 if (block
+ blocks
> size
) blocks
= size
- block
;
100 if (blocks
> NBUF
) blocks
=NBUF
;
101 if (!blocks
) blocks
= 1;
102 for(i
=1; i
<blocks
; i
++)
104 bhlist
[i
] = getblk (dev
, block
+i
, blocksize
);
107 while(i
>= 0) brelse(bhlist
[i
--]);
108 return written
? written
: -EIO
;
112 ll_rw_block(READ
, blocks
, bhlist
);
113 for(i
=1; i
<blocks
; i
++) brelse(bhlist
[i
]);
115 if (!buffer_uptodate(bh
)) {
117 return written
? written
: -EIO
;
123 p
= offset
+ bh
->b_data
;
128 copy_from_user(p
,buf
,chars
);
131 mark_buffer_uptodate(bh
, 1);
132 mark_buffer_dirty(bh
, 0);
133 if (filp
->f_flags
& O_SYNC
)
134 bufferlist
[buffercount
++] = bh
;
137 if (buffercount
== NBUF
){
138 ll_rw_block(WRITE
, buffercount
, bufferlist
);
139 for(i
=0; i
<buffercount
; i
++){
140 wait_on_buffer(bufferlist
[i
]);
141 if (!buffer_uptodate(bufferlist
[i
]))
143 brelse(bufferlist
[i
]);
152 ll_rw_block(WRITE
, buffercount
, bufferlist
);
153 for(i
=0; i
<buffercount
; i
++){
154 wait_on_buffer(bufferlist
[i
]);
155 if (!buffer_uptodate(bufferlist
[i
]))
157 brelse(bufferlist
[i
]);
166 ssize_t
block_read(struct file
* filp
, char * buf
, size_t count
, loff_t
*ppos
)
168 struct inode
* inode
= filp
->f_dentry
->d_inode
;
172 ssize_t blocksize_bits
, i
;
173 size_t blocks
, rblocks
, left
;
174 int bhrequest
, uptodate
;
175 struct buffer_head
** bhb
, ** bhe
;
176 struct buffer_head
* buflist
[NBUF
];
177 struct buffer_head
* bhreq
[NBUF
];
184 blocksize
= BLOCK_SIZE
;
185 if (blksize_size
[MAJOR(dev
)] && blksize_size
[MAJOR(dev
)][MINOR(dev
)])
186 blocksize
= blksize_size
[MAJOR(dev
)][MINOR(dev
)];
195 if (blk_size
[MAJOR(dev
)])
196 size
= (loff_t
) blk_size
[MAJOR(dev
)][MINOR(dev
)] << BLOCK_SIZE_BITS
;
198 size
= (loff_t
) INT_MAX
<< BLOCK_SIZE_BITS
;
202 /* size - offset might not fit into left, so check explicitly. */
203 else if (size
- offset
> INT_MAX
)
206 left
= size
- offset
;
212 block
= offset
>> blocksize_bits
;
213 offset
&= blocksize
-1;
214 size
>>= blocksize_bits
;
215 rblocks
= blocks
= (left
+ offset
+ blocksize
- 1) >> blocksize_bits
;
218 if (blocks
< read_ahead
[MAJOR(dev
)] / (blocksize
>> 9))
219 blocks
= read_ahead
[MAJOR(dev
)] / (blocksize
>> 9);
220 if (rblocks
> blocks
)
224 if (block
+ blocks
> size
) {
225 blocks
= size
- block
;
230 /* We do this in a two stage process. We first try to request
231 as many blocks as we can, then we wait for the first one to
232 complete, and then we try to wrap up as many as are actually
233 done. This routine is rather generic, in that it can be used
234 in a filesystem by substituting the appropriate function in
237 This routine is optimized to make maximum use of the various
238 buffers and caches. */
245 *bhb
= getblk(dev
, block
++, blocksize
);
246 if (*bhb
&& !buffer_uptodate(*bhb
)) {
248 bhreq
[bhrequest
++] = *bhb
;
251 if (++bhb
== &buflist
[NBUF
])
254 /* If the block we have on hand is uptodate, go ahead
255 and complete processing. */
262 /* Now request them all */
264 ll_rw_block(READ
, bhrequest
, bhreq
);
267 do { /* Finish off all I/O that has actually completed */
269 wait_on_buffer(*bhe
);
270 if (!buffer_uptodate(*bhe
)) { /* read error? */
272 if (++bhe
== &buflist
[NBUF
])
278 if (left
< blocksize
- offset
)
281 chars
= blocksize
- offset
;
286 copy_to_user(buf
,offset
+(*bhe
)->b_data
,chars
);
294 if (++bhe
== &buflist
[NBUF
])
296 } while (left
> 0 && bhe
!= bhb
&& (!*bhe
|| !buffer_locked(*bhe
)));
297 if (bhe
== bhb
&& !blocks
)
301 /* Release the read-ahead blocks */
304 if (++bhe
== &buflist
[NBUF
])
314 * Filp may be NULL when we are called by an msync of a vma
315 * since the vma has no handle.
318 static int block_fsync(struct file
*filp
, struct dentry
*dentry
, int datasync
)
320 return fsync_dev(dentry
->d_inode
->i_rdev
);
324 * bdev cache handling - shamelessly stolen from inode.c
325 * We use smaller hashtable, though.
329 #define HASH_SIZE (1UL << HASH_BITS)
330 #define HASH_MASK (HASH_SIZE-1)
331 static struct list_head bdev_hashtable
[HASH_SIZE
];
332 static spinlock_t bdev_lock
= SPIN_LOCK_UNLOCKED
;
333 static kmem_cache_t
* bdev_cachep
;
335 #define alloc_bdev() \
336 ((struct block_device *) kmem_cache_alloc(bdev_cachep, SLAB_KERNEL))
337 #define destroy_bdev(bdev) kmem_cache_free(bdev_cachep, (bdev))
339 static void init_once(void * foo
, kmem_cache_t
* cachep
, unsigned long flags
)
341 struct block_device
* bdev
= (struct block_device
*) foo
;
343 if ((flags
& (SLAB_CTOR_VERIFY
|SLAB_CTOR_CONSTRUCTOR
)) ==
344 SLAB_CTOR_CONSTRUCTOR
)
346 memset(bdev
, 0, sizeof(*bdev
));
347 sema_init(&bdev
->bd_sem
, 1);
351 void __init
bdev_init(void)
354 struct list_head
*head
= bdev_hashtable
;
358 INIT_LIST_HEAD(head
);
363 bdev_cachep
= kmem_cache_create("bdev_cache",
364 sizeof(struct block_device
),
365 0, SLAB_HWCACHE_ALIGN
, init_once
,
368 panic("Cannot create bdev_cache SLAB cache");
372 * Most likely _very_ bad one - but then it's hardly critical for small
373 * /dev and can be fixed when somebody will need really large one.
375 static inline unsigned long hash(dev_t dev
)
377 unsigned long tmp
= dev
;
378 tmp
= tmp
+ (tmp
>> HASH_BITS
) + (tmp
>> HASH_BITS
*2);
379 return tmp
& HASH_MASK
;
382 static struct block_device
*bdfind(dev_t dev
, struct list_head
*head
)
385 struct block_device
*bdev
;
386 for (p
=head
->next
; p
!=head
; p
=p
->next
) {
387 bdev
= list_entry(p
, struct block_device
, bd_hash
);
388 if (bdev
->bd_dev
!= dev
)
390 atomic_inc(&bdev
->bd_count
);
396 struct block_device
*bdget(dev_t dev
)
398 struct list_head
* head
= bdev_hashtable
+ hash(dev
);
399 struct block_device
*bdev
, *new_bdev
;
400 spin_lock(&bdev_lock
);
401 bdev
= bdfind(dev
, head
);
402 spin_unlock(&bdev_lock
);
405 new_bdev
= alloc_bdev();
408 atomic_set(&new_bdev
->bd_count
,1);
409 new_bdev
->bd_dev
= dev
;
410 new_bdev
->bd_op
= NULL
;
411 spin_lock(&bdev_lock
);
412 bdev
= bdfind(dev
, head
);
414 list_add(&new_bdev
->bd_hash
, head
);
415 spin_unlock(&bdev_lock
);
418 spin_unlock(&bdev_lock
);
419 destroy_bdev(new_bdev
);
423 void bdput(struct block_device
*bdev
)
425 if (atomic_dec_and_test(&bdev
->bd_count
)) {
426 spin_lock(&bdev_lock
);
427 if (atomic_read(&bdev
->bd_openers
))
429 list_del(&bdev
->bd_hash
);
430 spin_unlock(&bdev_lock
);
437 struct block_device_operations
*bdops
;
438 } blkdevs
[MAX_BLKDEV
] = {
442 int get_blkdev_list(char * p
)
447 len
= sprintf(p
, "\nBlock devices:\n");
448 for (i
= 0; i
< MAX_BLKDEV
; i
++) {
449 if (blkdevs
[i
].bdops
) {
450 len
+= sprintf(p
+len
, "%3d %s\n", i
, blkdevs
[i
].name
);
457 Return the function table of a device.
458 Load the driver if needed.
460 const struct block_device_operations
* get_blkfops(unsigned int major
)
462 const struct block_device_operations
*ret
= NULL
;
464 /* major 0 is used for non-device mounts */
465 if (major
&& major
< MAX_BLKDEV
) {
467 if (!blkdevs
[major
].bdops
) {
469 sprintf(name
, "block-major-%d", major
);
470 request_module(name
);
473 ret
= blkdevs
[major
].bdops
;
478 int register_blkdev(unsigned int major
, const char * name
, struct block_device_operations
*bdops
)
481 for (major
= MAX_BLKDEV
-1; major
> 0; major
--) {
482 if (blkdevs
[major
].bdops
== NULL
) {
483 blkdevs
[major
].name
= name
;
484 blkdevs
[major
].bdops
= bdops
;
490 if (major
>= MAX_BLKDEV
)
492 if (blkdevs
[major
].bdops
&& blkdevs
[major
].bdops
!= bdops
)
494 blkdevs
[major
].name
= name
;
495 blkdevs
[major
].bdops
= bdops
;
499 int unregister_blkdev(unsigned int major
, const char * name
)
501 if (major
>= MAX_BLKDEV
)
503 if (!blkdevs
[major
].bdops
)
505 if (strcmp(blkdevs
[major
].name
, name
))
507 blkdevs
[major
].name
= NULL
;
508 blkdevs
[major
].bdops
= NULL
;
513 * This routine checks whether a removable media has been changed,
514 * and invalidates all buffer-cache-entries in that case. This
515 * is a relatively slow routine, so we have to try to minimize using
516 * it. Thus it is called only upon a 'mount' or 'open'. This
517 * is the best way of combining speed and utility, I think.
518 * People changing diskettes in the middle of an operation deserve
521 int check_disk_change(kdev_t dev
)
524 const struct block_device_operations
* bdops
= NULL
;
525 struct super_block
* sb
;
529 bdops
= blkdevs
[i
].bdops
;
533 de
= devfs_find_handle (NULL
, NULL
, i
, MINOR (dev
),
534 DEVFS_SPECIAL_BLK
, 0);
535 if (de
) bdops
= devfs_get_ops (de
);
539 if (bdops
->check_media_change
== NULL
)
541 if (!bdops
->check_media_change(dev
))
544 printk(KERN_DEBUG
"VFS: Disk change detected on device %s\n",
548 if (sb
&& invalidate_inodes(sb
))
549 printk("VFS: busy inodes on changed media.\n");
551 destroy_buffers(dev
);
553 if (bdops
->revalidate
)
554 bdops
->revalidate(dev
);
558 int ioctl_by_bdev(struct block_device
*bdev
, unsigned cmd
, unsigned long arg
)
560 kdev_t rdev
= to_kdev_t(bdev
->bd_dev
);
561 struct inode inode_fake
;
563 mm_segment_t old_fs
= get_fs();
565 if (!bdev
->bd_op
->ioctl
)
567 inode_fake
.i_rdev
=rdev
;
568 init_waitqueue_head(&inode_fake
.i_wait
);
570 res
= bdev
->bd_op
->ioctl(&inode_fake
, NULL
, cmd
, arg
);
575 int blkdev_get(struct block_device
*bdev
, mode_t mode
, unsigned flags
, int kind
)
578 kdev_t rdev
= to_kdev_t(bdev
->bd_dev
); /* this should become bdev */
581 bdev
->bd_op
= get_blkfops(MAJOR(rdev
));
584 * This crockload is due to bad choice of ->open() type.
586 * For now, block device ->open() routine must _not_
587 * examine anything in 'inode' argument except ->i_rdev.
589 struct file fake_file
= {};
590 struct dentry fake_dentry
= {};
591 struct inode
*fake_inode
= get_empty_inode();
594 fake_file
.f_mode
= mode
;
595 fake_file
.f_flags
= flags
;
596 fake_file
.f_dentry
= &fake_dentry
;
597 fake_dentry
.d_inode
= fake_inode
;
598 fake_inode
->i_rdev
= rdev
;
600 if (bdev
->bd_op
->open
)
601 ret
= bdev
->bd_op
->open(fake_inode
, &fake_file
);
603 atomic_inc(&bdev
->bd_openers
);
604 else if (!atomic_read(&bdev
->bd_openers
))
613 int blkdev_open(struct inode
* inode
, struct file
* filp
)
616 struct block_device
*bdev
= inode
->i_bdev
;
620 bdev
->bd_op
= get_blkfops(MAJOR(inode
->i_rdev
));
623 if (bdev
->bd_op
->open
)
624 ret
= bdev
->bd_op
->open(inode
,filp
);
626 atomic_inc(&bdev
->bd_openers
);
627 else if (!atomic_read(&bdev
->bd_openers
))
635 int blkdev_put(struct block_device
*bdev
, int kind
)
638 kdev_t rdev
= to_kdev_t(bdev
->bd_dev
); /* this should become bdev */
640 /* syncing will go here */
642 if (kind
== BDEV_FILE
|| kind
== BDEV_FS
)
644 if (atomic_dec_and_test(&bdev
->bd_openers
)) {
645 /* invalidating buffers will go here */
646 invalidate_buffers(rdev
);
648 if (bdev
->bd_op
->release
) {
649 struct inode
* fake_inode
= get_empty_inode();
652 fake_inode
->i_rdev
= rdev
;
653 ret
= bdev
->bd_op
->release(fake_inode
, NULL
);
657 if (!atomic_read(&bdev
->bd_openers
))
658 bdev
->bd_op
= NULL
; /* we can't rely on driver being */
659 /* kind to stay around. */
665 static int blkdev_close(struct inode
* inode
, struct file
* filp
)
667 return blkdev_put(inode
->i_bdev
, BDEV_FILE
);
670 static int blkdev_ioctl(struct inode
*inode
, struct file
*file
, unsigned cmd
,
673 if (inode
->i_bdev
->bd_op
->ioctl
)
674 return inode
->i_bdev
->bd_op
->ioctl(inode
, file
, cmd
, arg
);
678 struct file_operations def_blk_fops
= {
680 release
: blkdev_close
,
687 const char * bdevname(kdev_t dev
)
689 static char buffer
[32];
690 const char * name
= blkdevs
[MAJOR(dev
)].name
;
693 name
= "unknown-block";
695 sprintf(buffer
, "%s(%d,%d)", name
, MAJOR(dev
), MINOR(dev
));