4 * Copyright (C) 1991, 1992 Linus Torvalds
7 #include <linux/config.h>
8 #include <linux/init.h>
10 #include <linux/locks.h>
11 #include <linux/fcntl.h>
12 #include <linux/malloc.h>
13 #include <linux/kmod.h>
14 #include <linux/devfs_fs_kernel.h>
15 #include <linux/smp_lock.h>
17 #include <asm/uaccess.h>
19 extern int *blk_size
[];
20 extern int *blksize_size
[];
22 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
25 ssize_t
block_write(struct file
* filp
, const char * buf
,
26 size_t count
, loff_t
*ppos
)
28 struct inode
* inode
= filp
->f_dentry
->d_inode
;
29 ssize_t blocksize
, blocksize_bits
, i
, buffercount
, write_error
;
30 ssize_t block
, blocks
;
34 struct buffer_head
* bhlist
[NBUF
];
36 kdev_t dev
= inode
->i_rdev
;
37 struct buffer_head
* bh
, *bufferlist
[NBUF
];
40 if (is_read_only(dev
))
43 written
= write_error
= buffercount
= 0;
44 blocksize
= BLOCK_SIZE
;
45 if (blksize_size
[MAJOR(dev
)] && blksize_size
[MAJOR(dev
)][MINOR(dev
)])
46 blocksize
= blksize_size
[MAJOR(dev
)][MINOR(dev
)];
55 block
= *ppos
>> blocksize_bits
;
56 offset
= *ppos
& (blocksize
-1);
58 if (blk_size
[MAJOR(dev
)])
59 size
= ((loff_t
) blk_size
[MAJOR(dev
)][MINOR(dev
)] << BLOCK_SIZE_BITS
) >> blocksize_bits
;
64 return written
? written
: -ENOSPC
;
65 chars
= blocksize
- offset
;
70 /* get the buffer head */
72 struct buffer_head
* (*fn
)(kdev_t
, int, int) = getblk
;
73 if (chars
!= blocksize
)
75 bh
= fn(dev
, block
, blocksize
);
77 return written
? written
: -EIO
;
78 if (!buffer_uptodate(bh
))
82 bh
= getblk(dev
, block
, blocksize
);
84 return written
? written
: -EIO
;
86 if (!buffer_uptodate(bh
))
88 if (chars
== blocksize
)
93 if (!filp
->f_reada
|| !read_ahead
[MAJOR(dev
)]) {
94 /* We do this to force the read of a single buffer */
97 /* Read-ahead before write */
98 blocks
= read_ahead
[MAJOR(dev
)] / (blocksize
>> 9) / 2;
99 if (block
+ blocks
> size
) blocks
= size
- block
;
100 if (blocks
> NBUF
) blocks
=NBUF
;
101 if (!blocks
) blocks
= 1;
102 for(i
=1; i
<blocks
; i
++)
104 bhlist
[i
] = getblk (dev
, block
+i
, blocksize
);
107 while(i
>= 0) brelse(bhlist
[i
--]);
108 return written
? written
: -EIO
;
112 ll_rw_block(READ
, blocks
, bhlist
);
113 for(i
=1; i
<blocks
; i
++) brelse(bhlist
[i
]);
115 if (!buffer_uptodate(bh
)) {
117 return written
? written
: -EIO
;
123 p
= offset
+ bh
->b_data
;
128 copy_from_user(p
,buf
,chars
);
131 mark_buffer_uptodate(bh
, 1);
132 mark_buffer_dirty(bh
);
133 if (filp
->f_flags
& O_SYNC
)
134 bufferlist
[buffercount
++] = bh
;
137 if (buffercount
== NBUF
){
138 ll_rw_block(WRITE
, buffercount
, bufferlist
);
139 for(i
=0; i
<buffercount
; i
++){
140 wait_on_buffer(bufferlist
[i
]);
141 if (!buffer_uptodate(bufferlist
[i
]))
143 brelse(bufferlist
[i
]);
152 ll_rw_block(WRITE
, buffercount
, bufferlist
);
153 for(i
=0; i
<buffercount
; i
++){
154 wait_on_buffer(bufferlist
[i
]);
155 if (!buffer_uptodate(bufferlist
[i
]))
157 brelse(bufferlist
[i
]);
166 ssize_t
block_read(struct file
* filp
, char * buf
, size_t count
, loff_t
*ppos
)
168 struct inode
* inode
= filp
->f_dentry
->d_inode
;
172 ssize_t blocksize_bits
, i
;
173 size_t blocks
, rblocks
, left
;
174 int bhrequest
, uptodate
;
175 struct buffer_head
** bhb
, ** bhe
;
176 struct buffer_head
* buflist
[NBUF
];
177 struct buffer_head
* bhreq
[NBUF
];
184 blocksize
= BLOCK_SIZE
;
185 if (blksize_size
[MAJOR(dev
)] && blksize_size
[MAJOR(dev
)][MINOR(dev
)])
186 blocksize
= blksize_size
[MAJOR(dev
)][MINOR(dev
)];
195 if (blk_size
[MAJOR(dev
)])
196 size
= (loff_t
) blk_size
[MAJOR(dev
)][MINOR(dev
)] << BLOCK_SIZE_BITS
;
198 size
= (loff_t
) INT_MAX
<< BLOCK_SIZE_BITS
;
202 /* size - offset might not fit into left, so check explicitly. */
203 else if (size
- offset
> INT_MAX
)
206 left
= size
- offset
;
212 block
= offset
>> blocksize_bits
;
213 offset
&= blocksize
-1;
214 size
>>= blocksize_bits
;
215 rblocks
= blocks
= (left
+ offset
+ blocksize
- 1) >> blocksize_bits
;
218 if (blocks
< read_ahead
[MAJOR(dev
)] / (blocksize
>> 9))
219 blocks
= read_ahead
[MAJOR(dev
)] / (blocksize
>> 9);
220 if (rblocks
> blocks
)
224 if (block
+ blocks
> size
) {
225 blocks
= size
- block
;
230 /* We do this in a two stage process. We first try to request
231 as many blocks as we can, then we wait for the first one to
232 complete, and then we try to wrap up as many as are actually
233 done. This routine is rather generic, in that it can be used
234 in a filesystem by substituting the appropriate function in
237 This routine is optimized to make maximum use of the various
238 buffers and caches. */
245 *bhb
= getblk(dev
, block
++, blocksize
);
246 if (*bhb
&& !buffer_uptodate(*bhb
)) {
248 bhreq
[bhrequest
++] = *bhb
;
251 if (++bhb
== &buflist
[NBUF
])
254 /* If the block we have on hand is uptodate, go ahead
255 and complete processing. */
262 /* Now request them all */
264 ll_rw_block(READ
, bhrequest
, bhreq
);
267 do { /* Finish off all I/O that has actually completed */
269 wait_on_buffer(*bhe
);
270 if (!buffer_uptodate(*bhe
)) { /* read error? */
272 if (++bhe
== &buflist
[NBUF
])
278 if (left
< blocksize
- offset
)
281 chars
= blocksize
- offset
;
286 copy_to_user(buf
,offset
+(*bhe
)->b_data
,chars
);
294 if (++bhe
== &buflist
[NBUF
])
296 } while (left
> 0 && bhe
!= bhb
&& (!*bhe
|| !buffer_locked(*bhe
)));
297 if (bhe
== bhb
&& !blocks
)
301 /* Release the read-ahead blocks */
304 if (++bhe
== &buflist
[NBUF
])
315 * for a block special file file->f_dentry->d_inode->i_size is zero
316 * so we compute the size by hand (just as in block_read/write above)
318 static loff_t
block_llseek(struct file
*file
, loff_t offset
, int origin
)
325 dev
= file
->f_dentry
->d_inode
->i_rdev
;
326 if (blk_size
[MAJOR(dev
)])
327 offset
+= (loff_t
) blk_size
[MAJOR(dev
)][MINOR(dev
)] << BLOCK_SIZE_BITS
;
328 /* else? return -EINVAL? */
331 offset
+= file
->f_pos
;
335 if (offset
!= file
->f_pos
) {
336 file
->f_pos
= offset
;
338 file
->f_version
= ++event
;
347 * Filp may be NULL when we are called by an msync of a vma
348 * since the vma has no handle.
351 static int block_fsync(struct file
*filp
, struct dentry
*dentry
, int datasync
)
353 return fsync_dev(dentry
->d_inode
->i_rdev
);
357 * bdev cache handling - shamelessly stolen from inode.c
358 * We use smaller hashtable, though.
362 #define HASH_SIZE (1UL << HASH_BITS)
363 #define HASH_MASK (HASH_SIZE-1)
364 static struct list_head bdev_hashtable
[HASH_SIZE
];
365 static spinlock_t bdev_lock
= SPIN_LOCK_UNLOCKED
;
366 static kmem_cache_t
* bdev_cachep
;
368 #define alloc_bdev() \
369 ((struct block_device *) kmem_cache_alloc(bdev_cachep, SLAB_KERNEL))
370 #define destroy_bdev(bdev) kmem_cache_free(bdev_cachep, (bdev))
372 static void init_once(void * foo
, kmem_cache_t
* cachep
, unsigned long flags
)
374 struct block_device
* bdev
= (struct block_device
*) foo
;
376 if ((flags
& (SLAB_CTOR_VERIFY
|SLAB_CTOR_CONSTRUCTOR
)) ==
377 SLAB_CTOR_CONSTRUCTOR
)
379 memset(bdev
, 0, sizeof(*bdev
));
380 sema_init(&bdev
->bd_sem
, 1);
384 void __init
bdev_init(void)
387 struct list_head
*head
= bdev_hashtable
;
391 INIT_LIST_HEAD(head
);
396 bdev_cachep
= kmem_cache_create("bdev_cache",
397 sizeof(struct block_device
),
398 0, SLAB_HWCACHE_ALIGN
, init_once
,
401 panic("Cannot create bdev_cache SLAB cache");
405 * Most likely _very_ bad one - but then it's hardly critical for small
406 * /dev and can be fixed when somebody will need really large one.
408 static inline unsigned long hash(dev_t dev
)
410 unsigned long tmp
= dev
;
411 tmp
= tmp
+ (tmp
>> HASH_BITS
) + (tmp
>> HASH_BITS
*2);
412 return tmp
& HASH_MASK
;
415 static struct block_device
*bdfind(dev_t dev
, struct list_head
*head
)
418 struct block_device
*bdev
;
419 for (p
=head
->next
; p
!=head
; p
=p
->next
) {
420 bdev
= list_entry(p
, struct block_device
, bd_hash
);
421 if (bdev
->bd_dev
!= dev
)
423 atomic_inc(&bdev
->bd_count
);
429 struct block_device
*bdget(dev_t dev
)
431 struct list_head
* head
= bdev_hashtable
+ hash(dev
);
432 struct block_device
*bdev
, *new_bdev
;
433 spin_lock(&bdev_lock
);
434 bdev
= bdfind(dev
, head
);
435 spin_unlock(&bdev_lock
);
438 new_bdev
= alloc_bdev();
441 atomic_set(&new_bdev
->bd_count
,1);
442 new_bdev
->bd_dev
= dev
;
443 new_bdev
->bd_op
= NULL
;
444 spin_lock(&bdev_lock
);
445 bdev
= bdfind(dev
, head
);
447 list_add(&new_bdev
->bd_hash
, head
);
448 spin_unlock(&bdev_lock
);
451 spin_unlock(&bdev_lock
);
452 destroy_bdev(new_bdev
);
456 void bdput(struct block_device
*bdev
)
458 if (atomic_dec_and_test(&bdev
->bd_count
)) {
459 spin_lock(&bdev_lock
);
460 if (atomic_read(&bdev
->bd_openers
))
462 list_del(&bdev
->bd_hash
);
463 spin_unlock(&bdev_lock
);
470 struct block_device_operations
*bdops
;
471 } blkdevs
[MAX_BLKDEV
];
473 int get_blkdev_list(char * p
)
478 len
= sprintf(p
, "\nBlock devices:\n");
479 for (i
= 0; i
< MAX_BLKDEV
; i
++) {
480 if (blkdevs
[i
].bdops
) {
481 len
+= sprintf(p
+len
, "%3d %s\n", i
, blkdevs
[i
].name
);
488 Return the function table of a device.
489 Load the driver if needed.
491 const struct block_device_operations
* get_blkfops(unsigned int major
)
493 const struct block_device_operations
*ret
= NULL
;
495 /* major 0 is used for non-device mounts */
496 if (major
&& major
< MAX_BLKDEV
) {
498 if (!blkdevs
[major
].bdops
) {
500 sprintf(name
, "block-major-%d", major
);
501 request_module(name
);
504 ret
= blkdevs
[major
].bdops
;
509 int register_blkdev(unsigned int major
, const char * name
, struct block_device_operations
*bdops
)
512 for (major
= MAX_BLKDEV
-1; major
> 0; major
--) {
513 if (blkdevs
[major
].bdops
== NULL
) {
514 blkdevs
[major
].name
= name
;
515 blkdevs
[major
].bdops
= bdops
;
521 if (major
>= MAX_BLKDEV
)
523 if (blkdevs
[major
].bdops
&& blkdevs
[major
].bdops
!= bdops
)
525 blkdevs
[major
].name
= name
;
526 blkdevs
[major
].bdops
= bdops
;
530 int unregister_blkdev(unsigned int major
, const char * name
)
532 if (major
>= MAX_BLKDEV
)
534 if (!blkdevs
[major
].bdops
)
536 if (strcmp(blkdevs
[major
].name
, name
))
538 blkdevs
[major
].name
= NULL
;
539 blkdevs
[major
].bdops
= NULL
;
544 * This routine checks whether a removable media has been changed,
545 * and invalidates all buffer-cache-entries in that case. This
546 * is a relatively slow routine, so we have to try to minimize using
547 * it. Thus it is called only upon a 'mount' or 'open'. This
548 * is the best way of combining speed and utility, I think.
549 * People changing diskettes in the middle of an operation deserve
552 int check_disk_change(kdev_t dev
)
555 const struct block_device_operations
* bdops
= NULL
;
556 struct super_block
* sb
;
560 bdops
= blkdevs
[i
].bdops
;
564 de
= devfs_find_handle (NULL
, NULL
, i
, MINOR (dev
),
565 DEVFS_SPECIAL_BLK
, 0);
566 if (de
) bdops
= devfs_get_ops (de
);
570 if (bdops
->check_media_change
== NULL
)
572 if (!bdops
->check_media_change(dev
))
575 printk(KERN_DEBUG
"VFS: Disk change detected on device %s\n",
579 if (sb
&& invalidate_inodes(sb
))
580 printk("VFS: busy inodes on changed media.\n");
582 destroy_buffers(dev
);
584 if (bdops
->revalidate
)
585 bdops
->revalidate(dev
);
589 int ioctl_by_bdev(struct block_device
*bdev
, unsigned cmd
, unsigned long arg
)
591 kdev_t rdev
= to_kdev_t(bdev
->bd_dev
);
592 struct inode inode_fake
;
594 mm_segment_t old_fs
= get_fs();
596 if (!bdev
->bd_op
->ioctl
)
598 inode_fake
.i_rdev
=rdev
;
599 init_waitqueue_head(&inode_fake
.i_wait
);
601 res
= bdev
->bd_op
->ioctl(&inode_fake
, NULL
, cmd
, arg
);
606 int blkdev_get(struct block_device
*bdev
, mode_t mode
, unsigned flags
, int kind
)
609 kdev_t rdev
= to_kdev_t(bdev
->bd_dev
); /* this should become bdev */
612 bdev
->bd_op
= get_blkfops(MAJOR(rdev
));
615 * This crockload is due to bad choice of ->open() type.
617 * For now, block device ->open() routine must _not_
618 * examine anything in 'inode' argument except ->i_rdev.
620 struct file fake_file
= {};
621 struct dentry fake_dentry
= {};
622 struct inode
*fake_inode
= get_empty_inode();
625 fake_file
.f_mode
= mode
;
626 fake_file
.f_flags
= flags
;
627 fake_file
.f_dentry
= &fake_dentry
;
628 fake_dentry
.d_inode
= fake_inode
;
629 fake_inode
->i_rdev
= rdev
;
631 if (bdev
->bd_op
->open
)
632 ret
= bdev
->bd_op
->open(fake_inode
, &fake_file
);
634 atomic_inc(&bdev
->bd_openers
);
635 else if (!atomic_read(&bdev
->bd_openers
))
644 int blkdev_open(struct inode
* inode
, struct file
* filp
)
647 struct block_device
*bdev
= inode
->i_bdev
;
651 bdev
->bd_op
= get_blkfops(MAJOR(inode
->i_rdev
));
654 if (bdev
->bd_op
->open
)
655 ret
= bdev
->bd_op
->open(inode
,filp
);
657 atomic_inc(&bdev
->bd_openers
);
658 else if (!atomic_read(&bdev
->bd_openers
))
666 int blkdev_put(struct block_device
*bdev
, int kind
)
669 kdev_t rdev
= to_kdev_t(bdev
->bd_dev
); /* this should become bdev */
671 /* syncing will go here */
673 if (kind
== BDEV_FILE
|| kind
== BDEV_FS
)
675 if (atomic_dec_and_test(&bdev
->bd_openers
)) {
676 /* invalidating buffers will go here */
677 invalidate_buffers(rdev
);
679 if (bdev
->bd_op
->release
) {
680 struct inode
* fake_inode
= get_empty_inode();
683 fake_inode
->i_rdev
= rdev
;
684 ret
= bdev
->bd_op
->release(fake_inode
, NULL
);
688 if (!atomic_read(&bdev
->bd_openers
))
689 bdev
->bd_op
= NULL
; /* we can't rely on driver being */
690 /* kind to stay around. */
696 static int blkdev_close(struct inode
* inode
, struct file
* filp
)
698 return blkdev_put(inode
->i_bdev
, BDEV_FILE
);
701 static int blkdev_ioctl(struct inode
*inode
, struct file
*file
, unsigned cmd
,
704 if (inode
->i_bdev
->bd_op
->ioctl
)
705 return inode
->i_bdev
->bd_op
->ioctl(inode
, file
, cmd
, arg
);
709 struct file_operations def_blk_fops
= {
711 release
: blkdev_close
,
712 llseek
: block_llseek
,
719 const char * bdevname(kdev_t dev
)
721 static char buffer
[32];
722 const char * name
= blkdevs
[MAJOR(dev
)].name
;
725 name
= "unknown-block";
727 sprintf(buffer
, "%s(%d,%d)", name
, MAJOR(dev
), MINOR(dev
));