4 * Copyright (C) 1991, 1992 Linus Torvalds
7 #include <linux/config.h>
9 #include <linux/locks.h>
10 #include <linux/fcntl.h>
11 #include <linux/malloc.h>
12 #include <linux/kmod.h>
13 #include <linux/devfs_fs_kernel.h>
15 #include <asm/uaccess.h>
17 extern int *blk_size
[];
18 extern int *blksize_size
[];
20 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
23 ssize_t
block_write(struct file
* filp
, const char * buf
,
24 size_t count
, loff_t
*ppos
)
26 struct inode
* inode
= filp
->f_dentry
->d_inode
;
27 ssize_t blocksize
, blocksize_bits
, i
, buffercount
, write_error
;
28 ssize_t block
, blocks
;
32 struct buffer_head
* bhlist
[NBUF
];
35 struct buffer_head
* bh
, *bufferlist
[NBUF
];
38 write_error
= buffercount
= 0;
40 if ( is_read_only( inode
->i_rdev
))
42 blocksize
= BLOCK_SIZE
;
43 if (blksize_size
[MAJOR(dev
)] && blksize_size
[MAJOR(dev
)][MINOR(dev
)])
44 blocksize
= blksize_size
[MAJOR(dev
)][MINOR(dev
)];
53 block
= *ppos
>> blocksize_bits
;
54 offset
= *ppos
& (blocksize
-1);
56 if (blk_size
[MAJOR(dev
)])
57 size
= ((loff_t
) blk_size
[MAJOR(dev
)][MINOR(dev
)] << BLOCK_SIZE_BITS
) >> blocksize_bits
;
62 return written
? written
: -ENOSPC
;
63 chars
= blocksize
- offset
;
68 /* get the buffer head */
70 struct buffer_head
* (*fn
)(kdev_t
, int, int) = getblk
;
71 if (chars
!= blocksize
)
73 bh
= fn(dev
, block
, blocksize
);
75 return written
? written
: -EIO
;
76 if (!buffer_uptodate(bh
))
80 bh
= getblk(dev
, block
, blocksize
);
82 return written
? written
: -EIO
;
84 if (!buffer_uptodate(bh
))
86 if (chars
== blocksize
)
91 if (!filp
->f_reada
|| !read_ahead
[MAJOR(dev
)]) {
92 /* We do this to force the read of a single buffer */
95 /* Read-ahead before write */
96 blocks
= read_ahead
[MAJOR(dev
)] / (blocksize
>> 9) / 2;
97 if (block
+ blocks
> size
) blocks
= size
- block
;
98 if (blocks
> NBUF
) blocks
=NBUF
;
99 if (!blocks
) blocks
= 1;
100 for(i
=1; i
<blocks
; i
++)
102 bhlist
[i
] = getblk (dev
, block
+i
, blocksize
);
105 while(i
>= 0) brelse(bhlist
[i
--]);
106 return written
? written
: -EIO
;
110 ll_rw_block(READ
, blocks
, bhlist
);
111 for(i
=1; i
<blocks
; i
++) brelse(bhlist
[i
]);
113 if (!buffer_uptodate(bh
)) {
115 return written
? written
: -EIO
;
121 p
= offset
+ bh
->b_data
;
126 copy_from_user(p
,buf
,chars
);
129 mark_buffer_uptodate(bh
, 1);
130 mark_buffer_dirty(bh
, 0);
131 if (filp
->f_flags
& O_SYNC
)
132 bufferlist
[buffercount
++] = bh
;
135 if (buffercount
== NBUF
){
136 ll_rw_block(WRITE
, buffercount
, bufferlist
);
137 for(i
=0; i
<buffercount
; i
++){
138 wait_on_buffer(bufferlist
[i
]);
139 if (!buffer_uptodate(bufferlist
[i
]))
141 brelse(bufferlist
[i
]);
150 ll_rw_block(WRITE
, buffercount
, bufferlist
);
151 for(i
=0; i
<buffercount
; i
++){
152 wait_on_buffer(bufferlist
[i
]);
153 if (!buffer_uptodate(bufferlist
[i
]))
155 brelse(bufferlist
[i
]);
164 ssize_t
block_read(struct file
* filp
, char * buf
, size_t count
, loff_t
*ppos
)
166 struct inode
* inode
= filp
->f_dentry
->d_inode
;
170 ssize_t blocksize_bits
, i
;
171 size_t blocks
, rblocks
, left
;
172 int bhrequest
, uptodate
;
173 struct buffer_head
** bhb
, ** bhe
;
174 struct buffer_head
* buflist
[NBUF
];
175 struct buffer_head
* bhreq
[NBUF
];
182 blocksize
= BLOCK_SIZE
;
183 if (blksize_size
[MAJOR(dev
)] && blksize_size
[MAJOR(dev
)][MINOR(dev
)])
184 blocksize
= blksize_size
[MAJOR(dev
)][MINOR(dev
)];
193 if (blk_size
[MAJOR(dev
)])
194 size
= (loff_t
) blk_size
[MAJOR(dev
)][MINOR(dev
)] << BLOCK_SIZE_BITS
;
196 size
= (loff_t
) INT_MAX
<< BLOCK_SIZE_BITS
;
200 /* size - offset might not fit into left, so check explicitly. */
201 else if (size
- offset
> INT_MAX
)
204 left
= size
- offset
;
210 block
= offset
>> blocksize_bits
;
211 offset
&= blocksize
-1;
212 size
>>= blocksize_bits
;
213 rblocks
= blocks
= (left
+ offset
+ blocksize
- 1) >> blocksize_bits
;
216 if (blocks
< read_ahead
[MAJOR(dev
)] / (blocksize
>> 9))
217 blocks
= read_ahead
[MAJOR(dev
)] / (blocksize
>> 9);
218 if (rblocks
> blocks
)
222 if (block
+ blocks
> size
) {
223 blocks
= size
- block
;
228 /* We do this in a two stage process. We first try to request
229 as many blocks as we can, then we wait for the first one to
230 complete, and then we try to wrap up as many as are actually
231 done. This routine is rather generic, in that it can be used
232 in a filesystem by substituting the appropriate function in
235 This routine is optimized to make maximum use of the various
236 buffers and caches. */
243 *bhb
= getblk(dev
, block
++, blocksize
);
244 if (*bhb
&& !buffer_uptodate(*bhb
)) {
246 bhreq
[bhrequest
++] = *bhb
;
249 if (++bhb
== &buflist
[NBUF
])
252 /* If the block we have on hand is uptodate, go ahead
253 and complete processing. */
260 /* Now request them all */
262 ll_rw_block(READ
, bhrequest
, bhreq
);
265 do { /* Finish off all I/O that has actually completed */
267 wait_on_buffer(*bhe
);
268 if (!buffer_uptodate(*bhe
)) { /* read error? */
270 if (++bhe
== &buflist
[NBUF
])
276 if (left
< blocksize
- offset
)
279 chars
= blocksize
- offset
;
284 copy_to_user(buf
,offset
+(*bhe
)->b_data
,chars
);
292 if (++bhe
== &buflist
[NBUF
])
294 } while (left
> 0 && bhe
!= bhb
&& (!*bhe
|| !buffer_locked(*bhe
)));
295 if (bhe
== bhb
&& !blocks
)
299 /* Release the read-ahead blocks */
302 if (++bhe
== &buflist
[NBUF
])
312 * Filp may be NULL when we are called by an msync of a vma
313 * since the vma has no handle.
316 static int block_fsync(struct file
*filp
, struct dentry
*dentry
, int datasync
)
318 return fsync_dev(dentry
->d_inode
->i_rdev
);
322 * bdev cache handling - shamelessly stolen from inode.c
323 * We use smaller hashtable, though.
327 #define HASH_SIZE (1UL << HASH_BITS)
328 #define HASH_MASK (HASH_SIZE-1)
329 static struct list_head bdev_hashtable
[HASH_SIZE
];
330 static spinlock_t bdev_lock
= SPIN_LOCK_UNLOCKED
;
331 static kmem_cache_t
* bdev_cachep
;
333 #define alloc_bdev() \
334 ((struct block_device *) kmem_cache_alloc(bdev_cachep, SLAB_KERNEL))
335 #define destroy_bdev(bdev) kmem_cache_free(bdev_cachep, (bdev))
337 static void init_once(void * foo
, kmem_cache_t
* cachep
, unsigned long flags
)
339 struct block_device
* bdev
= (struct block_device
*) foo
;
341 if ((flags
& (SLAB_CTOR_VERIFY
|SLAB_CTOR_CONSTRUCTOR
)) ==
342 SLAB_CTOR_CONSTRUCTOR
)
344 memset(bdev
, 0, sizeof(*bdev
));
345 sema_init(&bdev
->bd_sem
, 1);
352 struct list_head
*head
= bdev_hashtable
;
356 INIT_LIST_HEAD(head
);
361 bdev_cachep
= kmem_cache_create("bdev_cache",
362 sizeof(struct block_device
),
363 0, SLAB_HWCACHE_ALIGN
, init_once
,
366 panic("cannot create bdev slab cache");
370 * Most likely _very_ bad one - but then it's hardly critical for small
371 * /dev and can be fixed when somebody will need really large one.
373 static inline unsigned long hash(dev_t dev
)
375 unsigned long tmp
= dev
;
376 tmp
= tmp
+ (tmp
>> HASH_BITS
) + (tmp
>> HASH_BITS
*2);
377 return tmp
& HASH_MASK
;
380 static struct block_device
*bdfind(dev_t dev
, struct list_head
*head
)
383 struct block_device
*bdev
;
384 for (p
=head
->next
; p
!=head
; p
=p
->next
) {
385 bdev
= list_entry(p
, struct block_device
, bd_hash
);
386 if (bdev
->bd_dev
!= dev
)
388 atomic_inc(&bdev
->bd_count
);
394 struct block_device
*bdget(dev_t dev
)
396 struct list_head
* head
= bdev_hashtable
+ hash(dev
);
397 struct block_device
*bdev
, *new_bdev
;
398 spin_lock(&bdev_lock
);
399 bdev
= bdfind(dev
, head
);
400 spin_unlock(&bdev_lock
);
403 new_bdev
= alloc_bdev();
406 atomic_set(&new_bdev
->bd_count
,1);
407 new_bdev
->bd_dev
= dev
;
408 new_bdev
->bd_op
= NULL
;
409 spin_lock(&bdev_lock
);
410 bdev
= bdfind(dev
, head
);
412 list_add(&new_bdev
->bd_hash
, head
);
413 spin_unlock(&bdev_lock
);
416 spin_unlock(&bdev_lock
);
417 destroy_bdev(new_bdev
);
421 void bdput(struct block_device
*bdev
)
423 if (atomic_dec_and_test(&bdev
->bd_count
)) {
424 spin_lock(&bdev_lock
);
425 if (atomic_read(&bdev
->bd_openers
))
427 list_del(&bdev
->bd_hash
);
428 spin_unlock(&bdev_lock
);
435 struct block_device_operations
*bdops
;
436 } blkdevs
[MAX_BLKDEV
] = {
440 int get_blkdev_list(char * p
)
445 len
= sprintf(p
, "\nBlock devices:\n");
446 for (i
= 0; i
< MAX_BLKDEV
; i
++) {
447 if (blkdevs
[i
].bdops
) {
448 len
+= sprintf(p
+len
, "%3d %s\n", i
, blkdevs
[i
].name
);
455 Return the function table of a device.
456 Load the driver if needed.
458 const struct block_device_operations
* get_blkfops(unsigned int major
)
460 const struct block_device_operations
*ret
= NULL
;
462 /* major 0 is used for non-device mounts */
463 if (major
&& major
< MAX_BLKDEV
) {
465 if (!blkdevs
[major
].bdops
) {
467 sprintf(name
, "block-major-%d", major
);
468 request_module(name
);
471 ret
= blkdevs
[major
].bdops
;
476 int register_blkdev(unsigned int major
, const char * name
, struct block_device_operations
*bdops
)
479 for (major
= MAX_BLKDEV
-1; major
> 0; major
--) {
480 if (blkdevs
[major
].bdops
== NULL
) {
481 blkdevs
[major
].name
= name
;
482 blkdevs
[major
].bdops
= bdops
;
488 if (major
>= MAX_BLKDEV
)
490 if (blkdevs
[major
].bdops
&& blkdevs
[major
].bdops
!= bdops
)
492 blkdevs
[major
].name
= name
;
493 blkdevs
[major
].bdops
= bdops
;
497 int unregister_blkdev(unsigned int major
, const char * name
)
499 if (major
>= MAX_BLKDEV
)
501 if (!blkdevs
[major
].bdops
)
503 if (strcmp(blkdevs
[major
].name
, name
))
505 blkdevs
[major
].name
= NULL
;
506 blkdevs
[major
].bdops
= NULL
;
511 * This routine checks whether a removable media has been changed,
512 * and invalidates all buffer-cache-entries in that case. This
513 * is a relatively slow routine, so we have to try to minimize using
514 * it. Thus it is called only upon a 'mount' or 'open'. This
515 * is the best way of combining speed and utility, I think.
516 * People changing diskettes in the middle of an operation deserve
519 int check_disk_change(kdev_t dev
)
522 const struct block_device_operations
* bdops
= NULL
;
523 struct super_block
* sb
;
527 bdops
= blkdevs
[i
].bdops
;
531 de
= devfs_find_handle (NULL
, NULL
, 0, i
, MINOR (dev
),
532 DEVFS_SPECIAL_BLK
, 0);
533 if (de
) bdops
= devfs_get_ops (de
);
537 if (bdops
->check_media_change
== NULL
)
539 if (!bdops
->check_media_change(dev
))
542 printk(KERN_DEBUG
"VFS: Disk change detected on device %s\n",
546 if (sb
&& invalidate_inodes(sb
))
547 printk("VFS: busy inodes on changed media.\n");
549 destroy_buffers(dev
);
551 if (bdops
->revalidate
)
552 bdops
->revalidate(dev
);
556 int ioctl_by_bdev(struct block_device
*bdev
, unsigned cmd
, unsigned long arg
)
558 kdev_t rdev
= to_kdev_t(bdev
->bd_dev
);
559 struct inode inode_fake
;
561 mm_segment_t old_fs
= get_fs();
563 if (!bdev
->bd_op
->ioctl
)
565 inode_fake
.i_rdev
=rdev
;
566 init_waitqueue_head(&inode_fake
.i_wait
);
568 res
= bdev
->bd_op
->ioctl(&inode_fake
, NULL
, cmd
, arg
);
573 int blkdev_get(struct block_device
*bdev
, mode_t mode
, unsigned flags
, int kind
)
576 kdev_t rdev
= to_kdev_t(bdev
->bd_dev
); /* this should become bdev */
579 bdev
->bd_op
= get_blkfops(MAJOR(rdev
));
582 * This crockload is due to bad choice of ->open() type.
585 struct file fake_file
= {};
586 struct dentry fake_dentry
= {};
587 struct inode
*fake_inode
= get_empty_inode();
590 fake_file
.f_mode
= mode
;
591 fake_file
.f_flags
= flags
;
592 fake_file
.f_dentry
= &fake_dentry
;
593 fake_dentry
.d_inode
= fake_inode
;
594 fake_inode
->i_rdev
= rdev
;
596 if (bdev
->bd_op
->open
)
597 ret
= bdev
->bd_op
->open(fake_inode
, &fake_file
);
599 atomic_inc(&bdev
->bd_openers
);
600 else if (!atomic_read(&bdev
->bd_openers
))
609 int blkdev_open(struct inode
* inode
, struct file
* filp
)
612 struct block_device
*bdev
= inode
->i_bdev
;
615 bdev
->bd_op
= get_blkfops(MAJOR(inode
->i_rdev
));
618 if (bdev
->bd_op
->open
)
619 ret
= bdev
->bd_op
->open(inode
,filp
);
621 atomic_inc(&bdev
->bd_openers
);
622 else if (!atomic_read(&bdev
->bd_openers
))
629 int blkdev_put(struct block_device
*bdev
, int kind
)
632 kdev_t rdev
= to_kdev_t(bdev
->bd_dev
); /* this should become bdev */
634 /* syncing will go here */
635 if (kind
== BDEV_FILE
|| kind
== BDEV_FS
)
637 if (atomic_dec_and_test(&bdev
->bd_openers
)) {
638 /* invalidating buffers will go here */
639 invalidate_buffers(rdev
);
641 if (bdev
->bd_op
->release
) {
642 struct inode
* fake_inode
= get_empty_inode();
645 fake_inode
->i_rdev
= rdev
;
646 ret
= bdev
->bd_op
->release(fake_inode
, NULL
);
650 if (!atomic_read(&bdev
->bd_openers
))
651 bdev
->bd_op
= NULL
; /* we can't rely on driver being */
652 /* kind to stay around. */
657 static int blkdev_close(struct inode
* inode
, struct file
* filp
)
659 return blkdev_put(inode
->i_bdev
, BDEV_FILE
);
662 static int blkdev_ioctl(struct inode
*inode
, struct file
*file
, unsigned cmd
,
665 if (inode
->i_bdev
->bd_op
->ioctl
)
666 return inode
->i_bdev
->bd_op
->ioctl(inode
, file
, cmd
, arg
);
670 struct file_operations def_blk_fops
= {
672 release
: blkdev_close
,
679 const char * bdevname(kdev_t dev
)
681 static char buffer
[32];
682 const char * name
= blkdevs
[MAJOR(dev
)].name
;
685 name
= "unknown-block";
687 sprintf(buffer
, "%s(%d,%d)", name
, MAJOR(dev
), MINOR(dev
));