4 * Copyright (C) 1991, 1992 Linus Torvalds
7 #include <linux/config.h>
9 #include <linux/locks.h>
10 #include <linux/fcntl.h>
11 #include <linux/malloc.h>
12 #include <linux/kmod.h>
13 #include <linux/devfs_fs_kernel.h>
14 #include <linux/smp_lock.h>
16 #include <asm/uaccess.h>
18 extern int *blk_size
[];
19 extern int *blksize_size
[];
21 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
24 ssize_t
block_write(struct file
* filp
, const char * buf
,
25 size_t count
, loff_t
*ppos
)
27 struct inode
* inode
= filp
->f_dentry
->d_inode
;
28 ssize_t blocksize
, blocksize_bits
, i
, buffercount
, write_error
;
29 ssize_t block
, blocks
;
33 struct buffer_head
* bhlist
[NBUF
];
36 struct buffer_head
* bh
, *bufferlist
[NBUF
];
39 write_error
= buffercount
= 0;
41 if ( is_read_only( inode
->i_rdev
))
43 blocksize
= BLOCK_SIZE
;
44 if (blksize_size
[MAJOR(dev
)] && blksize_size
[MAJOR(dev
)][MINOR(dev
)])
45 blocksize
= blksize_size
[MAJOR(dev
)][MINOR(dev
)];
54 block
= *ppos
>> blocksize_bits
;
55 offset
= *ppos
& (blocksize
-1);
57 if (blk_size
[MAJOR(dev
)])
58 size
= ((loff_t
) blk_size
[MAJOR(dev
)][MINOR(dev
)] << BLOCK_SIZE_BITS
) >> blocksize_bits
;
63 return written
? written
: -ENOSPC
;
64 chars
= blocksize
- offset
;
69 /* get the buffer head */
71 struct buffer_head
* (*fn
)(kdev_t
, int, int) = getblk
;
72 if (chars
!= blocksize
)
74 bh
= fn(dev
, block
, blocksize
);
76 return written
? written
: -EIO
;
77 if (!buffer_uptodate(bh
))
81 bh
= getblk(dev
, block
, blocksize
);
83 return written
? written
: -EIO
;
85 if (!buffer_uptodate(bh
))
87 if (chars
== blocksize
)
92 if (!filp
->f_reada
|| !read_ahead
[MAJOR(dev
)]) {
93 /* We do this to force the read of a single buffer */
96 /* Read-ahead before write */
97 blocks
= read_ahead
[MAJOR(dev
)] / (blocksize
>> 9) / 2;
98 if (block
+ blocks
> size
) blocks
= size
- block
;
99 if (blocks
> NBUF
) blocks
=NBUF
;
100 if (!blocks
) blocks
= 1;
101 for(i
=1; i
<blocks
; i
++)
103 bhlist
[i
] = getblk (dev
, block
+i
, blocksize
);
106 while(i
>= 0) brelse(bhlist
[i
--]);
107 return written
? written
: -EIO
;
111 ll_rw_block(READ
, blocks
, bhlist
);
112 for(i
=1; i
<blocks
; i
++) brelse(bhlist
[i
]);
114 if (!buffer_uptodate(bh
)) {
116 return written
? written
: -EIO
;
122 p
= offset
+ bh
->b_data
;
127 copy_from_user(p
,buf
,chars
);
130 mark_buffer_uptodate(bh
, 1);
131 mark_buffer_dirty(bh
, 0);
132 if (filp
->f_flags
& O_SYNC
)
133 bufferlist
[buffercount
++] = bh
;
136 if (buffercount
== NBUF
){
137 ll_rw_block(WRITE
, buffercount
, bufferlist
);
138 for(i
=0; i
<buffercount
; i
++){
139 wait_on_buffer(bufferlist
[i
]);
140 if (!buffer_uptodate(bufferlist
[i
]))
142 brelse(bufferlist
[i
]);
151 ll_rw_block(WRITE
, buffercount
, bufferlist
);
152 for(i
=0; i
<buffercount
; i
++){
153 wait_on_buffer(bufferlist
[i
]);
154 if (!buffer_uptodate(bufferlist
[i
]))
156 brelse(bufferlist
[i
]);
165 ssize_t
block_read(struct file
* filp
, char * buf
, size_t count
, loff_t
*ppos
)
167 struct inode
* inode
= filp
->f_dentry
->d_inode
;
171 ssize_t blocksize_bits
, i
;
172 size_t blocks
, rblocks
, left
;
173 int bhrequest
, uptodate
;
174 struct buffer_head
** bhb
, ** bhe
;
175 struct buffer_head
* buflist
[NBUF
];
176 struct buffer_head
* bhreq
[NBUF
];
183 blocksize
= BLOCK_SIZE
;
184 if (blksize_size
[MAJOR(dev
)] && blksize_size
[MAJOR(dev
)][MINOR(dev
)])
185 blocksize
= blksize_size
[MAJOR(dev
)][MINOR(dev
)];
194 if (blk_size
[MAJOR(dev
)])
195 size
= (loff_t
) blk_size
[MAJOR(dev
)][MINOR(dev
)] << BLOCK_SIZE_BITS
;
197 size
= (loff_t
) INT_MAX
<< BLOCK_SIZE_BITS
;
201 /* size - offset might not fit into left, so check explicitly. */
202 else if (size
- offset
> INT_MAX
)
205 left
= size
- offset
;
211 block
= offset
>> blocksize_bits
;
212 offset
&= blocksize
-1;
213 size
>>= blocksize_bits
;
214 rblocks
= blocks
= (left
+ offset
+ blocksize
- 1) >> blocksize_bits
;
217 if (blocks
< read_ahead
[MAJOR(dev
)] / (blocksize
>> 9))
218 blocks
= read_ahead
[MAJOR(dev
)] / (blocksize
>> 9);
219 if (rblocks
> blocks
)
223 if (block
+ blocks
> size
) {
224 blocks
= size
- block
;
229 /* We do this in a two stage process. We first try to request
230 as many blocks as we can, then we wait for the first one to
231 complete, and then we try to wrap up as many as are actually
232 done. This routine is rather generic, in that it can be used
233 in a filesystem by substituting the appropriate function in
236 This routine is optimized to make maximum use of the various
237 buffers and caches. */
244 *bhb
= getblk(dev
, block
++, blocksize
);
245 if (*bhb
&& !buffer_uptodate(*bhb
)) {
247 bhreq
[bhrequest
++] = *bhb
;
250 if (++bhb
== &buflist
[NBUF
])
253 /* If the block we have on hand is uptodate, go ahead
254 and complete processing. */
261 /* Now request them all */
263 ll_rw_block(READ
, bhrequest
, bhreq
);
266 do { /* Finish off all I/O that has actually completed */
268 wait_on_buffer(*bhe
);
269 if (!buffer_uptodate(*bhe
)) { /* read error? */
271 if (++bhe
== &buflist
[NBUF
])
277 if (left
< blocksize
- offset
)
280 chars
= blocksize
- offset
;
285 copy_to_user(buf
,offset
+(*bhe
)->b_data
,chars
);
293 if (++bhe
== &buflist
[NBUF
])
295 } while (left
> 0 && bhe
!= bhb
&& (!*bhe
|| !buffer_locked(*bhe
)));
296 if (bhe
== bhb
&& !blocks
)
300 /* Release the read-ahead blocks */
303 if (++bhe
== &buflist
[NBUF
])
313 * Filp may be NULL when we are called by an msync of a vma
314 * since the vma has no handle.
317 static int block_fsync(struct file
*filp
, struct dentry
*dentry
, int datasync
)
319 return fsync_dev(dentry
->d_inode
->i_rdev
);
323 * bdev cache handling - shamelessly stolen from inode.c
324 * We use smaller hashtable, though.
328 #define HASH_SIZE (1UL << HASH_BITS)
329 #define HASH_MASK (HASH_SIZE-1)
330 static struct list_head bdev_hashtable
[HASH_SIZE
];
331 static spinlock_t bdev_lock
= SPIN_LOCK_UNLOCKED
;
332 static kmem_cache_t
* bdev_cachep
;
334 #define alloc_bdev() \
335 ((struct block_device *) kmem_cache_alloc(bdev_cachep, SLAB_KERNEL))
336 #define destroy_bdev(bdev) kmem_cache_free(bdev_cachep, (bdev))
338 static void init_once(void * foo
, kmem_cache_t
* cachep
, unsigned long flags
)
340 struct block_device
* bdev
= (struct block_device
*) foo
;
342 if ((flags
& (SLAB_CTOR_VERIFY
|SLAB_CTOR_CONSTRUCTOR
)) ==
343 SLAB_CTOR_CONSTRUCTOR
)
345 memset(bdev
, 0, sizeof(*bdev
));
346 sema_init(&bdev
->bd_sem
, 1);
353 struct list_head
*head
= bdev_hashtable
;
357 INIT_LIST_HEAD(head
);
362 bdev_cachep
= kmem_cache_create("bdev_cache",
363 sizeof(struct block_device
),
364 0, SLAB_HWCACHE_ALIGN
, init_once
,
367 panic("cannot create bdev slab cache");
371 * Most likely _very_ bad one - but then it's hardly critical for small
372 * /dev and can be fixed when somebody will need really large one.
374 static inline unsigned long hash(dev_t dev
)
376 unsigned long tmp
= dev
;
377 tmp
= tmp
+ (tmp
>> HASH_BITS
) + (tmp
>> HASH_BITS
*2);
378 return tmp
& HASH_MASK
;
381 static struct block_device
*bdfind(dev_t dev
, struct list_head
*head
)
384 struct block_device
*bdev
;
385 for (p
=head
->next
; p
!=head
; p
=p
->next
) {
386 bdev
= list_entry(p
, struct block_device
, bd_hash
);
387 if (bdev
->bd_dev
!= dev
)
389 atomic_inc(&bdev
->bd_count
);
395 struct block_device
*bdget(dev_t dev
)
397 struct list_head
* head
= bdev_hashtable
+ hash(dev
);
398 struct block_device
*bdev
, *new_bdev
;
399 spin_lock(&bdev_lock
);
400 bdev
= bdfind(dev
, head
);
401 spin_unlock(&bdev_lock
);
404 new_bdev
= alloc_bdev();
407 atomic_set(&new_bdev
->bd_count
,1);
408 new_bdev
->bd_dev
= dev
;
409 new_bdev
->bd_op
= NULL
;
410 spin_lock(&bdev_lock
);
411 bdev
= bdfind(dev
, head
);
413 list_add(&new_bdev
->bd_hash
, head
);
414 spin_unlock(&bdev_lock
);
417 spin_unlock(&bdev_lock
);
418 destroy_bdev(new_bdev
);
422 void bdput(struct block_device
*bdev
)
424 if (atomic_dec_and_test(&bdev
->bd_count
)) {
425 spin_lock(&bdev_lock
);
426 if (atomic_read(&bdev
->bd_openers
))
428 list_del(&bdev
->bd_hash
);
429 spin_unlock(&bdev_lock
);
436 struct block_device_operations
*bdops
;
437 } blkdevs
[MAX_BLKDEV
] = {
441 int get_blkdev_list(char * p
)
446 len
= sprintf(p
, "\nBlock devices:\n");
447 for (i
= 0; i
< MAX_BLKDEV
; i
++) {
448 if (blkdevs
[i
].bdops
) {
449 len
+= sprintf(p
+len
, "%3d %s\n", i
, blkdevs
[i
].name
);
456 Return the function table of a device.
457 Load the driver if needed.
459 const struct block_device_operations
* get_blkfops(unsigned int major
)
461 const struct block_device_operations
*ret
= NULL
;
463 /* major 0 is used for non-device mounts */
464 if (major
&& major
< MAX_BLKDEV
) {
466 if (!blkdevs
[major
].bdops
) {
468 sprintf(name
, "block-major-%d", major
);
469 request_module(name
);
472 ret
= blkdevs
[major
].bdops
;
477 int register_blkdev(unsigned int major
, const char * name
, struct block_device_operations
*bdops
)
480 for (major
= MAX_BLKDEV
-1; major
> 0; major
--) {
481 if (blkdevs
[major
].bdops
== NULL
) {
482 blkdevs
[major
].name
= name
;
483 blkdevs
[major
].bdops
= bdops
;
489 if (major
>= MAX_BLKDEV
)
491 if (blkdevs
[major
].bdops
&& blkdevs
[major
].bdops
!= bdops
)
493 blkdevs
[major
].name
= name
;
494 blkdevs
[major
].bdops
= bdops
;
498 int unregister_blkdev(unsigned int major
, const char * name
)
500 if (major
>= MAX_BLKDEV
)
502 if (!blkdevs
[major
].bdops
)
504 if (strcmp(blkdevs
[major
].name
, name
))
506 blkdevs
[major
].name
= NULL
;
507 blkdevs
[major
].bdops
= NULL
;
512 * This routine checks whether a removable media has been changed,
513 * and invalidates all buffer-cache-entries in that case. This
514 * is a relatively slow routine, so we have to try to minimize using
515 * it. Thus it is called only upon a 'mount' or 'open'. This
516 * is the best way of combining speed and utility, I think.
517 * People changing diskettes in the middle of an operation deserve
520 int check_disk_change(kdev_t dev
)
523 const struct block_device_operations
* bdops
= NULL
;
524 struct super_block
* sb
;
528 bdops
= blkdevs
[i
].bdops
;
532 de
= devfs_find_handle (NULL
, NULL
, i
, MINOR (dev
),
533 DEVFS_SPECIAL_BLK
, 0);
534 if (de
) bdops
= devfs_get_ops (de
);
538 if (bdops
->check_media_change
== NULL
)
540 if (!bdops
->check_media_change(dev
))
543 printk(KERN_DEBUG
"VFS: Disk change detected on device %s\n",
547 if (sb
&& invalidate_inodes(sb
))
548 printk("VFS: busy inodes on changed media.\n");
550 destroy_buffers(dev
);
552 if (bdops
->revalidate
)
553 bdops
->revalidate(dev
);
557 int ioctl_by_bdev(struct block_device
*bdev
, unsigned cmd
, unsigned long arg
)
559 kdev_t rdev
= to_kdev_t(bdev
->bd_dev
);
560 struct inode inode_fake
;
562 mm_segment_t old_fs
= get_fs();
564 if (!bdev
->bd_op
->ioctl
)
566 inode_fake
.i_rdev
=rdev
;
567 init_waitqueue_head(&inode_fake
.i_wait
);
569 res
= bdev
->bd_op
->ioctl(&inode_fake
, NULL
, cmd
, arg
);
574 int blkdev_get(struct block_device
*bdev
, mode_t mode
, unsigned flags
, int kind
)
577 kdev_t rdev
= to_kdev_t(bdev
->bd_dev
); /* this should become bdev */
580 bdev
->bd_op
= get_blkfops(MAJOR(rdev
));
583 * This crockload is due to bad choice of ->open() type.
586 struct file fake_file
= {};
587 struct dentry fake_dentry
= {};
588 struct inode
*fake_inode
= get_empty_inode();
591 fake_file
.f_mode
= mode
;
592 fake_file
.f_flags
= flags
;
593 fake_file
.f_dentry
= &fake_dentry
;
594 fake_dentry
.d_inode
= fake_inode
;
595 fake_inode
->i_rdev
= rdev
;
597 if (bdev
->bd_op
->open
)
598 ret
= bdev
->bd_op
->open(fake_inode
, &fake_file
);
600 atomic_inc(&bdev
->bd_openers
);
601 else if (!atomic_read(&bdev
->bd_openers
))
610 int blkdev_open(struct inode
* inode
, struct file
* filp
)
613 struct block_device
*bdev
= inode
->i_bdev
;
617 bdev
->bd_op
= get_blkfops(MAJOR(inode
->i_rdev
));
620 if (bdev
->bd_op
->open
)
621 ret
= bdev
->bd_op
->open(inode
,filp
);
623 atomic_inc(&bdev
->bd_openers
);
624 else if (!atomic_read(&bdev
->bd_openers
))
632 int blkdev_put(struct block_device
*bdev
, int kind
)
635 kdev_t rdev
= to_kdev_t(bdev
->bd_dev
); /* this should become bdev */
637 /* syncing will go here */
639 if (kind
== BDEV_FILE
|| kind
== BDEV_FS
)
641 if (atomic_dec_and_test(&bdev
->bd_openers
)) {
642 /* invalidating buffers will go here */
643 invalidate_buffers(rdev
);
645 if (bdev
->bd_op
->release
) {
646 struct inode
* fake_inode
= get_empty_inode();
649 fake_inode
->i_rdev
= rdev
;
650 ret
= bdev
->bd_op
->release(fake_inode
, NULL
);
654 if (!atomic_read(&bdev
->bd_openers
))
655 bdev
->bd_op
= NULL
; /* we can't rely on driver being */
656 /* kind to stay around. */
662 static int blkdev_close(struct inode
* inode
, struct file
* filp
)
664 return blkdev_put(inode
->i_bdev
, BDEV_FILE
);
667 static int blkdev_ioctl(struct inode
*inode
, struct file
*file
, unsigned cmd
,
670 if (inode
->i_bdev
->bd_op
->ioctl
)
671 return inode
->i_bdev
->bd_op
->ioctl(inode
, file
, cmd
, arg
);
675 struct file_operations def_blk_fops
= {
677 release
: blkdev_close
,
684 const char * bdevname(kdev_t dev
)
686 static char buffer
[32];
687 const char * name
= blkdevs
[MAJOR(dev
)].name
;
690 name
= "unknown-block";
692 sprintf(buffer
, "%s(%d,%d)", name
, MAJOR(dev
), MINOR(dev
));