More Makefile cleanups, otherwise mainly noticeable are the netfilter fix
[davej-history.git] / drivers / block / ll_rw_blk.c
blob520751f2dc544754a403356b81f47aeea483dfe0
1 /*
2 * linux/drivers/block/ll_rw_blk.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
6 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
7 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
8 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000
9 */
12 * This handles all read/write requests to block devices
14 #include <linux/sched.h>
15 #include <linux/kernel.h>
16 #include <linux/kernel_stat.h>
17 #include <linux/errno.h>
18 #include <linux/string.h>
19 #include <linux/config.h>
20 #include <linux/locks.h>
21 #include <linux/mm.h>
22 #include <linux/init.h>
23 #include <linux/smp_lock.h>
25 #include <asm/system.h>
26 #include <asm/io.h>
27 #include <linux/blk.h>
28 #include <linux/highmem.h>
29 #include <linux/raid/md.h>
31 #include <linux/module.h>
34 * MAC Floppy IWM hooks
37 #ifdef CONFIG_MAC_FLOPPY_IWM
38 extern int mac_floppy_init(void);
39 #endif
41 extern int lvm_init(void);
44 * For the allocated request tables
46 static kmem_cache_t *request_cachep;
49 * The "disk" task queue is used to start the actual requests
50 * after a plug
52 DECLARE_TASK_QUEUE(tq_disk);
55 * Protect the request list against multiple users..
57 * With this spinlock the Linux block IO subsystem is 100% SMP threaded
58 * from the IRQ event side, and almost 100% SMP threaded from the syscall
59 * side (we still have protect against block device array operations, and
60 * the do_request() side is casually still unsafe. The kernel lock protects
61 * this part currently.).
63 * there is a fair chance that things will work just OK if these functions
64 * are called with no global kernel lock held ...
66 spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
68 /* This specifies how many sectors to read ahead on the disk. */
70 int read_ahead[MAX_BLKDEV];
72 /* blk_dev_struct is:
73 * *request_fn
74 * *current_request
76 struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
79 * blk_size contains the size of all block-devices in units of 1024 byte
80 * sectors:
82 * blk_size[MAJOR][MINOR]
84 * if (!blk_size[MAJOR]) then no minor size checking is done.
86 int * blk_size[MAX_BLKDEV];
89 * blksize_size contains the size of all block-devices:
91 * blksize_size[MAJOR][MINOR]
93 * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
95 int * blksize_size[MAX_BLKDEV];
98 * hardsect_size contains the size of the hardware sector of a device.
100 * hardsect_size[MAJOR][MINOR]
102 * if (!hardsect_size[MAJOR])
103 * then 512 bytes is assumed.
104 * else
105 * sector_size is hardsect_size[MAJOR][MINOR]
106 * This is currently set by some scsi devices and read by the msdos fs driver.
107 * Other uses may appear later.
109 int * hardsect_size[MAX_BLKDEV];
112 * The following tunes the read-ahead algorithm in mm/filemap.c
114 int * max_readahead[MAX_BLKDEV];
117 * Max number of sectors per request
119 int * max_sectors[MAX_BLKDEV];
121 static inline int get_max_sectors(kdev_t dev)
123 if (!max_sectors[MAJOR(dev)])
124 return MAX_SECTORS;
125 return max_sectors[MAJOR(dev)][MINOR(dev)];
128 static inline request_queue_t *__blk_get_queue(kdev_t dev)
130 struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
132 if (bdev->queue)
133 return bdev->queue(dev);
134 else
135 return &blk_dev[MAJOR(dev)].request_queue;
139 * NOTE: the device-specific queue() functions
140 * have to be atomic!
142 request_queue_t *blk_get_queue(kdev_t dev)
144 request_queue_t *ret;
145 unsigned long flags;
147 spin_lock_irqsave(&io_request_lock,flags);
148 ret = __blk_get_queue(dev);
149 spin_unlock_irqrestore(&io_request_lock,flags);
151 return ret;
154 static int __blk_cleanup_queue(struct list_head *head)
156 struct list_head *entry;
157 struct request *rq;
158 int i = 0;
160 if (list_empty(head))
161 return 0;
163 entry = head->next;
164 do {
165 rq = list_entry(entry, struct request, table);
166 entry = entry->next;
167 list_del(&rq->table);
168 kmem_cache_free(request_cachep, rq);
169 i++;
170 } while (!list_empty(head));
172 return i;
176 * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
177 * @q: the request queue to be released
179 * Description:
180 * blk_cleanup_queue is the pair to blk_init_queue(). It should
181 * be called when a request queue is being released; typically
182 * when a block device is being de-registered. Currently, its
183 * primary task it to free all the &struct request structures that
184 * were allocated to the queue.
185 * Caveat:
186 * Hopefully the low level driver will have finished any
187 * outstanding requests first...
189 void blk_cleanup_queue(request_queue_t * q)
191 int count = QUEUE_NR_REQUESTS;
193 count -= __blk_cleanup_queue(&q->request_freelist[READ]);
194 count -= __blk_cleanup_queue(&q->request_freelist[WRITE]);
196 if (count)
197 printk("blk_cleanup_queue: leaked requests (%d)\n", count);
199 memset(q, 0, sizeof(*q));
203 * blk_queue_headactive - indicate whether head of request queue may be active
204 * @q: The queue which this applies to.
205 * @active: A flag indication where the head of the queue is active.
207 * Description:
208 * The driver for a block device may choose to leave the currently active
209 * request on the request queue, removing it only when it has completed.
210 * The queue handling routines assume this by default for safety reasons
211 * and will not involve the head of the request queue in any merging or
212 * reordering of requests when the queue is unplugged (and thus may be
213 * working on this particular request).
215 * If a driver removes requests from the queue before processing them, then
216 * it may indicate that it does so, there by allowing the head of the queue
217 * to be involved in merging and reordering. This is done be calling
218 * blk_queue_headactive() with an @active flag of %0.
220 * If a driver processes several requests at once, it must remove them (or
221 * at least all but one of them) from the request queue.
223 * When a queue is plugged (see blk_queue_pluggable()) the head will be
224 * assumed to be inactive.
227 void blk_queue_headactive(request_queue_t * q, int active)
229 q->head_active = active;
233 * blk_queue_pluggable - define a plugging function for a request queue
234 * @q: the request queue to which the function will apply
235 * @plug: the function to be called to plug a queue
237 * Description:
238 * A request queue will be "plugged" if a request is added to it
239 * while it is empty. This allows a number of requests to be added
240 * before any are processed, thus providing an opportunity for these
241 * requests to be merged or re-ordered.
242 * The default plugging function (generic_plug_device()) sets the
243 * "plugged" flag for the queue and adds a task to the $tq_disk task
244 * queue to unplug the queue and call the request function at a
245 * later time.
247 * A device driver may provide an alternate plugging function by
248 * passing it to blk_queue_pluggable(). This function should set
249 * the "plugged" flag if it want calls to the request_function to be
250 * blocked, and should place a task on $tq_disk which will unplug
251 * the queue. Alternately it can simply do nothing and there-by
252 * disable plugging of the device.
255 void blk_queue_pluggable (request_queue_t * q, plug_device_fn *plug)
257 q->plug_device_fn = plug;
262 * blk_queue_make_request - define an alternate make_request function for a device
263 * @q: the request queue for the device to be affected
264 * @mfn: the alternate make_request function
266 * Description:
267 * The normal way for &struct buffer_heads to be passed to a device
268 * driver is for them to be collected into requests on a request
269 * queue, and then to allow the device driver to select requests
270 * off that queue when it is ready. This works well for many block
271 * devices. However some block devices (typically virtual devices
272 * such as md or lvm) do not benefit from the processing on the
273 * request queue, and are served best by having the requests passed
274 * directly to them. This can be achieved by providing a function
275 * to blk_queue_make_request().
277 * Caveat:
278 * The driver that does this *must* be able to deal appropriately
279 * with buffers in "highmemory", either by calling bh_kmap() to get
280 * a kernel mapping, to by calling create_bounce() to create a
281 * buffer in normal memory.
284 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
286 q->make_request_fn = mfn;
289 static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
291 if (req->nr_segments < max_segments) {
292 req->nr_segments++;
293 q->elevator.nr_segments++;
294 return 1;
296 return 0;
299 static int ll_back_merge_fn(request_queue_t *q, struct request *req,
300 struct buffer_head *bh, int max_segments)
302 if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
303 return 1;
304 return ll_new_segment(q, req, max_segments);
307 static int ll_front_merge_fn(request_queue_t *q, struct request *req,
308 struct buffer_head *bh, int max_segments)
310 if (bh->b_data + bh->b_size == req->bh->b_data)
311 return 1;
312 return ll_new_segment(q, req, max_segments);
315 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
316 struct request *next, int max_segments)
318 int total_segments = req->nr_segments + next->nr_segments;
319 int same_segment;
321 same_segment = 0;
322 if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) {
323 total_segments--;
324 same_segment = 1;
327 if (total_segments > max_segments)
328 return 0;
330 q->elevator.nr_segments -= same_segment;
331 req->nr_segments = total_segments;
332 return 1;
336 * "plug" the device if there are no outstanding requests: this will
337 * force the transfer to start only after we have put all the requests
338 * on the list.
340 * This is called with interrupts off and no requests on the queue.
341 * (and with the request spinlock acquired)
343 static void generic_plug_device(request_queue_t *q, kdev_t dev)
346 * no need to replug device
348 if (!list_empty(&q->queue_head) || q->plugged)
349 return;
351 q->plugged = 1;
352 queue_task(&q->plug_tq, &tq_disk);
356 * remove the plug and let it rip..
358 static inline void __generic_unplug_device(request_queue_t *q)
360 if (q->plugged) {
361 q->plugged = 0;
362 if (!list_empty(&q->queue_head))
363 q->request_fn(q);
367 static void generic_unplug_device(void *data)
369 request_queue_t *q = (request_queue_t *) data;
370 unsigned long flags;
372 spin_lock_irqsave(&io_request_lock, flags);
373 __generic_unplug_device(q);
374 spin_unlock_irqrestore(&io_request_lock, flags);
377 static void blk_init_free_list(request_queue_t *q)
379 struct request *rq;
380 int i;
383 * Divide requests in half between read and write. This used to
384 * be a 2/3 advantage for reads, but now reads can steal from
385 * the write free list.
387 for (i = 0; i < QUEUE_NR_REQUESTS; i++) {
388 rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL);
389 rq->rq_status = RQ_INACTIVE;
390 list_add(&rq->table, &q->request_freelist[i & 1]);
393 init_waitqueue_head(&q->wait_for_request);
394 spin_lock_init(&q->request_lock);
397 static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
400 * blk_init_queue - prepare a request queue for use with a block device
401 * @q: The &request_queue_t to be initialised
402 * @rfn: The function to be called to process requests that have been
403 * placed on the queue.
405 * Description:
406 * If a block device wishes to use the standard request handling procedures,
407 * which sorts requests and coalesces adjacent requests, then it must
408 * call blk_init_queue(). The function @rfn will be called when there
409 * are requests on the queue that need to be processed. If the device
410 * supports plugging, then @rfn may not be called immediately when requests
411 * are available on the queue, but may be called at some time later instead.
412 * Plugged queues are generally unplugged when a buffer belonging to one
413 * of the requests on the queue is needed, or due to memory pressure.
415 * @rfn is not required, or even expected, to remove all requests off the
416 * queue, but only as many as it can handle at a time. If it does leave
417 * requests on the queue, it is responsible for arranging that the requests
418 * get dealt with eventually.
420 * A global spin lock $io_request_lock must be held while manipulating the
421 * requests on the request queue.
423 * The request on the head of the queue is by default assumed to be
424 * potentially active, and it is not considered for re-ordering or merging
425 * whenever the given queue is unplugged. This behaviour can be changed with
426 * blk_queue_headactive().
428 * Note:
429 * blk_init_queue() must be paired with a blk_cleanup-queue() call
430 * when the block device is deactivated (such as at module unload).
432 void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
434 INIT_LIST_HEAD(&q->queue_head);
435 INIT_LIST_HEAD(&q->request_freelist[READ]);
436 INIT_LIST_HEAD(&q->request_freelist[WRITE]);
437 elevator_init(&q->elevator, ELEVATOR_LINUS);
438 blk_init_free_list(q);
439 q->request_fn = rfn;
440 q->back_merge_fn = ll_back_merge_fn;
441 q->front_merge_fn = ll_front_merge_fn;
442 q->merge_requests_fn = ll_merge_requests_fn;
443 q->make_request_fn = __make_request;
444 q->plug_tq.sync = 0;
445 q->plug_tq.routine = &generic_unplug_device;
446 q->plug_tq.data = q;
447 q->plugged = 0;
449 * These booleans describe the queue properties. We set the
450 * default (and most common) values here. Other drivers can
451 * use the appropriate functions to alter the queue properties.
452 * as appropriate.
454 q->plug_device_fn = generic_plug_device;
455 q->head_active = 1;
459 #define blkdev_free_rq(list) list_entry((list)->next, struct request, table);
461 * Get a free request. io_request_lock must be held and interrupts
462 * disabled on the way in.
464 static inline struct request *get_request(request_queue_t *q, int rw)
466 struct list_head *list = &q->request_freelist[rw];
467 struct request *rq;
470 * Reads get preferential treatment and are allowed to steal
471 * from the write free list if necessary.
473 if (!list_empty(list)) {
474 rq = blkdev_free_rq(list);
475 goto got_rq;
479 * if the WRITE list is non-empty, we know that rw is READ
480 * and that the READ list is empty. allow reads to 'steal'
481 * from the WRITE list.
483 if (!list_empty(&q->request_freelist[WRITE])) {
484 list = &q->request_freelist[WRITE];
485 rq = blkdev_free_rq(list);
486 goto got_rq;
489 return NULL;
491 got_rq:
492 list_del(&rq->table);
493 rq->free_list = list;
494 rq->rq_status = RQ_ACTIVE;
495 rq->special = NULL;
496 rq->q = q;
497 return rq;
501 * No available requests for this queue, unplug the device.
503 static struct request *__get_request_wait(request_queue_t *q, int rw)
505 register struct request *rq;
506 DECLARE_WAITQUEUE(wait, current);
508 add_wait_queue_exclusive(&q->wait_for_request, &wait);
509 for (;;) {
510 __set_current_state(TASK_UNINTERRUPTIBLE);
511 spin_lock_irq(&io_request_lock);
512 rq = get_request(q, rw);
513 spin_unlock_irq(&io_request_lock);
514 if (rq)
515 break;
516 generic_unplug_device(q);
517 schedule();
519 remove_wait_queue(&q->wait_for_request, &wait);
520 current->state = TASK_RUNNING;
521 return rq;
524 static inline struct request *get_request_wait(request_queue_t *q, int rw)
526 register struct request *rq;
528 spin_lock_irq(&io_request_lock);
529 rq = get_request(q, rw);
530 spin_unlock_irq(&io_request_lock);
531 if (rq)
532 return rq;
533 return __get_request_wait(q, rw);
536 /* RO fail safe mechanism */
538 static long ro_bits[MAX_BLKDEV][8];
540 int is_read_only(kdev_t dev)
542 int minor,major;
544 major = MAJOR(dev);
545 minor = MINOR(dev);
546 if (major < 0 || major >= MAX_BLKDEV) return 0;
547 return ro_bits[major][minor >> 5] & (1 << (minor & 31));
550 void set_device_ro(kdev_t dev,int flag)
552 int minor,major;
554 major = MAJOR(dev);
555 minor = MINOR(dev);
556 if (major < 0 || major >= MAX_BLKDEV) return;
557 if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
558 else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
561 inline void drive_stat_acct (kdev_t dev, int rw,
562 unsigned long nr_sectors, int new_io)
564 unsigned int major = MAJOR(dev);
565 unsigned int index;
567 index = disk_index(dev);
568 if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
569 return;
571 kstat.dk_drive[major][index] += new_io;
572 if (rw == READ) {
573 kstat.dk_drive_rio[major][index] += new_io;
574 kstat.dk_drive_rblk[major][index] += nr_sectors;
575 } else if (rw == WRITE) {
576 kstat.dk_drive_wio[major][index] += new_io;
577 kstat.dk_drive_wblk[major][index] += nr_sectors;
578 } else
579 printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
583 * add-request adds a request to the linked list.
584 * It disables interrupts (acquires the request spinlock) so that it can muck
585 * with the request-lists in peace. Thus it should be called with no spinlocks
586 * held.
588 * By this point, req->cmd is always either READ/WRITE, never READA,
589 * which is important for drive_stat_acct() above.
592 static inline void add_request(request_queue_t * q, struct request * req,
593 struct list_head *head, int lat)
595 int major;
597 drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
600 * let selected elevator insert the request
602 q->elevator.elevator_fn(req, &q->elevator, &q->queue_head, head, lat);
605 * FIXME(eric) I don't understand why there is a need for this
606 * special case code. It clearly doesn't fit any more with
607 * the new queueing architecture, and it got added in 2.3.10.
608 * I am leaving this in here until I hear back from the COMPAQ
609 * people.
611 major = MAJOR(req->rq_dev);
612 if (major >= COMPAQ_SMART2_MAJOR+0 && major <= COMPAQ_SMART2_MAJOR+7)
613 (q->request_fn)(q);
614 if (major >= COMPAQ_CISS_MAJOR+0 && major <= COMPAQ_CISS_MAJOR+7)
615 (q->request_fn)(q);
616 if (major >= DAC960_MAJOR+0 && major <= DAC960_MAJOR+7)
617 (q->request_fn)(q);
621 * Must be called with io_request_lock held and interrupts disabled
623 void inline blkdev_release_request(struct request *req)
625 req->rq_status = RQ_INACTIVE;
628 * Request may not have originated from ll_rw_blk
630 if (req->free_list) {
631 list_add(&req->table, req->free_list);
632 req->free_list = NULL;
633 wake_up(&req->q->wait_for_request);
638 * Has to be called with the request spinlock acquired
640 static void attempt_merge(request_queue_t * q,
641 struct request *req,
642 int max_sectors,
643 int max_segments)
645 struct request *next;
647 next = blkdev_next_request(req);
648 if (req->sector + req->nr_sectors != next->sector)
649 return;
650 if (req->cmd != next->cmd
651 || req->rq_dev != next->rq_dev
652 || req->nr_sectors + next->nr_sectors > max_sectors
653 || next->sem)
654 return;
656 * If we are not allowed to merge these requests, then
657 * return. If we are allowed to merge, then the count
658 * will have been updated to the appropriate number,
659 * and we shouldn't do it here too.
661 if(!(q->merge_requests_fn)(q, req, next, max_segments))
662 return;
664 req->bhtail->b_reqnext = next->bh;
665 req->bhtail = next->bhtail;
666 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
667 list_del(&next->queue);
668 blkdev_release_request(next);
671 static inline void attempt_back_merge(request_queue_t * q,
672 struct request *req,
673 int max_sectors,
674 int max_segments)
676 if (&req->queue == q->queue_head.prev)
677 return;
678 attempt_merge(q, req, max_sectors, max_segments);
681 static inline void attempt_front_merge(request_queue_t * q,
682 struct list_head * head,
683 struct request *req,
684 int max_sectors,
685 int max_segments)
687 struct list_head * prev;
689 prev = req->queue.prev;
690 if (head == prev)
691 return;
692 attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
695 static int __make_request(request_queue_t * q, int rw,
696 struct buffer_head * bh)
698 unsigned int sector, count;
699 int max_segments = MAX_SEGMENTS;
700 struct request * req = NULL, *freereq = NULL;
701 int rw_ahead, max_sectors, el_ret;
702 struct list_head *head;
703 int latency;
704 elevator_t *elevator = &q->elevator;
706 count = bh->b_size >> 9;
707 sector = bh->b_rsector;
709 rw_ahead = 0; /* normal case; gets changed below for READA */
710 switch (rw) {
711 case READA:
712 rw_ahead = 1;
713 rw = READ; /* drop into READ */
714 case READ:
715 case WRITE:
716 break;
717 default:
718 BUG();
719 goto end_io;
722 /* We'd better have a real physical mapping!
723 Check this bit only if the buffer was dirty and just locked
724 down by us so at this point flushpage will block and
725 won't clear the mapped bit under us. */
726 if (!buffer_mapped(bh))
727 BUG();
730 * Temporary solution - in 2.5 this will be done by the lowlevel
731 * driver. Create a bounce buffer if the buffer data points into
732 * high memory - keep the original buffer otherwise.
734 #if CONFIG_HIGHMEM
735 bh = create_bounce(rw, bh);
736 #endif
738 /* look for a free request. */
740 * Try to coalesce the new request with old requests
742 max_sectors = get_max_sectors(bh->b_rdev);
744 latency = elevator_request_latency(elevator, rw);
747 * Now we acquire the request spinlock, we have to be mega careful
748 * not to schedule or do something nonatomic
750 again:
751 spin_lock_irq(&io_request_lock);
754 * skip first entry, for devices with active queue head
756 head = &q->queue_head;
757 if (q->head_active && !q->plugged)
758 head = head->next;
760 if (list_empty(head)) {
761 q->plug_device_fn(q, bh->b_rdev); /* is atomic */
762 goto get_rq;
765 el_ret = elevator->elevator_merge_fn(q, &req, bh, rw,
766 &max_sectors, &max_segments);
767 switch (el_ret) {
769 case ELEVATOR_BACK_MERGE:
770 if (!q->back_merge_fn(q, req, bh, max_segments))
771 break;
772 req->bhtail->b_reqnext = bh;
773 req->bhtail = bh;
774 req->nr_sectors = req->hard_nr_sectors += count;
775 req->e = elevator;
776 drive_stat_acct(req->rq_dev, req->cmd, count, 0);
777 attempt_back_merge(q, req, max_sectors, max_segments);
778 goto out;
780 case ELEVATOR_FRONT_MERGE:
781 if (!q->front_merge_fn(q, req, bh, max_segments))
782 break;
783 bh->b_reqnext = req->bh;
784 req->bh = bh;
785 req->buffer = bh->b_data;
786 req->current_nr_sectors = count;
787 req->sector = req->hard_sector = sector;
788 req->nr_sectors = req->hard_nr_sectors += count;
789 req->e = elevator;
790 drive_stat_acct(req->rq_dev, req->cmd, count, 0);
791 attempt_front_merge(q, head, req, max_sectors, max_segments);
792 goto out;
794 * elevator says don't/can't merge. get new request
796 case ELEVATOR_NO_MERGE:
797 break;
799 default:
800 printk("elevator returned crap (%d)\n", el_ret);
801 BUG();
805 * Grab a free request from the freelist. Read first try their
806 * own queue - if that is empty, we steal from the write list.
807 * Writes must block if the write list is empty, and read aheads
808 * are not crucial.
810 get_rq:
811 if (freereq) {
812 req = freereq;
813 freereq = NULL;
814 } else if ((req = get_request(q, rw)) == NULL) {
815 spin_unlock_irq(&io_request_lock);
816 if (rw_ahead)
817 goto end_io;
819 freereq = __get_request_wait(q, rw);
820 goto again;
823 /* fill up the request-info, and add it to the queue */
824 req->cmd = rw;
825 req->errors = 0;
826 req->hard_sector = req->sector = sector;
827 req->hard_nr_sectors = req->nr_sectors = count;
828 req->current_nr_sectors = count;
829 req->nr_segments = 1; /* Always 1 for a new request. */
830 req->nr_hw_segments = 1; /* Always 1 for a new request. */
831 req->buffer = bh->b_data;
832 req->sem = NULL;
833 req->bh = bh;
834 req->bhtail = bh;
835 req->rq_dev = bh->b_rdev;
836 req->e = elevator;
837 add_request(q, req, head, latency);
838 out:
839 if (!q->plugged)
840 (q->request_fn)(q);
841 if (freereq)
842 blkdev_release_request(freereq);
843 spin_unlock_irq(&io_request_lock);
844 return 0;
845 end_io:
846 bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
847 return 0;
851 * generic_make_request: hand a buffer head to it's device driver for I/O
852 * @rw: READ, WRITE, or READA - what sort of I/O is desired.
853 * @bh: The buffer head describing the location in memory and on the device.
855 * generic_make_request() is used to make I/O requests of block
856 * devices. It is passed a &struct buffer_head and a &rw value. The
857 * %READ and %WRITE options are (hopefully) obvious in meaning. The
858 * %READA value means that a read is required, but that the driver is
859 * free to fail the request if, for example, it cannot get needed
860 * resources immediately.
862 * generic_make_request() does not return any status. The
863 * success/failure status of the request, along with notification of
864 * completion, is delivered asynchronously through the bh->b_end_io
865 * function described (one day) else where.
867 * The caller of generic_make_request must make sure that b_page,
868 * b_addr, b_size are set to describe the memory buffer, that b_rdev
869 * and b_rsector are set to describe the device address, and the
870 * b_end_io and optionally b_private are set to describe how
871 * completion notification should be signaled. BH_Mapped should also
872 * be set (to confirm that b_dev and b_blocknr are valid).
874 * generic_make_request and the drivers it calls may use b_reqnext,
875 * and may change b_rdev and b_rsector. So the values of these fields
876 * should NOT be depended on after the call to generic_make_request.
877 * Because of this, the caller should record the device address
878 * information in b_dev and b_blocknr.
880 * Apart from those fields mentioned above, no other fields, and in
881 * particular, no other flags, are changed by generic_make_request or
882 * any lower level drivers.
883 * */
884 void generic_make_request (int rw, struct buffer_head * bh)
886 int major = MAJOR(bh->b_rdev);
887 request_queue_t *q;
889 if (!bh->b_end_io) BUG();
890 if (blk_size[major]) {
891 unsigned long maxsector = (blk_size[major][MINOR(bh->b_rdev)] << 1) + 1;
892 unsigned int sector, count;
894 count = bh->b_size >> 9;
895 sector = bh->b_rsector;
897 if (maxsector < count || maxsector - count < sector) {
898 bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
899 if (blk_size[major][MINOR(bh->b_rdev)]) {
901 /* This may well happen - the kernel calls bread()
902 without checking the size of the device, e.g.,
903 when mounting a device. */
904 printk(KERN_INFO
905 "attempt to access beyond end of device\n");
906 printk(KERN_INFO "%s: rw=%d, want=%d, limit=%d\n",
907 kdevname(bh->b_rdev), rw,
908 (sector + count)>>1,
909 blk_size[major][MINOR(bh->b_rdev)]);
911 bh->b_end_io(bh, 0);
912 return;
917 * Resolve the mapping until finished. (drivers are
918 * still free to implement/resolve their own stacking
919 * by explicitly returning 0)
921 /* NOTE: we don't repeat the blk_size check for each new device.
922 * Stacking drivers are expected to know what they are doing.
924 do {
925 q = blk_get_queue(bh->b_rdev);
926 if (!q) {
927 printk(KERN_ERR
928 "generic_make_request: Trying to access nonexistent block-device %s (%ld)\n",
929 kdevname(bh->b_rdev), bh->b_rsector);
930 buffer_IO_error(bh);
931 break;
935 while (q->make_request_fn(q, rw, bh));
940 * submit_bh: submit a buffer_head to the block device later for I/O
941 * @rw: whether to %READ or %WRITE, or mayve to %READA (read ahead)
942 * @bh: The &struct buffer_head which describes the I/O
944 * submit_bh() is very similar in purpose to generic_make_request(), and
945 * uses that function to do most of the work.
947 * The extra functionality provided by submit_bh is to determine
948 * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
949 * This is is appropriate for IO requests that come from the buffer
950 * cache and page cache which (currently) always use aligned blocks.
952 void submit_bh(int rw, struct buffer_head * bh)
954 if (!test_bit(BH_Lock, &bh->b_state))
955 BUG();
957 set_bit(BH_Req, &bh->b_state);
960 * First step, 'identity mapping' - RAID or LVM might
961 * further remap this.
963 bh->b_rdev = bh->b_dev;
964 bh->b_rsector = bh->b_blocknr * (bh->b_size>>9);
966 generic_make_request(rw, bh);
970 * Default IO end handler, used by "ll_rw_block()".
972 static void end_buffer_io_sync(struct buffer_head *bh, int uptodate)
974 mark_buffer_uptodate(bh, uptodate);
975 unlock_buffer(bh);
979 * ll_rw_block: low-level access to block devices
980 * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
981 * @nr: number of &struct buffer_heads in the array
982 * @bhs: array of pointers to &struct buffer_head
984 * ll_rw_block() takes an array of pointers to &struct buffer_heads,
985 * and requests an I/O operation on them, either a %READ or a %WRITE.
986 * The third %READA option is described in the documentation for
987 * generic_make_request() which ll_rw_block() calls.
989 * This function provides extra functionality that is not in
990 * generic_make_request() that is relevant to buffers in the buffer
991 * cache or page cache. In particular it drops any buffer that it
992 * cannot get a lock on (with the BH_Lock state bit), any buffer that
993 * appears to be clean when doing a write request, and any buffer that
994 * appears to be up-to-date when doing read request. Further it marks
995 * as clean buffers that are processed for writing (the buffer cache
996 * wont assume that they are actually clean until the buffer gets
997 * unlocked).
999 * ll_rw_block sets b_end_io to simple completion handler that marks
1000 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
1001 * any waiters. As client that needs a more interesting completion
1002 * routine should call submit_bh() (or generic_make_request())
1003 * directly.
1005 * Caveat:
1006 * All of the buffers must be for the same device, and must also be
1007 * of the current approved size for the device. */
1009 void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
1011 struct buffer_head *bh;
1012 unsigned int major;
1013 int correct_size;
1014 int i;
1016 major = MAJOR(bhs[0]->b_dev);
1018 /* Determine correct block size for this device. */
1019 correct_size = BLOCK_SIZE;
1020 if (blksize_size[major]) {
1021 i = blksize_size[major][MINOR(bhs[0]->b_dev)];
1022 if (i)
1023 correct_size = i;
1026 /* Verify requested block sizes. */
1027 for (i = 0; i < nr; i++) {
1028 bh = bhs[i];
1029 if (bh->b_size != correct_size) {
1030 printk(KERN_NOTICE "ll_rw_block: device %s: "
1031 "only %d-char blocks implemented (%u)\n",
1032 kdevname(bhs[0]->b_dev),
1033 correct_size, bh->b_size);
1034 goto sorry;
1038 if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
1039 printk(KERN_NOTICE "Can't write to read-only device %s\n",
1040 kdevname(bhs[0]->b_dev));
1041 goto sorry;
1044 for (i = 0; i < nr; i++) {
1045 bh = bhs[i];
1047 /* Only one thread can actually submit the I/O. */
1048 if (test_and_set_bit(BH_Lock, &bh->b_state))
1049 continue;
1051 /* We have the buffer lock */
1052 bh->b_end_io = end_buffer_io_sync;
1054 switch(rw) {
1055 case WRITE:
1056 if (!atomic_set_buffer_clean(bh))
1057 /* Hmmph! Nothing to write */
1058 goto end_io;
1059 __mark_buffer_clean(bh);
1060 kstat.pgpgout++;
1061 break;
1063 case READA:
1064 case READ:
1065 if (buffer_uptodate(bh))
1066 /* Hmmph! Already have it */
1067 goto end_io;
1068 kstat.pgpgin++;
1069 break;
1070 default:
1071 BUG();
1072 end_io:
1073 bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1074 continue;
1077 submit_bh(rw, bh);
1079 return;
1081 sorry:
1082 for (i = 0; i < nr; i++)
1083 buffer_IO_error(bhs[i]);
1086 #ifdef CONFIG_STRAM_SWAP
1087 extern int stram_device_init (void);
1088 #endif
1091 * First step of what used to be end_request
1093 * 0 means continue with end_that_request_last,
1094 * 1 means we are done
1097 int end_that_request_first (struct request *req, int uptodate, char *name)
1099 struct buffer_head * bh;
1100 int nsect;
1102 req->errors = 0;
1103 if (!uptodate)
1104 printk("end_request: I/O error, dev %s (%s), sector %lu\n",
1105 kdevname(req->rq_dev), name, req->sector);
1107 if ((bh = req->bh) != NULL) {
1108 nsect = bh->b_size >> 9;
1109 req->bh = bh->b_reqnext;
1110 bh->b_reqnext = NULL;
1111 bh->b_end_io(bh, uptodate);
1112 if ((bh = req->bh) != NULL) {
1113 req->hard_sector += nsect;
1114 req->hard_nr_sectors -= nsect;
1115 req->sector = req->hard_sector;
1116 req->nr_sectors = req->hard_nr_sectors;
1118 req->current_nr_sectors = bh->b_size >> 9;
1119 if (req->nr_sectors < req->current_nr_sectors) {
1120 req->nr_sectors = req->current_nr_sectors;
1121 printk("end_request: buffer-list destroyed\n");
1123 req->buffer = bh->b_data;
1124 return 1;
1127 return 0;
1130 void end_that_request_last(struct request *req)
1132 if (req->e) {
1133 printk("end_that_request_last called with non-dequeued req\n");
1134 BUG();
1136 if (req->sem != NULL)
1137 up(req->sem);
1139 blkdev_release_request(req);
1142 int __init blk_dev_init(void)
1144 struct blk_dev_struct *dev;
1146 request_cachep = kmem_cache_create("blkdev_requests",
1147 sizeof(struct request),
1148 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
1150 if (!request_cachep)
1151 panic("Can't create request pool slab cache\n");
1153 for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
1154 dev->queue = NULL;
1156 memset(ro_bits,0,sizeof(ro_bits));
1157 memset(max_readahead, 0, sizeof(max_readahead));
1158 memset(max_sectors, 0, sizeof(max_sectors));
1159 #ifdef CONFIG_AMIGA_Z2RAM
1160 z2_init();
1161 #endif
1162 #ifdef CONFIG_STRAM_SWAP
1163 stram_device_init();
1164 #endif
1165 #ifdef CONFIG_BLK_DEV_RAM
1166 rd_init();
1167 #endif
1168 #ifdef CONFIG_BLK_DEV_LOOP
1169 loop_init();
1170 #endif
1171 #ifdef CONFIG_ISP16_CDI
1172 isp16_init();
1173 #endif
1174 #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE)
1175 ide_init(); /* this MUST precede hd_init */
1176 #endif
1177 #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD)
1178 hd_init();
1179 #endif
1180 #ifdef CONFIG_BLK_DEV_PS2
1181 ps2esdi_init();
1182 #endif
1183 #ifdef CONFIG_BLK_DEV_XD
1184 xd_init();
1185 #endif
1186 #ifdef CONFIG_BLK_DEV_MFM
1187 mfm_init();
1188 #endif
1189 #ifdef CONFIG_PARIDE
1190 { extern void paride_init(void); paride_init(); };
1191 #endif
1192 #ifdef CONFIG_MAC_FLOPPY
1193 swim3_init();
1194 #endif
1195 #ifdef CONFIG_BLK_DEV_SWIM_IOP
1196 swimiop_init();
1197 #endif
1198 #ifdef CONFIG_AMIGA_FLOPPY
1199 amiga_floppy_init();
1200 #endif
1201 #ifdef CONFIG_ATARI_FLOPPY
1202 atari_floppy_init();
1203 #endif
1204 #ifdef CONFIG_BLK_DEV_FD
1205 floppy_init();
1206 #else
1207 #if defined(__i386__) /* Do we even need this? */
1208 outb_p(0xc, 0x3f2);
1209 #endif
1210 #endif
1211 #ifdef CONFIG_CDU31A
1212 cdu31a_init();
1213 #endif
1214 #ifdef CONFIG_ATARI_ACSI
1215 acsi_init();
1216 #endif
1217 #ifdef CONFIG_MCD
1218 mcd_init();
1219 #endif
1220 #ifdef CONFIG_MCDX
1221 mcdx_init();
1222 #endif
1223 #ifdef CONFIG_SBPCD
1224 sbpcd_init();
1225 #endif
1226 #ifdef CONFIG_AZTCD
1227 aztcd_init();
1228 #endif
1229 #ifdef CONFIG_CDU535
1230 sony535_init();
1231 #endif
1232 #ifdef CONFIG_GSCD
1233 gscd_init();
1234 #endif
1235 #ifdef CONFIG_CM206
1236 cm206_init();
1237 #endif
1238 #ifdef CONFIG_OPTCD
1239 optcd_init();
1240 #endif
1241 #ifdef CONFIG_SJCD
1242 sjcd_init();
1243 #endif
1244 #ifdef CONFIG_APBLOCK
1245 ap_init();
1246 #endif
1247 #ifdef CONFIG_DDV
1248 ddv_init();
1249 #endif
1250 #ifdef CONFIG_BLK_DEV_NBD
1251 nbd_init();
1252 #endif
1253 #ifdef CONFIG_MDISK
1254 mdisk_init();
1255 #endif
1256 #ifdef CONFIG_DASD
1257 dasd_init();
1258 #endif
1259 #ifdef CONFIG_SUN_JSFLASH
1260 jsfd_init();
1261 #endif
1262 #ifdef CONFIG_BLK_DEV_LVM
1263 lvm_init();
1264 #endif
1265 return 0;
1268 EXPORT_SYMBOL(io_request_lock);
1269 EXPORT_SYMBOL(end_that_request_first);
1270 EXPORT_SYMBOL(end_that_request_last);
1271 EXPORT_SYMBOL(blk_init_queue);
1272 EXPORT_SYMBOL(blk_get_queue);
1273 EXPORT_SYMBOL(blk_cleanup_queue);
1274 EXPORT_SYMBOL(blk_queue_headactive);
1275 EXPORT_SYMBOL(blk_queue_pluggable);
1276 EXPORT_SYMBOL(blk_queue_make_request);
1277 EXPORT_SYMBOL(generic_make_request);
1278 EXPORT_SYMBOL(blkdev_release_request);