[PATCH] elevator: move the backmerging logic into the elevator core
[linux-2.6/cjktty.git] / block / deadline-iosched.c
blobb66e820f544d8a2f9406027d6592aed7d2382459
1 /*
2 * Deadline i/o scheduler.
4 * Copyright (C) 2002 Jens Axboe <axboe@suse.de>
5 */
6 #include <linux/kernel.h>
7 #include <linux/fs.h>
8 #include <linux/blkdev.h>
9 #include <linux/elevator.h>
10 #include <linux/bio.h>
11 #include <linux/module.h>
12 #include <linux/slab.h>
13 #include <linux/init.h>
14 #include <linux/compiler.h>
15 #include <linux/rbtree.h>
18 * See Documentation/block/deadline-iosched.txt
20 static const int read_expire = HZ / 2; /* max time before a read is submitted. */
21 static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
22 static const int writes_starved = 2; /* max times reads can starve a write */
23 static const int fifo_batch = 16; /* # of sequential requests treated as one
24 by the above parameters. For throughput. */
26 struct deadline_data {
28 * run time data
32 * requests (deadline_rq s) are present on both sort_list and fifo_list
34 struct rb_root sort_list[2];
35 struct list_head fifo_list[2];
38 * next in sort order. read, write or both are NULL
40 struct deadline_rq *next_drq[2];
41 unsigned int batching; /* number of sequential requests made */
42 sector_t last_sector; /* head position */
43 unsigned int starved; /* times reads have starved writes */
46 * settings that change how the i/o scheduler behaves
48 int fifo_expire[2];
49 int fifo_batch;
50 int writes_starved;
51 int front_merges;
53 mempool_t *drq_pool;
57 * pre-request data.
59 struct deadline_rq {
61 * rbtree index, key is the starting offset
63 struct rb_node rb_node;
64 sector_t rb_key;
66 struct request *request;
69 * expire fifo
71 struct list_head fifo;
72 unsigned long expires;
75 static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq);
77 static kmem_cache_t *drq_pool;
79 #define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private)
82 * rb tree support functions
84 #define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node)
85 #define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)])
86 #define rq_rb_key(rq) (rq)->sector
88 static struct deadline_rq *
89 __deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
91 struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;
92 struct rb_node *parent = NULL;
93 struct deadline_rq *__drq;
95 while (*p) {
96 parent = *p;
97 __drq = rb_entry_drq(parent);
99 if (drq->rb_key < __drq->rb_key)
100 p = &(*p)->rb_left;
101 else if (drq->rb_key > __drq->rb_key)
102 p = &(*p)->rb_right;
103 else
104 return __drq;
107 rb_link_node(&drq->rb_node, parent, p);
108 return NULL;
111 static void
112 deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
114 struct deadline_rq *__alias;
116 drq->rb_key = rq_rb_key(drq->request);
118 retry:
119 __alias = __deadline_add_drq_rb(dd, drq);
120 if (!__alias) {
121 rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
122 return;
125 deadline_move_request(dd, __alias);
126 goto retry;
129 static inline void
130 deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
132 const int data_dir = rq_data_dir(drq->request);
134 if (dd->next_drq[data_dir] == drq) {
135 struct rb_node *rbnext = rb_next(&drq->rb_node);
137 dd->next_drq[data_dir] = NULL;
138 if (rbnext)
139 dd->next_drq[data_dir] = rb_entry_drq(rbnext);
142 BUG_ON(!RB_EMPTY_NODE(&drq->rb_node));
143 rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
144 RB_CLEAR_NODE(&drq->rb_node);
147 static struct request *
148 deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)
150 struct rb_node *n = dd->sort_list[data_dir].rb_node;
151 struct deadline_rq *drq;
153 while (n) {
154 drq = rb_entry_drq(n);
156 if (sector < drq->rb_key)
157 n = n->rb_left;
158 else if (sector > drq->rb_key)
159 n = n->rb_right;
160 else
161 return drq->request;
164 return NULL;
168 * deadline_find_first_drq finds the first (lowest sector numbered) request
169 * for the specified data_dir. Used to sweep back to the start of the disk
170 * (1-way elevator) after we process the last (highest sector) request.
172 static struct deadline_rq *
173 deadline_find_first_drq(struct deadline_data *dd, int data_dir)
175 struct rb_node *n = dd->sort_list[data_dir].rb_node;
177 for (;;) {
178 if (n->rb_left == NULL)
179 return rb_entry_drq(n);
181 n = n->rb_left;
186 * add drq to rbtree and fifo
188 static void
189 deadline_add_request(struct request_queue *q, struct request *rq)
191 struct deadline_data *dd = q->elevator->elevator_data;
192 struct deadline_rq *drq = RQ_DATA(rq);
193 const int data_dir = rq_data_dir(drq->request);
195 deadline_add_drq_rb(dd, drq);
198 * set expire time (only used for reads) and add to fifo list
200 drq->expires = jiffies + dd->fifo_expire[data_dir];
201 list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
205 * remove rq from rbtree and fifo.
207 static void deadline_remove_request(request_queue_t *q, struct request *rq)
209 struct deadline_rq *drq = RQ_DATA(rq);
210 struct deadline_data *dd = q->elevator->elevator_data;
212 list_del_init(&drq->fifo);
213 deadline_del_drq_rb(dd, drq);
216 static int
217 deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
219 struct deadline_data *dd = q->elevator->elevator_data;
220 struct request *__rq;
221 int ret;
224 * check for front merge
226 if (dd->front_merges) {
227 sector_t rb_key = bio->bi_sector + bio_sectors(bio);
229 __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio));
230 if (__rq) {
231 BUG_ON(rb_key != rq_rb_key(__rq));
233 if (elv_rq_merge_ok(__rq, bio)) {
234 ret = ELEVATOR_FRONT_MERGE;
235 goto out;
240 return ELEVATOR_NO_MERGE;
241 out:
242 *req = __rq;
243 return ret;
246 static void deadline_merged_request(request_queue_t *q, struct request *req)
248 struct deadline_data *dd = q->elevator->elevator_data;
249 struct deadline_rq *drq = RQ_DATA(req);
252 * if the merge was a front merge, we need to reposition request
254 if (rq_rb_key(req) != drq->rb_key) {
255 deadline_del_drq_rb(dd, drq);
256 deadline_add_drq_rb(dd, drq);
260 static void
261 deadline_merged_requests(request_queue_t *q, struct request *req,
262 struct request *next)
264 struct deadline_data *dd = q->elevator->elevator_data;
265 struct deadline_rq *drq = RQ_DATA(req);
266 struct deadline_rq *dnext = RQ_DATA(next);
268 BUG_ON(!drq);
269 BUG_ON(!dnext);
271 if (rq_rb_key(req) != drq->rb_key) {
272 deadline_del_drq_rb(dd, drq);
273 deadline_add_drq_rb(dd, drq);
277 * if dnext expires before drq, assign its expire time to drq
278 * and move into dnext position (dnext will be deleted) in fifo
280 if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
281 if (time_before(dnext->expires, drq->expires)) {
282 list_move(&drq->fifo, &dnext->fifo);
283 drq->expires = dnext->expires;
288 * kill knowledge of next, this one is a goner
290 deadline_remove_request(q, next);
294 * move request from sort list to dispatch queue.
296 static inline void
297 deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
299 request_queue_t *q = drq->request->q;
301 deadline_remove_request(q, drq->request);
302 elv_dispatch_add_tail(q, drq->request);
306 * move an entry to dispatch queue
308 static void
309 deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq)
311 const int data_dir = rq_data_dir(drq->request);
312 struct rb_node *rbnext = rb_next(&drq->rb_node);
314 dd->next_drq[READ] = NULL;
315 dd->next_drq[WRITE] = NULL;
317 if (rbnext)
318 dd->next_drq[data_dir] = rb_entry_drq(rbnext);
320 dd->last_sector = drq->request->sector + drq->request->nr_sectors;
323 * take it off the sort and fifo list, move
324 * to dispatch queue
326 deadline_move_to_dispatch(dd, drq);
329 #define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo)
332 * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
333 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
335 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
337 struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next);
340 * drq is expired!
342 if (time_after(jiffies, drq->expires))
343 return 1;
345 return 0;
349 * deadline_dispatch_requests selects the best request according to
350 * read/write expire, fifo_batch, etc
352 static int deadline_dispatch_requests(request_queue_t *q, int force)
354 struct deadline_data *dd = q->elevator->elevator_data;
355 const int reads = !list_empty(&dd->fifo_list[READ]);
356 const int writes = !list_empty(&dd->fifo_list[WRITE]);
357 struct deadline_rq *drq;
358 int data_dir;
361 * batches are currently reads XOR writes
363 if (dd->next_drq[WRITE])
364 drq = dd->next_drq[WRITE];
365 else
366 drq = dd->next_drq[READ];
368 if (drq) {
369 /* we have a "next request" */
371 if (dd->last_sector != drq->request->sector)
372 /* end the batch on a non sequential request */
373 dd->batching += dd->fifo_batch;
375 if (dd->batching < dd->fifo_batch)
376 /* we are still entitled to batch */
377 goto dispatch_request;
381 * at this point we are not running a batch. select the appropriate
382 * data direction (read / write)
385 if (reads) {
386 BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
388 if (writes && (dd->starved++ >= dd->writes_starved))
389 goto dispatch_writes;
391 data_dir = READ;
393 goto dispatch_find_request;
397 * there are either no reads or writes have been starved
400 if (writes) {
401 dispatch_writes:
402 BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
404 dd->starved = 0;
406 data_dir = WRITE;
408 goto dispatch_find_request;
411 return 0;
413 dispatch_find_request:
415 * we are not running a batch, find best request for selected data_dir
417 if (deadline_check_fifo(dd, data_dir)) {
418 /* An expired request exists - satisfy it */
419 dd->batching = 0;
420 drq = list_entry_fifo(dd->fifo_list[data_dir].next);
422 } else if (dd->next_drq[data_dir]) {
424 * The last req was the same dir and we have a next request in
425 * sort order. No expired requests so continue on from here.
427 drq = dd->next_drq[data_dir];
428 } else {
430 * The last req was the other direction or we have run out of
431 * higher-sectored requests. Go back to the lowest sectored
432 * request (1 way elevator) and start a new batch.
434 dd->batching = 0;
435 drq = deadline_find_first_drq(dd, data_dir);
438 dispatch_request:
440 * drq is the selected appropriate request.
442 dd->batching++;
443 deadline_move_request(dd, drq);
445 return 1;
448 static int deadline_queue_empty(request_queue_t *q)
450 struct deadline_data *dd = q->elevator->elevator_data;
452 return list_empty(&dd->fifo_list[WRITE])
453 && list_empty(&dd->fifo_list[READ]);
456 static struct request *
457 deadline_former_request(request_queue_t *q, struct request *rq)
459 struct deadline_rq *drq = RQ_DATA(rq);
460 struct rb_node *rbprev = rb_prev(&drq->rb_node);
462 if (rbprev)
463 return rb_entry_drq(rbprev)->request;
465 return NULL;
468 static struct request *
469 deadline_latter_request(request_queue_t *q, struct request *rq)
471 struct deadline_rq *drq = RQ_DATA(rq);
472 struct rb_node *rbnext = rb_next(&drq->rb_node);
474 if (rbnext)
475 return rb_entry_drq(rbnext)->request;
477 return NULL;
480 static void deadline_exit_queue(elevator_t *e)
482 struct deadline_data *dd = e->elevator_data;
484 BUG_ON(!list_empty(&dd->fifo_list[READ]));
485 BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
487 mempool_destroy(dd->drq_pool);
488 kfree(dd);
492 * initialize elevator private data (deadline_data), and alloc a drq for
493 * each request on the free lists
495 static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
497 struct deadline_data *dd;
499 if (!drq_pool)
500 return NULL;
502 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
503 if (!dd)
504 return NULL;
505 memset(dd, 0, sizeof(*dd));
507 dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
508 mempool_free_slab, drq_pool, q->node);
509 if (!dd->drq_pool) {
510 kfree(dd);
511 return NULL;
514 INIT_LIST_HEAD(&dd->fifo_list[READ]);
515 INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
516 dd->sort_list[READ] = RB_ROOT;
517 dd->sort_list[WRITE] = RB_ROOT;
518 dd->fifo_expire[READ] = read_expire;
519 dd->fifo_expire[WRITE] = write_expire;
520 dd->writes_starved = writes_starved;
521 dd->front_merges = 1;
522 dd->fifo_batch = fifo_batch;
523 return dd;
526 static void deadline_put_request(request_queue_t *q, struct request *rq)
528 struct deadline_data *dd = q->elevator->elevator_data;
529 struct deadline_rq *drq = RQ_DATA(rq);
531 mempool_free(drq, dd->drq_pool);
532 rq->elevator_private = NULL;
535 static int
536 deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
537 gfp_t gfp_mask)
539 struct deadline_data *dd = q->elevator->elevator_data;
540 struct deadline_rq *drq;
542 drq = mempool_alloc(dd->drq_pool, gfp_mask);
543 if (drq) {
544 memset(drq, 0, sizeof(*drq));
545 RB_CLEAR_NODE(&drq->rb_node);
546 drq->request = rq;
548 INIT_LIST_HEAD(&drq->fifo);
550 rq->elevator_private = drq;
551 return 0;
554 return 1;
558 * sysfs parts below
561 static ssize_t
562 deadline_var_show(int var, char *page)
564 return sprintf(page, "%d\n", var);
567 static ssize_t
568 deadline_var_store(int *var, const char *page, size_t count)
570 char *p = (char *) page;
572 *var = simple_strtol(p, &p, 10);
573 return count;
576 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
577 static ssize_t __FUNC(elevator_t *e, char *page) \
579 struct deadline_data *dd = e->elevator_data; \
580 int __data = __VAR; \
581 if (__CONV) \
582 __data = jiffies_to_msecs(__data); \
583 return deadline_var_show(__data, (page)); \
585 SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
586 SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
587 SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
588 SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
589 SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
590 #undef SHOW_FUNCTION
592 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
593 static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
595 struct deadline_data *dd = e->elevator_data; \
596 int __data; \
597 int ret = deadline_var_store(&__data, (page), count); \
598 if (__data < (MIN)) \
599 __data = (MIN); \
600 else if (__data > (MAX)) \
601 __data = (MAX); \
602 if (__CONV) \
603 *(__PTR) = msecs_to_jiffies(__data); \
604 else \
605 *(__PTR) = __data; \
606 return ret; \
608 STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
609 STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
610 STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
611 STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
612 STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
613 #undef STORE_FUNCTION
615 #define DD_ATTR(name) \
616 __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
617 deadline_##name##_store)
619 static struct elv_fs_entry deadline_attrs[] = {
620 DD_ATTR(read_expire),
621 DD_ATTR(write_expire),
622 DD_ATTR(writes_starved),
623 DD_ATTR(front_merges),
624 DD_ATTR(fifo_batch),
625 __ATTR_NULL
628 static struct elevator_type iosched_deadline = {
629 .ops = {
630 .elevator_merge_fn = deadline_merge,
631 .elevator_merged_fn = deadline_merged_request,
632 .elevator_merge_req_fn = deadline_merged_requests,
633 .elevator_dispatch_fn = deadline_dispatch_requests,
634 .elevator_add_req_fn = deadline_add_request,
635 .elevator_queue_empty_fn = deadline_queue_empty,
636 .elevator_former_req_fn = deadline_former_request,
637 .elevator_latter_req_fn = deadline_latter_request,
638 .elevator_set_req_fn = deadline_set_request,
639 .elevator_put_req_fn = deadline_put_request,
640 .elevator_init_fn = deadline_init_queue,
641 .elevator_exit_fn = deadline_exit_queue,
644 .elevator_attrs = deadline_attrs,
645 .elevator_name = "deadline",
646 .elevator_owner = THIS_MODULE,
649 static int __init deadline_init(void)
651 int ret;
653 drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
654 0, 0, NULL, NULL);
656 if (!drq_pool)
657 return -ENOMEM;
659 ret = elv_register(&iosched_deadline);
660 if (ret)
661 kmem_cache_destroy(drq_pool);
663 return ret;
666 static void __exit deadline_exit(void)
668 kmem_cache_destroy(drq_pool);
669 elv_unregister(&iosched_deadline);
672 module_init(deadline_init);
673 module_exit(deadline_exit);
675 MODULE_AUTHOR("Jens Axboe");
676 MODULE_LICENSE("GPL");
677 MODULE_DESCRIPTION("deadline IO scheduler");