initial commit with v2.6.9
[linux-2.6.9-moxart.git] / drivers / md / dm-io.c
blob9f3fb61fdf4f0eb0f5f1716b6ff3ee920bec1687
1 /*
2 * Copyright (C) 2003 Sistina Software
4 * This file is released under the GPL.
5 */
7 #include "dm-io.h"
9 #include <linux/bio.h>
10 #include <linux/mempool.h>
11 #include <linux/module.h>
12 #include <linux/sched.h>
13 #include <linux/slab.h>
15 #define BIO_POOL_SIZE 256
18 /*-----------------------------------------------------------------
19 * Bio set, move this to bio.c
20 *---------------------------------------------------------------*/
21 #define BV_NAME_SIZE 16
22 struct biovec_pool {
23 int nr_vecs;
24 char name[BV_NAME_SIZE];
25 kmem_cache_t *slab;
26 mempool_t *pool;
27 atomic_t allocated; /* FIXME: debug */
30 #define BIOVEC_NR_POOLS 6
31 struct bio_set {
32 char name[BV_NAME_SIZE];
33 kmem_cache_t *bio_slab;
34 mempool_t *bio_pool;
35 struct biovec_pool pools[BIOVEC_NR_POOLS];
38 static void bio_set_exit(struct bio_set *bs)
40 unsigned i;
41 struct biovec_pool *bp;
43 if (bs->bio_pool)
44 mempool_destroy(bs->bio_pool);
46 if (bs->bio_slab)
47 kmem_cache_destroy(bs->bio_slab);
49 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
50 bp = bs->pools + i;
51 if (bp->pool)
52 mempool_destroy(bp->pool);
54 if (bp->slab)
55 kmem_cache_destroy(bp->slab);
59 static void mk_name(char *str, size_t len, const char *prefix, unsigned count)
61 snprintf(str, len, "%s-%u", prefix, count);
64 static int bio_set_init(struct bio_set *bs, const char *slab_prefix,
65 unsigned pool_entries, unsigned scale)
67 /* FIXME: this must match bvec_index(), why not go the
68 * whole hog and have a pool per power of 2 ? */
69 static unsigned _vec_lengths[BIOVEC_NR_POOLS] = {
70 1, 4, 16, 64, 128, BIO_MAX_PAGES
74 unsigned i, size;
75 struct biovec_pool *bp;
77 /* zero the bs so we can tear down properly on error */
78 memset(bs, 0, sizeof(*bs));
81 * Set up the bio pool.
83 snprintf(bs->name, sizeof(bs->name), "%s-bio", slab_prefix);
85 bs->bio_slab = kmem_cache_create(bs->name, sizeof(struct bio), 0,
86 SLAB_HWCACHE_ALIGN, NULL, NULL);
87 if (!bs->bio_slab) {
88 DMWARN("can't init bio slab");
89 goto bad;
92 bs->bio_pool = mempool_create(pool_entries, mempool_alloc_slab,
93 mempool_free_slab, bs->bio_slab);
94 if (!bs->bio_pool) {
95 DMWARN("can't init bio pool");
96 goto bad;
100 * Set up the biovec pools.
102 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
103 bp = bs->pools + i;
104 bp->nr_vecs = _vec_lengths[i];
105 atomic_set(&bp->allocated, 1); /* FIXME: debug */
108 size = bp->nr_vecs * sizeof(struct bio_vec);
110 mk_name(bp->name, sizeof(bp->name), slab_prefix, i);
111 bp->slab = kmem_cache_create(bp->name, size, 0,
112 SLAB_HWCACHE_ALIGN, NULL, NULL);
113 if (!bp->slab) {
114 DMWARN("can't init biovec slab cache");
115 goto bad;
118 if (i >= scale)
119 pool_entries >>= 1;
121 bp->pool = mempool_create(pool_entries, mempool_alloc_slab,
122 mempool_free_slab, bp->slab);
123 if (!bp->pool) {
124 DMWARN("can't init biovec mempool");
125 goto bad;
129 return 0;
131 bad:
132 bio_set_exit(bs);
133 return -ENOMEM;
136 /* FIXME: blech */
137 static inline unsigned bvec_index(unsigned nr)
139 switch (nr) {
140 case 1: return 0;
141 case 2 ... 4: return 1;
142 case 5 ... 16: return 2;
143 case 17 ... 64: return 3;
144 case 65 ... 128:return 4;
145 case 129 ... BIO_MAX_PAGES: return 5;
148 BUG();
149 return 0;
152 static inline void bs_bio_init(struct bio *bio)
154 bio->bi_next = NULL;
155 bio->bi_flags = 1 << BIO_UPTODATE;
156 bio->bi_rw = 0;
157 bio->bi_vcnt = 0;
158 bio->bi_idx = 0;
159 bio->bi_phys_segments = 0;
160 bio->bi_hw_segments = 0;
161 bio->bi_size = 0;
162 bio->bi_max_vecs = 0;
163 bio->bi_end_io = NULL;
164 atomic_set(&bio->bi_cnt, 1);
165 bio->bi_private = NULL;
168 static unsigned _bio_count = 0;
169 struct bio *bio_set_alloc(struct bio_set *bs, int gfp_mask, int nr_iovecs)
171 struct biovec_pool *bp;
172 struct bio_vec *bv = NULL;
173 unsigned long idx;
174 struct bio *bio;
176 bio = mempool_alloc(bs->bio_pool, gfp_mask);
177 if (unlikely(!bio))
178 return NULL;
180 bio_init(bio);
182 if (likely(nr_iovecs)) {
183 idx = bvec_index(nr_iovecs);
184 bp = bs->pools + idx;
185 bv = mempool_alloc(bp->pool, gfp_mask);
186 if (!bv) {
187 mempool_free(bio, bs->bio_pool);
188 return NULL;
191 memset(bv, 0, bp->nr_vecs * sizeof(*bv));
192 bio->bi_flags |= idx << BIO_POOL_OFFSET;
193 bio->bi_max_vecs = bp->nr_vecs;
194 atomic_inc(&bp->allocated);
197 bio->bi_io_vec = bv;
198 return bio;
201 static void bio_set_free(struct bio_set *bs, struct bio *bio)
203 struct biovec_pool *bp = bs->pools + BIO_POOL_IDX(bio);
205 if (atomic_dec_and_test(&bp->allocated))
206 BUG();
208 mempool_free(bio->bi_io_vec, bp->pool);
209 mempool_free(bio, bs->bio_pool);
212 /*-----------------------------------------------------------------
213 * dm-io proper
214 *---------------------------------------------------------------*/
215 static struct bio_set _bios;
217 /* FIXME: can we shrink this ? */
218 struct io {
219 unsigned long error;
220 atomic_t count;
221 struct task_struct *sleeper;
222 io_notify_fn callback;
223 void *context;
227 * io contexts are only dynamically allocated for asynchronous
228 * io. Since async io is likely to be the majority of io we'll
229 * have the same number of io contexts as buffer heads ! (FIXME:
230 * must reduce this).
232 static unsigned _num_ios;
233 static mempool_t *_io_pool;
235 static void *alloc_io(int gfp_mask, void *pool_data)
237 return kmalloc(sizeof(struct io), gfp_mask);
240 static void free_io(void *element, void *pool_data)
242 kfree(element);
245 static unsigned int pages_to_ios(unsigned int pages)
247 return 4 * pages; /* too many ? */
250 static int resize_pool(unsigned int new_ios)
252 int r = 0;
254 if (_io_pool) {
255 if (new_ios == 0) {
256 /* free off the pool */
257 mempool_destroy(_io_pool);
258 _io_pool = NULL;
259 bio_set_exit(&_bios);
261 } else {
262 /* resize the pool */
263 r = mempool_resize(_io_pool, new_ios, GFP_KERNEL);
266 } else {
267 /* create new pool */
268 _io_pool = mempool_create(new_ios, alloc_io, free_io, NULL);
269 if (!_io_pool)
270 r = -ENOMEM;
272 r = bio_set_init(&_bios, "dm-io", 512, 1);
273 if (r) {
274 mempool_destroy(_io_pool);
275 _io_pool = NULL;
279 if (!r)
280 _num_ios = new_ios;
282 return r;
285 int dm_io_get(unsigned int num_pages)
287 return resize_pool(_num_ios + pages_to_ios(num_pages));
290 void dm_io_put(unsigned int num_pages)
292 resize_pool(_num_ios - pages_to_ios(num_pages));
295 /*-----------------------------------------------------------------
296 * We need to keep track of which region a bio is doing io for.
297 * In order to save a memory allocation we store this the last
298 * bvec which we know is unused (blech).
299 *---------------------------------------------------------------*/
300 static inline void bio_set_region(struct bio *bio, unsigned region)
302 bio->bi_io_vec[bio->bi_max_vecs - 1].bv_len = region;
305 static inline unsigned bio_get_region(struct bio *bio)
307 return bio->bi_io_vec[bio->bi_max_vecs - 1].bv_len;
310 /*-----------------------------------------------------------------
311 * We need an io object to keep track of the number of bios that
312 * have been dispatched for a particular io.
313 *---------------------------------------------------------------*/
314 static void dec_count(struct io *io, unsigned int region, int error)
316 if (error)
317 set_bit(region, &io->error);
319 if (atomic_dec_and_test(&io->count)) {
320 if (io->sleeper)
321 wake_up_process(io->sleeper);
323 else {
324 int r = io->error;
325 io_notify_fn fn = io->callback;
326 void *context = io->context;
328 mempool_free(io, _io_pool);
329 fn(r, context);
334 /* FIXME Move this to bio.h? */
335 static void zero_fill_bio(struct bio *bio)
337 unsigned long flags;
338 struct bio_vec *bv;
339 int i;
341 bio_for_each_segment(bv, bio, i) {
342 char *data = bvec_kmap_irq(bv, &flags);
343 memset(data, 0, bv->bv_len);
344 flush_dcache_page(bv->bv_page);
345 bvec_kunmap_irq(data, &flags);
349 static int endio(struct bio *bio, unsigned int done, int error)
351 struct io *io = (struct io *) bio->bi_private;
353 /* keep going until we've finished */
354 if (bio->bi_size)
355 return 1;
357 if (error && bio_data_dir(bio) == READ)
358 zero_fill_bio(bio);
360 dec_count(io, bio_get_region(bio), error);
361 bio_put(bio);
363 return 0;
366 static void bio_dtr(struct bio *bio)
368 _bio_count--;
369 bio_set_free(&_bios, bio);
372 /*-----------------------------------------------------------------
373 * These little objects provide an abstraction for getting a new
374 * destination page for io.
375 *---------------------------------------------------------------*/
376 struct dpages {
377 void (*get_page)(struct dpages *dp,
378 struct page **p, unsigned long *len, unsigned *offset);
379 void (*next_page)(struct dpages *dp);
381 unsigned context_u;
382 void *context_ptr;
386 * Functions for getting the pages from a list.
388 static void list_get_page(struct dpages *dp,
389 struct page **p, unsigned long *len, unsigned *offset)
391 unsigned o = dp->context_u;
392 struct page_list *pl = (struct page_list *) dp->context_ptr;
394 *p = pl->page;
395 *len = PAGE_SIZE - o;
396 *offset = o;
399 static void list_next_page(struct dpages *dp)
401 struct page_list *pl = (struct page_list *) dp->context_ptr;
402 dp->context_ptr = pl->next;
403 dp->context_u = 0;
406 static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset)
408 dp->get_page = list_get_page;
409 dp->next_page = list_next_page;
410 dp->context_u = offset;
411 dp->context_ptr = pl;
415 * Functions for getting the pages from a bvec.
417 static void bvec_get_page(struct dpages *dp,
418 struct page **p, unsigned long *len, unsigned *offset)
420 struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
421 *p = bvec->bv_page;
422 *len = bvec->bv_len;
423 *offset = bvec->bv_offset;
426 static void bvec_next_page(struct dpages *dp)
428 struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
429 dp->context_ptr = bvec + 1;
432 static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec)
434 dp->get_page = bvec_get_page;
435 dp->next_page = bvec_next_page;
436 dp->context_ptr = bvec;
439 static void vm_get_page(struct dpages *dp,
440 struct page **p, unsigned long *len, unsigned *offset)
442 *p = vmalloc_to_page(dp->context_ptr);
443 *offset = dp->context_u;
444 *len = PAGE_SIZE - dp->context_u;
447 static void vm_next_page(struct dpages *dp)
449 dp->context_ptr += PAGE_SIZE - dp->context_u;
450 dp->context_u = 0;
453 static void vm_dp_init(struct dpages *dp, void *data)
455 dp->get_page = vm_get_page;
456 dp->next_page = vm_next_page;
457 dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1);
458 dp->context_ptr = data;
461 /*-----------------------------------------------------------------
462 * IO routines that accept a list of pages.
463 *---------------------------------------------------------------*/
464 static void do_region(int rw, unsigned int region, struct io_region *where,
465 struct dpages *dp, struct io *io)
467 struct bio *bio;
468 struct page *page;
469 unsigned long len;
470 unsigned offset;
471 unsigned num_bvecs;
472 sector_t remaining = where->count;
474 while (remaining) {
476 * Allocate a suitably sized bio, we add an extra
477 * bvec for bio_get/set_region().
479 num_bvecs = (remaining / (PAGE_SIZE >> 9)) + 2;
480 _bio_count++;
481 bio = bio_set_alloc(&_bios, GFP_NOIO, num_bvecs);
482 bio->bi_sector = where->sector + (where->count - remaining);
483 bio->bi_bdev = where->bdev;
484 bio->bi_end_io = endio;
485 bio->bi_private = io;
486 bio->bi_destructor = bio_dtr;
487 bio_set_region(bio, region);
490 * Try and add as many pages as possible.
492 while (remaining) {
493 dp->get_page(dp, &page, &len, &offset);
494 len = min(len, to_bytes(remaining));
495 if (!bio_add_page(bio, page, len, offset))
496 break;
498 offset = 0;
499 remaining -= to_sector(len);
500 dp->next_page(dp);
503 atomic_inc(&io->count);
504 submit_bio(rw, bio);
508 static void dispatch_io(int rw, unsigned int num_regions,
509 struct io_region *where, struct dpages *dp,
510 struct io *io, int sync)
512 int i;
513 struct dpages old_pages = *dp;
515 if (sync)
516 rw |= (1 << BIO_RW_SYNC);
519 * For multiple regions we need to be careful to rewind
520 * the dp object for each call to do_region.
522 for (i = 0; i < num_regions; i++) {
523 *dp = old_pages;
524 if (where[i].count)
525 do_region(rw, i, where + i, dp, io);
529 * Drop the extra refence that we were holding to avoid
530 * the io being completed too early.
532 dec_count(io, 0, 0);
535 static int sync_io(unsigned int num_regions, struct io_region *where,
536 int rw, struct dpages *dp, unsigned long *error_bits)
538 struct io io;
540 if (num_regions > 1 && rw != WRITE) {
541 WARN_ON(1);
542 return -EIO;
545 io.error = 0;
546 atomic_set(&io.count, 1); /* see dispatch_io() */
547 io.sleeper = current;
549 dispatch_io(rw, num_regions, where, dp, &io, 1);
551 while (1) {
552 set_current_state(TASK_UNINTERRUPTIBLE);
554 if (!atomic_read(&io.count) || signal_pending(current))
555 break;
557 io_schedule();
559 set_current_state(TASK_RUNNING);
561 if (atomic_read(&io.count))
562 return -EINTR;
564 *error_bits = io.error;
565 return io.error ? -EIO : 0;
568 static int async_io(unsigned int num_regions, struct io_region *where, int rw,
569 struct dpages *dp, io_notify_fn fn, void *context)
571 struct io *io;
573 if (num_regions > 1 && rw != WRITE) {
574 WARN_ON(1);
575 fn(1, context);
576 return -EIO;
579 io = mempool_alloc(_io_pool, GFP_NOIO);
580 io->error = 0;
581 atomic_set(&io->count, 1); /* see dispatch_io() */
582 io->sleeper = NULL;
583 io->callback = fn;
584 io->context = context;
586 dispatch_io(rw, num_regions, where, dp, io, 0);
587 return 0;
590 int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
591 struct page_list *pl, unsigned int offset,
592 unsigned long *error_bits)
594 struct dpages dp;
595 list_dp_init(&dp, pl, offset);
596 return sync_io(num_regions, where, rw, &dp, error_bits);
599 int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw,
600 struct bio_vec *bvec, unsigned long *error_bits)
602 struct dpages dp;
603 bvec_dp_init(&dp, bvec);
604 return sync_io(num_regions, where, rw, &dp, error_bits);
607 int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
608 void *data, unsigned long *error_bits)
610 struct dpages dp;
611 vm_dp_init(&dp, data);
612 return sync_io(num_regions, where, rw, &dp, error_bits);
615 int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
616 struct page_list *pl, unsigned int offset,
617 io_notify_fn fn, void *context)
619 struct dpages dp;
620 list_dp_init(&dp, pl, offset);
621 return async_io(num_regions, where, rw, &dp, fn, context);
624 int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw,
625 struct bio_vec *bvec, io_notify_fn fn, void *context)
627 struct dpages dp;
628 bvec_dp_init(&dp, bvec);
629 return async_io(num_regions, where, rw, &dp, fn, context);
632 int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
633 void *data, io_notify_fn fn, void *context)
635 struct dpages dp;
636 vm_dp_init(&dp, data);
637 return async_io(num_regions, where, rw, &dp, fn, context);
640 EXPORT_SYMBOL(dm_io_get);
641 EXPORT_SYMBOL(dm_io_put);
642 EXPORT_SYMBOL(dm_io_sync);
643 EXPORT_SYMBOL(dm_io_async);
644 EXPORT_SYMBOL(dm_io_sync_bvec);
645 EXPORT_SYMBOL(dm_io_async_bvec);
646 EXPORT_SYMBOL(dm_io_sync_vm);
647 EXPORT_SYMBOL(dm_io_async_vm);