[PATCH] kconfig documentation update
[linux-2.6/history.git] / fs / bio.c
blob6e7f245649d3a84a91a2626f3ff37ac549168e1b
1 /*
2 * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public Licens
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
19 #include <linux/mm.h>
20 #include <linux/bio.h>
21 #include <linux/blk.h>
22 #include <linux/slab.h>
23 #include <linux/init.h>
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/mempool.h>
28 #define BIO_POOL_SIZE 256
30 static mempool_t *bio_pool;
31 static kmem_cache_t *bio_slab;
33 #define BIOVEC_NR_POOLS 6
35 struct biovec_pool {
36 int nr_vecs;
37 char *name;
38 kmem_cache_t *slab;
39 mempool_t *pool;
43 * if you change this list, also change bvec_alloc or things will
44 * break badly! cannot be bigger than what you can fit into an
45 * unsigned short
48 #define BV(x) { x, "biovec-" #x }
49 static struct biovec_pool bvec_array[BIOVEC_NR_POOLS] = {
50 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
52 #undef BV
54 static void *slab_pool_alloc(int gfp_mask, void *data)
56 return kmem_cache_alloc(data, gfp_mask);
59 static void slab_pool_free(void *ptr, void *data)
61 kmem_cache_free(data, ptr);
64 static inline struct bio_vec *bvec_alloc(int gfp_mask, int nr, unsigned long *idx)
66 struct biovec_pool *bp;
67 struct bio_vec *bvl;
70 * see comment near bvec_array define!
72 switch (nr) {
73 case 1 : *idx = 0; break;
74 case 2 ... 4: *idx = 1; break;
75 case 5 ... 16: *idx = 2; break;
76 case 17 ... 64: *idx = 3; break;
77 case 65 ... 128: *idx = 4; break;
78 case 129 ... BIO_MAX_PAGES: *idx = 5; break;
79 default:
80 return NULL;
83 * idx now points to the pool we want to allocate from
85 bp = bvec_array + *idx;
87 bvl = mempool_alloc(bp->pool, gfp_mask);
88 if (bvl)
89 memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
90 return bvl;
94 * default destructor for a bio allocated with bio_alloc()
96 void bio_destructor(struct bio *bio)
98 const int pool_idx = BIO_POOL_IDX(bio);
99 struct biovec_pool *bp = bvec_array + pool_idx;
101 BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
104 * cloned bio doesn't own the veclist
106 if (!bio_flagged(bio, BIO_CLONED))
107 mempool_free(bio->bi_io_vec, bp->pool);
109 mempool_free(bio, bio_pool);
112 inline void bio_init(struct bio *bio)
114 bio->bi_next = NULL;
115 bio->bi_flags = 1 << BIO_UPTODATE;
116 bio->bi_rw = 0;
117 bio->bi_vcnt = 0;
118 bio->bi_idx = 0;
119 bio->bi_phys_segments = 0;
120 bio->bi_hw_segments = 0;
121 bio->bi_size = 0;
122 bio->bi_max_vecs = 0;
123 bio->bi_end_io = NULL;
124 atomic_set(&bio->bi_cnt, 1);
125 bio->bi_private = NULL;
129 * bio_alloc - allocate a bio for I/O
130 * @gfp_mask: the GFP_ mask given to the slab allocator
131 * @nr_iovecs: number of iovecs to pre-allocate
133 * Description:
134 * bio_alloc will first try it's on mempool to satisfy the allocation.
135 * If %__GFP_WAIT is set then we will block on the internal pool waiting
136 * for a &struct bio to become free.
138 struct bio *bio_alloc(int gfp_mask, int nr_iovecs)
140 int pf_flags = current->flags;
141 struct bio_vec *bvl = NULL;
142 unsigned long idx;
143 struct bio *bio;
145 current->flags |= PF_NOWARN;
146 bio = mempool_alloc(bio_pool, gfp_mask);
147 if (unlikely(!bio))
148 goto out;
150 bio_init(bio);
152 if (unlikely(!nr_iovecs))
153 goto noiovec;
155 bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx);
156 if (bvl) {
157 bio->bi_flags |= idx << BIO_POOL_OFFSET;
158 bio->bi_max_vecs = bvec_array[idx].nr_vecs;
159 noiovec:
160 bio->bi_io_vec = bvl;
161 bio->bi_destructor = bio_destructor;
162 out:
163 current->flags = pf_flags;
164 return bio;
167 mempool_free(bio, bio_pool);
168 bio = NULL;
169 goto out;
173 * bio_put - release a reference to a bio
174 * @bio: bio to release reference to
176 * Description:
177 * Put a reference to a &struct bio, either one you have gotten with
178 * bio_alloc or bio_get. The last put of a bio will free it.
180 void bio_put(struct bio *bio)
182 BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
185 * last put frees it
187 if (atomic_dec_and_test(&bio->bi_cnt)) {
188 bio->bi_next = NULL;
189 bio->bi_destructor(bio);
193 inline int bio_phys_segments(request_queue_t *q, struct bio *bio)
195 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
196 blk_recount_segments(q, bio);
198 return bio->bi_phys_segments;
201 inline int bio_hw_segments(request_queue_t *q, struct bio *bio)
203 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
204 blk_recount_segments(q, bio);
206 return bio->bi_hw_segments;
210 * __bio_clone - clone a bio
211 * @bio: destination bio
212 * @bio_src: bio to clone
214 * Clone a &bio. Caller will own the returned bio, but not
215 * the actual data it points to. Reference count of returned
216 * bio will be one.
218 inline void __bio_clone(struct bio *bio, struct bio *bio_src)
220 bio->bi_io_vec = bio_src->bi_io_vec;
222 bio->bi_sector = bio_src->bi_sector;
223 bio->bi_bdev = bio_src->bi_bdev;
224 bio->bi_flags |= 1 << BIO_CLONED;
225 bio->bi_rw = bio_src->bi_rw;
228 * notes -- maybe just leave bi_idx alone. assume identical mapping
229 * for the clone
231 bio->bi_vcnt = bio_src->bi_vcnt;
232 bio->bi_idx = bio_src->bi_idx;
233 if (bio_flagged(bio, BIO_SEG_VALID)) {
234 bio->bi_phys_segments = bio_src->bi_phys_segments;
235 bio->bi_hw_segments = bio_src->bi_hw_segments;
236 bio->bi_flags |= (1 << BIO_SEG_VALID);
238 bio->bi_size = bio_src->bi_size;
241 * cloned bio does not own the bio_vec, so users cannot fiddle with
242 * it. clear bi_max_vecs and clear the BIO_POOL_BITS to make this
243 * apparent
245 bio->bi_max_vecs = 0;
246 bio->bi_flags &= (BIO_POOL_MASK - 1);
250 * bio_clone - clone a bio
251 * @bio: bio to clone
252 * @gfp_mask: allocation priority
254 * Like __bio_clone, only also allocates the returned bio
256 struct bio *bio_clone(struct bio *bio, int gfp_mask)
258 struct bio *b = bio_alloc(gfp_mask, 0);
260 if (b)
261 __bio_clone(b, bio);
263 return b;
267 * bio_copy - create copy of a bio
268 * @bio: bio to copy
269 * @gfp_mask: allocation priority
270 * @copy: copy data to allocated bio
272 * Create a copy of a &bio. Caller will own the returned bio and
273 * the actual data it points to. Reference count of returned
274 * bio will be one.
276 struct bio *bio_copy(struct bio *bio, int gfp_mask, int copy)
278 struct bio *b = bio_alloc(gfp_mask, bio->bi_vcnt);
279 unsigned long flags = 0; /* gcc silly */
280 struct bio_vec *bv;
281 int i;
283 if (unlikely(!b))
284 return NULL;
287 * iterate iovec list and alloc pages + copy data
289 __bio_for_each_segment(bv, bio, i, 0) {
290 struct bio_vec *bbv = &b->bi_io_vec[i];
291 char *vfrom, *vto;
293 bbv->bv_page = alloc_page(gfp_mask);
294 if (bbv->bv_page == NULL)
295 goto oom;
297 bbv->bv_len = bv->bv_len;
298 bbv->bv_offset = bv->bv_offset;
301 * if doing a copy for a READ request, no need
302 * to memcpy page data
304 if (!copy)
305 continue;
307 if (gfp_mask & __GFP_WAIT) {
308 vfrom = kmap(bv->bv_page);
309 vto = kmap(bbv->bv_page);
310 } else {
311 local_irq_save(flags);
312 vfrom = kmap_atomic(bv->bv_page, KM_BIO_SRC_IRQ);
313 vto = kmap_atomic(bbv->bv_page, KM_BIO_DST_IRQ);
316 memcpy(vto + bbv->bv_offset, vfrom + bv->bv_offset, bv->bv_len);
317 if (gfp_mask & __GFP_WAIT) {
318 kunmap(bbv->bv_page);
319 kunmap(bv->bv_page);
320 } else {
321 kunmap_atomic(vto, KM_BIO_DST_IRQ);
322 kunmap_atomic(vfrom, KM_BIO_SRC_IRQ);
323 local_irq_restore(flags);
327 b->bi_sector = bio->bi_sector;
328 b->bi_bdev = bio->bi_bdev;
329 b->bi_rw = bio->bi_rw;
331 b->bi_vcnt = bio->bi_vcnt;
332 b->bi_size = bio->bi_size;
334 return b;
336 oom:
337 while (--i >= 0)
338 __free_page(b->bi_io_vec[i].bv_page);
340 mempool_free(b, bio_pool);
341 return NULL;
345 * bio_get_nr_vecs - return approx number of vecs
346 * @bdev: I/O target
348 * Return the approximate number of pages we can send to this target.
349 * There's no guarentee that you will be able to fit this number of pages
350 * into a bio, it does not account for dynamic restrictions that vary
351 * on offset.
353 int bio_get_nr_vecs(struct block_device *bdev)
355 request_queue_t *q = bdev_get_queue(bdev);
356 int nr_pages;
358 nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
359 if (nr_pages > q->max_phys_segments)
360 nr_pages = q->max_phys_segments;
361 if (nr_pages > q->max_hw_segments)
362 nr_pages = q->max_hw_segments;
364 return nr_pages;
368 * bio_add_page - attempt to add page to bio
369 * @bio: destination bio
370 * @page: page to add
371 * @len: vec entry length
372 * @offset: vec entry offset
374 * Attempt to add a page to the bio_vec maplist. This can fail for a
375 * number of reasons, such as the bio being full or target block
376 * device limitations.
378 int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
379 unsigned int offset)
381 request_queue_t *q = bdev_get_queue(bio->bi_bdev);
382 int fail_segments = 0, retried_segments = 0;
383 struct bio_vec *bvec;
386 * cloned bio must not modify vec list
388 if (unlikely(bio_flagged(bio, BIO_CLONED)))
389 return 0;
391 if (bio->bi_vcnt >= bio->bi_max_vecs)
392 return 0;
394 if (((bio->bi_size + len) >> 9) > q->max_sectors)
395 return 0;
398 * we might loose a segment or two here, but rather that than
399 * make this too complex.
401 retry_segments:
402 if (bio_phys_segments(q, bio) >= q->max_phys_segments
403 || bio_hw_segments(q, bio) >= q->max_hw_segments)
404 fail_segments = 1;
406 if (fail_segments) {
407 if (retried_segments)
408 return 0;
410 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
411 retried_segments = 1;
412 goto retry_segments;
416 * setup the new entry, we might clear it again later if we
417 * cannot add the page
419 bvec = &bio->bi_io_vec[bio->bi_vcnt];
420 bvec->bv_page = page;
421 bvec->bv_len = len;
422 bvec->bv_offset = offset;
425 * if queue has other restrictions (eg varying max sector size
426 * depending on offset), it can specify a merge_bvec_fn in the
427 * queue to get further control
429 if (q->merge_bvec_fn) {
431 * merge_bvec_fn() returns number of bytes it can accept
432 * at this offset
434 if (q->merge_bvec_fn(q, bio, bvec) < len) {
435 bvec->bv_page = NULL;
436 bvec->bv_len = 0;
437 bvec->bv_offset = 0;
438 return 0;
442 bio->bi_vcnt++;
443 bio->bi_phys_segments++;
444 bio->bi_hw_segments++;
445 bio->bi_size += len;
446 return len;
450 * bio_map_user - map user address into bio
451 * @bdev: destination block device
452 * @uaddr: start of user address
453 * @len: length in bytes
454 * @write_to_vm: bool indicating writing to pages or not
456 * Map the user space address into a bio suitable for io to a block
457 * device. Caller should check the size of the returned bio, we might
458 * not have mapped the entire range specified.
460 struct bio *bio_map_user(struct block_device *bdev, unsigned long uaddr,
461 unsigned int len, int write_to_vm)
463 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
464 unsigned long start = uaddr >> PAGE_SHIFT;
465 const int nr_pages = end - start;
466 request_queue_t *q = bdev_get_queue(bdev);
467 int ret, offset, i;
468 struct page **pages;
469 struct bio *bio;
472 * transfer and buffer must be aligned to at least hardsector
473 * size for now, in the future we can relax this restriction
475 if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
476 return NULL;
478 bio = bio_alloc(GFP_KERNEL, nr_pages);
479 if (!bio)
480 return NULL;
482 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
483 if (!pages)
484 goto out;
486 down_read(&current->mm->mmap_sem);
487 ret = get_user_pages(current, current->mm, uaddr, nr_pages,
488 write_to_vm, 0, pages, NULL);
489 up_read(&current->mm->mmap_sem);
491 if (ret < nr_pages)
492 goto out;
494 bio->bi_bdev = bdev;
496 offset = uaddr & ~PAGE_MASK;
497 for (i = 0; i < nr_pages; i++) {
498 unsigned int bytes = PAGE_SIZE - offset;
500 if (len <= 0)
501 break;
503 if (bytes > len)
504 bytes = len;
507 * sorry...
509 if (bio_add_page(bio, pages[i], bytes, offset) < bytes)
510 break;
512 len -= bytes;
513 offset = 0;
517 * release the pages we didn't map into the bio, if any
519 while (i < nr_pages)
520 page_cache_release(pages[i++]);
522 kfree(pages);
525 * check if the mapped pages need bouncing for an isa host.
527 blk_queue_bounce(q, &bio);
528 return bio;
529 out:
530 kfree(pages);
531 bio_put(bio);
532 return NULL;
536 * bio_unmap_user - unmap a bio
537 * @bio: the bio being unmapped
538 * @write_to_vm: bool indicating whether pages were written to
540 * Unmap a bio previously mapped by bio_map_user(). The @write_to_vm
541 * must be the same as passed into bio_map_user(). Must be called with
542 * a process context.
544 void bio_unmap_user(struct bio *bio, int write_to_vm)
546 struct bio_vec *bvec;
547 int i;
550 * find original bio if it was bounced
552 if (bio->bi_private) {
554 * someone stole our bio, must not happen
556 BUG_ON(!bio_flagged(bio, BIO_BOUNCED));
558 bio = bio->bi_private;
562 * make sure we dirty pages we wrote to
564 __bio_for_each_segment(bvec, bio, i, 0) {
565 if (write_to_vm)
566 set_page_dirty(bvec->bv_page);
568 page_cache_release(bvec->bv_page);
571 bio_put(bio);
575 * bio_endio - end I/O on a bio
576 * @bio: bio
577 * @bytes_done: number of bytes completed
578 * @error: error, if any
580 * Description:
581 * bio_endio() will end I/O on @bytes_done number of bytes. This may be
582 * just a partial part of the bio, or it may be the whole bio. bio_endio()
583 * is the preferred way to end I/O on a bio, it takes care of decrementing
584 * bi_size and clearing BIO_UPTODATE on error. @error is 0 on success, and
585 * and one of the established -Exxxx (-EIO, for instance) error values in
586 * case something went wrong. Noone should call bi_end_io() directly on
587 * a bio unless they own it and thus know that it has an end_io function.
589 void bio_endio(struct bio *bio, unsigned int bytes_done, int error)
591 if (error)
592 clear_bit(BIO_UPTODATE, &bio->bi_flags);
594 if (unlikely(bytes_done > bio->bi_size)) {
595 printk("%s: want %u bytes done, only %u left\n", __FUNCTION__,
596 bytes_done, bio->bi_size);
597 bytes_done = bio->bi_size;
600 bio->bi_size -= bytes_done;
602 if (bio->bi_end_io)
603 bio->bi_end_io(bio, bytes_done, error);
606 static void __init biovec_init_pools(void)
608 int i, size, megabytes, pool_entries = BIO_POOL_SIZE;
609 int scale = BIOVEC_NR_POOLS;
611 megabytes = nr_free_pages() >> (20 - PAGE_SHIFT);
614 * find out where to start scaling
616 if (megabytes <= 16)
617 scale = 0;
618 else if (megabytes <= 32)
619 scale = 1;
620 else if (megabytes <= 64)
621 scale = 2;
622 else if (megabytes <= 96)
623 scale = 3;
624 else if (megabytes <= 128)
625 scale = 4;
628 * scale number of entries
630 pool_entries = megabytes * 2;
631 if (pool_entries > 256)
632 pool_entries = 256;
634 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
635 struct biovec_pool *bp = bvec_array + i;
637 size = bp->nr_vecs * sizeof(struct bio_vec);
639 bp->slab = kmem_cache_create(bp->name, size, 0,
640 SLAB_HWCACHE_ALIGN, NULL, NULL);
641 if (!bp->slab)
642 panic("biovec: can't init slab cache\n");
644 if (i >= scale)
645 pool_entries >>= 1;
647 bp->pool = mempool_create(pool_entries, slab_pool_alloc,
648 slab_pool_free, bp->slab);
649 if (!bp->pool)
650 panic("biovec: can't init mempool\n");
652 printk("biovec pool[%d]: %3d bvecs: %3d entries (%d bytes)\n",
653 i, bp->nr_vecs, pool_entries,
654 size);
658 static int __init init_bio(void)
660 bio_slab = kmem_cache_create("bio", sizeof(struct bio), 0,
661 SLAB_HWCACHE_ALIGN, NULL, NULL);
662 if (!bio_slab)
663 panic("bio: can't create slab cache\n");
664 bio_pool = mempool_create(BIO_POOL_SIZE, slab_pool_alloc, slab_pool_free, bio_slab);
665 if (!bio_pool)
666 panic("bio: can't create mempool\n");
668 printk("BIO: pool of %d setup, %ZuKb (%Zd bytes/bio)\n", BIO_POOL_SIZE, BIO_POOL_SIZE * sizeof(struct bio) >> 10, sizeof(struct bio));
670 biovec_init_pools();
672 return 0;
675 subsys_initcall(init_bio);
677 EXPORT_SYMBOL(bio_alloc);
678 EXPORT_SYMBOL(bio_put);
679 EXPORT_SYMBOL(bio_endio);
680 EXPORT_SYMBOL(bio_init);
681 EXPORT_SYMBOL(bio_copy);
682 EXPORT_SYMBOL(__bio_clone);
683 EXPORT_SYMBOL(bio_clone);
684 EXPORT_SYMBOL(bio_phys_segments);
685 EXPORT_SYMBOL(bio_hw_segments);
686 EXPORT_SYMBOL(bio_add_page);
687 EXPORT_SYMBOL(bio_get_nr_vecs);
688 EXPORT_SYMBOL(bio_map_user);
689 EXPORT_SYMBOL(bio_unmap_user);