2 * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public Licens
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
20 #include <linux/bio.h>
21 #include <linux/blk.h>
22 #include <linux/slab.h>
23 #include <linux/init.h>
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/mempool.h>
28 #define BIO_POOL_SIZE 256
30 static mempool_t
*bio_pool
;
31 static kmem_cache_t
*bio_slab
;
33 #define BIOVEC_NR_POOLS 6
43 * if you change this list, also change bvec_alloc or things will
44 * break badly! cannot be bigger than what you can fit into an
48 #define BV(x) { x, "biovec-" #x }
49 static struct biovec_pool bvec_array
[BIOVEC_NR_POOLS
] = {
50 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES
),
54 static void *slab_pool_alloc(int gfp_mask
, void *data
)
56 return kmem_cache_alloc(data
, gfp_mask
);
59 static void slab_pool_free(void *ptr
, void *data
)
61 kmem_cache_free(data
, ptr
);
64 static inline struct bio_vec
*bvec_alloc(int gfp_mask
, int nr
, unsigned long *idx
)
66 struct biovec_pool
*bp
;
70 * see comment near bvec_array define!
73 case 1 : *idx
= 0; break;
74 case 2 ... 4: *idx
= 1; break;
75 case 5 ... 16: *idx
= 2; break;
76 case 17 ... 64: *idx
= 3; break;
77 case 65 ... 128: *idx
= 4; break;
78 case 129 ... BIO_MAX_PAGES
: *idx
= 5; break;
83 * idx now points to the pool we want to allocate from
85 bp
= bvec_array
+ *idx
;
87 bvl
= mempool_alloc(bp
->pool
, gfp_mask
);
89 memset(bvl
, 0, bp
->nr_vecs
* sizeof(struct bio_vec
));
94 * default destructor for a bio allocated with bio_alloc()
96 void bio_destructor(struct bio
*bio
)
98 const int pool_idx
= BIO_POOL_IDX(bio
);
99 struct biovec_pool
*bp
= bvec_array
+ pool_idx
;
101 BIO_BUG_ON(pool_idx
>= BIOVEC_NR_POOLS
);
104 * cloned bio doesn't own the veclist
106 if (!bio_flagged(bio
, BIO_CLONED
))
107 mempool_free(bio
->bi_io_vec
, bp
->pool
);
109 mempool_free(bio
, bio_pool
);
112 inline void bio_init(struct bio
*bio
)
115 bio
->bi_flags
= 1 << BIO_UPTODATE
;
119 bio
->bi_phys_segments
= 0;
120 bio
->bi_hw_segments
= 0;
122 bio
->bi_max_vecs
= 0;
123 bio
->bi_end_io
= NULL
;
124 atomic_set(&bio
->bi_cnt
, 1);
125 bio
->bi_private
= NULL
;
129 * bio_alloc - allocate a bio for I/O
130 * @gfp_mask: the GFP_ mask given to the slab allocator
131 * @nr_iovecs: number of iovecs to pre-allocate
134 * bio_alloc will first try it's on mempool to satisfy the allocation.
135 * If %__GFP_WAIT is set then we will block on the internal pool waiting
136 * for a &struct bio to become free.
138 struct bio
*bio_alloc(int gfp_mask
, int nr_iovecs
)
140 int pf_flags
= current
->flags
;
141 struct bio_vec
*bvl
= NULL
;
145 current
->flags
|= PF_NOWARN
;
146 bio
= mempool_alloc(bio_pool
, gfp_mask
);
152 if (unlikely(!nr_iovecs
))
155 bvl
= bvec_alloc(gfp_mask
, nr_iovecs
, &idx
);
157 bio
->bi_flags
|= idx
<< BIO_POOL_OFFSET
;
158 bio
->bi_max_vecs
= bvec_array
[idx
].nr_vecs
;
160 bio
->bi_io_vec
= bvl
;
161 bio
->bi_destructor
= bio_destructor
;
163 current
->flags
= pf_flags
;
167 mempool_free(bio
, bio_pool
);
173 * bio_put - release a reference to a bio
174 * @bio: bio to release reference to
177 * Put a reference to a &struct bio, either one you have gotten with
178 * bio_alloc or bio_get. The last put of a bio will free it.
180 void bio_put(struct bio
*bio
)
182 BIO_BUG_ON(!atomic_read(&bio
->bi_cnt
));
187 if (atomic_dec_and_test(&bio
->bi_cnt
)) {
189 bio
->bi_destructor(bio
);
193 inline int bio_phys_segments(request_queue_t
*q
, struct bio
*bio
)
195 if (unlikely(!bio_flagged(bio
, BIO_SEG_VALID
)))
196 blk_recount_segments(q
, bio
);
198 return bio
->bi_phys_segments
;
201 inline int bio_hw_segments(request_queue_t
*q
, struct bio
*bio
)
203 if (unlikely(!bio_flagged(bio
, BIO_SEG_VALID
)))
204 blk_recount_segments(q
, bio
);
206 return bio
->bi_hw_segments
;
210 * __bio_clone - clone a bio
211 * @bio: destination bio
212 * @bio_src: bio to clone
214 * Clone a &bio. Caller will own the returned bio, but not
215 * the actual data it points to. Reference count of returned
218 inline void __bio_clone(struct bio
*bio
, struct bio
*bio_src
)
220 bio
->bi_io_vec
= bio_src
->bi_io_vec
;
222 bio
->bi_sector
= bio_src
->bi_sector
;
223 bio
->bi_bdev
= bio_src
->bi_bdev
;
224 bio
->bi_flags
|= 1 << BIO_CLONED
;
225 bio
->bi_rw
= bio_src
->bi_rw
;
228 * notes -- maybe just leave bi_idx alone. assume identical mapping
231 bio
->bi_vcnt
= bio_src
->bi_vcnt
;
232 bio
->bi_idx
= bio_src
->bi_idx
;
233 if (bio_flagged(bio
, BIO_SEG_VALID
)) {
234 bio
->bi_phys_segments
= bio_src
->bi_phys_segments
;
235 bio
->bi_hw_segments
= bio_src
->bi_hw_segments
;
236 bio
->bi_flags
|= (1 << BIO_SEG_VALID
);
238 bio
->bi_size
= bio_src
->bi_size
;
241 * cloned bio does not own the bio_vec, so users cannot fiddle with
242 * it. clear bi_max_vecs and clear the BIO_POOL_BITS to make this
245 bio
->bi_max_vecs
= 0;
246 bio
->bi_flags
&= (BIO_POOL_MASK
- 1);
250 * bio_clone - clone a bio
252 * @gfp_mask: allocation priority
254 * Like __bio_clone, only also allocates the returned bio
256 struct bio
*bio_clone(struct bio
*bio
, int gfp_mask
)
258 struct bio
*b
= bio_alloc(gfp_mask
, 0);
267 * bio_copy - create copy of a bio
269 * @gfp_mask: allocation priority
270 * @copy: copy data to allocated bio
272 * Create a copy of a &bio. Caller will own the returned bio and
273 * the actual data it points to. Reference count of returned
276 struct bio
*bio_copy(struct bio
*bio
, int gfp_mask
, int copy
)
278 struct bio
*b
= bio_alloc(gfp_mask
, bio
->bi_vcnt
);
279 unsigned long flags
= 0; /* gcc silly */
287 * iterate iovec list and alloc pages + copy data
289 __bio_for_each_segment(bv
, bio
, i
, 0) {
290 struct bio_vec
*bbv
= &b
->bi_io_vec
[i
];
293 bbv
->bv_page
= alloc_page(gfp_mask
);
294 if (bbv
->bv_page
== NULL
)
297 bbv
->bv_len
= bv
->bv_len
;
298 bbv
->bv_offset
= bv
->bv_offset
;
301 * if doing a copy for a READ request, no need
302 * to memcpy page data
307 if (gfp_mask
& __GFP_WAIT
) {
308 vfrom
= kmap(bv
->bv_page
);
309 vto
= kmap(bbv
->bv_page
);
311 local_irq_save(flags
);
312 vfrom
= kmap_atomic(bv
->bv_page
, KM_BIO_SRC_IRQ
);
313 vto
= kmap_atomic(bbv
->bv_page
, KM_BIO_DST_IRQ
);
316 memcpy(vto
+ bbv
->bv_offset
, vfrom
+ bv
->bv_offset
, bv
->bv_len
);
317 if (gfp_mask
& __GFP_WAIT
) {
318 kunmap(bbv
->bv_page
);
321 kunmap_atomic(vto
, KM_BIO_DST_IRQ
);
322 kunmap_atomic(vfrom
, KM_BIO_SRC_IRQ
);
323 local_irq_restore(flags
);
327 b
->bi_sector
= bio
->bi_sector
;
328 b
->bi_bdev
= bio
->bi_bdev
;
329 b
->bi_rw
= bio
->bi_rw
;
331 b
->bi_vcnt
= bio
->bi_vcnt
;
332 b
->bi_size
= bio
->bi_size
;
338 __free_page(b
->bi_io_vec
[i
].bv_page
);
340 mempool_free(b
, bio_pool
);
345 * bio_get_nr_vecs - return approx number of vecs
348 * Return the approximate number of pages we can send to this target.
349 * There's no guarentee that you will be able to fit this number of pages
350 * into a bio, it does not account for dynamic restrictions that vary
353 int bio_get_nr_vecs(struct block_device
*bdev
)
355 request_queue_t
*q
= bdev_get_queue(bdev
);
358 nr_pages
= ((q
->max_sectors
<< 9) + PAGE_SIZE
- 1) >> PAGE_SHIFT
;
359 if (nr_pages
> q
->max_phys_segments
)
360 nr_pages
= q
->max_phys_segments
;
361 if (nr_pages
> q
->max_hw_segments
)
362 nr_pages
= q
->max_hw_segments
;
368 * bio_add_page - attempt to add page to bio
369 * @bio: destination bio
371 * @len: vec entry length
372 * @offset: vec entry offset
374 * Attempt to add a page to the bio_vec maplist. This can fail for a
375 * number of reasons, such as the bio being full or target block
376 * device limitations.
378 int bio_add_page(struct bio
*bio
, struct page
*page
, unsigned int len
,
381 request_queue_t
*q
= bdev_get_queue(bio
->bi_bdev
);
382 int fail_segments
= 0, retried_segments
= 0;
383 struct bio_vec
*bvec
;
386 * cloned bio must not modify vec list
388 if (unlikely(bio_flagged(bio
, BIO_CLONED
)))
391 if (bio
->bi_vcnt
>= bio
->bi_max_vecs
)
394 if (((bio
->bi_size
+ len
) >> 9) > q
->max_sectors
)
398 * we might loose a segment or two here, but rather that than
399 * make this too complex.
402 if (bio_phys_segments(q
, bio
) >= q
->max_phys_segments
403 || bio_hw_segments(q
, bio
) >= q
->max_hw_segments
)
407 if (retried_segments
)
410 bio
->bi_flags
&= ~(1 << BIO_SEG_VALID
);
411 retried_segments
= 1;
416 * setup the new entry, we might clear it again later if we
417 * cannot add the page
419 bvec
= &bio
->bi_io_vec
[bio
->bi_vcnt
];
420 bvec
->bv_page
= page
;
422 bvec
->bv_offset
= offset
;
425 * if queue has other restrictions (eg varying max sector size
426 * depending on offset), it can specify a merge_bvec_fn in the
427 * queue to get further control
429 if (q
->merge_bvec_fn
) {
431 * merge_bvec_fn() returns number of bytes it can accept
434 if (q
->merge_bvec_fn(q
, bio
, bvec
) < len
) {
435 bvec
->bv_page
= NULL
;
443 bio
->bi_phys_segments
++;
444 bio
->bi_hw_segments
++;
450 * bio_map_user - map user address into bio
451 * @bdev: destination block device
452 * @uaddr: start of user address
453 * @len: length in bytes
454 * @write_to_vm: bool indicating writing to pages or not
456 * Map the user space address into a bio suitable for io to a block
457 * device. Caller should check the size of the returned bio, we might
458 * not have mapped the entire range specified.
460 struct bio
*bio_map_user(struct block_device
*bdev
, unsigned long uaddr
,
461 unsigned int len
, int write_to_vm
)
463 unsigned long end
= (uaddr
+ len
+ PAGE_SIZE
- 1) >> PAGE_SHIFT
;
464 unsigned long start
= uaddr
>> PAGE_SHIFT
;
465 const int nr_pages
= end
- start
;
466 request_queue_t
*q
= bdev_get_queue(bdev
);
472 * transfer and buffer must be aligned to at least hardsector
473 * size for now, in the future we can relax this restriction
475 if ((uaddr
& queue_dma_alignment(q
)) || (len
& queue_dma_alignment(q
)))
478 bio
= bio_alloc(GFP_KERNEL
, nr_pages
);
482 pages
= kmalloc(nr_pages
* sizeof(struct page
*), GFP_KERNEL
);
486 down_read(¤t
->mm
->mmap_sem
);
487 ret
= get_user_pages(current
, current
->mm
, uaddr
, nr_pages
,
488 write_to_vm
, 0, pages
, NULL
);
489 up_read(¤t
->mm
->mmap_sem
);
496 offset
= uaddr
& ~PAGE_MASK
;
497 for (i
= 0; i
< nr_pages
; i
++) {
498 unsigned int bytes
= PAGE_SIZE
- offset
;
509 if (bio_add_page(bio
, pages
[i
], bytes
, offset
) < bytes
)
517 * release the pages we didn't map into the bio, if any
520 page_cache_release(pages
[i
++]);
525 * check if the mapped pages need bouncing for an isa host.
527 blk_queue_bounce(q
, &bio
);
536 * bio_unmap_user - unmap a bio
537 * @bio: the bio being unmapped
538 * @write_to_vm: bool indicating whether pages were written to
540 * Unmap a bio previously mapped by bio_map_user(). The @write_to_vm
541 * must be the same as passed into bio_map_user(). Must be called with
544 void bio_unmap_user(struct bio
*bio
, int write_to_vm
)
546 struct bio_vec
*bvec
;
550 * find original bio if it was bounced
552 if (bio
->bi_private
) {
554 * someone stole our bio, must not happen
556 BUG_ON(!bio_flagged(bio
, BIO_BOUNCED
));
558 bio
= bio
->bi_private
;
562 * make sure we dirty pages we wrote to
564 __bio_for_each_segment(bvec
, bio
, i
, 0) {
566 set_page_dirty(bvec
->bv_page
);
568 page_cache_release(bvec
->bv_page
);
575 * bio_endio - end I/O on a bio
577 * @bytes_done: number of bytes completed
578 * @error: error, if any
581 * bio_endio() will end I/O on @bytes_done number of bytes. This may be
582 * just a partial part of the bio, or it may be the whole bio. bio_endio()
583 * is the preferred way to end I/O on a bio, it takes care of decrementing
584 * bi_size and clearing BIO_UPTODATE on error. @error is 0 on success, and
585 * and one of the established -Exxxx (-EIO, for instance) error values in
586 * case something went wrong. Noone should call bi_end_io() directly on
587 * a bio unless they own it and thus know that it has an end_io function.
589 void bio_endio(struct bio
*bio
, unsigned int bytes_done
, int error
)
592 clear_bit(BIO_UPTODATE
, &bio
->bi_flags
);
594 if (unlikely(bytes_done
> bio
->bi_size
)) {
595 printk("%s: want %u bytes done, only %u left\n", __FUNCTION__
,
596 bytes_done
, bio
->bi_size
);
597 bytes_done
= bio
->bi_size
;
600 bio
->bi_size
-= bytes_done
;
603 bio
->bi_end_io(bio
, bytes_done
, error
);
606 static void __init
biovec_init_pools(void)
608 int i
, size
, megabytes
, pool_entries
= BIO_POOL_SIZE
;
609 int scale
= BIOVEC_NR_POOLS
;
611 megabytes
= nr_free_pages() >> (20 - PAGE_SHIFT
);
614 * find out where to start scaling
618 else if (megabytes
<= 32)
620 else if (megabytes
<= 64)
622 else if (megabytes
<= 96)
624 else if (megabytes
<= 128)
628 * scale number of entries
630 pool_entries
= megabytes
* 2;
631 if (pool_entries
> 256)
634 for (i
= 0; i
< BIOVEC_NR_POOLS
; i
++) {
635 struct biovec_pool
*bp
= bvec_array
+ i
;
637 size
= bp
->nr_vecs
* sizeof(struct bio_vec
);
639 bp
->slab
= kmem_cache_create(bp
->name
, size
, 0,
640 SLAB_HWCACHE_ALIGN
, NULL
, NULL
);
642 panic("biovec: can't init slab cache\n");
647 bp
->pool
= mempool_create(pool_entries
, slab_pool_alloc
,
648 slab_pool_free
, bp
->slab
);
650 panic("biovec: can't init mempool\n");
652 printk("biovec pool[%d]: %3d bvecs: %3d entries (%d bytes)\n",
653 i
, bp
->nr_vecs
, pool_entries
,
658 static int __init
init_bio(void)
660 bio_slab
= kmem_cache_create("bio", sizeof(struct bio
), 0,
661 SLAB_HWCACHE_ALIGN
, NULL
, NULL
);
663 panic("bio: can't create slab cache\n");
664 bio_pool
= mempool_create(BIO_POOL_SIZE
, slab_pool_alloc
, slab_pool_free
, bio_slab
);
666 panic("bio: can't create mempool\n");
668 printk("BIO: pool of %d setup, %ZuKb (%Zd bytes/bio)\n", BIO_POOL_SIZE
, BIO_POOL_SIZE
* sizeof(struct bio
) >> 10, sizeof(struct bio
));
675 subsys_initcall(init_bio
);
677 EXPORT_SYMBOL(bio_alloc
);
678 EXPORT_SYMBOL(bio_put
);
679 EXPORT_SYMBOL(bio_endio
);
680 EXPORT_SYMBOL(bio_init
);
681 EXPORT_SYMBOL(bio_copy
);
682 EXPORT_SYMBOL(__bio_clone
);
683 EXPORT_SYMBOL(bio_clone
);
684 EXPORT_SYMBOL(bio_phys_segments
);
685 EXPORT_SYMBOL(bio_hw_segments
);
686 EXPORT_SYMBOL(bio_add_page
);
687 EXPORT_SYMBOL(bio_get_nr_vecs
);
688 EXPORT_SYMBOL(bio_map_user
);
689 EXPORT_SYMBOL(bio_unmap_user
);