2 * Copyright (C) 2003 Christophe Saout <christophe@saout.de>
4 * This file is released under the GPL.
7 #include <linux/module.h>
8 #include <linux/init.h>
9 #include <linux/kernel.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/mempool.h>
13 #include <linux/slab.h>
14 #include <linux/crypto.h>
15 #include <linux/workqueue.h>
16 #include <asm/atomic.h>
17 #include <asm/scatterlist.h>
24 * per bio private data
27 struct dm_target
*target
;
29 struct bio
*first_clone
;
30 struct work_struct work
;
36 * context holding the current state of a multi-part conversion
38 struct convert_context
{
41 unsigned int offset_in
;
42 unsigned int offset_out
;
50 * Crypt: maps a linear range of a block device
51 * and encrypts / decrypts at the same time.
58 * pool for per bio private data and
59 * for encryption buffer pages
67 struct crypto_tfm
*tfm
;
69 int (*iv_generator
)(struct crypt_config
*cc
, u8
*iv
, sector_t sector
);
76 #define MIN_POOL_PAGES 32
77 #define MIN_BIO_PAGES 8
79 static kmem_cache_t
*_crypt_io_pool
;
82 * Mempool alloc and free functions for the page
84 static void *mempool_alloc_page(int gfp_mask
, void *data
)
86 return alloc_page(gfp_mask
);
89 static void mempool_free_page(void *page
, void *data
)
96 * Different IV generation algorithms
98 static int crypt_iv_plain(struct crypt_config
*cc
, u8
*iv
, sector_t sector
)
100 *(u32
*)iv
= cpu_to_le32(sector
& 0xffffffff);
101 if (cc
->iv_size
> sizeof(u32
) / sizeof(u8
))
102 memset(iv
+ (sizeof(u32
) / sizeof(u8
)), 0,
103 cc
->iv_size
- (sizeof(u32
) / sizeof(u8
)));
109 crypt_convert_scatterlist(struct crypt_config
*cc
, struct scatterlist
*out
,
110 struct scatterlist
*in
, unsigned int length
,
111 int write
, sector_t sector
)
116 if (cc
->iv_generator
) {
117 r
= cc
->iv_generator(cc
, iv
, sector
);
122 r
= crypto_cipher_encrypt_iv(cc
->tfm
, out
, in
, length
, iv
);
124 r
= crypto_cipher_decrypt_iv(cc
->tfm
, out
, in
, length
, iv
);
127 r
= crypto_cipher_encrypt(cc
->tfm
, out
, in
, length
);
129 r
= crypto_cipher_decrypt(cc
->tfm
, out
, in
, length
);
136 crypt_convert_init(struct crypt_config
*cc
, struct convert_context
*ctx
,
137 struct bio
*bio_out
, struct bio
*bio_in
,
138 sector_t sector
, int write
)
140 ctx
->bio_in
= bio_in
;
141 ctx
->bio_out
= bio_out
;
144 ctx
->idx_in
= bio_in
? bio_in
->bi_idx
: 0;
145 ctx
->idx_out
= bio_out
? bio_out
->bi_idx
: 0;
146 ctx
->sector
= sector
+ cc
->iv_offset
;
151 * Encrypt / decrypt data from one bio to another one (can be the same one)
153 static int crypt_convert(struct crypt_config
*cc
,
154 struct convert_context
*ctx
)
158 while(ctx
->idx_in
< ctx
->bio_in
->bi_vcnt
&&
159 ctx
->idx_out
< ctx
->bio_out
->bi_vcnt
) {
160 struct bio_vec
*bv_in
= bio_iovec_idx(ctx
->bio_in
, ctx
->idx_in
);
161 struct bio_vec
*bv_out
= bio_iovec_idx(ctx
->bio_out
, ctx
->idx_out
);
162 struct scatterlist sg_in
= {
163 .page
= bv_in
->bv_page
,
164 .offset
= bv_in
->bv_offset
+ ctx
->offset_in
,
165 .length
= 1 << SECTOR_SHIFT
167 struct scatterlist sg_out
= {
168 .page
= bv_out
->bv_page
,
169 .offset
= bv_out
->bv_offset
+ ctx
->offset_out
,
170 .length
= 1 << SECTOR_SHIFT
173 ctx
->offset_in
+= sg_in
.length
;
174 if (ctx
->offset_in
>= bv_in
->bv_len
) {
179 ctx
->offset_out
+= sg_out
.length
;
180 if (ctx
->offset_out
>= bv_out
->bv_len
) {
185 r
= crypt_convert_scatterlist(cc
, &sg_out
, &sg_in
, sg_in
.length
,
186 ctx
->write
, ctx
->sector
);
197 * Generate a new unfragmented bio with the given size
198 * This should never violate the device limitations
199 * May return a smaller bio when running out of pages
202 crypt_alloc_buffer(struct crypt_config
*cc
, unsigned int size
,
203 struct bio
*base_bio
, int *bio_vec_idx
)
206 int nr_iovecs
= dm_div_up(size
, PAGE_SIZE
);
207 int gfp_mask
= GFP_NOIO
| __GFP_HIGHMEM
;
208 int flags
= current
->flags
;
212 * Tell VM to act less aggressively and fail earlier.
213 * This is not necessary but increases throughput.
214 * FIXME: Is this really intelligent?
216 current
->flags
&= ~PF_MEMALLOC
;
219 bio
= bio_clone(base_bio
, GFP_NOIO
);
221 bio
= bio_alloc(GFP_NOIO
, nr_iovecs
);
223 if (flags
& PF_MEMALLOC
)
224 current
->flags
|= PF_MEMALLOC
;
228 /* if the last bio was not complete, continue where that one ended */
229 bio
->bi_idx
= *bio_vec_idx
;
230 bio
->bi_vcnt
= *bio_vec_idx
;
232 bio
->bi_flags
&= ~(1 << BIO_SEG_VALID
);
234 /* bio->bi_idx pages have already been allocated */
235 size
-= bio
->bi_idx
* PAGE_SIZE
;
237 for(i
= bio
->bi_idx
; i
< nr_iovecs
; i
++) {
238 struct bio_vec
*bv
= bio_iovec_idx(bio
, i
);
240 bv
->bv_page
= mempool_alloc(cc
->page_pool
, gfp_mask
);
245 * if additional pages cannot be allocated without waiting,
246 * return a partially allocated bio, the caller will then try
247 * to allocate additional bios while submitting this partial bio
249 if ((i
- bio
->bi_idx
) == (MIN_BIO_PAGES
- 1))
250 gfp_mask
= (gfp_mask
| __GFP_NOWARN
) & ~__GFP_WAIT
;
253 if (size
> PAGE_SIZE
)
254 bv
->bv_len
= PAGE_SIZE
;
258 bio
->bi_size
+= bv
->bv_len
;
263 if (flags
& PF_MEMALLOC
)
264 current
->flags
|= PF_MEMALLOC
;
272 * Remember the last bio_vec allocated to be able
273 * to correctly continue after the splitting.
275 *bio_vec_idx
= bio
->bi_vcnt
;
280 static void crypt_free_buffer_pages(struct crypt_config
*cc
,
281 struct bio
*bio
, unsigned int bytes
)
283 unsigned int start
, end
;
288 * This is ugly, but Jens Axboe thinks that using bi_idx in the
289 * endio function is too dangerous at the moment, so I calculate the
290 * correct position using bi_vcnt and bi_size.
291 * The bv_offset and bv_len fields might already be modified but we
292 * know that we always allocated whole pages.
293 * A fix to the bi_idx issue in the kernel is in the works, so
294 * we will hopefully be able to revert to the cleaner solution soon.
296 i
= bio
->bi_vcnt
- 1;
297 bv
= bio_iovec_idx(bio
, i
);
298 end
= (i
<< PAGE_SHIFT
) + (bv
->bv_offset
+ bv
->bv_len
) - bio
->bi_size
;
301 start
>>= PAGE_SHIFT
;
307 for(i
= start
; i
< end
; i
++) {
308 bv
= bio_iovec_idx(bio
, i
);
309 BUG_ON(!bv
->bv_page
);
310 mempool_free(bv
->bv_page
, cc
->page_pool
);
316 * One of the bios was finished. Check for completion of
317 * the whole request and correctly clean up the buffer.
319 static void dec_pending(struct crypt_io
*io
, int error
)
321 struct crypt_config
*cc
= (struct crypt_config
*) io
->target
->private;
326 if (!atomic_dec_and_test(&io
->pending
))
330 bio_put(io
->first_clone
);
332 bio_endio(io
->bio
, io
->bio
->bi_size
, io
->error
);
334 mempool_free(io
, cc
->io_pool
);
340 * Needed because it would be very unwise to do decryption in an
341 * interrupt context, so bios returning from read requests get
344 static struct workqueue_struct
*_kcryptd_workqueue
;
346 static void kcryptd_do_work(void *data
)
348 struct crypt_io
*io
= (struct crypt_io
*) data
;
349 struct crypt_config
*cc
= (struct crypt_config
*) io
->target
->private;
350 struct convert_context ctx
;
353 crypt_convert_init(cc
, &ctx
, io
->bio
, io
->bio
,
354 io
->bio
->bi_sector
- io
->target
->begin
, 0);
355 r
= crypt_convert(cc
, &ctx
);
360 static void kcryptd_queue_io(struct crypt_io
*io
)
362 INIT_WORK(&io
->work
, kcryptd_do_work
, io
);
363 queue_work(_kcryptd_workqueue
, &io
->work
);
367 * Decode key from its hex representation
369 static int crypt_decode_key(u8
*key
, char *hex
, int size
)
377 for(i
= 0; i
< size
; i
++) {
381 key
[i
] = (u8
)simple_strtoul(buffer
, &endp
, 16);
383 if (endp
!= &buffer
[2])
394 * Encode key into its hex representation
396 static void crypt_encode_key(char *hex
, u8
*key
, int size
)
400 for(i
= 0; i
< size
; i
++) {
401 sprintf(hex
, "%02x", *key
);
408 * Construct an encryption mapping:
409 * <cipher> <key> <iv_offset> <dev_path> <start>
411 static int crypt_ctr(struct dm_target
*ti
, unsigned int argc
, char **argv
)
413 struct crypt_config
*cc
;
414 struct crypto_tfm
*tfm
;
422 ti
->error
= PFX
"Not enough arguments";
427 cipher
= strsep(&tmp
, "-");
428 mode
= strsep(&tmp
, "-");
431 DMWARN(PFX
"Unexpected additional cipher options");
433 key_size
= strlen(argv
[1]) >> 1;
435 cc
= kmalloc(sizeof(*cc
) + key_size
* sizeof(u8
), GFP_KERNEL
);
438 PFX
"Cannot allocate transparent encryption context";
442 if (!mode
|| strcmp(mode
, "plain") == 0)
443 cc
->iv_generator
= crypt_iv_plain
;
444 else if (strcmp(mode
, "ecb") == 0)
445 cc
->iv_generator
= NULL
;
447 ti
->error
= PFX
"Invalid chaining mode";
451 if (cc
->iv_generator
)
452 crypto_flags
= CRYPTO_TFM_MODE_CBC
;
454 crypto_flags
= CRYPTO_TFM_MODE_ECB
;
456 tfm
= crypto_alloc_tfm(cipher
, crypto_flags
);
458 ti
->error
= PFX
"Error allocating crypto tfm";
461 if (crypto_tfm_alg_type(tfm
) != CRYPTO_ALG_TYPE_CIPHER
) {
462 ti
->error
= PFX
"Expected cipher algorithm";
466 if (tfm
->crt_cipher
.cit_decrypt_iv
&& tfm
->crt_cipher
.cit_encrypt_iv
)
467 /* at least a 32 bit sector number should fit in our buffer */
468 cc
->iv_size
= max(crypto_tfm_alg_ivsize(tfm
),
469 (unsigned int)(sizeof(u32
) / sizeof(u8
)));
472 if (cc
->iv_generator
) {
473 DMWARN(PFX
"Selected cipher does not support IVs");
474 cc
->iv_generator
= NULL
;
478 cc
->io_pool
= mempool_create(MIN_IOS
, mempool_alloc_slab
,
479 mempool_free_slab
, _crypt_io_pool
);
481 ti
->error
= PFX
"Cannot allocate crypt io mempool";
485 cc
->page_pool
= mempool_create(MIN_POOL_PAGES
, mempool_alloc_page
,
486 mempool_free_page
, NULL
);
487 if (!cc
->page_pool
) {
488 ti
->error
= PFX
"Cannot allocate page mempool";
493 cc
->key_size
= key_size
;
494 if ((key_size
== 0 && strcmp(argv
[1], "-") != 0)
495 || crypt_decode_key(cc
->key
, argv
[1], key_size
) < 0) {
496 ti
->error
= PFX
"Error decoding key";
500 if (tfm
->crt_cipher
.cit_setkey(tfm
, cc
->key
, key_size
) < 0) {
501 ti
->error
= PFX
"Error setting key";
505 if (sscanf(argv
[2], SECTOR_FORMAT
, &cc
->iv_offset
) != 1) {
506 ti
->error
= PFX
"Invalid iv_offset sector";
510 if (sscanf(argv
[4], SECTOR_FORMAT
, &cc
->start
) != 1) {
511 ti
->error
= PFX
"Invalid device sector";
515 if (dm_get_device(ti
, argv
[3], cc
->start
, ti
->len
,
516 dm_table_get_mode(ti
->table
), &cc
->dev
)) {
517 ti
->error
= PFX
"Device lookup failed";
525 mempool_destroy(cc
->page_pool
);
527 mempool_destroy(cc
->io_pool
);
529 crypto_free_tfm(tfm
);
535 static void crypt_dtr(struct dm_target
*ti
)
537 struct crypt_config
*cc
= (struct crypt_config
*) ti
->private;
539 mempool_destroy(cc
->page_pool
);
540 mempool_destroy(cc
->io_pool
);
542 crypto_free_tfm(cc
->tfm
);
543 dm_put_device(ti
, cc
->dev
);
547 static int crypt_endio(struct bio
*bio
, unsigned int done
, int error
)
549 struct crypt_io
*io
= (struct crypt_io
*) bio
->bi_private
;
550 struct crypt_config
*cc
= (struct crypt_config
*) io
->target
->private;
552 if (bio_data_dir(bio
) == WRITE
) {
554 * free the processed pages, even if
555 * it's only a partially completed write
557 crypt_free_buffer_pages(cc
, bio
, done
);
566 * successful reads are decrypted by the worker thread
568 if ((bio_data_dir(bio
) == READ
)
569 && bio_flagged(bio
, BIO_UPTODATE
)) {
570 kcryptd_queue_io(io
);
574 dec_pending(io
, error
);
578 static inline struct bio
*
579 crypt_clone(struct crypt_config
*cc
, struct crypt_io
*io
, struct bio
*bio
,
580 sector_t sector
, int *bvec_idx
, struct convert_context
*ctx
)
584 if (bio_data_dir(bio
) == WRITE
) {
585 clone
= crypt_alloc_buffer(cc
, bio
->bi_size
,
586 io
->first_clone
, bvec_idx
);
588 ctx
->bio_out
= clone
;
589 if (crypt_convert(cc
, ctx
) < 0) {
590 crypt_free_buffer_pages(cc
, clone
,
598 * The block layer might modify the bvec array, so always
599 * copy the required bvecs because we need the original
600 * one in order to decrypt the whole bio data *afterwards*.
602 clone
= bio_alloc(GFP_NOIO
, bio_segments(bio
));
605 clone
->bi_vcnt
= bio_segments(bio
);
606 clone
->bi_size
= bio
->bi_size
;
607 memcpy(clone
->bi_io_vec
, bio_iovec(bio
),
608 sizeof(struct bio_vec
) * clone
->bi_vcnt
);
615 clone
->bi_private
= io
;
616 clone
->bi_end_io
= crypt_endio
;
617 clone
->bi_bdev
= cc
->dev
->bdev
;
618 clone
->bi_sector
= cc
->start
+ sector
;
619 clone
->bi_rw
= bio
->bi_rw
;
624 static int crypt_map(struct dm_target
*ti
, struct bio
*bio
,
625 union map_info
*map_context
)
627 struct crypt_config
*cc
= (struct crypt_config
*) ti
->private;
628 struct crypt_io
*io
= mempool_alloc(cc
->io_pool
, GFP_NOIO
);
629 struct convert_context ctx
;
631 unsigned int remaining
= bio
->bi_size
;
632 sector_t sector
= bio
->bi_sector
- ti
->begin
;
637 io
->first_clone
= NULL
;
639 atomic_set(&io
->pending
, 1); /* hold a reference */
641 if (bio_data_dir(bio
) == WRITE
)
642 crypt_convert_init(cc
, &ctx
, NULL
, bio
, sector
, 1);
645 * The allocated buffers can be smaller than the whole bio,
646 * so repeat the whole process until all the data can be handled.
649 clone
= crypt_clone(cc
, io
, bio
, sector
, &bvec_idx
, &ctx
);
653 if (!io
->first_clone
) {
655 * hold a reference to the first clone, because it
656 * holds the bio_vec array and that can't be freed
657 * before all other clones are released
660 io
->first_clone
= clone
;
662 atomic_inc(&io
->pending
);
664 remaining
-= clone
->bi_size
;
665 sector
+= bio_sectors(clone
);
667 generic_make_request(clone
);
669 /* out of memory -> run queues */
671 blk_congestion_wait(bio_data_dir(clone
), HZ
/100);
674 /* drop reference, clones could have returned before we reach this */
679 if (io
->first_clone
) {
680 dec_pending(io
, -ENOMEM
);
684 /* if no bio has been dispatched yet, we can directly return the error */
685 mempool_free(io
, cc
->io_pool
);
689 static int crypt_status(struct dm_target
*ti
, status_type_t type
,
690 char *result
, unsigned int maxlen
)
692 struct crypt_config
*cc
= (struct crypt_config
*) ti
->private;
695 const char *mode
= NULL
;
699 case STATUSTYPE_INFO
:
703 case STATUSTYPE_TABLE
:
704 cipher
= crypto_tfm_alg_name(cc
->tfm
);
706 switch(cc
->tfm
->crt_cipher
.cit_mode
) {
707 case CRYPTO_TFM_MODE_CBC
:
710 case CRYPTO_TFM_MODE_ECB
:
717 snprintf(result
, maxlen
, "%s-%s ", cipher
, mode
);
718 offset
= strlen(result
);
720 if (cc
->key_size
> 0) {
721 if ((maxlen
- offset
) < ((cc
->key_size
<< 1) + 1))
724 crypt_encode_key(result
+ offset
, cc
->key
, cc
->key_size
);
725 offset
+= cc
->key_size
<< 1;
727 if (offset
>= maxlen
)
729 result
[offset
++] = '-';
732 format_dev_t(buffer
, cc
->dev
->bdev
->bd_dev
);
733 snprintf(result
+ offset
, maxlen
- offset
, " " SECTOR_FORMAT
734 " %s " SECTOR_FORMAT
, cc
->iv_offset
,
741 static struct target_type crypt_target
= {
744 .module
= THIS_MODULE
,
748 .status
= crypt_status
,
751 static int __init
dm_crypt_init(void)
755 _crypt_io_pool
= kmem_cache_create("dm-crypt_io",
756 sizeof(struct crypt_io
),
761 _kcryptd_workqueue
= create_workqueue("kcryptd");
762 if (!_kcryptd_workqueue
) {
764 DMERR(PFX
"couldn't create kcryptd");
768 r
= dm_register_target(&crypt_target
);
770 DMERR(PFX
"register failed %d", r
);
777 destroy_workqueue(_kcryptd_workqueue
);
779 kmem_cache_destroy(_crypt_io_pool
);
783 static void __exit
dm_crypt_exit(void)
785 int r
= dm_unregister_target(&crypt_target
);
788 DMERR(PFX
"unregister failed %d", r
);
790 destroy_workqueue(_kcryptd_workqueue
);
791 kmem_cache_destroy(_crypt_io_pool
);
794 module_init(dm_crypt_init
);
795 module_exit(dm_crypt_exit
);
797 MODULE_AUTHOR("Christophe Saout <christophe@saout.de>");
798 MODULE_DESCRIPTION(DM_NAME
" target for transparent encryption / decryption");
799 MODULE_LICENSE("GPL");