2 * Glue Code for the AVX assembler implemention of the Cast6 Cipher
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
24 #include <linux/module.h>
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/crypto.h>
28 #include <linux/err.h>
29 #include <crypto/algapi.h>
30 #include <crypto/cast6.h>
31 #include <crypto/cryptd.h>
32 #include <crypto/b128ops.h>
33 #include <crypto/ctr.h>
34 #include <crypto/lrw.h>
35 #include <crypto/xts.h>
37 #include <asm/xsave.h>
38 #include <asm/crypto/ablk_helper.h>
39 #include <asm/crypto/glue_helper.h>
41 #define CAST6_PARALLEL_BLOCKS 8
43 asmlinkage
void __cast6_enc_blk_8way(struct cast6_ctx
*ctx
, u8
*dst
,
44 const u8
*src
, bool xor);
45 asmlinkage
void cast6_dec_blk_8way(struct cast6_ctx
*ctx
, u8
*dst
,
48 static inline void cast6_enc_blk_xway(struct cast6_ctx
*ctx
, u8
*dst
,
51 __cast6_enc_blk_8way(ctx
, dst
, src
, false);
54 static inline void cast6_enc_blk_xway_xor(struct cast6_ctx
*ctx
, u8
*dst
,
57 __cast6_enc_blk_8way(ctx
, dst
, src
, true);
60 static inline void cast6_dec_blk_xway(struct cast6_ctx
*ctx
, u8
*dst
,
63 cast6_dec_blk_8way(ctx
, dst
, src
);
67 static void cast6_decrypt_cbc_xway(void *ctx
, u128
*dst
, const u128
*src
)
69 u128 ivs
[CAST6_PARALLEL_BLOCKS
- 1];
72 for (j
= 0; j
< CAST6_PARALLEL_BLOCKS
- 1; j
++)
75 cast6_dec_blk_xway(ctx
, (u8
*)dst
, (u8
*)src
);
77 for (j
= 0; j
< CAST6_PARALLEL_BLOCKS
- 1; j
++)
78 u128_xor(dst
+ (j
+ 1), dst
+ (j
+ 1), ivs
+ j
);
81 static void cast6_crypt_ctr(void *ctx
, u128
*dst
, const u128
*src
, u128
*iv
)
85 u128_to_be128(&ctrblk
, iv
);
88 __cast6_encrypt(ctx
, (u8
*)&ctrblk
, (u8
*)&ctrblk
);
89 u128_xor(dst
, src
, (u128
*)&ctrblk
);
92 static void cast6_crypt_ctr_xway(void *ctx
, u128
*dst
, const u128
*src
,
95 be128 ctrblks
[CAST6_PARALLEL_BLOCKS
];
98 for (i
= 0; i
< CAST6_PARALLEL_BLOCKS
; i
++) {
102 u128_to_be128(&ctrblks
[i
], iv
);
106 cast6_enc_blk_xway_xor(ctx
, (u8
*)dst
, (u8
*)ctrblks
);
109 static const struct common_glue_ctx cast6_enc
= {
111 .fpu_blocks_limit
= CAST6_PARALLEL_BLOCKS
,
114 .num_blocks
= CAST6_PARALLEL_BLOCKS
,
115 .fn_u
= { .ecb
= GLUE_FUNC_CAST(cast6_enc_blk_xway
) }
118 .fn_u
= { .ecb
= GLUE_FUNC_CAST(__cast6_encrypt
) }
122 static const struct common_glue_ctx cast6_ctr
= {
124 .fpu_blocks_limit
= CAST6_PARALLEL_BLOCKS
,
127 .num_blocks
= CAST6_PARALLEL_BLOCKS
,
128 .fn_u
= { .ctr
= GLUE_CTR_FUNC_CAST(cast6_crypt_ctr_xway
) }
131 .fn_u
= { .ctr
= GLUE_CTR_FUNC_CAST(cast6_crypt_ctr
) }
135 static const struct common_glue_ctx cast6_dec
= {
137 .fpu_blocks_limit
= CAST6_PARALLEL_BLOCKS
,
140 .num_blocks
= CAST6_PARALLEL_BLOCKS
,
141 .fn_u
= { .ecb
= GLUE_FUNC_CAST(cast6_dec_blk_xway
) }
144 .fn_u
= { .ecb
= GLUE_FUNC_CAST(__cast6_decrypt
) }
148 static const struct common_glue_ctx cast6_dec_cbc
= {
150 .fpu_blocks_limit
= CAST6_PARALLEL_BLOCKS
,
153 .num_blocks
= CAST6_PARALLEL_BLOCKS
,
154 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(cast6_decrypt_cbc_xway
) }
157 .fn_u
= { .cbc
= GLUE_CBC_FUNC_CAST(__cast6_decrypt
) }
161 static int ecb_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
162 struct scatterlist
*src
, unsigned int nbytes
)
164 return glue_ecb_crypt_128bit(&cast6_enc
, desc
, dst
, src
, nbytes
);
167 static int ecb_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
168 struct scatterlist
*src
, unsigned int nbytes
)
170 return glue_ecb_crypt_128bit(&cast6_dec
, desc
, dst
, src
, nbytes
);
173 static int cbc_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
174 struct scatterlist
*src
, unsigned int nbytes
)
176 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt
), desc
,
180 static int cbc_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
181 struct scatterlist
*src
, unsigned int nbytes
)
183 return glue_cbc_decrypt_128bit(&cast6_dec_cbc
, desc
, dst
, src
,
187 static int ctr_crypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
188 struct scatterlist
*src
, unsigned int nbytes
)
190 return glue_ctr_crypt_128bit(&cast6_ctr
, desc
, dst
, src
, nbytes
);
193 static inline bool cast6_fpu_begin(bool fpu_enabled
, unsigned int nbytes
)
195 return glue_fpu_begin(CAST6_BLOCK_SIZE
, CAST6_PARALLEL_BLOCKS
,
196 NULL
, fpu_enabled
, nbytes
);
199 static inline void cast6_fpu_end(bool fpu_enabled
)
201 glue_fpu_end(fpu_enabled
);
205 struct cast6_ctx
*ctx
;
209 static void encrypt_callback(void *priv
, u8
*srcdst
, unsigned int nbytes
)
211 const unsigned int bsize
= CAST6_BLOCK_SIZE
;
212 struct crypt_priv
*ctx
= priv
;
215 ctx
->fpu_enabled
= cast6_fpu_begin(ctx
->fpu_enabled
, nbytes
);
217 if (nbytes
== bsize
* CAST6_PARALLEL_BLOCKS
) {
218 cast6_enc_blk_xway(ctx
->ctx
, srcdst
, srcdst
);
222 for (i
= 0; i
< nbytes
/ bsize
; i
++, srcdst
+= bsize
)
223 __cast6_encrypt(ctx
->ctx
, srcdst
, srcdst
);
226 static void decrypt_callback(void *priv
, u8
*srcdst
, unsigned int nbytes
)
228 const unsigned int bsize
= CAST6_BLOCK_SIZE
;
229 struct crypt_priv
*ctx
= priv
;
232 ctx
->fpu_enabled
= cast6_fpu_begin(ctx
->fpu_enabled
, nbytes
);
234 if (nbytes
== bsize
* CAST6_PARALLEL_BLOCKS
) {
235 cast6_dec_blk_xway(ctx
->ctx
, srcdst
, srcdst
);
239 for (i
= 0; i
< nbytes
/ bsize
; i
++, srcdst
+= bsize
)
240 __cast6_decrypt(ctx
->ctx
, srcdst
, srcdst
);
243 struct cast6_lrw_ctx
{
244 struct lrw_table_ctx lrw_table
;
245 struct cast6_ctx cast6_ctx
;
248 static int lrw_cast6_setkey(struct crypto_tfm
*tfm
, const u8
*key
,
251 struct cast6_lrw_ctx
*ctx
= crypto_tfm_ctx(tfm
);
254 err
= __cast6_setkey(&ctx
->cast6_ctx
, key
, keylen
- CAST6_BLOCK_SIZE
,
259 return lrw_init_table(&ctx
->lrw_table
, key
+ keylen
- CAST6_BLOCK_SIZE
);
262 static int lrw_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
263 struct scatterlist
*src
, unsigned int nbytes
)
265 struct cast6_lrw_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
266 be128 buf
[CAST6_PARALLEL_BLOCKS
];
267 struct crypt_priv crypt_ctx
= {
268 .ctx
= &ctx
->cast6_ctx
,
269 .fpu_enabled
= false,
271 struct lrw_crypt_req req
= {
273 .tbuflen
= sizeof(buf
),
275 .table_ctx
= &ctx
->lrw_table
,
276 .crypt_ctx
= &crypt_ctx
,
277 .crypt_fn
= encrypt_callback
,
281 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
282 ret
= lrw_crypt(desc
, dst
, src
, nbytes
, &req
);
283 cast6_fpu_end(crypt_ctx
.fpu_enabled
);
288 static int lrw_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
289 struct scatterlist
*src
, unsigned int nbytes
)
291 struct cast6_lrw_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
292 be128 buf
[CAST6_PARALLEL_BLOCKS
];
293 struct crypt_priv crypt_ctx
= {
294 .ctx
= &ctx
->cast6_ctx
,
295 .fpu_enabled
= false,
297 struct lrw_crypt_req req
= {
299 .tbuflen
= sizeof(buf
),
301 .table_ctx
= &ctx
->lrw_table
,
302 .crypt_ctx
= &crypt_ctx
,
303 .crypt_fn
= decrypt_callback
,
307 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
308 ret
= lrw_crypt(desc
, dst
, src
, nbytes
, &req
);
309 cast6_fpu_end(crypt_ctx
.fpu_enabled
);
314 static void lrw_exit_tfm(struct crypto_tfm
*tfm
)
316 struct cast6_lrw_ctx
*ctx
= crypto_tfm_ctx(tfm
);
318 lrw_free_table(&ctx
->lrw_table
);
321 struct cast6_xts_ctx
{
322 struct cast6_ctx tweak_ctx
;
323 struct cast6_ctx crypt_ctx
;
326 static int xts_cast6_setkey(struct crypto_tfm
*tfm
, const u8
*key
,
329 struct cast6_xts_ctx
*ctx
= crypto_tfm_ctx(tfm
);
330 u32
*flags
= &tfm
->crt_flags
;
333 /* key consists of keys of equal size concatenated, therefore
334 * the length must be even
337 *flags
|= CRYPTO_TFM_RES_BAD_KEY_LEN
;
341 /* first half of xts-key is for crypt */
342 err
= __cast6_setkey(&ctx
->crypt_ctx
, key
, keylen
/ 2, flags
);
346 /* second half of xts-key is for tweak */
347 return __cast6_setkey(&ctx
->tweak_ctx
, key
+ keylen
/ 2, keylen
/ 2,
351 static int xts_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
352 struct scatterlist
*src
, unsigned int nbytes
)
354 struct cast6_xts_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
355 be128 buf
[CAST6_PARALLEL_BLOCKS
];
356 struct crypt_priv crypt_ctx
= {
357 .ctx
= &ctx
->crypt_ctx
,
358 .fpu_enabled
= false,
360 struct xts_crypt_req req
= {
362 .tbuflen
= sizeof(buf
),
364 .tweak_ctx
= &ctx
->tweak_ctx
,
365 .tweak_fn
= XTS_TWEAK_CAST(__cast6_encrypt
),
366 .crypt_ctx
= &crypt_ctx
,
367 .crypt_fn
= encrypt_callback
,
371 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
372 ret
= xts_crypt(desc
, dst
, src
, nbytes
, &req
);
373 cast6_fpu_end(crypt_ctx
.fpu_enabled
);
378 static int xts_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
379 struct scatterlist
*src
, unsigned int nbytes
)
381 struct cast6_xts_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
382 be128 buf
[CAST6_PARALLEL_BLOCKS
];
383 struct crypt_priv crypt_ctx
= {
384 .ctx
= &ctx
->crypt_ctx
,
385 .fpu_enabled
= false,
387 struct xts_crypt_req req
= {
389 .tbuflen
= sizeof(buf
),
391 .tweak_ctx
= &ctx
->tweak_ctx
,
392 .tweak_fn
= XTS_TWEAK_CAST(__cast6_encrypt
),
393 .crypt_ctx
= &crypt_ctx
,
394 .crypt_fn
= decrypt_callback
,
398 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
399 ret
= xts_crypt(desc
, dst
, src
, nbytes
, &req
);
400 cast6_fpu_end(crypt_ctx
.fpu_enabled
);
405 static struct crypto_alg cast6_algs
[10] = { {
406 .cra_name
= "__ecb-cast6-avx",
407 .cra_driver_name
= "__driver-ecb-cast6-avx",
409 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
410 .cra_blocksize
= CAST6_BLOCK_SIZE
,
411 .cra_ctxsize
= sizeof(struct cast6_ctx
),
413 .cra_type
= &crypto_blkcipher_type
,
414 .cra_module
= THIS_MODULE
,
417 .min_keysize
= CAST6_MIN_KEY_SIZE
,
418 .max_keysize
= CAST6_MAX_KEY_SIZE
,
419 .setkey
= cast6_setkey
,
420 .encrypt
= ecb_encrypt
,
421 .decrypt
= ecb_decrypt
,
425 .cra_name
= "__cbc-cast6-avx",
426 .cra_driver_name
= "__driver-cbc-cast6-avx",
428 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
429 .cra_blocksize
= CAST6_BLOCK_SIZE
,
430 .cra_ctxsize
= sizeof(struct cast6_ctx
),
432 .cra_type
= &crypto_blkcipher_type
,
433 .cra_module
= THIS_MODULE
,
436 .min_keysize
= CAST6_MIN_KEY_SIZE
,
437 .max_keysize
= CAST6_MAX_KEY_SIZE
,
438 .setkey
= cast6_setkey
,
439 .encrypt
= cbc_encrypt
,
440 .decrypt
= cbc_decrypt
,
444 .cra_name
= "__ctr-cast6-avx",
445 .cra_driver_name
= "__driver-ctr-cast6-avx",
447 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
449 .cra_ctxsize
= sizeof(struct cast6_ctx
),
451 .cra_type
= &crypto_blkcipher_type
,
452 .cra_module
= THIS_MODULE
,
455 .min_keysize
= CAST6_MIN_KEY_SIZE
,
456 .max_keysize
= CAST6_MAX_KEY_SIZE
,
457 .ivsize
= CAST6_BLOCK_SIZE
,
458 .setkey
= cast6_setkey
,
459 .encrypt
= ctr_crypt
,
460 .decrypt
= ctr_crypt
,
464 .cra_name
= "__lrw-cast6-avx",
465 .cra_driver_name
= "__driver-lrw-cast6-avx",
467 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
468 .cra_blocksize
= CAST6_BLOCK_SIZE
,
469 .cra_ctxsize
= sizeof(struct cast6_lrw_ctx
),
471 .cra_type
= &crypto_blkcipher_type
,
472 .cra_module
= THIS_MODULE
,
473 .cra_exit
= lrw_exit_tfm
,
476 .min_keysize
= CAST6_MIN_KEY_SIZE
+
478 .max_keysize
= CAST6_MAX_KEY_SIZE
+
480 .ivsize
= CAST6_BLOCK_SIZE
,
481 .setkey
= lrw_cast6_setkey
,
482 .encrypt
= lrw_encrypt
,
483 .decrypt
= lrw_decrypt
,
487 .cra_name
= "__xts-cast6-avx",
488 .cra_driver_name
= "__driver-xts-cast6-avx",
490 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
491 .cra_blocksize
= CAST6_BLOCK_SIZE
,
492 .cra_ctxsize
= sizeof(struct cast6_xts_ctx
),
494 .cra_type
= &crypto_blkcipher_type
,
495 .cra_module
= THIS_MODULE
,
498 .min_keysize
= CAST6_MIN_KEY_SIZE
* 2,
499 .max_keysize
= CAST6_MAX_KEY_SIZE
* 2,
500 .ivsize
= CAST6_BLOCK_SIZE
,
501 .setkey
= xts_cast6_setkey
,
502 .encrypt
= xts_encrypt
,
503 .decrypt
= xts_decrypt
,
507 .cra_name
= "ecb(cast6)",
508 .cra_driver_name
= "ecb-cast6-avx",
510 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
511 .cra_blocksize
= CAST6_BLOCK_SIZE
,
512 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
514 .cra_type
= &crypto_ablkcipher_type
,
515 .cra_module
= THIS_MODULE
,
516 .cra_init
= ablk_init
,
517 .cra_exit
= ablk_exit
,
520 .min_keysize
= CAST6_MIN_KEY_SIZE
,
521 .max_keysize
= CAST6_MAX_KEY_SIZE
,
522 .setkey
= ablk_set_key
,
523 .encrypt
= ablk_encrypt
,
524 .decrypt
= ablk_decrypt
,
528 .cra_name
= "cbc(cast6)",
529 .cra_driver_name
= "cbc-cast6-avx",
531 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
532 .cra_blocksize
= CAST6_BLOCK_SIZE
,
533 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
535 .cra_type
= &crypto_ablkcipher_type
,
536 .cra_module
= THIS_MODULE
,
537 .cra_init
= ablk_init
,
538 .cra_exit
= ablk_exit
,
541 .min_keysize
= CAST6_MIN_KEY_SIZE
,
542 .max_keysize
= CAST6_MAX_KEY_SIZE
,
543 .ivsize
= CAST6_BLOCK_SIZE
,
544 .setkey
= ablk_set_key
,
545 .encrypt
= __ablk_encrypt
,
546 .decrypt
= ablk_decrypt
,
550 .cra_name
= "ctr(cast6)",
551 .cra_driver_name
= "ctr-cast6-avx",
553 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
555 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
557 .cra_type
= &crypto_ablkcipher_type
,
558 .cra_module
= THIS_MODULE
,
559 .cra_init
= ablk_init
,
560 .cra_exit
= ablk_exit
,
563 .min_keysize
= CAST6_MIN_KEY_SIZE
,
564 .max_keysize
= CAST6_MAX_KEY_SIZE
,
565 .ivsize
= CAST6_BLOCK_SIZE
,
566 .setkey
= ablk_set_key
,
567 .encrypt
= ablk_encrypt
,
568 .decrypt
= ablk_encrypt
,
573 .cra_name
= "lrw(cast6)",
574 .cra_driver_name
= "lrw-cast6-avx",
576 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
577 .cra_blocksize
= CAST6_BLOCK_SIZE
,
578 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
580 .cra_type
= &crypto_ablkcipher_type
,
581 .cra_module
= THIS_MODULE
,
582 .cra_init
= ablk_init
,
583 .cra_exit
= ablk_exit
,
586 .min_keysize
= CAST6_MIN_KEY_SIZE
+
588 .max_keysize
= CAST6_MAX_KEY_SIZE
+
590 .ivsize
= CAST6_BLOCK_SIZE
,
591 .setkey
= ablk_set_key
,
592 .encrypt
= ablk_encrypt
,
593 .decrypt
= ablk_decrypt
,
597 .cra_name
= "xts(cast6)",
598 .cra_driver_name
= "xts-cast6-avx",
600 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
601 .cra_blocksize
= CAST6_BLOCK_SIZE
,
602 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
604 .cra_type
= &crypto_ablkcipher_type
,
605 .cra_module
= THIS_MODULE
,
606 .cra_init
= ablk_init
,
607 .cra_exit
= ablk_exit
,
610 .min_keysize
= CAST6_MIN_KEY_SIZE
* 2,
611 .max_keysize
= CAST6_MAX_KEY_SIZE
* 2,
612 .ivsize
= CAST6_BLOCK_SIZE
,
613 .setkey
= ablk_set_key
,
614 .encrypt
= ablk_encrypt
,
615 .decrypt
= ablk_decrypt
,
620 static int __init
cast6_init(void)
624 if (!cpu_has_avx
|| !cpu_has_osxsave
) {
625 pr_info("AVX instructions are not detected.\n");
629 xcr0
= xgetbv(XCR_XFEATURE_ENABLED_MASK
);
630 if ((xcr0
& (XSTATE_SSE
| XSTATE_YMM
)) != (XSTATE_SSE
| XSTATE_YMM
)) {
631 pr_info("AVX detected but unusable.\n");
635 return crypto_register_algs(cast6_algs
, ARRAY_SIZE(cast6_algs
));
638 static void __exit
cast6_exit(void)
640 crypto_unregister_algs(cast6_algs
, ARRAY_SIZE(cast6_algs
));
643 module_init(cast6_init
);
644 module_exit(cast6_exit
);
646 MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized");
647 MODULE_LICENSE("GPL");
648 MODULE_ALIAS("cast6");