xref: /openbmc/linux/arch/x86/crypto/cast5_avx_glue.c (revision cf028200)
1 /*
2  * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3  *
4  * Copyright (C) 2012 Johannes Goetzfried
5  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
20  * USA
21  *
22  */
23 
24 #include <linux/module.h>
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/crypto.h>
28 #include <linux/err.h>
29 #include <crypto/algapi.h>
30 #include <crypto/cast5.h>
31 #include <crypto/cryptd.h>
32 #include <crypto/ctr.h>
33 #include <asm/xcr.h>
34 #include <asm/xsave.h>
35 #include <asm/crypto/ablk_helper.h>
36 #include <asm/crypto/glue_helper.h>
37 
38 #define CAST5_PARALLEL_BLOCKS 16
39 
40 asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst,
41 				      const u8 *src, bool xor);
42 asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst,
43 				    const u8 *src);
44 
45 static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst,
46 				      const u8 *src)
47 {
48 	__cast5_enc_blk_16way(ctx, dst, src, false);
49 }
50 
51 static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst,
52 					  const u8 *src)
53 {
54 	__cast5_enc_blk_16way(ctx, dst, src, true);
55 }
56 
57 static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst,
58 				      const u8 *src)
59 {
60 	cast5_dec_blk_16way(ctx, dst, src);
61 }
62 
63 
64 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
65 {
66 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
67 			      NULL, fpu_enabled, nbytes);
68 }
69 
70 static inline void cast5_fpu_end(bool fpu_enabled)
71 {
72 	return glue_fpu_end(fpu_enabled);
73 }
74 
75 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
76 		     bool enc)
77 {
78 	bool fpu_enabled = false;
79 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
80 	const unsigned int bsize = CAST5_BLOCK_SIZE;
81 	unsigned int nbytes;
82 	int err;
83 
84 	err = blkcipher_walk_virt(desc, walk);
85 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
86 
87 	while ((nbytes = walk->nbytes)) {
88 		u8 *wsrc = walk->src.virt.addr;
89 		u8 *wdst = walk->dst.virt.addr;
90 
91 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
92 
93 		/* Process multi-block batch */
94 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
95 			do {
96 				if (enc)
97 					cast5_enc_blk_xway(ctx, wdst, wsrc);
98 				else
99 					cast5_dec_blk_xway(ctx, wdst, wsrc);
100 
101 				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
102 				wdst += bsize * CAST5_PARALLEL_BLOCKS;
103 				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
104 			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
105 
106 			if (nbytes < bsize)
107 				goto done;
108 		}
109 
110 		/* Handle leftovers */
111 		do {
112 			if (enc)
113 				__cast5_encrypt(ctx, wdst, wsrc);
114 			else
115 				__cast5_decrypt(ctx, wdst, wsrc);
116 
117 			wsrc += bsize;
118 			wdst += bsize;
119 			nbytes -= bsize;
120 		} while (nbytes >= bsize);
121 
122 done:
123 		err = blkcipher_walk_done(desc, walk, nbytes);
124 	}
125 
126 	cast5_fpu_end(fpu_enabled);
127 	return err;
128 }
129 
130 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
131 		       struct scatterlist *src, unsigned int nbytes)
132 {
133 	struct blkcipher_walk walk;
134 
135 	blkcipher_walk_init(&walk, dst, src, nbytes);
136 	return ecb_crypt(desc, &walk, true);
137 }
138 
139 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
140 		       struct scatterlist *src, unsigned int nbytes)
141 {
142 	struct blkcipher_walk walk;
143 
144 	blkcipher_walk_init(&walk, dst, src, nbytes);
145 	return ecb_crypt(desc, &walk, false);
146 }
147 
148 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
149 				  struct blkcipher_walk *walk)
150 {
151 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
152 	const unsigned int bsize = CAST5_BLOCK_SIZE;
153 	unsigned int nbytes = walk->nbytes;
154 	u64 *src = (u64 *)walk->src.virt.addr;
155 	u64 *dst = (u64 *)walk->dst.virt.addr;
156 	u64 *iv = (u64 *)walk->iv;
157 
158 	do {
159 		*dst = *src ^ *iv;
160 		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
161 		iv = dst;
162 
163 		src += 1;
164 		dst += 1;
165 		nbytes -= bsize;
166 	} while (nbytes >= bsize);
167 
168 	*(u64 *)walk->iv = *iv;
169 	return nbytes;
170 }
171 
172 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
173 		       struct scatterlist *src, unsigned int nbytes)
174 {
175 	struct blkcipher_walk walk;
176 	int err;
177 
178 	blkcipher_walk_init(&walk, dst, src, nbytes);
179 	err = blkcipher_walk_virt(desc, &walk);
180 
181 	while ((nbytes = walk.nbytes)) {
182 		nbytes = __cbc_encrypt(desc, &walk);
183 		err = blkcipher_walk_done(desc, &walk, nbytes);
184 	}
185 
186 	return err;
187 }
188 
189 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
190 				  struct blkcipher_walk *walk)
191 {
192 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
193 	const unsigned int bsize = CAST5_BLOCK_SIZE;
194 	unsigned int nbytes = walk->nbytes;
195 	u64 *src = (u64 *)walk->src.virt.addr;
196 	u64 *dst = (u64 *)walk->dst.virt.addr;
197 	u64 ivs[CAST5_PARALLEL_BLOCKS - 1];
198 	u64 last_iv;
199 	int i;
200 
201 	/* Start of the last block. */
202 	src += nbytes / bsize - 1;
203 	dst += nbytes / bsize - 1;
204 
205 	last_iv = *src;
206 
207 	/* Process multi-block batch */
208 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
209 		do {
210 			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
211 			src -= CAST5_PARALLEL_BLOCKS - 1;
212 			dst -= CAST5_PARALLEL_BLOCKS - 1;
213 
214 			for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
215 				ivs[i] = src[i];
216 
217 			cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
218 
219 			for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
220 				*(dst + (i + 1)) ^= *(ivs + i);
221 
222 			nbytes -= bsize;
223 			if (nbytes < bsize)
224 				goto done;
225 
226 			*dst ^= *(src - 1);
227 			src -= 1;
228 			dst -= 1;
229 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
230 
231 		if (nbytes < bsize)
232 			goto done;
233 	}
234 
235 	/* Handle leftovers */
236 	for (;;) {
237 		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
238 
239 		nbytes -= bsize;
240 		if (nbytes < bsize)
241 			break;
242 
243 		*dst ^= *(src - 1);
244 		src -= 1;
245 		dst -= 1;
246 	}
247 
248 done:
249 	*dst ^= *(u64 *)walk->iv;
250 	*(u64 *)walk->iv = last_iv;
251 
252 	return nbytes;
253 }
254 
255 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
256 		       struct scatterlist *src, unsigned int nbytes)
257 {
258 	bool fpu_enabled = false;
259 	struct blkcipher_walk walk;
260 	int err;
261 
262 	blkcipher_walk_init(&walk, dst, src, nbytes);
263 	err = blkcipher_walk_virt(desc, &walk);
264 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
265 
266 	while ((nbytes = walk.nbytes)) {
267 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
268 		nbytes = __cbc_decrypt(desc, &walk);
269 		err = blkcipher_walk_done(desc, &walk, nbytes);
270 	}
271 
272 	cast5_fpu_end(fpu_enabled);
273 	return err;
274 }
275 
276 static void ctr_crypt_final(struct blkcipher_desc *desc,
277 			    struct blkcipher_walk *walk)
278 {
279 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
280 	u8 *ctrblk = walk->iv;
281 	u8 keystream[CAST5_BLOCK_SIZE];
282 	u8 *src = walk->src.virt.addr;
283 	u8 *dst = walk->dst.virt.addr;
284 	unsigned int nbytes = walk->nbytes;
285 
286 	__cast5_encrypt(ctx, keystream, ctrblk);
287 	crypto_xor(keystream, src, nbytes);
288 	memcpy(dst, keystream, nbytes);
289 
290 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
291 }
292 
293 static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
294 				struct blkcipher_walk *walk)
295 {
296 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
297 	const unsigned int bsize = CAST5_BLOCK_SIZE;
298 	unsigned int nbytes = walk->nbytes;
299 	u64 *src = (u64 *)walk->src.virt.addr;
300 	u64 *dst = (u64 *)walk->dst.virt.addr;
301 	u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
302 	__be64 ctrblocks[CAST5_PARALLEL_BLOCKS];
303 	int i;
304 
305 	/* Process multi-block batch */
306 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
307 		do {
308 			/* create ctrblks for parallel encrypt */
309 			for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) {
310 				if (dst != src)
311 					dst[i] = src[i];
312 
313 				ctrblocks[i] = cpu_to_be64(ctrblk++);
314 			}
315 
316 			cast5_enc_blk_xway_xor(ctx, (u8 *)dst,
317 					       (u8 *)ctrblocks);
318 
319 			src += CAST5_PARALLEL_BLOCKS;
320 			dst += CAST5_PARALLEL_BLOCKS;
321 			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
322 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
323 
324 		if (nbytes < bsize)
325 			goto done;
326 	}
327 
328 	/* Handle leftovers */
329 	do {
330 		if (dst != src)
331 			*dst = *src;
332 
333 		ctrblocks[0] = cpu_to_be64(ctrblk++);
334 
335 		__cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
336 		*dst ^= ctrblocks[0];
337 
338 		src += 1;
339 		dst += 1;
340 		nbytes -= bsize;
341 	} while (nbytes >= bsize);
342 
343 done:
344 	*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
345 	return nbytes;
346 }
347 
348 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
349 		     struct scatterlist *src, unsigned int nbytes)
350 {
351 	bool fpu_enabled = false;
352 	struct blkcipher_walk walk;
353 	int err;
354 
355 	blkcipher_walk_init(&walk, dst, src, nbytes);
356 	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
357 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
358 
359 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
360 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
361 		nbytes = __ctr_crypt(desc, &walk);
362 		err = blkcipher_walk_done(desc, &walk, nbytes);
363 	}
364 
365 	cast5_fpu_end(fpu_enabled);
366 
367 	if (walk.nbytes) {
368 		ctr_crypt_final(desc, &walk);
369 		err = blkcipher_walk_done(desc, &walk, 0);
370 	}
371 
372 	return err;
373 }
374 
375 
376 static struct crypto_alg cast5_algs[6] = { {
377 	.cra_name		= "__ecb-cast5-avx",
378 	.cra_driver_name	= "__driver-ecb-cast5-avx",
379 	.cra_priority		= 0,
380 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
381 	.cra_blocksize		= CAST5_BLOCK_SIZE,
382 	.cra_ctxsize		= sizeof(struct cast5_ctx),
383 	.cra_alignmask		= 0,
384 	.cra_type		= &crypto_blkcipher_type,
385 	.cra_module		= THIS_MODULE,
386 	.cra_u = {
387 		.blkcipher = {
388 			.min_keysize	= CAST5_MIN_KEY_SIZE,
389 			.max_keysize	= CAST5_MAX_KEY_SIZE,
390 			.setkey		= cast5_setkey,
391 			.encrypt	= ecb_encrypt,
392 			.decrypt	= ecb_decrypt,
393 		},
394 	},
395 }, {
396 	.cra_name		= "__cbc-cast5-avx",
397 	.cra_driver_name	= "__driver-cbc-cast5-avx",
398 	.cra_priority		= 0,
399 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
400 	.cra_blocksize		= CAST5_BLOCK_SIZE,
401 	.cra_ctxsize		= sizeof(struct cast5_ctx),
402 	.cra_alignmask		= 0,
403 	.cra_type		= &crypto_blkcipher_type,
404 	.cra_module		= THIS_MODULE,
405 	.cra_u = {
406 		.blkcipher = {
407 			.min_keysize	= CAST5_MIN_KEY_SIZE,
408 			.max_keysize	= CAST5_MAX_KEY_SIZE,
409 			.setkey		= cast5_setkey,
410 			.encrypt	= cbc_encrypt,
411 			.decrypt	= cbc_decrypt,
412 		},
413 	},
414 }, {
415 	.cra_name		= "__ctr-cast5-avx",
416 	.cra_driver_name	= "__driver-ctr-cast5-avx",
417 	.cra_priority		= 0,
418 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
419 	.cra_blocksize		= 1,
420 	.cra_ctxsize		= sizeof(struct cast5_ctx),
421 	.cra_alignmask		= 0,
422 	.cra_type		= &crypto_blkcipher_type,
423 	.cra_module		= THIS_MODULE,
424 	.cra_u = {
425 		.blkcipher = {
426 			.min_keysize	= CAST5_MIN_KEY_SIZE,
427 			.max_keysize	= CAST5_MAX_KEY_SIZE,
428 			.ivsize		= CAST5_BLOCK_SIZE,
429 			.setkey		= cast5_setkey,
430 			.encrypt	= ctr_crypt,
431 			.decrypt	= ctr_crypt,
432 		},
433 	},
434 }, {
435 	.cra_name		= "ecb(cast5)",
436 	.cra_driver_name	= "ecb-cast5-avx",
437 	.cra_priority		= 200,
438 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
439 	.cra_blocksize		= CAST5_BLOCK_SIZE,
440 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
441 	.cra_alignmask		= 0,
442 	.cra_type		= &crypto_ablkcipher_type,
443 	.cra_module		= THIS_MODULE,
444 	.cra_init		= ablk_init,
445 	.cra_exit		= ablk_exit,
446 	.cra_u = {
447 		.ablkcipher = {
448 			.min_keysize	= CAST5_MIN_KEY_SIZE,
449 			.max_keysize	= CAST5_MAX_KEY_SIZE,
450 			.setkey		= ablk_set_key,
451 			.encrypt	= ablk_encrypt,
452 			.decrypt	= ablk_decrypt,
453 		},
454 	},
455 }, {
456 	.cra_name		= "cbc(cast5)",
457 	.cra_driver_name	= "cbc-cast5-avx",
458 	.cra_priority		= 200,
459 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
460 	.cra_blocksize		= CAST5_BLOCK_SIZE,
461 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
462 	.cra_alignmask		= 0,
463 	.cra_type		= &crypto_ablkcipher_type,
464 	.cra_module		= THIS_MODULE,
465 	.cra_init		= ablk_init,
466 	.cra_exit		= ablk_exit,
467 	.cra_u = {
468 		.ablkcipher = {
469 			.min_keysize	= CAST5_MIN_KEY_SIZE,
470 			.max_keysize	= CAST5_MAX_KEY_SIZE,
471 			.ivsize		= CAST5_BLOCK_SIZE,
472 			.setkey		= ablk_set_key,
473 			.encrypt	= __ablk_encrypt,
474 			.decrypt	= ablk_decrypt,
475 		},
476 	},
477 }, {
478 	.cra_name		= "ctr(cast5)",
479 	.cra_driver_name	= "ctr-cast5-avx",
480 	.cra_priority		= 200,
481 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
482 	.cra_blocksize		= 1,
483 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
484 	.cra_alignmask		= 0,
485 	.cra_type		= &crypto_ablkcipher_type,
486 	.cra_module		= THIS_MODULE,
487 	.cra_init		= ablk_init,
488 	.cra_exit		= ablk_exit,
489 	.cra_u = {
490 		.ablkcipher = {
491 			.min_keysize	= CAST5_MIN_KEY_SIZE,
492 			.max_keysize	= CAST5_MAX_KEY_SIZE,
493 			.ivsize		= CAST5_BLOCK_SIZE,
494 			.setkey		= ablk_set_key,
495 			.encrypt	= ablk_encrypt,
496 			.decrypt	= ablk_encrypt,
497 			.geniv		= "chainiv",
498 		},
499 	},
500 } };
501 
502 static int __init cast5_init(void)
503 {
504 	u64 xcr0;
505 
506 	if (!cpu_has_avx || !cpu_has_osxsave) {
507 		pr_info("AVX instructions are not detected.\n");
508 		return -ENODEV;
509 	}
510 
511 	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
512 	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
513 		pr_info("AVX detected but unusable.\n");
514 		return -ENODEV;
515 	}
516 
517 	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
518 }
519 
520 static void __exit cast5_exit(void)
521 {
522 	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
523 }
524 
525 module_init(cast5_init);
526 module_exit(cast5_exit);
527 
528 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
529 MODULE_LICENSE("GPL");
530 MODULE_ALIAS("cast5");
531