xref: /openbmc/linux/arch/x86/crypto/cast5_avx_glue.c (revision bc000245)
1 /*
2  * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3  *
4  * Copyright (C) 2012 Johannes Goetzfried
5  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
20  * USA
21  *
22  */
23 
24 #include <linux/module.h>
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/crypto.h>
28 #include <linux/err.h>
29 #include <crypto/ablk_helper.h>
30 #include <crypto/algapi.h>
31 #include <crypto/cast5.h>
32 #include <crypto/cryptd.h>
33 #include <crypto/ctr.h>
34 #include <asm/xcr.h>
35 #include <asm/xsave.h>
36 #include <asm/crypto/glue_helper.h>
37 
38 #define CAST5_PARALLEL_BLOCKS 16
39 
40 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
41 				    const u8 *src);
42 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
43 				    const u8 *src);
44 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
45 				    const u8 *src);
46 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
47 				__be64 *iv);
48 
49 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
50 {
51 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
52 			      NULL, fpu_enabled, nbytes);
53 }
54 
55 static inline void cast5_fpu_end(bool fpu_enabled)
56 {
57 	return glue_fpu_end(fpu_enabled);
58 }
59 
60 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
61 		     bool enc)
62 {
63 	bool fpu_enabled = false;
64 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
65 	const unsigned int bsize = CAST5_BLOCK_SIZE;
66 	unsigned int nbytes;
67 	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
68 	int err;
69 
70 	fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
71 
72 	err = blkcipher_walk_virt(desc, walk);
73 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
74 
75 	while ((nbytes = walk->nbytes)) {
76 		u8 *wsrc = walk->src.virt.addr;
77 		u8 *wdst = walk->dst.virt.addr;
78 
79 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
80 
81 		/* Process multi-block batch */
82 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
83 			do {
84 				fn(ctx, wdst, wsrc);
85 
86 				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
87 				wdst += bsize * CAST5_PARALLEL_BLOCKS;
88 				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
89 			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
90 
91 			if (nbytes < bsize)
92 				goto done;
93 		}
94 
95 		fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
96 
97 		/* Handle leftovers */
98 		do {
99 			fn(ctx, wdst, wsrc);
100 
101 			wsrc += bsize;
102 			wdst += bsize;
103 			nbytes -= bsize;
104 		} while (nbytes >= bsize);
105 
106 done:
107 		err = blkcipher_walk_done(desc, walk, nbytes);
108 	}
109 
110 	cast5_fpu_end(fpu_enabled);
111 	return err;
112 }
113 
114 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
115 		       struct scatterlist *src, unsigned int nbytes)
116 {
117 	struct blkcipher_walk walk;
118 
119 	blkcipher_walk_init(&walk, dst, src, nbytes);
120 	return ecb_crypt(desc, &walk, true);
121 }
122 
123 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
124 		       struct scatterlist *src, unsigned int nbytes)
125 {
126 	struct blkcipher_walk walk;
127 
128 	blkcipher_walk_init(&walk, dst, src, nbytes);
129 	return ecb_crypt(desc, &walk, false);
130 }
131 
132 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
133 				  struct blkcipher_walk *walk)
134 {
135 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
136 	const unsigned int bsize = CAST5_BLOCK_SIZE;
137 	unsigned int nbytes = walk->nbytes;
138 	u64 *src = (u64 *)walk->src.virt.addr;
139 	u64 *dst = (u64 *)walk->dst.virt.addr;
140 	u64 *iv = (u64 *)walk->iv;
141 
142 	do {
143 		*dst = *src ^ *iv;
144 		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
145 		iv = dst;
146 
147 		src += 1;
148 		dst += 1;
149 		nbytes -= bsize;
150 	} while (nbytes >= bsize);
151 
152 	*(u64 *)walk->iv = *iv;
153 	return nbytes;
154 }
155 
156 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
157 		       struct scatterlist *src, unsigned int nbytes)
158 {
159 	struct blkcipher_walk walk;
160 	int err;
161 
162 	blkcipher_walk_init(&walk, dst, src, nbytes);
163 	err = blkcipher_walk_virt(desc, &walk);
164 
165 	while ((nbytes = walk.nbytes)) {
166 		nbytes = __cbc_encrypt(desc, &walk);
167 		err = blkcipher_walk_done(desc, &walk, nbytes);
168 	}
169 
170 	return err;
171 }
172 
173 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
174 				  struct blkcipher_walk *walk)
175 {
176 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
177 	const unsigned int bsize = CAST5_BLOCK_SIZE;
178 	unsigned int nbytes = walk->nbytes;
179 	u64 *src = (u64 *)walk->src.virt.addr;
180 	u64 *dst = (u64 *)walk->dst.virt.addr;
181 	u64 last_iv;
182 
183 	/* Start of the last block. */
184 	src += nbytes / bsize - 1;
185 	dst += nbytes / bsize - 1;
186 
187 	last_iv = *src;
188 
189 	/* Process multi-block batch */
190 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
191 		do {
192 			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
193 			src -= CAST5_PARALLEL_BLOCKS - 1;
194 			dst -= CAST5_PARALLEL_BLOCKS - 1;
195 
196 			cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
197 
198 			nbytes -= bsize;
199 			if (nbytes < bsize)
200 				goto done;
201 
202 			*dst ^= *(src - 1);
203 			src -= 1;
204 			dst -= 1;
205 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
206 
207 		if (nbytes < bsize)
208 			goto done;
209 	}
210 
211 	/* Handle leftovers */
212 	for (;;) {
213 		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
214 
215 		nbytes -= bsize;
216 		if (nbytes < bsize)
217 			break;
218 
219 		*dst ^= *(src - 1);
220 		src -= 1;
221 		dst -= 1;
222 	}
223 
224 done:
225 	*dst ^= *(u64 *)walk->iv;
226 	*(u64 *)walk->iv = last_iv;
227 
228 	return nbytes;
229 }
230 
231 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
232 		       struct scatterlist *src, unsigned int nbytes)
233 {
234 	bool fpu_enabled = false;
235 	struct blkcipher_walk walk;
236 	int err;
237 
238 	blkcipher_walk_init(&walk, dst, src, nbytes);
239 	err = blkcipher_walk_virt(desc, &walk);
240 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
241 
242 	while ((nbytes = walk.nbytes)) {
243 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
244 		nbytes = __cbc_decrypt(desc, &walk);
245 		err = blkcipher_walk_done(desc, &walk, nbytes);
246 	}
247 
248 	cast5_fpu_end(fpu_enabled);
249 	return err;
250 }
251 
252 static void ctr_crypt_final(struct blkcipher_desc *desc,
253 			    struct blkcipher_walk *walk)
254 {
255 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
256 	u8 *ctrblk = walk->iv;
257 	u8 keystream[CAST5_BLOCK_SIZE];
258 	u8 *src = walk->src.virt.addr;
259 	u8 *dst = walk->dst.virt.addr;
260 	unsigned int nbytes = walk->nbytes;
261 
262 	__cast5_encrypt(ctx, keystream, ctrblk);
263 	crypto_xor(keystream, src, nbytes);
264 	memcpy(dst, keystream, nbytes);
265 
266 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
267 }
268 
269 static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
270 				struct blkcipher_walk *walk)
271 {
272 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
273 	const unsigned int bsize = CAST5_BLOCK_SIZE;
274 	unsigned int nbytes = walk->nbytes;
275 	u64 *src = (u64 *)walk->src.virt.addr;
276 	u64 *dst = (u64 *)walk->dst.virt.addr;
277 
278 	/* Process multi-block batch */
279 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
280 		do {
281 			cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
282 					(__be64 *)walk->iv);
283 
284 			src += CAST5_PARALLEL_BLOCKS;
285 			dst += CAST5_PARALLEL_BLOCKS;
286 			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
287 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
288 
289 		if (nbytes < bsize)
290 			goto done;
291 	}
292 
293 	/* Handle leftovers */
294 	do {
295 		u64 ctrblk;
296 
297 		if (dst != src)
298 			*dst = *src;
299 
300 		ctrblk = *(u64 *)walk->iv;
301 		be64_add_cpu((__be64 *)walk->iv, 1);
302 
303 		__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
304 		*dst ^= ctrblk;
305 
306 		src += 1;
307 		dst += 1;
308 		nbytes -= bsize;
309 	} while (nbytes >= bsize);
310 
311 done:
312 	return nbytes;
313 }
314 
315 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
316 		     struct scatterlist *src, unsigned int nbytes)
317 {
318 	bool fpu_enabled = false;
319 	struct blkcipher_walk walk;
320 	int err;
321 
322 	blkcipher_walk_init(&walk, dst, src, nbytes);
323 	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
324 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
325 
326 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
327 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
328 		nbytes = __ctr_crypt(desc, &walk);
329 		err = blkcipher_walk_done(desc, &walk, nbytes);
330 	}
331 
332 	cast5_fpu_end(fpu_enabled);
333 
334 	if (walk.nbytes) {
335 		ctr_crypt_final(desc, &walk);
336 		err = blkcipher_walk_done(desc, &walk, 0);
337 	}
338 
339 	return err;
340 }
341 
342 
343 static struct crypto_alg cast5_algs[6] = { {
344 	.cra_name		= "__ecb-cast5-avx",
345 	.cra_driver_name	= "__driver-ecb-cast5-avx",
346 	.cra_priority		= 0,
347 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
348 	.cra_blocksize		= CAST5_BLOCK_SIZE,
349 	.cra_ctxsize		= sizeof(struct cast5_ctx),
350 	.cra_alignmask		= 0,
351 	.cra_type		= &crypto_blkcipher_type,
352 	.cra_module		= THIS_MODULE,
353 	.cra_u = {
354 		.blkcipher = {
355 			.min_keysize	= CAST5_MIN_KEY_SIZE,
356 			.max_keysize	= CAST5_MAX_KEY_SIZE,
357 			.setkey		= cast5_setkey,
358 			.encrypt	= ecb_encrypt,
359 			.decrypt	= ecb_decrypt,
360 		},
361 	},
362 }, {
363 	.cra_name		= "__cbc-cast5-avx",
364 	.cra_driver_name	= "__driver-cbc-cast5-avx",
365 	.cra_priority		= 0,
366 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
367 	.cra_blocksize		= CAST5_BLOCK_SIZE,
368 	.cra_ctxsize		= sizeof(struct cast5_ctx),
369 	.cra_alignmask		= 0,
370 	.cra_type		= &crypto_blkcipher_type,
371 	.cra_module		= THIS_MODULE,
372 	.cra_u = {
373 		.blkcipher = {
374 			.min_keysize	= CAST5_MIN_KEY_SIZE,
375 			.max_keysize	= CAST5_MAX_KEY_SIZE,
376 			.setkey		= cast5_setkey,
377 			.encrypt	= cbc_encrypt,
378 			.decrypt	= cbc_decrypt,
379 		},
380 	},
381 }, {
382 	.cra_name		= "__ctr-cast5-avx",
383 	.cra_driver_name	= "__driver-ctr-cast5-avx",
384 	.cra_priority		= 0,
385 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
386 	.cra_blocksize		= 1,
387 	.cra_ctxsize		= sizeof(struct cast5_ctx),
388 	.cra_alignmask		= 0,
389 	.cra_type		= &crypto_blkcipher_type,
390 	.cra_module		= THIS_MODULE,
391 	.cra_u = {
392 		.blkcipher = {
393 			.min_keysize	= CAST5_MIN_KEY_SIZE,
394 			.max_keysize	= CAST5_MAX_KEY_SIZE,
395 			.ivsize		= CAST5_BLOCK_SIZE,
396 			.setkey		= cast5_setkey,
397 			.encrypt	= ctr_crypt,
398 			.decrypt	= ctr_crypt,
399 		},
400 	},
401 }, {
402 	.cra_name		= "ecb(cast5)",
403 	.cra_driver_name	= "ecb-cast5-avx",
404 	.cra_priority		= 200,
405 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
406 	.cra_blocksize		= CAST5_BLOCK_SIZE,
407 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
408 	.cra_alignmask		= 0,
409 	.cra_type		= &crypto_ablkcipher_type,
410 	.cra_module		= THIS_MODULE,
411 	.cra_init		= ablk_init,
412 	.cra_exit		= ablk_exit,
413 	.cra_u = {
414 		.ablkcipher = {
415 			.min_keysize	= CAST5_MIN_KEY_SIZE,
416 			.max_keysize	= CAST5_MAX_KEY_SIZE,
417 			.setkey		= ablk_set_key,
418 			.encrypt	= ablk_encrypt,
419 			.decrypt	= ablk_decrypt,
420 		},
421 	},
422 }, {
423 	.cra_name		= "cbc(cast5)",
424 	.cra_driver_name	= "cbc-cast5-avx",
425 	.cra_priority		= 200,
426 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
427 	.cra_blocksize		= CAST5_BLOCK_SIZE,
428 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
429 	.cra_alignmask		= 0,
430 	.cra_type		= &crypto_ablkcipher_type,
431 	.cra_module		= THIS_MODULE,
432 	.cra_init		= ablk_init,
433 	.cra_exit		= ablk_exit,
434 	.cra_u = {
435 		.ablkcipher = {
436 			.min_keysize	= CAST5_MIN_KEY_SIZE,
437 			.max_keysize	= CAST5_MAX_KEY_SIZE,
438 			.ivsize		= CAST5_BLOCK_SIZE,
439 			.setkey		= ablk_set_key,
440 			.encrypt	= __ablk_encrypt,
441 			.decrypt	= ablk_decrypt,
442 		},
443 	},
444 }, {
445 	.cra_name		= "ctr(cast5)",
446 	.cra_driver_name	= "ctr-cast5-avx",
447 	.cra_priority		= 200,
448 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
449 	.cra_blocksize		= 1,
450 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
451 	.cra_alignmask		= 0,
452 	.cra_type		= &crypto_ablkcipher_type,
453 	.cra_module		= THIS_MODULE,
454 	.cra_init		= ablk_init,
455 	.cra_exit		= ablk_exit,
456 	.cra_u = {
457 		.ablkcipher = {
458 			.min_keysize	= CAST5_MIN_KEY_SIZE,
459 			.max_keysize	= CAST5_MAX_KEY_SIZE,
460 			.ivsize		= CAST5_BLOCK_SIZE,
461 			.setkey		= ablk_set_key,
462 			.encrypt	= ablk_encrypt,
463 			.decrypt	= ablk_encrypt,
464 			.geniv		= "chainiv",
465 		},
466 	},
467 } };
468 
469 static int __init cast5_init(void)
470 {
471 	u64 xcr0;
472 
473 	if (!cpu_has_avx || !cpu_has_osxsave) {
474 		pr_info("AVX instructions are not detected.\n");
475 		return -ENODEV;
476 	}
477 
478 	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
479 	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
480 		pr_info("AVX detected but unusable.\n");
481 		return -ENODEV;
482 	}
483 
484 	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
485 }
486 
487 static void __exit cast5_exit(void)
488 {
489 	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
490 }
491 
492 module_init(cast5_init);
493 module_exit(cast5_exit);
494 
495 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
496 MODULE_LICENSE("GPL");
497 MODULE_ALIAS("cast5");
498