xref: /openbmc/linux/arch/x86/crypto/cast5_avx_glue.c (revision e2c75e76)
1 /*
2  * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3  *
4  * Copyright (C) 2012 Johannes Goetzfried
5  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
20  * USA
21  *
22  */
23 
24 #include <linux/module.h>
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/crypto.h>
28 #include <linux/err.h>
29 #include <crypto/ablk_helper.h>
30 #include <crypto/algapi.h>
31 #include <crypto/cast5.h>
32 #include <crypto/cryptd.h>
33 #include <crypto/ctr.h>
34 #include <asm/fpu/api.h>
35 #include <asm/crypto/glue_helper.h>
36 
37 #define CAST5_PARALLEL_BLOCKS 16
38 
39 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
40 				    const u8 *src);
41 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
42 				    const u8 *src);
43 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
44 				    const u8 *src);
45 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
46 				__be64 *iv);
47 
48 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
49 {
50 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
51 			      NULL, fpu_enabled, nbytes);
52 }
53 
54 static inline void cast5_fpu_end(bool fpu_enabled)
55 {
56 	return glue_fpu_end(fpu_enabled);
57 }
58 
59 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
60 		     bool enc)
61 {
62 	bool fpu_enabled = false;
63 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
64 	const unsigned int bsize = CAST5_BLOCK_SIZE;
65 	unsigned int nbytes;
66 	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
67 	int err;
68 
69 	fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
70 
71 	err = blkcipher_walk_virt(desc, walk);
72 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
73 
74 	while ((nbytes = walk->nbytes)) {
75 		u8 *wsrc = walk->src.virt.addr;
76 		u8 *wdst = walk->dst.virt.addr;
77 
78 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
79 
80 		/* Process multi-block batch */
81 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
82 			do {
83 				fn(ctx, wdst, wsrc);
84 
85 				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
86 				wdst += bsize * CAST5_PARALLEL_BLOCKS;
87 				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
88 			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
89 
90 			if (nbytes < bsize)
91 				goto done;
92 		}
93 
94 		fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
95 
96 		/* Handle leftovers */
97 		do {
98 			fn(ctx, wdst, wsrc);
99 
100 			wsrc += bsize;
101 			wdst += bsize;
102 			nbytes -= bsize;
103 		} while (nbytes >= bsize);
104 
105 done:
106 		err = blkcipher_walk_done(desc, walk, nbytes);
107 	}
108 
109 	cast5_fpu_end(fpu_enabled);
110 	return err;
111 }
112 
113 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
114 		       struct scatterlist *src, unsigned int nbytes)
115 {
116 	struct blkcipher_walk walk;
117 
118 	blkcipher_walk_init(&walk, dst, src, nbytes);
119 	return ecb_crypt(desc, &walk, true);
120 }
121 
122 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
123 		       struct scatterlist *src, unsigned int nbytes)
124 {
125 	struct blkcipher_walk walk;
126 
127 	blkcipher_walk_init(&walk, dst, src, nbytes);
128 	return ecb_crypt(desc, &walk, false);
129 }
130 
131 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
132 				  struct blkcipher_walk *walk)
133 {
134 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
135 	const unsigned int bsize = CAST5_BLOCK_SIZE;
136 	unsigned int nbytes = walk->nbytes;
137 	u64 *src = (u64 *)walk->src.virt.addr;
138 	u64 *dst = (u64 *)walk->dst.virt.addr;
139 	u64 *iv = (u64 *)walk->iv;
140 
141 	do {
142 		*dst = *src ^ *iv;
143 		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
144 		iv = dst;
145 
146 		src += 1;
147 		dst += 1;
148 		nbytes -= bsize;
149 	} while (nbytes >= bsize);
150 
151 	*(u64 *)walk->iv = *iv;
152 	return nbytes;
153 }
154 
155 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
156 		       struct scatterlist *src, unsigned int nbytes)
157 {
158 	struct blkcipher_walk walk;
159 	int err;
160 
161 	blkcipher_walk_init(&walk, dst, src, nbytes);
162 	err = blkcipher_walk_virt(desc, &walk);
163 
164 	while ((nbytes = walk.nbytes)) {
165 		nbytes = __cbc_encrypt(desc, &walk);
166 		err = blkcipher_walk_done(desc, &walk, nbytes);
167 	}
168 
169 	return err;
170 }
171 
172 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
173 				  struct blkcipher_walk *walk)
174 {
175 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
176 	const unsigned int bsize = CAST5_BLOCK_SIZE;
177 	unsigned int nbytes = walk->nbytes;
178 	u64 *src = (u64 *)walk->src.virt.addr;
179 	u64 *dst = (u64 *)walk->dst.virt.addr;
180 	u64 last_iv;
181 
182 	/* Start of the last block. */
183 	src += nbytes / bsize - 1;
184 	dst += nbytes / bsize - 1;
185 
186 	last_iv = *src;
187 
188 	/* Process multi-block batch */
189 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
190 		do {
191 			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
192 			src -= CAST5_PARALLEL_BLOCKS - 1;
193 			dst -= CAST5_PARALLEL_BLOCKS - 1;
194 
195 			cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
196 
197 			nbytes -= bsize;
198 			if (nbytes < bsize)
199 				goto done;
200 
201 			*dst ^= *(src - 1);
202 			src -= 1;
203 			dst -= 1;
204 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
205 	}
206 
207 	/* Handle leftovers */
208 	for (;;) {
209 		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
210 
211 		nbytes -= bsize;
212 		if (nbytes < bsize)
213 			break;
214 
215 		*dst ^= *(src - 1);
216 		src -= 1;
217 		dst -= 1;
218 	}
219 
220 done:
221 	*dst ^= *(u64 *)walk->iv;
222 	*(u64 *)walk->iv = last_iv;
223 
224 	return nbytes;
225 }
226 
227 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
228 		       struct scatterlist *src, unsigned int nbytes)
229 {
230 	bool fpu_enabled = false;
231 	struct blkcipher_walk walk;
232 	int err;
233 
234 	blkcipher_walk_init(&walk, dst, src, nbytes);
235 	err = blkcipher_walk_virt(desc, &walk);
236 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
237 
238 	while ((nbytes = walk.nbytes)) {
239 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
240 		nbytes = __cbc_decrypt(desc, &walk);
241 		err = blkcipher_walk_done(desc, &walk, nbytes);
242 	}
243 
244 	cast5_fpu_end(fpu_enabled);
245 	return err;
246 }
247 
248 static void ctr_crypt_final(struct blkcipher_desc *desc,
249 			    struct blkcipher_walk *walk)
250 {
251 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
252 	u8 *ctrblk = walk->iv;
253 	u8 keystream[CAST5_BLOCK_SIZE];
254 	u8 *src = walk->src.virt.addr;
255 	u8 *dst = walk->dst.virt.addr;
256 	unsigned int nbytes = walk->nbytes;
257 
258 	__cast5_encrypt(ctx, keystream, ctrblk);
259 	crypto_xor_cpy(dst, keystream, src, nbytes);
260 
261 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
262 }
263 
264 static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
265 				struct blkcipher_walk *walk)
266 {
267 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
268 	const unsigned int bsize = CAST5_BLOCK_SIZE;
269 	unsigned int nbytes = walk->nbytes;
270 	u64 *src = (u64 *)walk->src.virt.addr;
271 	u64 *dst = (u64 *)walk->dst.virt.addr;
272 
273 	/* Process multi-block batch */
274 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
275 		do {
276 			cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
277 					(__be64 *)walk->iv);
278 
279 			src += CAST5_PARALLEL_BLOCKS;
280 			dst += CAST5_PARALLEL_BLOCKS;
281 			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
282 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
283 
284 		if (nbytes < bsize)
285 			goto done;
286 	}
287 
288 	/* Handle leftovers */
289 	do {
290 		u64 ctrblk;
291 
292 		if (dst != src)
293 			*dst = *src;
294 
295 		ctrblk = *(u64 *)walk->iv;
296 		be64_add_cpu((__be64 *)walk->iv, 1);
297 
298 		__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
299 		*dst ^= ctrblk;
300 
301 		src += 1;
302 		dst += 1;
303 		nbytes -= bsize;
304 	} while (nbytes >= bsize);
305 
306 done:
307 	return nbytes;
308 }
309 
310 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
311 		     struct scatterlist *src, unsigned int nbytes)
312 {
313 	bool fpu_enabled = false;
314 	struct blkcipher_walk walk;
315 	int err;
316 
317 	blkcipher_walk_init(&walk, dst, src, nbytes);
318 	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
319 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
320 
321 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
322 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
323 		nbytes = __ctr_crypt(desc, &walk);
324 		err = blkcipher_walk_done(desc, &walk, nbytes);
325 	}
326 
327 	cast5_fpu_end(fpu_enabled);
328 
329 	if (walk.nbytes) {
330 		ctr_crypt_final(desc, &walk);
331 		err = blkcipher_walk_done(desc, &walk, 0);
332 	}
333 
334 	return err;
335 }
336 
337 
338 static struct crypto_alg cast5_algs[6] = { {
339 	.cra_name		= "__ecb-cast5-avx",
340 	.cra_driver_name	= "__driver-ecb-cast5-avx",
341 	.cra_priority		= 0,
342 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
343 				  CRYPTO_ALG_INTERNAL,
344 	.cra_blocksize		= CAST5_BLOCK_SIZE,
345 	.cra_ctxsize		= sizeof(struct cast5_ctx),
346 	.cra_alignmask		= 0,
347 	.cra_type		= &crypto_blkcipher_type,
348 	.cra_module		= THIS_MODULE,
349 	.cra_u = {
350 		.blkcipher = {
351 			.min_keysize	= CAST5_MIN_KEY_SIZE,
352 			.max_keysize	= CAST5_MAX_KEY_SIZE,
353 			.setkey		= cast5_setkey,
354 			.encrypt	= ecb_encrypt,
355 			.decrypt	= ecb_decrypt,
356 		},
357 	},
358 }, {
359 	.cra_name		= "__cbc-cast5-avx",
360 	.cra_driver_name	= "__driver-cbc-cast5-avx",
361 	.cra_priority		= 0,
362 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
363 				  CRYPTO_ALG_INTERNAL,
364 	.cra_blocksize		= CAST5_BLOCK_SIZE,
365 	.cra_ctxsize		= sizeof(struct cast5_ctx),
366 	.cra_alignmask		= 0,
367 	.cra_type		= &crypto_blkcipher_type,
368 	.cra_module		= THIS_MODULE,
369 	.cra_u = {
370 		.blkcipher = {
371 			.min_keysize	= CAST5_MIN_KEY_SIZE,
372 			.max_keysize	= CAST5_MAX_KEY_SIZE,
373 			.setkey		= cast5_setkey,
374 			.encrypt	= cbc_encrypt,
375 			.decrypt	= cbc_decrypt,
376 		},
377 	},
378 }, {
379 	.cra_name		= "__ctr-cast5-avx",
380 	.cra_driver_name	= "__driver-ctr-cast5-avx",
381 	.cra_priority		= 0,
382 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
383 				  CRYPTO_ALG_INTERNAL,
384 	.cra_blocksize		= 1,
385 	.cra_ctxsize		= sizeof(struct cast5_ctx),
386 	.cra_alignmask		= 0,
387 	.cra_type		= &crypto_blkcipher_type,
388 	.cra_module		= THIS_MODULE,
389 	.cra_u = {
390 		.blkcipher = {
391 			.min_keysize	= CAST5_MIN_KEY_SIZE,
392 			.max_keysize	= CAST5_MAX_KEY_SIZE,
393 			.ivsize		= CAST5_BLOCK_SIZE,
394 			.setkey		= cast5_setkey,
395 			.encrypt	= ctr_crypt,
396 			.decrypt	= ctr_crypt,
397 		},
398 	},
399 }, {
400 	.cra_name		= "ecb(cast5)",
401 	.cra_driver_name	= "ecb-cast5-avx",
402 	.cra_priority		= 200,
403 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
404 	.cra_blocksize		= CAST5_BLOCK_SIZE,
405 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
406 	.cra_alignmask		= 0,
407 	.cra_type		= &crypto_ablkcipher_type,
408 	.cra_module		= THIS_MODULE,
409 	.cra_init		= ablk_init,
410 	.cra_exit		= ablk_exit,
411 	.cra_u = {
412 		.ablkcipher = {
413 			.min_keysize	= CAST5_MIN_KEY_SIZE,
414 			.max_keysize	= CAST5_MAX_KEY_SIZE,
415 			.setkey		= ablk_set_key,
416 			.encrypt	= ablk_encrypt,
417 			.decrypt	= ablk_decrypt,
418 		},
419 	},
420 }, {
421 	.cra_name		= "cbc(cast5)",
422 	.cra_driver_name	= "cbc-cast5-avx",
423 	.cra_priority		= 200,
424 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
425 	.cra_blocksize		= CAST5_BLOCK_SIZE,
426 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
427 	.cra_alignmask		= 0,
428 	.cra_type		= &crypto_ablkcipher_type,
429 	.cra_module		= THIS_MODULE,
430 	.cra_init		= ablk_init,
431 	.cra_exit		= ablk_exit,
432 	.cra_u = {
433 		.ablkcipher = {
434 			.min_keysize	= CAST5_MIN_KEY_SIZE,
435 			.max_keysize	= CAST5_MAX_KEY_SIZE,
436 			.ivsize		= CAST5_BLOCK_SIZE,
437 			.setkey		= ablk_set_key,
438 			.encrypt	= __ablk_encrypt,
439 			.decrypt	= ablk_decrypt,
440 		},
441 	},
442 }, {
443 	.cra_name		= "ctr(cast5)",
444 	.cra_driver_name	= "ctr-cast5-avx",
445 	.cra_priority		= 200,
446 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
447 	.cra_blocksize		= 1,
448 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
449 	.cra_alignmask		= 0,
450 	.cra_type		= &crypto_ablkcipher_type,
451 	.cra_module		= THIS_MODULE,
452 	.cra_init		= ablk_init,
453 	.cra_exit		= ablk_exit,
454 	.cra_u = {
455 		.ablkcipher = {
456 			.min_keysize	= CAST5_MIN_KEY_SIZE,
457 			.max_keysize	= CAST5_MAX_KEY_SIZE,
458 			.ivsize		= CAST5_BLOCK_SIZE,
459 			.setkey		= ablk_set_key,
460 			.encrypt	= ablk_encrypt,
461 			.decrypt	= ablk_encrypt,
462 			.geniv		= "chainiv",
463 		},
464 	},
465 } };
466 
467 static int __init cast5_init(void)
468 {
469 	const char *feature_name;
470 
471 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
472 				&feature_name)) {
473 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
474 		return -ENODEV;
475 	}
476 
477 	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
478 }
479 
480 static void __exit cast5_exit(void)
481 {
482 	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
483 }
484 
485 module_init(cast5_init);
486 module_exit(cast5_exit);
487 
488 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
489 MODULE_LICENSE("GPL");
490 MODULE_ALIAS_CRYPTO("cast5");
491