1 /*
2  * Glue code for AES implementation for SPE instructions (PPC)
3  *
4  * Based on generic implementation. The assembler module takes care
5  * about the SPE registers so it can run from interrupt context.
6  *
7  * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
8  *
9  * This program is free software; you can redistribute it and/or modify it
10  * under the terms of the GNU General Public License as published by the Free
11  * Software Foundation; either version 2 of the License, or (at your option)
12  * any later version.
13  *
14  */
15 
16 #include <crypto/aes.h>
17 #include <linux/module.h>
18 #include <linux/init.h>
19 #include <linux/types.h>
20 #include <linux/errno.h>
21 #include <linux/crypto.h>
22 #include <asm/byteorder.h>
23 #include <asm/switch_to.h>
24 #include <crypto/algapi.h>
25 #include <crypto/xts.h>
26 
27 /*
28  * MAX_BYTES defines the number of bytes that are allowed to be processed
29  * between preempt_disable() and preempt_enable(). e500 cores can issue two
30  * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
31  * bit unit (SU2). One of these can be a memory access that is executed via
32  * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
33  * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
34  * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
35  * included. Even with the low end model clocked at 667 MHz this equals to a
36  * critical time window of less than 30us. The value has been chosen to
37  * process a 512 byte disk block in one or a large 1400 bytes IPsec network
38  * packet in two runs.
39  *
40  */
41 #define MAX_BYTES 768
42 
43 struct ppc_aes_ctx {
44 	u32 key_enc[AES_MAX_KEYLENGTH_U32];
45 	u32 key_dec[AES_MAX_KEYLENGTH_U32];
46 	u32 rounds;
47 };
48 
49 struct ppc_xts_ctx {
50 	u32 key_enc[AES_MAX_KEYLENGTH_U32];
51 	u32 key_dec[AES_MAX_KEYLENGTH_U32];
52 	u32 key_twk[AES_MAX_KEYLENGTH_U32];
53 	u32 rounds;
54 };
55 
56 extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
57 extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
58 extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
59 			    u32 bytes);
60 extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
61 			    u32 bytes);
62 extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
63 			    u32 bytes, u8 *iv);
64 extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
65 			    u32 bytes, u8 *iv);
66 extern void ppc_crypt_ctr  (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
67 			    u32 bytes, u8 *iv);
68 extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
69 			    u32 bytes, u8 *iv, u32 *key_twk);
70 extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
71 			    u32 bytes, u8 *iv, u32 *key_twk);
72 
73 extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
74 extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
75 extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
76 
77 extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
78 				     unsigned int key_len);
79 
80 static void spe_begin(void)
81 {
82 	/* disable preemption and save users SPE registers if required */
83 	preempt_disable();
84 	enable_kernel_spe();
85 }
86 
87 static void spe_end(void)
88 {
89 	disable_kernel_spe();
90 	/* reenable preemption */
91 	preempt_enable();
92 }
93 
94 static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
95 		unsigned int key_len)
96 {
97 	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
98 
99 	if (key_len != AES_KEYSIZE_128 &&
100 	    key_len != AES_KEYSIZE_192 &&
101 	    key_len != AES_KEYSIZE_256) {
102 		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
103 		return -EINVAL;
104 	}
105 
106 	switch (key_len) {
107 	case AES_KEYSIZE_128:
108 		ctx->rounds = 4;
109 		ppc_expand_key_128(ctx->key_enc, in_key);
110 		break;
111 	case AES_KEYSIZE_192:
112 		ctx->rounds = 5;
113 		ppc_expand_key_192(ctx->key_enc, in_key);
114 		break;
115 	case AES_KEYSIZE_256:
116 		ctx->rounds = 6;
117 		ppc_expand_key_256(ctx->key_enc, in_key);
118 		break;
119 	}
120 
121 	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
122 
123 	return 0;
124 }
125 
126 static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
127 		   unsigned int key_len)
128 {
129 	struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
130 	int err;
131 
132 	err = xts_check_key(tfm, in_key, key_len);
133 	if (err)
134 		return err;
135 
136 	key_len >>= 1;
137 
138 	if (key_len != AES_KEYSIZE_128 &&
139 	    key_len != AES_KEYSIZE_192 &&
140 	    key_len != AES_KEYSIZE_256) {
141 		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
142 		return -EINVAL;
143 	}
144 
145 	switch (key_len) {
146 	case AES_KEYSIZE_128:
147 		ctx->rounds = 4;
148 		ppc_expand_key_128(ctx->key_enc, in_key);
149 		ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
150 		break;
151 	case AES_KEYSIZE_192:
152 		ctx->rounds = 5;
153 		ppc_expand_key_192(ctx->key_enc, in_key);
154 		ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
155 		break;
156 	case AES_KEYSIZE_256:
157 		ctx->rounds = 6;
158 		ppc_expand_key_256(ctx->key_enc, in_key);
159 		ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
160 		break;
161 	}
162 
163 	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
164 
165 	return 0;
166 }
167 
168 static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
169 {
170 	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
171 
172 	spe_begin();
173 	ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
174 	spe_end();
175 }
176 
177 static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
178 {
179 	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
180 
181 	spe_begin();
182 	ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
183 	spe_end();
184 }
185 
186 static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
187 			   struct scatterlist *src, unsigned int nbytes)
188 {
189 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
190 	struct blkcipher_walk walk;
191 	unsigned int ubytes;
192 	int err;
193 
194 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
195 	blkcipher_walk_init(&walk, dst, src, nbytes);
196 	err = blkcipher_walk_virt(desc, &walk);
197 
198 	while ((nbytes = walk.nbytes)) {
199 		ubytes = nbytes > MAX_BYTES ?
200 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
201 		nbytes -= ubytes;
202 
203 		spe_begin();
204 		ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
205 				ctx->key_enc, ctx->rounds, nbytes);
206 		spe_end();
207 
208 		err = blkcipher_walk_done(desc, &walk, ubytes);
209 	}
210 
211 	return err;
212 }
213 
214 static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
215 			   struct scatterlist *src, unsigned int nbytes)
216 {
217 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
218 	struct blkcipher_walk walk;
219 	unsigned int ubytes;
220 	int err;
221 
222 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
223 	blkcipher_walk_init(&walk, dst, src, nbytes);
224 	err = blkcipher_walk_virt(desc, &walk);
225 
226 	while ((nbytes = walk.nbytes)) {
227 		ubytes = nbytes > MAX_BYTES ?
228 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
229 		nbytes -= ubytes;
230 
231 		spe_begin();
232 		ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
233 				ctx->key_dec, ctx->rounds, nbytes);
234 		spe_end();
235 
236 		err = blkcipher_walk_done(desc, &walk, ubytes);
237 	}
238 
239 	return err;
240 }
241 
242 static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
243 			   struct scatterlist *src, unsigned int nbytes)
244 {
245 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
246 	struct blkcipher_walk walk;
247 	unsigned int ubytes;
248 	int err;
249 
250 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
251 	blkcipher_walk_init(&walk, dst, src, nbytes);
252 	err = blkcipher_walk_virt(desc, &walk);
253 
254 	while ((nbytes = walk.nbytes)) {
255 		ubytes = nbytes > MAX_BYTES ?
256 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
257 		nbytes -= ubytes;
258 
259 		spe_begin();
260 		ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
261 				ctx->key_enc, ctx->rounds, nbytes, walk.iv);
262 		spe_end();
263 
264 		err = blkcipher_walk_done(desc, &walk, ubytes);
265 	}
266 
267 	return err;
268 }
269 
270 static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
271 			   struct scatterlist *src, unsigned int nbytes)
272 {
273 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
274 	struct blkcipher_walk walk;
275 	unsigned int ubytes;
276 	int err;
277 
278 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
279 	blkcipher_walk_init(&walk, dst, src, nbytes);
280 	err = blkcipher_walk_virt(desc, &walk);
281 
282 	while ((nbytes = walk.nbytes)) {
283 		ubytes = nbytes > MAX_BYTES ?
284 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
285 		nbytes -= ubytes;
286 
287 		spe_begin();
288 		ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
289 				ctx->key_dec, ctx->rounds, nbytes, walk.iv);
290 		spe_end();
291 
292 		err = blkcipher_walk_done(desc, &walk, ubytes);
293 	}
294 
295 	return err;
296 }
297 
298 static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
299 			 struct scatterlist *src, unsigned int nbytes)
300 {
301 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
302 	struct blkcipher_walk walk;
303 	unsigned int pbytes, ubytes;
304 	int err;
305 
306 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
307 	blkcipher_walk_init(&walk, dst, src, nbytes);
308 	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
309 
310 	while ((pbytes = walk.nbytes)) {
311 		pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
312 		pbytes = pbytes == nbytes ?
313 			 nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
314 		ubytes = walk.nbytes - pbytes;
315 
316 		spe_begin();
317 		ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
318 			      ctx->key_enc, ctx->rounds, pbytes , walk.iv);
319 		spe_end();
320 
321 		nbytes -= pbytes;
322 		err = blkcipher_walk_done(desc, &walk, ubytes);
323 	}
324 
325 	return err;
326 }
327 
328 static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
329 			   struct scatterlist *src, unsigned int nbytes)
330 {
331 	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
332 	struct blkcipher_walk walk;
333 	unsigned int ubytes;
334 	int err;
335 	u32 *twk;
336 
337 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
338 	blkcipher_walk_init(&walk, dst, src, nbytes);
339 	err = blkcipher_walk_virt(desc, &walk);
340 	twk = ctx->key_twk;
341 
342 	while ((nbytes = walk.nbytes)) {
343 		ubytes = nbytes > MAX_BYTES ?
344 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
345 		nbytes -= ubytes;
346 
347 		spe_begin();
348 		ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
349 				ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
350 		spe_end();
351 
352 		twk = NULL;
353 		err = blkcipher_walk_done(desc, &walk, ubytes);
354 	}
355 
356 	return err;
357 }
358 
359 static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
360 			   struct scatterlist *src, unsigned int nbytes)
361 {
362 	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
363 	struct blkcipher_walk walk;
364 	unsigned int ubytes;
365 	int err;
366 	u32 *twk;
367 
368 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
369 	blkcipher_walk_init(&walk, dst, src, nbytes);
370 	err = blkcipher_walk_virt(desc, &walk);
371 	twk = ctx->key_twk;
372 
373 	while ((nbytes = walk.nbytes)) {
374 		ubytes = nbytes > MAX_BYTES ?
375 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
376 		nbytes -= ubytes;
377 
378 		spe_begin();
379 		ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
380 				ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
381 		spe_end();
382 
383 		twk = NULL;
384 		err = blkcipher_walk_done(desc, &walk, ubytes);
385 	}
386 
387 	return err;
388 }
389 
390 /*
391  * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
392  * because the e500 platform can handle unaligned reads/writes very efficently.
393  * This improves IPsec thoughput by another few percent. Additionally we assume
394  * that AES context is always aligned to at least 8 bytes because it is created
395  * with kmalloc() in the crypto infrastructure
396  *
397  */
398 static struct crypto_alg aes_algs[] = { {
399 	.cra_name		=	"aes",
400 	.cra_driver_name	=	"aes-ppc-spe",
401 	.cra_priority		=	300,
402 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
403 	.cra_blocksize		=	AES_BLOCK_SIZE,
404 	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
405 	.cra_alignmask		=	0,
406 	.cra_module		=	THIS_MODULE,
407 	.cra_u			=	{
408 		.cipher = {
409 			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
410 			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
411 			.cia_setkey		=	ppc_aes_setkey,
412 			.cia_encrypt		=	ppc_aes_encrypt,
413 			.cia_decrypt		=	ppc_aes_decrypt
414 		}
415 	}
416 }, {
417 	.cra_name		=	"ecb(aes)",
418 	.cra_driver_name	=	"ecb-ppc-spe",
419 	.cra_priority		=	300,
420 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
421 	.cra_blocksize		=	AES_BLOCK_SIZE,
422 	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
423 	.cra_alignmask		=	0,
424 	.cra_type		=	&crypto_blkcipher_type,
425 	.cra_module		=	THIS_MODULE,
426 	.cra_u = {
427 		.blkcipher = {
428 			.min_keysize		=	AES_MIN_KEY_SIZE,
429 			.max_keysize		=	AES_MAX_KEY_SIZE,
430 			.ivsize			=	AES_BLOCK_SIZE,
431 			.setkey			=	ppc_aes_setkey,
432 			.encrypt		=	ppc_ecb_encrypt,
433 			.decrypt		=	ppc_ecb_decrypt,
434 		}
435 	}
436 }, {
437 	.cra_name		=	"cbc(aes)",
438 	.cra_driver_name	=	"cbc-ppc-spe",
439 	.cra_priority		=	300,
440 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
441 	.cra_blocksize		=	AES_BLOCK_SIZE,
442 	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
443 	.cra_alignmask		=	0,
444 	.cra_type		=	&crypto_blkcipher_type,
445 	.cra_module		=	THIS_MODULE,
446 	.cra_u = {
447 		.blkcipher = {
448 			.min_keysize		=	AES_MIN_KEY_SIZE,
449 			.max_keysize		=	AES_MAX_KEY_SIZE,
450 			.ivsize			=	AES_BLOCK_SIZE,
451 			.setkey			=	ppc_aes_setkey,
452 			.encrypt		=	ppc_cbc_encrypt,
453 			.decrypt		=	ppc_cbc_decrypt,
454 		}
455 	}
456 }, {
457 	.cra_name		=	"ctr(aes)",
458 	.cra_driver_name	=	"ctr-ppc-spe",
459 	.cra_priority		=	300,
460 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
461 	.cra_blocksize		=	1,
462 	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
463 	.cra_alignmask		=	0,
464 	.cra_type		=	&crypto_blkcipher_type,
465 	.cra_module		=	THIS_MODULE,
466 	.cra_u = {
467 		.blkcipher = {
468 			.min_keysize		=	AES_MIN_KEY_SIZE,
469 			.max_keysize		=	AES_MAX_KEY_SIZE,
470 			.ivsize			=	AES_BLOCK_SIZE,
471 			.setkey			=	ppc_aes_setkey,
472 			.encrypt		=	ppc_ctr_crypt,
473 			.decrypt		=	ppc_ctr_crypt,
474 		}
475 	}
476 }, {
477 	.cra_name		=	"xts(aes)",
478 	.cra_driver_name	=	"xts-ppc-spe",
479 	.cra_priority		=	300,
480 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
481 	.cra_blocksize		=	AES_BLOCK_SIZE,
482 	.cra_ctxsize		=	sizeof(struct ppc_xts_ctx),
483 	.cra_alignmask		=	0,
484 	.cra_type		=	&crypto_blkcipher_type,
485 	.cra_module		=	THIS_MODULE,
486 	.cra_u = {
487 		.blkcipher = {
488 			.min_keysize		=	AES_MIN_KEY_SIZE * 2,
489 			.max_keysize		=	AES_MAX_KEY_SIZE * 2,
490 			.ivsize			=	AES_BLOCK_SIZE,
491 			.setkey			=	ppc_xts_setkey,
492 			.encrypt		=	ppc_xts_encrypt,
493 			.decrypt		=	ppc_xts_decrypt,
494 		}
495 	}
496 } };
497 
498 static int __init ppc_aes_mod_init(void)
499 {
500 	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
501 }
502 
503 static void __exit ppc_aes_mod_fini(void)
504 {
505 	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
506 }
507 
508 module_init(ppc_aes_mod_init);
509 module_exit(ppc_aes_mod_fini);
510 
511 MODULE_LICENSE("GPL");
512 MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
513 
514 MODULE_ALIAS_CRYPTO("aes");
515 MODULE_ALIAS_CRYPTO("ecb(aes)");
516 MODULE_ALIAS_CRYPTO("cbc(aes)");
517 MODULE_ALIAS_CRYPTO("ctr(aes)");
518 MODULE_ALIAS_CRYPTO("xts(aes)");
519 MODULE_ALIAS_CRYPTO("aes-ppc-spe");
520