1 /*
2  * Glue code for AES implementation for SPE instructions (PPC)
3  *
4  * Based on generic implementation. The assembler module takes care
5  * about the SPE registers so it can run from interrupt context.
6  *
7  * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
8  *
9  * This program is free software; you can redistribute it and/or modify it
10  * under the terms of the GNU General Public License as published by the Free
11  * Software Foundation; either version 2 of the License, or (at your option)
12  * any later version.
13  *
14  */
15 
16 #include <crypto/aes.h>
17 #include <linux/module.h>
18 #include <linux/init.h>
19 #include <linux/types.h>
20 #include <linux/errno.h>
21 #include <linux/crypto.h>
22 #include <asm/byteorder.h>
23 #include <asm/switch_to.h>
24 #include <crypto/algapi.h>
25 
26 /*
27  * MAX_BYTES defines the number of bytes that are allowed to be processed
28  * between preempt_disable() and preempt_enable(). e500 cores can issue two
29  * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
30  * bit unit (SU2). One of these can be a memory access that is executed via
31  * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
32  * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
33  * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
34  * included. Even with the low end model clocked at 667 MHz this equals to a
35  * critical time window of less than 30us. The value has been choosen to
36  * process a 512 byte disk block in one or a large 1400 bytes IPsec network
37  * packet in two runs.
38  *
39  */
40 #define MAX_BYTES 768
41 
42 struct ppc_aes_ctx {
43 	u32 key_enc[AES_MAX_KEYLENGTH_U32];
44 	u32 key_dec[AES_MAX_KEYLENGTH_U32];
45 	u32 rounds;
46 };
47 
48 struct ppc_xts_ctx {
49 	u32 key_enc[AES_MAX_KEYLENGTH_U32];
50 	u32 key_dec[AES_MAX_KEYLENGTH_U32];
51 	u32 key_twk[AES_MAX_KEYLENGTH_U32];
52 	u32 rounds;
53 };
54 
55 extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
56 extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
57 extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
58 			    u32 bytes);
59 extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
60 			    u32 bytes);
61 extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
62 			    u32 bytes, u8 *iv);
63 extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
64 			    u32 bytes, u8 *iv);
65 extern void ppc_crypt_ctr  (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
66 			    u32 bytes, u8 *iv);
67 extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
68 			    u32 bytes, u8 *iv, u32 *key_twk);
69 extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
70 			    u32 bytes, u8 *iv, u32 *key_twk);
71 
72 extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
73 extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
74 extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
75 
76 extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
77 				     unsigned int key_len);
78 
79 static void spe_begin(void)
80 {
81 	/* disable preemption and save users SPE registers if required */
82 	preempt_disable();
83 	enable_kernel_spe();
84 }
85 
86 static void spe_end(void)
87 {
88 	/* reenable preemption */
89 	preempt_enable();
90 }
91 
92 static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
93 		unsigned int key_len)
94 {
95 	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
96 
97 	if (key_len != AES_KEYSIZE_128 &&
98 	    key_len != AES_KEYSIZE_192 &&
99 	    key_len != AES_KEYSIZE_256) {
100 		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
101 		return -EINVAL;
102 	}
103 
104 	switch (key_len) {
105 	case AES_KEYSIZE_128:
106 		ctx->rounds = 4;
107 		ppc_expand_key_128(ctx->key_enc, in_key);
108 		break;
109 	case AES_KEYSIZE_192:
110 		ctx->rounds = 5;
111 		ppc_expand_key_192(ctx->key_enc, in_key);
112 		break;
113 	case AES_KEYSIZE_256:
114 		ctx->rounds = 6;
115 		ppc_expand_key_256(ctx->key_enc, in_key);
116 		break;
117 	}
118 
119 	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
120 
121 	return 0;
122 }
123 
124 static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
125 		   unsigned int key_len)
126 {
127 	struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
128 
129 	key_len >>= 1;
130 
131 	if (key_len != AES_KEYSIZE_128 &&
132 	    key_len != AES_KEYSIZE_192 &&
133 	    key_len != AES_KEYSIZE_256) {
134 		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
135 		return -EINVAL;
136 	}
137 
138 	switch (key_len) {
139 	case AES_KEYSIZE_128:
140 		ctx->rounds = 4;
141 		ppc_expand_key_128(ctx->key_enc, in_key);
142 		ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
143 		break;
144 	case AES_KEYSIZE_192:
145 		ctx->rounds = 5;
146 		ppc_expand_key_192(ctx->key_enc, in_key);
147 		ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
148 		break;
149 	case AES_KEYSIZE_256:
150 		ctx->rounds = 6;
151 		ppc_expand_key_256(ctx->key_enc, in_key);
152 		ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
153 		break;
154 	}
155 
156 	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
157 
158 	return 0;
159 }
160 
161 static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
162 {
163 	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
164 
165 	spe_begin();
166 	ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
167 	spe_end();
168 }
169 
170 static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
171 {
172 	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
173 
174 	spe_begin();
175 	ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
176 	spe_end();
177 }
178 
179 static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
180 			   struct scatterlist *src, unsigned int nbytes)
181 {
182 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
183 	struct blkcipher_walk walk;
184 	unsigned int ubytes;
185 	int err;
186 
187 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
188 	blkcipher_walk_init(&walk, dst, src, nbytes);
189 	err = blkcipher_walk_virt(desc, &walk);
190 
191 	while ((nbytes = walk.nbytes)) {
192 		ubytes = nbytes > MAX_BYTES ?
193 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
194 		nbytes -= ubytes;
195 
196 		spe_begin();
197 		ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
198 				ctx->key_enc, ctx->rounds, nbytes);
199 		spe_end();
200 
201 		err = blkcipher_walk_done(desc, &walk, ubytes);
202 	}
203 
204 	return err;
205 }
206 
207 static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
208 			   struct scatterlist *src, unsigned int nbytes)
209 {
210 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
211 	struct blkcipher_walk walk;
212 	unsigned int ubytes;
213 	int err;
214 
215 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
216 	blkcipher_walk_init(&walk, dst, src, nbytes);
217 	err = blkcipher_walk_virt(desc, &walk);
218 
219 	while ((nbytes = walk.nbytes)) {
220 		ubytes = nbytes > MAX_BYTES ?
221 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
222 		nbytes -= ubytes;
223 
224 		spe_begin();
225 		ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
226 				ctx->key_dec, ctx->rounds, nbytes);
227 		spe_end();
228 
229 		err = blkcipher_walk_done(desc, &walk, ubytes);
230 	}
231 
232 	return err;
233 }
234 
235 static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
236 			   struct scatterlist *src, unsigned int nbytes)
237 {
238 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
239 	struct blkcipher_walk walk;
240 	unsigned int ubytes;
241 	int err;
242 
243 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
244 	blkcipher_walk_init(&walk, dst, src, nbytes);
245 	err = blkcipher_walk_virt(desc, &walk);
246 
247 	while ((nbytes = walk.nbytes)) {
248 		ubytes = nbytes > MAX_BYTES ?
249 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
250 		nbytes -= ubytes;
251 
252 		spe_begin();
253 		ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
254 				ctx->key_enc, ctx->rounds, nbytes, walk.iv);
255 		spe_end();
256 
257 		err = blkcipher_walk_done(desc, &walk, ubytes);
258 	}
259 
260 	return err;
261 }
262 
263 static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
264 			   struct scatterlist *src, unsigned int nbytes)
265 {
266 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
267 	struct blkcipher_walk walk;
268 	unsigned int ubytes;
269 	int err;
270 
271 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
272 	blkcipher_walk_init(&walk, dst, src, nbytes);
273 	err = blkcipher_walk_virt(desc, &walk);
274 
275 	while ((nbytes = walk.nbytes)) {
276 		ubytes = nbytes > MAX_BYTES ?
277 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
278 		nbytes -= ubytes;
279 
280 		spe_begin();
281 		ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
282 				ctx->key_dec, ctx->rounds, nbytes, walk.iv);
283 		spe_end();
284 
285 		err = blkcipher_walk_done(desc, &walk, ubytes);
286 	}
287 
288 	return err;
289 }
290 
291 static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
292 			 struct scatterlist *src, unsigned int nbytes)
293 {
294 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
295 	struct blkcipher_walk walk;
296 	unsigned int pbytes, ubytes;
297 	int err;
298 
299 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
300 	blkcipher_walk_init(&walk, dst, src, nbytes);
301 	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
302 
303 	while ((pbytes = walk.nbytes)) {
304 		pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
305 		pbytes = pbytes == nbytes ?
306 			 nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
307 		ubytes = walk.nbytes - pbytes;
308 
309 		spe_begin();
310 		ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
311 			      ctx->key_enc, ctx->rounds, pbytes , walk.iv);
312 		spe_end();
313 
314 		nbytes -= pbytes;
315 		err = blkcipher_walk_done(desc, &walk, ubytes);
316 	}
317 
318 	return err;
319 }
320 
321 static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
322 			   struct scatterlist *src, unsigned int nbytes)
323 {
324 	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
325 	struct blkcipher_walk walk;
326 	unsigned int ubytes;
327 	int err;
328 	u32 *twk;
329 
330 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
331 	blkcipher_walk_init(&walk, dst, src, nbytes);
332 	err = blkcipher_walk_virt(desc, &walk);
333 	twk = ctx->key_twk;
334 
335 	while ((nbytes = walk.nbytes)) {
336 		ubytes = nbytes > MAX_BYTES ?
337 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
338 		nbytes -= ubytes;
339 
340 		spe_begin();
341 		ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
342 				ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
343 		spe_end();
344 
345 		twk = NULL;
346 		err = blkcipher_walk_done(desc, &walk, ubytes);
347 	}
348 
349 	return err;
350 }
351 
352 static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
353 			   struct scatterlist *src, unsigned int nbytes)
354 {
355 	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
356 	struct blkcipher_walk walk;
357 	unsigned int ubytes;
358 	int err;
359 	u32 *twk;
360 
361 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
362 	blkcipher_walk_init(&walk, dst, src, nbytes);
363 	err = blkcipher_walk_virt(desc, &walk);
364 	twk = ctx->key_twk;
365 
366 	while ((nbytes = walk.nbytes)) {
367 		ubytes = nbytes > MAX_BYTES ?
368 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
369 		nbytes -= ubytes;
370 
371 		spe_begin();
372 		ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
373 				ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
374 		spe_end();
375 
376 		twk = NULL;
377 		err = blkcipher_walk_done(desc, &walk, ubytes);
378 	}
379 
380 	return err;
381 }
382 
383 /*
384  * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
385  * because the e500 platform can handle unaligned reads/writes very efficently.
386  * This improves IPsec thoughput by another few percent. Additionally we assume
387  * that AES context is always aligned to at least 8 bytes because it is created
388  * with kmalloc() in the crypto infrastructure
389  *
390  */
391 static struct crypto_alg aes_algs[] = { {
392 	.cra_name		=	"aes",
393 	.cra_driver_name	=	"aes-ppc-spe",
394 	.cra_priority		=	300,
395 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
396 	.cra_blocksize		=	AES_BLOCK_SIZE,
397 	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
398 	.cra_alignmask		=	0,
399 	.cra_module		=	THIS_MODULE,
400 	.cra_u			=	{
401 		.cipher = {
402 			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
403 			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
404 			.cia_setkey		=	ppc_aes_setkey,
405 			.cia_encrypt		=	ppc_aes_encrypt,
406 			.cia_decrypt		=	ppc_aes_decrypt
407 		}
408 	}
409 }, {
410 	.cra_name		=	"ecb(aes)",
411 	.cra_driver_name	=	"ecb-ppc-spe",
412 	.cra_priority		=	300,
413 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
414 	.cra_blocksize		=	AES_BLOCK_SIZE,
415 	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
416 	.cra_alignmask		=	0,
417 	.cra_type		=	&crypto_blkcipher_type,
418 	.cra_module		=	THIS_MODULE,
419 	.cra_u = {
420 		.blkcipher = {
421 			.min_keysize		=	AES_MIN_KEY_SIZE,
422 			.max_keysize		=	AES_MAX_KEY_SIZE,
423 			.ivsize			=	AES_BLOCK_SIZE,
424 			.setkey			=	ppc_aes_setkey,
425 			.encrypt		=	ppc_ecb_encrypt,
426 			.decrypt		=	ppc_ecb_decrypt,
427 		}
428 	}
429 }, {
430 	.cra_name		=	"cbc(aes)",
431 	.cra_driver_name	=	"cbc-ppc-spe",
432 	.cra_priority		=	300,
433 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
434 	.cra_blocksize		=	AES_BLOCK_SIZE,
435 	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
436 	.cra_alignmask		=	0,
437 	.cra_type		=	&crypto_blkcipher_type,
438 	.cra_module		=	THIS_MODULE,
439 	.cra_u = {
440 		.blkcipher = {
441 			.min_keysize		=	AES_MIN_KEY_SIZE,
442 			.max_keysize		=	AES_MAX_KEY_SIZE,
443 			.ivsize			=	AES_BLOCK_SIZE,
444 			.setkey			=	ppc_aes_setkey,
445 			.encrypt		=	ppc_cbc_encrypt,
446 			.decrypt		=	ppc_cbc_decrypt,
447 		}
448 	}
449 }, {
450 	.cra_name		=	"ctr(aes)",
451 	.cra_driver_name	=	"ctr-ppc-spe",
452 	.cra_priority		=	300,
453 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
454 	.cra_blocksize		=	1,
455 	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
456 	.cra_alignmask		=	0,
457 	.cra_type		=	&crypto_blkcipher_type,
458 	.cra_module		=	THIS_MODULE,
459 	.cra_u = {
460 		.blkcipher = {
461 			.min_keysize		=	AES_MIN_KEY_SIZE,
462 			.max_keysize		=	AES_MAX_KEY_SIZE,
463 			.ivsize			=	AES_BLOCK_SIZE,
464 			.setkey			=	ppc_aes_setkey,
465 			.encrypt		=	ppc_ctr_crypt,
466 			.decrypt		=	ppc_ctr_crypt,
467 		}
468 	}
469 }, {
470 	.cra_name		=	"xts(aes)",
471 	.cra_driver_name	=	"xts-ppc-spe",
472 	.cra_priority		=	300,
473 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
474 	.cra_blocksize		=	AES_BLOCK_SIZE,
475 	.cra_ctxsize		=	sizeof(struct ppc_xts_ctx),
476 	.cra_alignmask		=	0,
477 	.cra_type		=	&crypto_blkcipher_type,
478 	.cra_module		=	THIS_MODULE,
479 	.cra_u = {
480 		.blkcipher = {
481 			.min_keysize		=	AES_MIN_KEY_SIZE * 2,
482 			.max_keysize		=	AES_MAX_KEY_SIZE * 2,
483 			.ivsize			=	AES_BLOCK_SIZE,
484 			.setkey			=	ppc_xts_setkey,
485 			.encrypt		=	ppc_xts_encrypt,
486 			.decrypt		=	ppc_xts_decrypt,
487 		}
488 	}
489 } };
490 
491 static int __init ppc_aes_mod_init(void)
492 {
493 	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
494 }
495 
496 static void __exit ppc_aes_mod_fini(void)
497 {
498 	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
499 }
500 
501 module_init(ppc_aes_mod_init);
502 module_exit(ppc_aes_mod_fini);
503 
504 MODULE_LICENSE("GPL");
505 MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
506 
507 MODULE_ALIAS_CRYPTO("aes");
508 MODULE_ALIAS_CRYPTO("ecb(aes)");
509 MODULE_ALIAS_CRYPTO("cbc(aes)");
510 MODULE_ALIAS_CRYPTO("ctr(aes)");
511 MODULE_ALIAS_CRYPTO("xts(aes)");
512 MODULE_ALIAS_CRYPTO("aes-ppc-spe");
513