1 /*
2  * Glue code for AES implementation for SPE instructions (PPC)
3  *
4  * Based on generic implementation. The assembler module takes care
5  * about the SPE registers so it can run from interrupt context.
6  *
7  * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
8  *
9  * This program is free software; you can redistribute it and/or modify it
10  * under the terms of the GNU General Public License as published by the Free
11  * Software Foundation; either version 2 of the License, or (at your option)
12  * any later version.
13  *
14  */
15 
16 #include <crypto/aes.h>
17 #include <linux/module.h>
18 #include <linux/init.h>
19 #include <linux/types.h>
20 #include <linux/errno.h>
21 #include <linux/crypto.h>
22 #include <asm/byteorder.h>
23 #include <asm/switch_to.h>
24 #include <crypto/algapi.h>
25 
26 /*
27  * MAX_BYTES defines the number of bytes that are allowed to be processed
28  * between preempt_disable() and preempt_enable(). e500 cores can issue two
29  * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
30  * bit unit (SU2). One of these can be a memory access that is executed via
31  * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
32  * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
33  * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
34  * included. Even with the low end model clocked at 667 MHz this equals to a
35  * critical time window of less than 30us. The value has been choosen to
36  * process a 512 byte disk block in one or a large 1400 bytes IPsec network
37  * packet in two runs.
38  *
39  */
40 #define MAX_BYTES 768
41 
42 struct ppc_aes_ctx {
43 	u32 key_enc[AES_MAX_KEYLENGTH_U32];
44 	u32 key_dec[AES_MAX_KEYLENGTH_U32];
45 	u32 rounds;
46 };
47 
48 struct ppc_xts_ctx {
49 	u32 key_enc[AES_MAX_KEYLENGTH_U32];
50 	u32 key_dec[AES_MAX_KEYLENGTH_U32];
51 	u32 key_twk[AES_MAX_KEYLENGTH_U32];
52 	u32 rounds;
53 };
54 
55 extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
56 extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
57 extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
58 			    u32 bytes);
59 extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
60 			    u32 bytes);
61 extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
62 			    u32 bytes, u8 *iv);
63 extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
64 			    u32 bytes, u8 *iv);
65 extern void ppc_crypt_ctr  (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
66 			    u32 bytes, u8 *iv);
67 extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
68 			    u32 bytes, u8 *iv, u32 *key_twk);
69 extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
70 			    u32 bytes, u8 *iv, u32 *key_twk);
71 
72 extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
73 extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
74 extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
75 
76 extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
77 				     unsigned int key_len);
78 
79 static void spe_begin(void)
80 {
81 	/* disable preemption and save users SPE registers if required */
82 	preempt_disable();
83 	enable_kernel_spe();
84 }
85 
86 static void spe_end(void)
87 {
88 	disable_kernel_spe();
89 	/* reenable preemption */
90 	preempt_enable();
91 }
92 
93 static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
94 		unsigned int key_len)
95 {
96 	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
97 
98 	if (key_len != AES_KEYSIZE_128 &&
99 	    key_len != AES_KEYSIZE_192 &&
100 	    key_len != AES_KEYSIZE_256) {
101 		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
102 		return -EINVAL;
103 	}
104 
105 	switch (key_len) {
106 	case AES_KEYSIZE_128:
107 		ctx->rounds = 4;
108 		ppc_expand_key_128(ctx->key_enc, in_key);
109 		break;
110 	case AES_KEYSIZE_192:
111 		ctx->rounds = 5;
112 		ppc_expand_key_192(ctx->key_enc, in_key);
113 		break;
114 	case AES_KEYSIZE_256:
115 		ctx->rounds = 6;
116 		ppc_expand_key_256(ctx->key_enc, in_key);
117 		break;
118 	}
119 
120 	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
121 
122 	return 0;
123 }
124 
125 static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
126 		   unsigned int key_len)
127 {
128 	struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
129 
130 	key_len >>= 1;
131 
132 	if (key_len != AES_KEYSIZE_128 &&
133 	    key_len != AES_KEYSIZE_192 &&
134 	    key_len != AES_KEYSIZE_256) {
135 		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
136 		return -EINVAL;
137 	}
138 
139 	switch (key_len) {
140 	case AES_KEYSIZE_128:
141 		ctx->rounds = 4;
142 		ppc_expand_key_128(ctx->key_enc, in_key);
143 		ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
144 		break;
145 	case AES_KEYSIZE_192:
146 		ctx->rounds = 5;
147 		ppc_expand_key_192(ctx->key_enc, in_key);
148 		ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
149 		break;
150 	case AES_KEYSIZE_256:
151 		ctx->rounds = 6;
152 		ppc_expand_key_256(ctx->key_enc, in_key);
153 		ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
154 		break;
155 	}
156 
157 	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
158 
159 	return 0;
160 }
161 
162 static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
163 {
164 	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
165 
166 	spe_begin();
167 	ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
168 	spe_end();
169 }
170 
171 static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
172 {
173 	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
174 
175 	spe_begin();
176 	ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
177 	spe_end();
178 }
179 
180 static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
181 			   struct scatterlist *src, unsigned int nbytes)
182 {
183 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
184 	struct blkcipher_walk walk;
185 	unsigned int ubytes;
186 	int err;
187 
188 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
189 	blkcipher_walk_init(&walk, dst, src, nbytes);
190 	err = blkcipher_walk_virt(desc, &walk);
191 
192 	while ((nbytes = walk.nbytes)) {
193 		ubytes = nbytes > MAX_BYTES ?
194 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
195 		nbytes -= ubytes;
196 
197 		spe_begin();
198 		ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
199 				ctx->key_enc, ctx->rounds, nbytes);
200 		spe_end();
201 
202 		err = blkcipher_walk_done(desc, &walk, ubytes);
203 	}
204 
205 	return err;
206 }
207 
208 static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
209 			   struct scatterlist *src, unsigned int nbytes)
210 {
211 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
212 	struct blkcipher_walk walk;
213 	unsigned int ubytes;
214 	int err;
215 
216 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
217 	blkcipher_walk_init(&walk, dst, src, nbytes);
218 	err = blkcipher_walk_virt(desc, &walk);
219 
220 	while ((nbytes = walk.nbytes)) {
221 		ubytes = nbytes > MAX_BYTES ?
222 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
223 		nbytes -= ubytes;
224 
225 		spe_begin();
226 		ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
227 				ctx->key_dec, ctx->rounds, nbytes);
228 		spe_end();
229 
230 		err = blkcipher_walk_done(desc, &walk, ubytes);
231 	}
232 
233 	return err;
234 }
235 
236 static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
237 			   struct scatterlist *src, unsigned int nbytes)
238 {
239 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
240 	struct blkcipher_walk walk;
241 	unsigned int ubytes;
242 	int err;
243 
244 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
245 	blkcipher_walk_init(&walk, dst, src, nbytes);
246 	err = blkcipher_walk_virt(desc, &walk);
247 
248 	while ((nbytes = walk.nbytes)) {
249 		ubytes = nbytes > MAX_BYTES ?
250 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
251 		nbytes -= ubytes;
252 
253 		spe_begin();
254 		ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
255 				ctx->key_enc, ctx->rounds, nbytes, walk.iv);
256 		spe_end();
257 
258 		err = blkcipher_walk_done(desc, &walk, ubytes);
259 	}
260 
261 	return err;
262 }
263 
264 static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
265 			   struct scatterlist *src, unsigned int nbytes)
266 {
267 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
268 	struct blkcipher_walk walk;
269 	unsigned int ubytes;
270 	int err;
271 
272 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
273 	blkcipher_walk_init(&walk, dst, src, nbytes);
274 	err = blkcipher_walk_virt(desc, &walk);
275 
276 	while ((nbytes = walk.nbytes)) {
277 		ubytes = nbytes > MAX_BYTES ?
278 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
279 		nbytes -= ubytes;
280 
281 		spe_begin();
282 		ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
283 				ctx->key_dec, ctx->rounds, nbytes, walk.iv);
284 		spe_end();
285 
286 		err = blkcipher_walk_done(desc, &walk, ubytes);
287 	}
288 
289 	return err;
290 }
291 
292 static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
293 			 struct scatterlist *src, unsigned int nbytes)
294 {
295 	struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
296 	struct blkcipher_walk walk;
297 	unsigned int pbytes, ubytes;
298 	int err;
299 
300 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
301 	blkcipher_walk_init(&walk, dst, src, nbytes);
302 	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
303 
304 	while ((pbytes = walk.nbytes)) {
305 		pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
306 		pbytes = pbytes == nbytes ?
307 			 nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
308 		ubytes = walk.nbytes - pbytes;
309 
310 		spe_begin();
311 		ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
312 			      ctx->key_enc, ctx->rounds, pbytes , walk.iv);
313 		spe_end();
314 
315 		nbytes -= pbytes;
316 		err = blkcipher_walk_done(desc, &walk, ubytes);
317 	}
318 
319 	return err;
320 }
321 
322 static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
323 			   struct scatterlist *src, unsigned int nbytes)
324 {
325 	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
326 	struct blkcipher_walk walk;
327 	unsigned int ubytes;
328 	int err;
329 	u32 *twk;
330 
331 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
332 	blkcipher_walk_init(&walk, dst, src, nbytes);
333 	err = blkcipher_walk_virt(desc, &walk);
334 	twk = ctx->key_twk;
335 
336 	while ((nbytes = walk.nbytes)) {
337 		ubytes = nbytes > MAX_BYTES ?
338 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
339 		nbytes -= ubytes;
340 
341 		spe_begin();
342 		ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
343 				ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
344 		spe_end();
345 
346 		twk = NULL;
347 		err = blkcipher_walk_done(desc, &walk, ubytes);
348 	}
349 
350 	return err;
351 }
352 
353 static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
354 			   struct scatterlist *src, unsigned int nbytes)
355 {
356 	struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
357 	struct blkcipher_walk walk;
358 	unsigned int ubytes;
359 	int err;
360 	u32 *twk;
361 
362 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
363 	blkcipher_walk_init(&walk, dst, src, nbytes);
364 	err = blkcipher_walk_virt(desc, &walk);
365 	twk = ctx->key_twk;
366 
367 	while ((nbytes = walk.nbytes)) {
368 		ubytes = nbytes > MAX_BYTES ?
369 			 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
370 		nbytes -= ubytes;
371 
372 		spe_begin();
373 		ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
374 				ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
375 		spe_end();
376 
377 		twk = NULL;
378 		err = blkcipher_walk_done(desc, &walk, ubytes);
379 	}
380 
381 	return err;
382 }
383 
384 /*
385  * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
386  * because the e500 platform can handle unaligned reads/writes very efficently.
387  * This improves IPsec thoughput by another few percent. Additionally we assume
388  * that AES context is always aligned to at least 8 bytes because it is created
389  * with kmalloc() in the crypto infrastructure
390  *
391  */
392 static struct crypto_alg aes_algs[] = { {
393 	.cra_name		=	"aes",
394 	.cra_driver_name	=	"aes-ppc-spe",
395 	.cra_priority		=	300,
396 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
397 	.cra_blocksize		=	AES_BLOCK_SIZE,
398 	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
399 	.cra_alignmask		=	0,
400 	.cra_module		=	THIS_MODULE,
401 	.cra_u			=	{
402 		.cipher = {
403 			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
404 			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
405 			.cia_setkey		=	ppc_aes_setkey,
406 			.cia_encrypt		=	ppc_aes_encrypt,
407 			.cia_decrypt		=	ppc_aes_decrypt
408 		}
409 	}
410 }, {
411 	.cra_name		=	"ecb(aes)",
412 	.cra_driver_name	=	"ecb-ppc-spe",
413 	.cra_priority		=	300,
414 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
415 	.cra_blocksize		=	AES_BLOCK_SIZE,
416 	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
417 	.cra_alignmask		=	0,
418 	.cra_type		=	&crypto_blkcipher_type,
419 	.cra_module		=	THIS_MODULE,
420 	.cra_u = {
421 		.blkcipher = {
422 			.min_keysize		=	AES_MIN_KEY_SIZE,
423 			.max_keysize		=	AES_MAX_KEY_SIZE,
424 			.ivsize			=	AES_BLOCK_SIZE,
425 			.setkey			=	ppc_aes_setkey,
426 			.encrypt		=	ppc_ecb_encrypt,
427 			.decrypt		=	ppc_ecb_decrypt,
428 		}
429 	}
430 }, {
431 	.cra_name		=	"cbc(aes)",
432 	.cra_driver_name	=	"cbc-ppc-spe",
433 	.cra_priority		=	300,
434 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
435 	.cra_blocksize		=	AES_BLOCK_SIZE,
436 	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
437 	.cra_alignmask		=	0,
438 	.cra_type		=	&crypto_blkcipher_type,
439 	.cra_module		=	THIS_MODULE,
440 	.cra_u = {
441 		.blkcipher = {
442 			.min_keysize		=	AES_MIN_KEY_SIZE,
443 			.max_keysize		=	AES_MAX_KEY_SIZE,
444 			.ivsize			=	AES_BLOCK_SIZE,
445 			.setkey			=	ppc_aes_setkey,
446 			.encrypt		=	ppc_cbc_encrypt,
447 			.decrypt		=	ppc_cbc_decrypt,
448 		}
449 	}
450 }, {
451 	.cra_name		=	"ctr(aes)",
452 	.cra_driver_name	=	"ctr-ppc-spe",
453 	.cra_priority		=	300,
454 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
455 	.cra_blocksize		=	1,
456 	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
457 	.cra_alignmask		=	0,
458 	.cra_type		=	&crypto_blkcipher_type,
459 	.cra_module		=	THIS_MODULE,
460 	.cra_u = {
461 		.blkcipher = {
462 			.min_keysize		=	AES_MIN_KEY_SIZE,
463 			.max_keysize		=	AES_MAX_KEY_SIZE,
464 			.ivsize			=	AES_BLOCK_SIZE,
465 			.setkey			=	ppc_aes_setkey,
466 			.encrypt		=	ppc_ctr_crypt,
467 			.decrypt		=	ppc_ctr_crypt,
468 		}
469 	}
470 }, {
471 	.cra_name		=	"xts(aes)",
472 	.cra_driver_name	=	"xts-ppc-spe",
473 	.cra_priority		=	300,
474 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
475 	.cra_blocksize		=	AES_BLOCK_SIZE,
476 	.cra_ctxsize		=	sizeof(struct ppc_xts_ctx),
477 	.cra_alignmask		=	0,
478 	.cra_type		=	&crypto_blkcipher_type,
479 	.cra_module		=	THIS_MODULE,
480 	.cra_u = {
481 		.blkcipher = {
482 			.min_keysize		=	AES_MIN_KEY_SIZE * 2,
483 			.max_keysize		=	AES_MAX_KEY_SIZE * 2,
484 			.ivsize			=	AES_BLOCK_SIZE,
485 			.setkey			=	ppc_xts_setkey,
486 			.encrypt		=	ppc_xts_encrypt,
487 			.decrypt		=	ppc_xts_decrypt,
488 		}
489 	}
490 } };
491 
492 static int __init ppc_aes_mod_init(void)
493 {
494 	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
495 }
496 
497 static void __exit ppc_aes_mod_fini(void)
498 {
499 	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
500 }
501 
502 module_init(ppc_aes_mod_init);
503 module_exit(ppc_aes_mod_fini);
504 
505 MODULE_LICENSE("GPL");
506 MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
507 
508 MODULE_ALIAS_CRYPTO("aes");
509 MODULE_ALIAS_CRYPTO("ecb(aes)");
510 MODULE_ALIAS_CRYPTO("cbc(aes)");
511 MODULE_ALIAS_CRYPTO("ctr(aes)");
512 MODULE_ALIAS_CRYPTO("xts(aes)");
513 MODULE_ALIAS_CRYPTO("aes-ppc-spe");
514