xref: /openbmc/linux/drivers/crypto/padlock-sha.c (revision 2c64e9cb)
1 /*
2  * Cryptographic API.
3  *
4  * Support for VIA PadLock hardware crypto engine.
5  *
6  * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  */
14 
15 #include <crypto/internal/hash.h>
16 #include <crypto/padlock.h>
17 #include <crypto/sha.h>
18 #include <linux/err.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/errno.h>
22 #include <linux/interrupt.h>
23 #include <linux/kernel.h>
24 #include <linux/scatterlist.h>
25 #include <asm/cpu_device_id.h>
26 #include <asm/fpu/api.h>
27 
28 struct padlock_sha_desc {
29 	struct shash_desc fallback;
30 };
31 
32 struct padlock_sha_ctx {
33 	struct crypto_shash *fallback;
34 };
35 
36 static int padlock_sha_init(struct shash_desc *desc)
37 {
38 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
39 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
40 
41 	dctx->fallback.tfm = ctx->fallback;
42 	return crypto_shash_init(&dctx->fallback);
43 }
44 
45 static int padlock_sha_update(struct shash_desc *desc,
46 			      const u8 *data, unsigned int length)
47 {
48 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
49 
50 	return crypto_shash_update(&dctx->fallback, data, length);
51 }
52 
53 static int padlock_sha_export(struct shash_desc *desc, void *out)
54 {
55 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
56 
57 	return crypto_shash_export(&dctx->fallback, out);
58 }
59 
60 static int padlock_sha_import(struct shash_desc *desc, const void *in)
61 {
62 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
63 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
64 
65 	dctx->fallback.tfm = ctx->fallback;
66 	return crypto_shash_import(&dctx->fallback, in);
67 }
68 
69 static inline void padlock_output_block(uint32_t *src,
70 		 	uint32_t *dst, size_t count)
71 {
72 	while (count--)
73 		*dst++ = swab32(*src++);
74 }
75 
76 static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
77 			      unsigned int count, u8 *out)
78 {
79 	/* We can't store directly to *out as it may be unaligned. */
80 	/* BTW Don't reduce the buffer size below 128 Bytes!
81 	 *     PadLock microcode needs it that big. */
82 	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
83 		((aligned(STACK_ALIGN)));
84 	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
85 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
86 	struct sha1_state state;
87 	unsigned int space;
88 	unsigned int leftover;
89 	int err;
90 
91 	err = crypto_shash_export(&dctx->fallback, &state);
92 	if (err)
93 		goto out;
94 
95 	if (state.count + count > ULONG_MAX)
96 		return crypto_shash_finup(&dctx->fallback, in, count, out);
97 
98 	leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1;
99 	space =  SHA1_BLOCK_SIZE - leftover;
100 	if (space) {
101 		if (count > space) {
102 			err = crypto_shash_update(&dctx->fallback, in, space) ?:
103 			      crypto_shash_export(&dctx->fallback, &state);
104 			if (err)
105 				goto out;
106 			count -= space;
107 			in += space;
108 		} else {
109 			memcpy(state.buffer + leftover, in, count);
110 			in = state.buffer;
111 			count += leftover;
112 			state.count &= ~(SHA1_BLOCK_SIZE - 1);
113 		}
114 	}
115 
116 	memcpy(result, &state.state, SHA1_DIGEST_SIZE);
117 
118 	asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
119 		      : \
120 		      : "c"((unsigned long)state.count + count), \
121 			"a"((unsigned long)state.count), \
122 			"S"(in), "D"(result));
123 
124 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
125 
126 out:
127 	return err;
128 }
129 
130 static int padlock_sha1_final(struct shash_desc *desc, u8 *out)
131 {
132 	u8 buf[4];
133 
134 	return padlock_sha1_finup(desc, buf, 0, out);
135 }
136 
137 static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
138 				unsigned int count, u8 *out)
139 {
140 	/* We can't store directly to *out as it may be unaligned. */
141 	/* BTW Don't reduce the buffer size below 128 Bytes!
142 	 *     PadLock microcode needs it that big. */
143 	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
144 		((aligned(STACK_ALIGN)));
145 	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
146 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
147 	struct sha256_state state;
148 	unsigned int space;
149 	unsigned int leftover;
150 	int err;
151 
152 	err = crypto_shash_export(&dctx->fallback, &state);
153 	if (err)
154 		goto out;
155 
156 	if (state.count + count > ULONG_MAX)
157 		return crypto_shash_finup(&dctx->fallback, in, count, out);
158 
159 	leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1;
160 	space =  SHA256_BLOCK_SIZE - leftover;
161 	if (space) {
162 		if (count > space) {
163 			err = crypto_shash_update(&dctx->fallback, in, space) ?:
164 			      crypto_shash_export(&dctx->fallback, &state);
165 			if (err)
166 				goto out;
167 			count -= space;
168 			in += space;
169 		} else {
170 			memcpy(state.buf + leftover, in, count);
171 			in = state.buf;
172 			count += leftover;
173 			state.count &= ~(SHA1_BLOCK_SIZE - 1);
174 		}
175 	}
176 
177 	memcpy(result, &state.state, SHA256_DIGEST_SIZE);
178 
179 	asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
180 		      : \
181 		      : "c"((unsigned long)state.count + count), \
182 			"a"((unsigned long)state.count), \
183 			"S"(in), "D"(result));
184 
185 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
186 
187 out:
188 	return err;
189 }
190 
191 static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
192 {
193 	u8 buf[4];
194 
195 	return padlock_sha256_finup(desc, buf, 0, out);
196 }
197 
198 static int padlock_cra_init(struct crypto_tfm *tfm)
199 {
200 	struct crypto_shash *hash = __crypto_shash_cast(tfm);
201 	const char *fallback_driver_name = crypto_tfm_alg_name(tfm);
202 	struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
203 	struct crypto_shash *fallback_tfm;
204 	int err = -ENOMEM;
205 
206 	/* Allocate a fallback and abort if it failed. */
207 	fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
208 					  CRYPTO_ALG_NEED_FALLBACK);
209 	if (IS_ERR(fallback_tfm)) {
210 		printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
211 		       fallback_driver_name);
212 		err = PTR_ERR(fallback_tfm);
213 		goto out;
214 	}
215 
216 	ctx->fallback = fallback_tfm;
217 	hash->descsize += crypto_shash_descsize(fallback_tfm);
218 	return 0;
219 
220 out:
221 	return err;
222 }
223 
224 static void padlock_cra_exit(struct crypto_tfm *tfm)
225 {
226 	struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
227 
228 	crypto_free_shash(ctx->fallback);
229 }
230 
231 static struct shash_alg sha1_alg = {
232 	.digestsize	=	SHA1_DIGEST_SIZE,
233 	.init   	= 	padlock_sha_init,
234 	.update 	=	padlock_sha_update,
235 	.finup  	=	padlock_sha1_finup,
236 	.final  	=	padlock_sha1_final,
237 	.export		=	padlock_sha_export,
238 	.import		=	padlock_sha_import,
239 	.descsize	=	sizeof(struct padlock_sha_desc),
240 	.statesize	=	sizeof(struct sha1_state),
241 	.base		=	{
242 		.cra_name		=	"sha1",
243 		.cra_driver_name	=	"sha1-padlock",
244 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
245 		.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
246 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
247 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
248 		.cra_module		=	THIS_MODULE,
249 		.cra_init		=	padlock_cra_init,
250 		.cra_exit		=	padlock_cra_exit,
251 	}
252 };
253 
254 static struct shash_alg sha256_alg = {
255 	.digestsize	=	SHA256_DIGEST_SIZE,
256 	.init   	= 	padlock_sha_init,
257 	.update 	=	padlock_sha_update,
258 	.finup  	=	padlock_sha256_finup,
259 	.final  	=	padlock_sha256_final,
260 	.export		=	padlock_sha_export,
261 	.import		=	padlock_sha_import,
262 	.descsize	=	sizeof(struct padlock_sha_desc),
263 	.statesize	=	sizeof(struct sha256_state),
264 	.base		=	{
265 		.cra_name		=	"sha256",
266 		.cra_driver_name	=	"sha256-padlock",
267 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
268 		.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
269 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
270 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
271 		.cra_module		=	THIS_MODULE,
272 		.cra_init		=	padlock_cra_init,
273 		.cra_exit		=	padlock_cra_exit,
274 	}
275 };
276 
277 /* Add two shash_alg instance for hardware-implemented *
278 * multiple-parts hash supported by VIA Nano Processor.*/
279 static int padlock_sha1_init_nano(struct shash_desc *desc)
280 {
281 	struct sha1_state *sctx = shash_desc_ctx(desc);
282 
283 	*sctx = (struct sha1_state){
284 		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
285 	};
286 
287 	return 0;
288 }
289 
290 static int padlock_sha1_update_nano(struct shash_desc *desc,
291 			const u8 *data,	unsigned int len)
292 {
293 	struct sha1_state *sctx = shash_desc_ctx(desc);
294 	unsigned int partial, done;
295 	const u8 *src;
296 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
297 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
298 		((aligned(STACK_ALIGN)));
299 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
300 
301 	partial = sctx->count & 0x3f;
302 	sctx->count += len;
303 	done = 0;
304 	src = data;
305 	memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
306 
307 	if ((partial + len) >= SHA1_BLOCK_SIZE) {
308 
309 		/* Append the bytes in state's buffer to a block to handle */
310 		if (partial) {
311 			done = -partial;
312 			memcpy(sctx->buffer + partial, data,
313 				done + SHA1_BLOCK_SIZE);
314 			src = sctx->buffer;
315 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
316 			: "+S"(src), "+D"(dst) \
317 			: "a"((long)-1), "c"((unsigned long)1));
318 			done += SHA1_BLOCK_SIZE;
319 			src = data + done;
320 		}
321 
322 		/* Process the left bytes from the input data */
323 		if (len - done >= SHA1_BLOCK_SIZE) {
324 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
325 			: "+S"(src), "+D"(dst)
326 			: "a"((long)-1),
327 			"c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
328 			done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
329 			src = data + done;
330 		}
331 		partial = 0;
332 	}
333 	memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);
334 	memcpy(sctx->buffer + partial, src, len - done);
335 
336 	return 0;
337 }
338 
339 static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out)
340 {
341 	struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);
342 	unsigned int partial, padlen;
343 	__be64 bits;
344 	static const u8 padding[64] = { 0x80, };
345 
346 	bits = cpu_to_be64(state->count << 3);
347 
348 	/* Pad out to 56 mod 64 */
349 	partial = state->count & 0x3f;
350 	padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
351 	padlock_sha1_update_nano(desc, padding, padlen);
352 
353 	/* Append length field bytes */
354 	padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits));
355 
356 	/* Swap to output */
357 	padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
358 
359 	return 0;
360 }
361 
362 static int padlock_sha256_init_nano(struct shash_desc *desc)
363 {
364 	struct sha256_state *sctx = shash_desc_ctx(desc);
365 
366 	*sctx = (struct sha256_state){
367 		.state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \
368 				SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
369 	};
370 
371 	return 0;
372 }
373 
374 static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
375 			  unsigned int len)
376 {
377 	struct sha256_state *sctx = shash_desc_ctx(desc);
378 	unsigned int partial, done;
379 	const u8 *src;
380 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
381 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
382 		((aligned(STACK_ALIGN)));
383 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
384 
385 	partial = sctx->count & 0x3f;
386 	sctx->count += len;
387 	done = 0;
388 	src = data;
389 	memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);
390 
391 	if ((partial + len) >= SHA256_BLOCK_SIZE) {
392 
393 		/* Append the bytes in state's buffer to a block to handle */
394 		if (partial) {
395 			done = -partial;
396 			memcpy(sctx->buf + partial, data,
397 				done + SHA256_BLOCK_SIZE);
398 			src = sctx->buf;
399 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
400 			: "+S"(src), "+D"(dst)
401 			: "a"((long)-1), "c"((unsigned long)1));
402 			done += SHA256_BLOCK_SIZE;
403 			src = data + done;
404 		}
405 
406 		/* Process the left bytes from input data*/
407 		if (len - done >= SHA256_BLOCK_SIZE) {
408 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
409 			: "+S"(src), "+D"(dst)
410 			: "a"((long)-1),
411 			"c"((unsigned long)((len - done) / 64)));
412 			done += ((len - done) - (len - done) % 64);
413 			src = data + done;
414 		}
415 		partial = 0;
416 	}
417 	memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE);
418 	memcpy(sctx->buf + partial, src, len - done);
419 
420 	return 0;
421 }
422 
423 static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out)
424 {
425 	struct sha256_state *state =
426 		(struct sha256_state *)shash_desc_ctx(desc);
427 	unsigned int partial, padlen;
428 	__be64 bits;
429 	static const u8 padding[64] = { 0x80, };
430 
431 	bits = cpu_to_be64(state->count << 3);
432 
433 	/* Pad out to 56 mod 64 */
434 	partial = state->count & 0x3f;
435 	padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
436 	padlock_sha256_update_nano(desc, padding, padlen);
437 
438 	/* Append length field bytes */
439 	padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits));
440 
441 	/* Swap to output */
442 	padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8);
443 
444 	return 0;
445 }
446 
447 static int padlock_sha_export_nano(struct shash_desc *desc,
448 				void *out)
449 {
450 	int statesize = crypto_shash_statesize(desc->tfm);
451 	void *sctx = shash_desc_ctx(desc);
452 
453 	memcpy(out, sctx, statesize);
454 	return 0;
455 }
456 
457 static int padlock_sha_import_nano(struct shash_desc *desc,
458 				const void *in)
459 {
460 	int statesize = crypto_shash_statesize(desc->tfm);
461 	void *sctx = shash_desc_ctx(desc);
462 
463 	memcpy(sctx, in, statesize);
464 	return 0;
465 }
466 
467 static struct shash_alg sha1_alg_nano = {
468 	.digestsize	=	SHA1_DIGEST_SIZE,
469 	.init		=	padlock_sha1_init_nano,
470 	.update		=	padlock_sha1_update_nano,
471 	.final		=	padlock_sha1_final_nano,
472 	.export		=	padlock_sha_export_nano,
473 	.import		=	padlock_sha_import_nano,
474 	.descsize	=	sizeof(struct sha1_state),
475 	.statesize	=	sizeof(struct sha1_state),
476 	.base		=	{
477 		.cra_name		=	"sha1",
478 		.cra_driver_name	=	"sha1-padlock-nano",
479 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
480 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
481 		.cra_module		=	THIS_MODULE,
482 	}
483 };
484 
485 static struct shash_alg sha256_alg_nano = {
486 	.digestsize	=	SHA256_DIGEST_SIZE,
487 	.init		=	padlock_sha256_init_nano,
488 	.update		=	padlock_sha256_update_nano,
489 	.final		=	padlock_sha256_final_nano,
490 	.export		=	padlock_sha_export_nano,
491 	.import		=	padlock_sha_import_nano,
492 	.descsize	=	sizeof(struct sha256_state),
493 	.statesize	=	sizeof(struct sha256_state),
494 	.base		=	{
495 		.cra_name		=	"sha256",
496 		.cra_driver_name	=	"sha256-padlock-nano",
497 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
498 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
499 		.cra_module		=	THIS_MODULE,
500 	}
501 };
502 
503 static const struct x86_cpu_id padlock_sha_ids[] = {
504 	X86_FEATURE_MATCH(X86_FEATURE_PHE),
505 	{}
506 };
507 MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);
508 
509 static int __init padlock_init(void)
510 {
511 	int rc = -ENODEV;
512 	struct cpuinfo_x86 *c = &cpu_data(0);
513 	struct shash_alg *sha1;
514 	struct shash_alg *sha256;
515 
516 	if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
517 		return -ENODEV;
518 
519 	/* Register the newly added algorithm module if on *
520 	* VIA Nano processor, or else just do as before */
521 	if (c->x86_model < 0x0f) {
522 		sha1 = &sha1_alg;
523 		sha256 = &sha256_alg;
524 	} else {
525 		sha1 = &sha1_alg_nano;
526 		sha256 = &sha256_alg_nano;
527 	}
528 
529 	rc = crypto_register_shash(sha1);
530 	if (rc)
531 		goto out;
532 
533 	rc = crypto_register_shash(sha256);
534 	if (rc)
535 		goto out_unreg1;
536 
537 	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
538 
539 	return 0;
540 
541 out_unreg1:
542 	crypto_unregister_shash(sha1);
543 
544 out:
545 	printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
546 	return rc;
547 }
548 
549 static void __exit padlock_fini(void)
550 {
551 	struct cpuinfo_x86 *c = &cpu_data(0);
552 
553 	if (c->x86_model >= 0x0f) {
554 		crypto_unregister_shash(&sha1_alg_nano);
555 		crypto_unregister_shash(&sha256_alg_nano);
556 	} else {
557 		crypto_unregister_shash(&sha1_alg);
558 		crypto_unregister_shash(&sha256_alg);
559 	}
560 }
561 
562 module_init(padlock_init);
563 module_exit(padlock_fini);
564 
565 MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
566 MODULE_LICENSE("GPL");
567 MODULE_AUTHOR("Michal Ludvig");
568 
569 MODULE_ALIAS_CRYPTO("sha1-all");
570 MODULE_ALIAS_CRYPTO("sha256-all");
571 MODULE_ALIAS_CRYPTO("sha1-padlock");
572 MODULE_ALIAS_CRYPTO("sha256-padlock");
573