xref: /openbmc/linux/drivers/crypto/padlock-sha.c (revision 4464005a12b5c79e1a364e6272ee10a83413f928)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Cryptographic API.
4  *
5  * Support for VIA PadLock hardware crypto engine.
6  *
7  * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
8  */
9 
10 #include <crypto/internal/hash.h>
11 #include <crypto/padlock.h>
12 #include <crypto/sha.h>
13 #include <linux/err.h>
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/errno.h>
17 #include <linux/interrupt.h>
18 #include <linux/kernel.h>
19 #include <linux/scatterlist.h>
20 #include <asm/cpu_device_id.h>
21 #include <asm/fpu/api.h>
22 
23 struct padlock_sha_desc {
24 	struct shash_desc fallback;
25 };
26 
27 struct padlock_sha_ctx {
28 	struct crypto_shash *fallback;
29 };
30 
31 static int padlock_sha_init(struct shash_desc *desc)
32 {
33 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
34 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
35 
36 	dctx->fallback.tfm = ctx->fallback;
37 	return crypto_shash_init(&dctx->fallback);
38 }
39 
40 static int padlock_sha_update(struct shash_desc *desc,
41 			      const u8 *data, unsigned int length)
42 {
43 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
44 
45 	return crypto_shash_update(&dctx->fallback, data, length);
46 }
47 
48 static int padlock_sha_export(struct shash_desc *desc, void *out)
49 {
50 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
51 
52 	return crypto_shash_export(&dctx->fallback, out);
53 }
54 
55 static int padlock_sha_import(struct shash_desc *desc, const void *in)
56 {
57 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
58 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
59 
60 	dctx->fallback.tfm = ctx->fallback;
61 	return crypto_shash_import(&dctx->fallback, in);
62 }
63 
64 static inline void padlock_output_block(uint32_t *src,
65 		 	uint32_t *dst, size_t count)
66 {
67 	while (count--)
68 		*dst++ = swab32(*src++);
69 }
70 
71 static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
72 			      unsigned int count, u8 *out)
73 {
74 	/* We can't store directly to *out as it may be unaligned. */
75 	/* BTW Don't reduce the buffer size below 128 Bytes!
76 	 *     PadLock microcode needs it that big. */
77 	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
78 		((aligned(STACK_ALIGN)));
79 	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
80 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
81 	struct sha1_state state;
82 	unsigned int space;
83 	unsigned int leftover;
84 	int err;
85 
86 	err = crypto_shash_export(&dctx->fallback, &state);
87 	if (err)
88 		goto out;
89 
90 	if (state.count + count > ULONG_MAX)
91 		return crypto_shash_finup(&dctx->fallback, in, count, out);
92 
93 	leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1;
94 	space =  SHA1_BLOCK_SIZE - leftover;
95 	if (space) {
96 		if (count > space) {
97 			err = crypto_shash_update(&dctx->fallback, in, space) ?:
98 			      crypto_shash_export(&dctx->fallback, &state);
99 			if (err)
100 				goto out;
101 			count -= space;
102 			in += space;
103 		} else {
104 			memcpy(state.buffer + leftover, in, count);
105 			in = state.buffer;
106 			count += leftover;
107 			state.count &= ~(SHA1_BLOCK_SIZE - 1);
108 		}
109 	}
110 
111 	memcpy(result, &state.state, SHA1_DIGEST_SIZE);
112 
113 	asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
114 		      : \
115 		      : "c"((unsigned long)state.count + count), \
116 			"a"((unsigned long)state.count), \
117 			"S"(in), "D"(result));
118 
119 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
120 
121 out:
122 	return err;
123 }
124 
125 static int padlock_sha1_final(struct shash_desc *desc, u8 *out)
126 {
127 	u8 buf[4];
128 
129 	return padlock_sha1_finup(desc, buf, 0, out);
130 }
131 
132 static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
133 				unsigned int count, u8 *out)
134 {
135 	/* We can't store directly to *out as it may be unaligned. */
136 	/* BTW Don't reduce the buffer size below 128 Bytes!
137 	 *     PadLock microcode needs it that big. */
138 	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
139 		((aligned(STACK_ALIGN)));
140 	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
141 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
142 	struct sha256_state state;
143 	unsigned int space;
144 	unsigned int leftover;
145 	int err;
146 
147 	err = crypto_shash_export(&dctx->fallback, &state);
148 	if (err)
149 		goto out;
150 
151 	if (state.count + count > ULONG_MAX)
152 		return crypto_shash_finup(&dctx->fallback, in, count, out);
153 
154 	leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1;
155 	space =  SHA256_BLOCK_SIZE - leftover;
156 	if (space) {
157 		if (count > space) {
158 			err = crypto_shash_update(&dctx->fallback, in, space) ?:
159 			      crypto_shash_export(&dctx->fallback, &state);
160 			if (err)
161 				goto out;
162 			count -= space;
163 			in += space;
164 		} else {
165 			memcpy(state.buf + leftover, in, count);
166 			in = state.buf;
167 			count += leftover;
168 			state.count &= ~(SHA1_BLOCK_SIZE - 1);
169 		}
170 	}
171 
172 	memcpy(result, &state.state, SHA256_DIGEST_SIZE);
173 
174 	asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
175 		      : \
176 		      : "c"((unsigned long)state.count + count), \
177 			"a"((unsigned long)state.count), \
178 			"S"(in), "D"(result));
179 
180 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
181 
182 out:
183 	return err;
184 }
185 
186 static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
187 {
188 	u8 buf[4];
189 
190 	return padlock_sha256_finup(desc, buf, 0, out);
191 }
192 
193 static int padlock_init_tfm(struct crypto_shash *hash)
194 {
195 	const char *fallback_driver_name = crypto_shash_alg_name(hash);
196 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
197 	struct crypto_shash *fallback_tfm;
198 
199 	/* Allocate a fallback and abort if it failed. */
200 	fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
201 					  CRYPTO_ALG_NEED_FALLBACK);
202 	if (IS_ERR(fallback_tfm)) {
203 		printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
204 		       fallback_driver_name);
205 		return PTR_ERR(fallback_tfm);
206 	}
207 
208 	ctx->fallback = fallback_tfm;
209 	hash->descsize += crypto_shash_descsize(fallback_tfm);
210 	return 0;
211 }
212 
213 static void padlock_exit_tfm(struct crypto_shash *hash)
214 {
215 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
216 
217 	crypto_free_shash(ctx->fallback);
218 }
219 
220 static struct shash_alg sha1_alg = {
221 	.digestsize	=	SHA1_DIGEST_SIZE,
222 	.init   	= 	padlock_sha_init,
223 	.update 	=	padlock_sha_update,
224 	.finup  	=	padlock_sha1_finup,
225 	.final  	=	padlock_sha1_final,
226 	.export		=	padlock_sha_export,
227 	.import		=	padlock_sha_import,
228 	.init_tfm	=	padlock_init_tfm,
229 	.exit_tfm	=	padlock_exit_tfm,
230 	.descsize	=	sizeof(struct padlock_sha_desc),
231 	.statesize	=	sizeof(struct sha1_state),
232 	.base		=	{
233 		.cra_name		=	"sha1",
234 		.cra_driver_name	=	"sha1-padlock",
235 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
236 		.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
237 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
238 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
239 		.cra_module		=	THIS_MODULE,
240 	}
241 };
242 
243 static struct shash_alg sha256_alg = {
244 	.digestsize	=	SHA256_DIGEST_SIZE,
245 	.init   	= 	padlock_sha_init,
246 	.update 	=	padlock_sha_update,
247 	.finup  	=	padlock_sha256_finup,
248 	.final  	=	padlock_sha256_final,
249 	.export		=	padlock_sha_export,
250 	.import		=	padlock_sha_import,
251 	.init_tfm	=	padlock_init_tfm,
252 	.exit_tfm	=	padlock_exit_tfm,
253 	.descsize	=	sizeof(struct padlock_sha_desc),
254 	.statesize	=	sizeof(struct sha256_state),
255 	.base		=	{
256 		.cra_name		=	"sha256",
257 		.cra_driver_name	=	"sha256-padlock",
258 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
259 		.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
260 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
261 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
262 		.cra_module		=	THIS_MODULE,
263 	}
264 };
265 
266 /* Add two shash_alg instance for hardware-implemented *
267 * multiple-parts hash supported by VIA Nano Processor.*/
268 static int padlock_sha1_init_nano(struct shash_desc *desc)
269 {
270 	struct sha1_state *sctx = shash_desc_ctx(desc);
271 
272 	*sctx = (struct sha1_state){
273 		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
274 	};
275 
276 	return 0;
277 }
278 
279 static int padlock_sha1_update_nano(struct shash_desc *desc,
280 			const u8 *data,	unsigned int len)
281 {
282 	struct sha1_state *sctx = shash_desc_ctx(desc);
283 	unsigned int partial, done;
284 	const u8 *src;
285 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
286 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
287 		((aligned(STACK_ALIGN)));
288 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
289 
290 	partial = sctx->count & 0x3f;
291 	sctx->count += len;
292 	done = 0;
293 	src = data;
294 	memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
295 
296 	if ((partial + len) >= SHA1_BLOCK_SIZE) {
297 
298 		/* Append the bytes in state's buffer to a block to handle */
299 		if (partial) {
300 			done = -partial;
301 			memcpy(sctx->buffer + partial, data,
302 				done + SHA1_BLOCK_SIZE);
303 			src = sctx->buffer;
304 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
305 			: "+S"(src), "+D"(dst) \
306 			: "a"((long)-1), "c"((unsigned long)1));
307 			done += SHA1_BLOCK_SIZE;
308 			src = data + done;
309 		}
310 
311 		/* Process the left bytes from the input data */
312 		if (len - done >= SHA1_BLOCK_SIZE) {
313 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
314 			: "+S"(src), "+D"(dst)
315 			: "a"((long)-1),
316 			"c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
317 			done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
318 			src = data + done;
319 		}
320 		partial = 0;
321 	}
322 	memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);
323 	memcpy(sctx->buffer + partial, src, len - done);
324 
325 	return 0;
326 }
327 
328 static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out)
329 {
330 	struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);
331 	unsigned int partial, padlen;
332 	__be64 bits;
333 	static const u8 padding[64] = { 0x80, };
334 
335 	bits = cpu_to_be64(state->count << 3);
336 
337 	/* Pad out to 56 mod 64 */
338 	partial = state->count & 0x3f;
339 	padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
340 	padlock_sha1_update_nano(desc, padding, padlen);
341 
342 	/* Append length field bytes */
343 	padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits));
344 
345 	/* Swap to output */
346 	padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
347 
348 	return 0;
349 }
350 
351 static int padlock_sha256_init_nano(struct shash_desc *desc)
352 {
353 	struct sha256_state *sctx = shash_desc_ctx(desc);
354 
355 	*sctx = (struct sha256_state){
356 		.state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \
357 				SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
358 	};
359 
360 	return 0;
361 }
362 
363 static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
364 			  unsigned int len)
365 {
366 	struct sha256_state *sctx = shash_desc_ctx(desc);
367 	unsigned int partial, done;
368 	const u8 *src;
369 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
370 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
371 		((aligned(STACK_ALIGN)));
372 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
373 
374 	partial = sctx->count & 0x3f;
375 	sctx->count += len;
376 	done = 0;
377 	src = data;
378 	memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);
379 
380 	if ((partial + len) >= SHA256_BLOCK_SIZE) {
381 
382 		/* Append the bytes in state's buffer to a block to handle */
383 		if (partial) {
384 			done = -partial;
385 			memcpy(sctx->buf + partial, data,
386 				done + SHA256_BLOCK_SIZE);
387 			src = sctx->buf;
388 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
389 			: "+S"(src), "+D"(dst)
390 			: "a"((long)-1), "c"((unsigned long)1));
391 			done += SHA256_BLOCK_SIZE;
392 			src = data + done;
393 		}
394 
395 		/* Process the left bytes from input data*/
396 		if (len - done >= SHA256_BLOCK_SIZE) {
397 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
398 			: "+S"(src), "+D"(dst)
399 			: "a"((long)-1),
400 			"c"((unsigned long)((len - done) / 64)));
401 			done += ((len - done) - (len - done) % 64);
402 			src = data + done;
403 		}
404 		partial = 0;
405 	}
406 	memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE);
407 	memcpy(sctx->buf + partial, src, len - done);
408 
409 	return 0;
410 }
411 
412 static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out)
413 {
414 	struct sha256_state *state =
415 		(struct sha256_state *)shash_desc_ctx(desc);
416 	unsigned int partial, padlen;
417 	__be64 bits;
418 	static const u8 padding[64] = { 0x80, };
419 
420 	bits = cpu_to_be64(state->count << 3);
421 
422 	/* Pad out to 56 mod 64 */
423 	partial = state->count & 0x3f;
424 	padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
425 	padlock_sha256_update_nano(desc, padding, padlen);
426 
427 	/* Append length field bytes */
428 	padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits));
429 
430 	/* Swap to output */
431 	padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8);
432 
433 	return 0;
434 }
435 
436 static int padlock_sha_export_nano(struct shash_desc *desc,
437 				void *out)
438 {
439 	int statesize = crypto_shash_statesize(desc->tfm);
440 	void *sctx = shash_desc_ctx(desc);
441 
442 	memcpy(out, sctx, statesize);
443 	return 0;
444 }
445 
446 static int padlock_sha_import_nano(struct shash_desc *desc,
447 				const void *in)
448 {
449 	int statesize = crypto_shash_statesize(desc->tfm);
450 	void *sctx = shash_desc_ctx(desc);
451 
452 	memcpy(sctx, in, statesize);
453 	return 0;
454 }
455 
456 static struct shash_alg sha1_alg_nano = {
457 	.digestsize	=	SHA1_DIGEST_SIZE,
458 	.init		=	padlock_sha1_init_nano,
459 	.update		=	padlock_sha1_update_nano,
460 	.final		=	padlock_sha1_final_nano,
461 	.export		=	padlock_sha_export_nano,
462 	.import		=	padlock_sha_import_nano,
463 	.descsize	=	sizeof(struct sha1_state),
464 	.statesize	=	sizeof(struct sha1_state),
465 	.base		=	{
466 		.cra_name		=	"sha1",
467 		.cra_driver_name	=	"sha1-padlock-nano",
468 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
469 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
470 		.cra_module		=	THIS_MODULE,
471 	}
472 };
473 
474 static struct shash_alg sha256_alg_nano = {
475 	.digestsize	=	SHA256_DIGEST_SIZE,
476 	.init		=	padlock_sha256_init_nano,
477 	.update		=	padlock_sha256_update_nano,
478 	.final		=	padlock_sha256_final_nano,
479 	.export		=	padlock_sha_export_nano,
480 	.import		=	padlock_sha_import_nano,
481 	.descsize	=	sizeof(struct sha256_state),
482 	.statesize	=	sizeof(struct sha256_state),
483 	.base		=	{
484 		.cra_name		=	"sha256",
485 		.cra_driver_name	=	"sha256-padlock-nano",
486 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
487 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
488 		.cra_module		=	THIS_MODULE,
489 	}
490 };
491 
492 static const struct x86_cpu_id padlock_sha_ids[] = {
493 	X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL),
494 	{}
495 };
496 MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);
497 
498 static int __init padlock_init(void)
499 {
500 	int rc = -ENODEV;
501 	struct cpuinfo_x86 *c = &cpu_data(0);
502 	struct shash_alg *sha1;
503 	struct shash_alg *sha256;
504 
505 	if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
506 		return -ENODEV;
507 
508 	/* Register the newly added algorithm module if on *
509 	* VIA Nano processor, or else just do as before */
510 	if (c->x86_model < 0x0f) {
511 		sha1 = &sha1_alg;
512 		sha256 = &sha256_alg;
513 	} else {
514 		sha1 = &sha1_alg_nano;
515 		sha256 = &sha256_alg_nano;
516 	}
517 
518 	rc = crypto_register_shash(sha1);
519 	if (rc)
520 		goto out;
521 
522 	rc = crypto_register_shash(sha256);
523 	if (rc)
524 		goto out_unreg1;
525 
526 	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
527 
528 	return 0;
529 
530 out_unreg1:
531 	crypto_unregister_shash(sha1);
532 
533 out:
534 	printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
535 	return rc;
536 }
537 
538 static void __exit padlock_fini(void)
539 {
540 	struct cpuinfo_x86 *c = &cpu_data(0);
541 
542 	if (c->x86_model >= 0x0f) {
543 		crypto_unregister_shash(&sha1_alg_nano);
544 		crypto_unregister_shash(&sha256_alg_nano);
545 	} else {
546 		crypto_unregister_shash(&sha1_alg);
547 		crypto_unregister_shash(&sha256_alg);
548 	}
549 }
550 
551 module_init(padlock_init);
552 module_exit(padlock_fini);
553 
554 MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
555 MODULE_LICENSE("GPL");
556 MODULE_AUTHOR("Michal Ludvig");
557 
558 MODULE_ALIAS_CRYPTO("sha1-all");
559 MODULE_ALIAS_CRYPTO("sha256-all");
560 MODULE_ALIAS_CRYPTO("sha1-padlock");
561 MODULE_ALIAS_CRYPTO("sha256-padlock");
562