1 /* 2 * Cryptographic API. 3 * 4 * Support for VIA PadLock hardware crypto engine. 5 * 6 * Copyright (c) 2006 Michal Ludvig <michal@logix.cz> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 */ 14 15 #include <crypto/internal/hash.h> 16 #include <crypto/padlock.h> 17 #include <crypto/sha.h> 18 #include <linux/err.h> 19 #include <linux/module.h> 20 #include <linux/init.h> 21 #include <linux/errno.h> 22 #include <linux/interrupt.h> 23 #include <linux/kernel.h> 24 #include <linux/scatterlist.h> 25 #include <asm/cpu_device_id.h> 26 #include <asm/fpu/api.h> 27 28 struct padlock_sha_desc { 29 struct shash_desc fallback; 30 }; 31 32 struct padlock_sha_ctx { 33 struct crypto_shash *fallback; 34 }; 35 36 static int padlock_sha_init(struct shash_desc *desc) 37 { 38 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 39 struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm); 40 41 dctx->fallback.tfm = ctx->fallback; 42 return crypto_shash_init(&dctx->fallback); 43 } 44 45 static int padlock_sha_update(struct shash_desc *desc, 46 const u8 *data, unsigned int length) 47 { 48 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 49 50 return crypto_shash_update(&dctx->fallback, data, length); 51 } 52 53 static int padlock_sha_export(struct shash_desc *desc, void *out) 54 { 55 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 56 57 return crypto_shash_export(&dctx->fallback, out); 58 } 59 60 static int padlock_sha_import(struct shash_desc *desc, const void *in) 61 { 62 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 63 struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm); 64 65 dctx->fallback.tfm = ctx->fallback; 66 return crypto_shash_import(&dctx->fallback, in); 67 } 68 69 static inline void padlock_output_block(uint32_t *src, 70 uint32_t *dst, size_t count) 71 { 72 while (count--) 73 *dst++ = swab32(*src++); 74 } 75 76 static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in, 77 unsigned int count, u8 *out) 78 { 79 /* We can't store directly to *out as it may be unaligned. */ 80 /* BTW Don't reduce the buffer size below 128 Bytes! 81 * PadLock microcode needs it that big. */ 82 char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 83 ((aligned(STACK_ALIGN))); 84 char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 85 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 86 struct sha1_state state; 87 unsigned int space; 88 unsigned int leftover; 89 int err; 90 91 err = crypto_shash_export(&dctx->fallback, &state); 92 if (err) 93 goto out; 94 95 if (state.count + count > ULONG_MAX) 96 return crypto_shash_finup(&dctx->fallback, in, count, out); 97 98 leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1; 99 space = SHA1_BLOCK_SIZE - leftover; 100 if (space) { 101 if (count > space) { 102 err = crypto_shash_update(&dctx->fallback, in, space) ?: 103 crypto_shash_export(&dctx->fallback, &state); 104 if (err) 105 goto out; 106 count -= space; 107 in += space; 108 } else { 109 memcpy(state.buffer + leftover, in, count); 110 in = state.buffer; 111 count += leftover; 112 state.count &= ~(SHA1_BLOCK_SIZE - 1); 113 } 114 } 115 116 memcpy(result, &state.state, SHA1_DIGEST_SIZE); 117 118 asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */ 119 : \ 120 : "c"((unsigned long)state.count + count), \ 121 "a"((unsigned long)state.count), \ 122 "S"(in), "D"(result)); 123 124 padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); 125 126 out: 127 return err; 128 } 129 130 static int padlock_sha1_final(struct shash_desc *desc, u8 *out) 131 { 132 u8 buf[4]; 133 134 return padlock_sha1_finup(desc, buf, 0, out); 135 } 136 137 static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in, 138 unsigned int count, u8 *out) 139 { 140 /* We can't store directly to *out as it may be unaligned. */ 141 /* BTW Don't reduce the buffer size below 128 Bytes! 142 * PadLock microcode needs it that big. */ 143 char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 144 ((aligned(STACK_ALIGN))); 145 char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 146 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 147 struct sha256_state state; 148 unsigned int space; 149 unsigned int leftover; 150 int err; 151 152 err = crypto_shash_export(&dctx->fallback, &state); 153 if (err) 154 goto out; 155 156 if (state.count + count > ULONG_MAX) 157 return crypto_shash_finup(&dctx->fallback, in, count, out); 158 159 leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1; 160 space = SHA256_BLOCK_SIZE - leftover; 161 if (space) { 162 if (count > space) { 163 err = crypto_shash_update(&dctx->fallback, in, space) ?: 164 crypto_shash_export(&dctx->fallback, &state); 165 if (err) 166 goto out; 167 count -= space; 168 in += space; 169 } else { 170 memcpy(state.buf + leftover, in, count); 171 in = state.buf; 172 count += leftover; 173 state.count &= ~(SHA1_BLOCK_SIZE - 1); 174 } 175 } 176 177 memcpy(result, &state.state, SHA256_DIGEST_SIZE); 178 179 asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */ 180 : \ 181 : "c"((unsigned long)state.count + count), \ 182 "a"((unsigned long)state.count), \ 183 "S"(in), "D"(result)); 184 185 padlock_output_block((uint32_t *)result, (uint32_t *)out, 8); 186 187 out: 188 return err; 189 } 190 191 static int padlock_sha256_final(struct shash_desc *desc, u8 *out) 192 { 193 u8 buf[4]; 194 195 return padlock_sha256_finup(desc, buf, 0, out); 196 } 197 198 static int padlock_cra_init(struct crypto_tfm *tfm) 199 { 200 struct crypto_shash *hash = __crypto_shash_cast(tfm); 201 const char *fallback_driver_name = crypto_tfm_alg_name(tfm); 202 struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm); 203 struct crypto_shash *fallback_tfm; 204 int err = -ENOMEM; 205 206 /* Allocate a fallback and abort if it failed. */ 207 fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0, 208 CRYPTO_ALG_NEED_FALLBACK); 209 if (IS_ERR(fallback_tfm)) { 210 printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n", 211 fallback_driver_name); 212 err = PTR_ERR(fallback_tfm); 213 goto out; 214 } 215 216 ctx->fallback = fallback_tfm; 217 hash->descsize += crypto_shash_descsize(fallback_tfm); 218 return 0; 219 220 out: 221 return err; 222 } 223 224 static void padlock_cra_exit(struct crypto_tfm *tfm) 225 { 226 struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm); 227 228 crypto_free_shash(ctx->fallback); 229 } 230 231 static struct shash_alg sha1_alg = { 232 .digestsize = SHA1_DIGEST_SIZE, 233 .init = padlock_sha_init, 234 .update = padlock_sha_update, 235 .finup = padlock_sha1_finup, 236 .final = padlock_sha1_final, 237 .export = padlock_sha_export, 238 .import = padlock_sha_import, 239 .descsize = sizeof(struct padlock_sha_desc), 240 .statesize = sizeof(struct sha1_state), 241 .base = { 242 .cra_name = "sha1", 243 .cra_driver_name = "sha1-padlock", 244 .cra_priority = PADLOCK_CRA_PRIORITY, 245 .cra_flags = CRYPTO_ALG_NEED_FALLBACK, 246 .cra_blocksize = SHA1_BLOCK_SIZE, 247 .cra_ctxsize = sizeof(struct padlock_sha_ctx), 248 .cra_module = THIS_MODULE, 249 .cra_init = padlock_cra_init, 250 .cra_exit = padlock_cra_exit, 251 } 252 }; 253 254 static struct shash_alg sha256_alg = { 255 .digestsize = SHA256_DIGEST_SIZE, 256 .init = padlock_sha_init, 257 .update = padlock_sha_update, 258 .finup = padlock_sha256_finup, 259 .final = padlock_sha256_final, 260 .export = padlock_sha_export, 261 .import = padlock_sha_import, 262 .descsize = sizeof(struct padlock_sha_desc), 263 .statesize = sizeof(struct sha256_state), 264 .base = { 265 .cra_name = "sha256", 266 .cra_driver_name = "sha256-padlock", 267 .cra_priority = PADLOCK_CRA_PRIORITY, 268 .cra_flags = CRYPTO_ALG_NEED_FALLBACK, 269 .cra_blocksize = SHA256_BLOCK_SIZE, 270 .cra_ctxsize = sizeof(struct padlock_sha_ctx), 271 .cra_module = THIS_MODULE, 272 .cra_init = padlock_cra_init, 273 .cra_exit = padlock_cra_exit, 274 } 275 }; 276 277 /* Add two shash_alg instance for hardware-implemented * 278 * multiple-parts hash supported by VIA Nano Processor.*/ 279 static int padlock_sha1_init_nano(struct shash_desc *desc) 280 { 281 struct sha1_state *sctx = shash_desc_ctx(desc); 282 283 *sctx = (struct sha1_state){ 284 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, 285 }; 286 287 return 0; 288 } 289 290 static int padlock_sha1_update_nano(struct shash_desc *desc, 291 const u8 *data, unsigned int len) 292 { 293 struct sha1_state *sctx = shash_desc_ctx(desc); 294 unsigned int partial, done; 295 const u8 *src; 296 /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/ 297 u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 298 ((aligned(STACK_ALIGN))); 299 u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 300 301 partial = sctx->count & 0x3f; 302 sctx->count += len; 303 done = 0; 304 src = data; 305 memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE); 306 307 if ((partial + len) >= SHA1_BLOCK_SIZE) { 308 309 /* Append the bytes in state's buffer to a block to handle */ 310 if (partial) { 311 done = -partial; 312 memcpy(sctx->buffer + partial, data, 313 done + SHA1_BLOCK_SIZE); 314 src = sctx->buffer; 315 asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" 316 : "+S"(src), "+D"(dst) \ 317 : "a"((long)-1), "c"((unsigned long)1)); 318 done += SHA1_BLOCK_SIZE; 319 src = data + done; 320 } 321 322 /* Process the left bytes from the input data */ 323 if (len - done >= SHA1_BLOCK_SIZE) { 324 asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" 325 : "+S"(src), "+D"(dst) 326 : "a"((long)-1), 327 "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE))); 328 done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE); 329 src = data + done; 330 } 331 partial = 0; 332 } 333 memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE); 334 memcpy(sctx->buffer + partial, src, len - done); 335 336 return 0; 337 } 338 339 static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out) 340 { 341 struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc); 342 unsigned int partial, padlen; 343 __be64 bits; 344 static const u8 padding[64] = { 0x80, }; 345 346 bits = cpu_to_be64(state->count << 3); 347 348 /* Pad out to 56 mod 64 */ 349 partial = state->count & 0x3f; 350 padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial); 351 padlock_sha1_update_nano(desc, padding, padlen); 352 353 /* Append length field bytes */ 354 padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits)); 355 356 /* Swap to output */ 357 padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5); 358 359 return 0; 360 } 361 362 static int padlock_sha256_init_nano(struct shash_desc *desc) 363 { 364 struct sha256_state *sctx = shash_desc_ctx(desc); 365 366 *sctx = (struct sha256_state){ 367 .state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \ 368 SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7}, 369 }; 370 371 return 0; 372 } 373 374 static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data, 375 unsigned int len) 376 { 377 struct sha256_state *sctx = shash_desc_ctx(desc); 378 unsigned int partial, done; 379 const u8 *src; 380 /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/ 381 u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 382 ((aligned(STACK_ALIGN))); 383 u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 384 385 partial = sctx->count & 0x3f; 386 sctx->count += len; 387 done = 0; 388 src = data; 389 memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE); 390 391 if ((partial + len) >= SHA256_BLOCK_SIZE) { 392 393 /* Append the bytes in state's buffer to a block to handle */ 394 if (partial) { 395 done = -partial; 396 memcpy(sctx->buf + partial, data, 397 done + SHA256_BLOCK_SIZE); 398 src = sctx->buf; 399 asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" 400 : "+S"(src), "+D"(dst) 401 : "a"((long)-1), "c"((unsigned long)1)); 402 done += SHA256_BLOCK_SIZE; 403 src = data + done; 404 } 405 406 /* Process the left bytes from input data*/ 407 if (len - done >= SHA256_BLOCK_SIZE) { 408 asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" 409 : "+S"(src), "+D"(dst) 410 : "a"((long)-1), 411 "c"((unsigned long)((len - done) / 64))); 412 done += ((len - done) - (len - done) % 64); 413 src = data + done; 414 } 415 partial = 0; 416 } 417 memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE); 418 memcpy(sctx->buf + partial, src, len - done); 419 420 return 0; 421 } 422 423 static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out) 424 { 425 struct sha256_state *state = 426 (struct sha256_state *)shash_desc_ctx(desc); 427 unsigned int partial, padlen; 428 __be64 bits; 429 static const u8 padding[64] = { 0x80, }; 430 431 bits = cpu_to_be64(state->count << 3); 432 433 /* Pad out to 56 mod 64 */ 434 partial = state->count & 0x3f; 435 padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial); 436 padlock_sha256_update_nano(desc, padding, padlen); 437 438 /* Append length field bytes */ 439 padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits)); 440 441 /* Swap to output */ 442 padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8); 443 444 return 0; 445 } 446 447 static int padlock_sha_export_nano(struct shash_desc *desc, 448 void *out) 449 { 450 int statesize = crypto_shash_statesize(desc->tfm); 451 void *sctx = shash_desc_ctx(desc); 452 453 memcpy(out, sctx, statesize); 454 return 0; 455 } 456 457 static int padlock_sha_import_nano(struct shash_desc *desc, 458 const void *in) 459 { 460 int statesize = crypto_shash_statesize(desc->tfm); 461 void *sctx = shash_desc_ctx(desc); 462 463 memcpy(sctx, in, statesize); 464 return 0; 465 } 466 467 static struct shash_alg sha1_alg_nano = { 468 .digestsize = SHA1_DIGEST_SIZE, 469 .init = padlock_sha1_init_nano, 470 .update = padlock_sha1_update_nano, 471 .final = padlock_sha1_final_nano, 472 .export = padlock_sha_export_nano, 473 .import = padlock_sha_import_nano, 474 .descsize = sizeof(struct sha1_state), 475 .statesize = sizeof(struct sha1_state), 476 .base = { 477 .cra_name = "sha1", 478 .cra_driver_name = "sha1-padlock-nano", 479 .cra_priority = PADLOCK_CRA_PRIORITY, 480 .cra_blocksize = SHA1_BLOCK_SIZE, 481 .cra_module = THIS_MODULE, 482 } 483 }; 484 485 static struct shash_alg sha256_alg_nano = { 486 .digestsize = SHA256_DIGEST_SIZE, 487 .init = padlock_sha256_init_nano, 488 .update = padlock_sha256_update_nano, 489 .final = padlock_sha256_final_nano, 490 .export = padlock_sha_export_nano, 491 .import = padlock_sha_import_nano, 492 .descsize = sizeof(struct sha256_state), 493 .statesize = sizeof(struct sha256_state), 494 .base = { 495 .cra_name = "sha256", 496 .cra_driver_name = "sha256-padlock-nano", 497 .cra_priority = PADLOCK_CRA_PRIORITY, 498 .cra_blocksize = SHA256_BLOCK_SIZE, 499 .cra_module = THIS_MODULE, 500 } 501 }; 502 503 static const struct x86_cpu_id padlock_sha_ids[] = { 504 X86_FEATURE_MATCH(X86_FEATURE_PHE), 505 {} 506 }; 507 MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids); 508 509 static int __init padlock_init(void) 510 { 511 int rc = -ENODEV; 512 struct cpuinfo_x86 *c = &cpu_data(0); 513 struct shash_alg *sha1; 514 struct shash_alg *sha256; 515 516 if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN)) 517 return -ENODEV; 518 519 /* Register the newly added algorithm module if on * 520 * VIA Nano processor, or else just do as before */ 521 if (c->x86_model < 0x0f) { 522 sha1 = &sha1_alg; 523 sha256 = &sha256_alg; 524 } else { 525 sha1 = &sha1_alg_nano; 526 sha256 = &sha256_alg_nano; 527 } 528 529 rc = crypto_register_shash(sha1); 530 if (rc) 531 goto out; 532 533 rc = crypto_register_shash(sha256); 534 if (rc) 535 goto out_unreg1; 536 537 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n"); 538 539 return 0; 540 541 out_unreg1: 542 crypto_unregister_shash(sha1); 543 544 out: 545 printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n"); 546 return rc; 547 } 548 549 static void __exit padlock_fini(void) 550 { 551 struct cpuinfo_x86 *c = &cpu_data(0); 552 553 if (c->x86_model >= 0x0f) { 554 crypto_unregister_shash(&sha1_alg_nano); 555 crypto_unregister_shash(&sha256_alg_nano); 556 } else { 557 crypto_unregister_shash(&sha1_alg); 558 crypto_unregister_shash(&sha256_alg); 559 } 560 } 561 562 module_init(padlock_init); 563 module_exit(padlock_fini); 564 565 MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support."); 566 MODULE_LICENSE("GPL"); 567 MODULE_AUTHOR("Michal Ludvig"); 568 569 MODULE_ALIAS_CRYPTO("sha1-all"); 570 MODULE_ALIAS_CRYPTO("sha256-all"); 571 MODULE_ALIAS_CRYPTO("sha1-padlock"); 572 MODULE_ALIAS_CRYPTO("sha256-padlock"); 573