1 /* 2 * Cryptographic API. 3 * 4 * Support for VIA PadLock hardware crypto engine. 5 * 6 * Copyright (c) 2006 Michal Ludvig <michal@logix.cz> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 */ 14 15 #include <crypto/internal/hash.h> 16 #include <crypto/padlock.h> 17 #include <crypto/sha.h> 18 #include <linux/err.h> 19 #include <linux/module.h> 20 #include <linux/init.h> 21 #include <linux/errno.h> 22 #include <linux/interrupt.h> 23 #include <linux/kernel.h> 24 #include <linux/scatterlist.h> 25 #include <asm/cpu_device_id.h> 26 #include <asm/fpu/api.h> 27 28 struct padlock_sha_desc { 29 struct shash_desc fallback; 30 }; 31 32 struct padlock_sha_ctx { 33 struct crypto_shash *fallback; 34 }; 35 36 static int padlock_sha_init(struct shash_desc *desc) 37 { 38 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 39 struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm); 40 41 dctx->fallback.tfm = ctx->fallback; 42 dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; 43 return crypto_shash_init(&dctx->fallback); 44 } 45 46 static int padlock_sha_update(struct shash_desc *desc, 47 const u8 *data, unsigned int length) 48 { 49 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 50 51 dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; 52 return crypto_shash_update(&dctx->fallback, data, length); 53 } 54 55 static int padlock_sha_export(struct shash_desc *desc, void *out) 56 { 57 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 58 59 return crypto_shash_export(&dctx->fallback, out); 60 } 61 62 static int padlock_sha_import(struct shash_desc *desc, const void *in) 63 { 64 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 65 struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm); 66 67 dctx->fallback.tfm = ctx->fallback; 68 dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; 69 return crypto_shash_import(&dctx->fallback, in); 70 } 71 72 static inline void padlock_output_block(uint32_t *src, 73 uint32_t *dst, size_t count) 74 { 75 while (count--) 76 *dst++ = swab32(*src++); 77 } 78 79 static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in, 80 unsigned int count, u8 *out) 81 { 82 /* We can't store directly to *out as it may be unaligned. */ 83 /* BTW Don't reduce the buffer size below 128 Bytes! 84 * PadLock microcode needs it that big. */ 85 char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 86 ((aligned(STACK_ALIGN))); 87 char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 88 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 89 struct sha1_state state; 90 unsigned int space; 91 unsigned int leftover; 92 int ts_state; 93 int err; 94 95 dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; 96 err = crypto_shash_export(&dctx->fallback, &state); 97 if (err) 98 goto out; 99 100 if (state.count + count > ULONG_MAX) 101 return crypto_shash_finup(&dctx->fallback, in, count, out); 102 103 leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1; 104 space = SHA1_BLOCK_SIZE - leftover; 105 if (space) { 106 if (count > space) { 107 err = crypto_shash_update(&dctx->fallback, in, space) ?: 108 crypto_shash_export(&dctx->fallback, &state); 109 if (err) 110 goto out; 111 count -= space; 112 in += space; 113 } else { 114 memcpy(state.buffer + leftover, in, count); 115 in = state.buffer; 116 count += leftover; 117 state.count &= ~(SHA1_BLOCK_SIZE - 1); 118 } 119 } 120 121 memcpy(result, &state.state, SHA1_DIGEST_SIZE); 122 123 /* prevent taking the spurious DNA fault with padlock. */ 124 ts_state = irq_ts_save(); 125 asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */ 126 : \ 127 : "c"((unsigned long)state.count + count), \ 128 "a"((unsigned long)state.count), \ 129 "S"(in), "D"(result)); 130 irq_ts_restore(ts_state); 131 132 padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); 133 134 out: 135 return err; 136 } 137 138 static int padlock_sha1_final(struct shash_desc *desc, u8 *out) 139 { 140 u8 buf[4]; 141 142 return padlock_sha1_finup(desc, buf, 0, out); 143 } 144 145 static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in, 146 unsigned int count, u8 *out) 147 { 148 /* We can't store directly to *out as it may be unaligned. */ 149 /* BTW Don't reduce the buffer size below 128 Bytes! 150 * PadLock microcode needs it that big. */ 151 char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 152 ((aligned(STACK_ALIGN))); 153 char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 154 struct padlock_sha_desc *dctx = shash_desc_ctx(desc); 155 struct sha256_state state; 156 unsigned int space; 157 unsigned int leftover; 158 int ts_state; 159 int err; 160 161 dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; 162 err = crypto_shash_export(&dctx->fallback, &state); 163 if (err) 164 goto out; 165 166 if (state.count + count > ULONG_MAX) 167 return crypto_shash_finup(&dctx->fallback, in, count, out); 168 169 leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1; 170 space = SHA256_BLOCK_SIZE - leftover; 171 if (space) { 172 if (count > space) { 173 err = crypto_shash_update(&dctx->fallback, in, space) ?: 174 crypto_shash_export(&dctx->fallback, &state); 175 if (err) 176 goto out; 177 count -= space; 178 in += space; 179 } else { 180 memcpy(state.buf + leftover, in, count); 181 in = state.buf; 182 count += leftover; 183 state.count &= ~(SHA1_BLOCK_SIZE - 1); 184 } 185 } 186 187 memcpy(result, &state.state, SHA256_DIGEST_SIZE); 188 189 /* prevent taking the spurious DNA fault with padlock. */ 190 ts_state = irq_ts_save(); 191 asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */ 192 : \ 193 : "c"((unsigned long)state.count + count), \ 194 "a"((unsigned long)state.count), \ 195 "S"(in), "D"(result)); 196 irq_ts_restore(ts_state); 197 198 padlock_output_block((uint32_t *)result, (uint32_t *)out, 8); 199 200 out: 201 return err; 202 } 203 204 static int padlock_sha256_final(struct shash_desc *desc, u8 *out) 205 { 206 u8 buf[4]; 207 208 return padlock_sha256_finup(desc, buf, 0, out); 209 } 210 211 static int padlock_cra_init(struct crypto_tfm *tfm) 212 { 213 struct crypto_shash *hash = __crypto_shash_cast(tfm); 214 const char *fallback_driver_name = crypto_tfm_alg_name(tfm); 215 struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm); 216 struct crypto_shash *fallback_tfm; 217 int err = -ENOMEM; 218 219 /* Allocate a fallback and abort if it failed. */ 220 fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0, 221 CRYPTO_ALG_NEED_FALLBACK); 222 if (IS_ERR(fallback_tfm)) { 223 printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n", 224 fallback_driver_name); 225 err = PTR_ERR(fallback_tfm); 226 goto out; 227 } 228 229 ctx->fallback = fallback_tfm; 230 hash->descsize += crypto_shash_descsize(fallback_tfm); 231 return 0; 232 233 out: 234 return err; 235 } 236 237 static void padlock_cra_exit(struct crypto_tfm *tfm) 238 { 239 struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm); 240 241 crypto_free_shash(ctx->fallback); 242 } 243 244 static struct shash_alg sha1_alg = { 245 .digestsize = SHA1_DIGEST_SIZE, 246 .init = padlock_sha_init, 247 .update = padlock_sha_update, 248 .finup = padlock_sha1_finup, 249 .final = padlock_sha1_final, 250 .export = padlock_sha_export, 251 .import = padlock_sha_import, 252 .descsize = sizeof(struct padlock_sha_desc), 253 .statesize = sizeof(struct sha1_state), 254 .base = { 255 .cra_name = "sha1", 256 .cra_driver_name = "sha1-padlock", 257 .cra_priority = PADLOCK_CRA_PRIORITY, 258 .cra_flags = CRYPTO_ALG_TYPE_SHASH | 259 CRYPTO_ALG_NEED_FALLBACK, 260 .cra_blocksize = SHA1_BLOCK_SIZE, 261 .cra_ctxsize = sizeof(struct padlock_sha_ctx), 262 .cra_module = THIS_MODULE, 263 .cra_init = padlock_cra_init, 264 .cra_exit = padlock_cra_exit, 265 } 266 }; 267 268 static struct shash_alg sha256_alg = { 269 .digestsize = SHA256_DIGEST_SIZE, 270 .init = padlock_sha_init, 271 .update = padlock_sha_update, 272 .finup = padlock_sha256_finup, 273 .final = padlock_sha256_final, 274 .export = padlock_sha_export, 275 .import = padlock_sha_import, 276 .descsize = sizeof(struct padlock_sha_desc), 277 .statesize = sizeof(struct sha256_state), 278 .base = { 279 .cra_name = "sha256", 280 .cra_driver_name = "sha256-padlock", 281 .cra_priority = PADLOCK_CRA_PRIORITY, 282 .cra_flags = CRYPTO_ALG_TYPE_SHASH | 283 CRYPTO_ALG_NEED_FALLBACK, 284 .cra_blocksize = SHA256_BLOCK_SIZE, 285 .cra_ctxsize = sizeof(struct padlock_sha_ctx), 286 .cra_module = THIS_MODULE, 287 .cra_init = padlock_cra_init, 288 .cra_exit = padlock_cra_exit, 289 } 290 }; 291 292 /* Add two shash_alg instance for hardware-implemented * 293 * multiple-parts hash supported by VIA Nano Processor.*/ 294 static int padlock_sha1_init_nano(struct shash_desc *desc) 295 { 296 struct sha1_state *sctx = shash_desc_ctx(desc); 297 298 *sctx = (struct sha1_state){ 299 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, 300 }; 301 302 return 0; 303 } 304 305 static int padlock_sha1_update_nano(struct shash_desc *desc, 306 const u8 *data, unsigned int len) 307 { 308 struct sha1_state *sctx = shash_desc_ctx(desc); 309 unsigned int partial, done; 310 const u8 *src; 311 /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/ 312 u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 313 ((aligned(STACK_ALIGN))); 314 u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 315 int ts_state; 316 317 partial = sctx->count & 0x3f; 318 sctx->count += len; 319 done = 0; 320 src = data; 321 memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE); 322 323 if ((partial + len) >= SHA1_BLOCK_SIZE) { 324 325 /* Append the bytes in state's buffer to a block to handle */ 326 if (partial) { 327 done = -partial; 328 memcpy(sctx->buffer + partial, data, 329 done + SHA1_BLOCK_SIZE); 330 src = sctx->buffer; 331 ts_state = irq_ts_save(); 332 asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" 333 : "+S"(src), "+D"(dst) \ 334 : "a"((long)-1), "c"((unsigned long)1)); 335 irq_ts_restore(ts_state); 336 done += SHA1_BLOCK_SIZE; 337 src = data + done; 338 } 339 340 /* Process the left bytes from the input data */ 341 if (len - done >= SHA1_BLOCK_SIZE) { 342 ts_state = irq_ts_save(); 343 asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" 344 : "+S"(src), "+D"(dst) 345 : "a"((long)-1), 346 "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE))); 347 irq_ts_restore(ts_state); 348 done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE); 349 src = data + done; 350 } 351 partial = 0; 352 } 353 memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE); 354 memcpy(sctx->buffer + partial, src, len - done); 355 356 return 0; 357 } 358 359 static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out) 360 { 361 struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc); 362 unsigned int partial, padlen; 363 __be64 bits; 364 static const u8 padding[64] = { 0x80, }; 365 366 bits = cpu_to_be64(state->count << 3); 367 368 /* Pad out to 56 mod 64 */ 369 partial = state->count & 0x3f; 370 padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial); 371 padlock_sha1_update_nano(desc, padding, padlen); 372 373 /* Append length field bytes */ 374 padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits)); 375 376 /* Swap to output */ 377 padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5); 378 379 return 0; 380 } 381 382 static int padlock_sha256_init_nano(struct shash_desc *desc) 383 { 384 struct sha256_state *sctx = shash_desc_ctx(desc); 385 386 *sctx = (struct sha256_state){ 387 .state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \ 388 SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7}, 389 }; 390 391 return 0; 392 } 393 394 static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data, 395 unsigned int len) 396 { 397 struct sha256_state *sctx = shash_desc_ctx(desc); 398 unsigned int partial, done; 399 const u8 *src; 400 /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/ 401 u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 402 ((aligned(STACK_ALIGN))); 403 u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 404 int ts_state; 405 406 partial = sctx->count & 0x3f; 407 sctx->count += len; 408 done = 0; 409 src = data; 410 memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE); 411 412 if ((partial + len) >= SHA256_BLOCK_SIZE) { 413 414 /* Append the bytes in state's buffer to a block to handle */ 415 if (partial) { 416 done = -partial; 417 memcpy(sctx->buf + partial, data, 418 done + SHA256_BLOCK_SIZE); 419 src = sctx->buf; 420 ts_state = irq_ts_save(); 421 asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" 422 : "+S"(src), "+D"(dst) 423 : "a"((long)-1), "c"((unsigned long)1)); 424 irq_ts_restore(ts_state); 425 done += SHA256_BLOCK_SIZE; 426 src = data + done; 427 } 428 429 /* Process the left bytes from input data*/ 430 if (len - done >= SHA256_BLOCK_SIZE) { 431 ts_state = irq_ts_save(); 432 asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" 433 : "+S"(src), "+D"(dst) 434 : "a"((long)-1), 435 "c"((unsigned long)((len - done) / 64))); 436 irq_ts_restore(ts_state); 437 done += ((len - done) - (len - done) % 64); 438 src = data + done; 439 } 440 partial = 0; 441 } 442 memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE); 443 memcpy(sctx->buf + partial, src, len - done); 444 445 return 0; 446 } 447 448 static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out) 449 { 450 struct sha256_state *state = 451 (struct sha256_state *)shash_desc_ctx(desc); 452 unsigned int partial, padlen; 453 __be64 bits; 454 static const u8 padding[64] = { 0x80, }; 455 456 bits = cpu_to_be64(state->count << 3); 457 458 /* Pad out to 56 mod 64 */ 459 partial = state->count & 0x3f; 460 padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial); 461 padlock_sha256_update_nano(desc, padding, padlen); 462 463 /* Append length field bytes */ 464 padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits)); 465 466 /* Swap to output */ 467 padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8); 468 469 return 0; 470 } 471 472 static int padlock_sha_export_nano(struct shash_desc *desc, 473 void *out) 474 { 475 int statesize = crypto_shash_statesize(desc->tfm); 476 void *sctx = shash_desc_ctx(desc); 477 478 memcpy(out, sctx, statesize); 479 return 0; 480 } 481 482 static int padlock_sha_import_nano(struct shash_desc *desc, 483 const void *in) 484 { 485 int statesize = crypto_shash_statesize(desc->tfm); 486 void *sctx = shash_desc_ctx(desc); 487 488 memcpy(sctx, in, statesize); 489 return 0; 490 } 491 492 static struct shash_alg sha1_alg_nano = { 493 .digestsize = SHA1_DIGEST_SIZE, 494 .init = padlock_sha1_init_nano, 495 .update = padlock_sha1_update_nano, 496 .final = padlock_sha1_final_nano, 497 .export = padlock_sha_export_nano, 498 .import = padlock_sha_import_nano, 499 .descsize = sizeof(struct sha1_state), 500 .statesize = sizeof(struct sha1_state), 501 .base = { 502 .cra_name = "sha1", 503 .cra_driver_name = "sha1-padlock-nano", 504 .cra_priority = PADLOCK_CRA_PRIORITY, 505 .cra_flags = CRYPTO_ALG_TYPE_SHASH, 506 .cra_blocksize = SHA1_BLOCK_SIZE, 507 .cra_module = THIS_MODULE, 508 } 509 }; 510 511 static struct shash_alg sha256_alg_nano = { 512 .digestsize = SHA256_DIGEST_SIZE, 513 .init = padlock_sha256_init_nano, 514 .update = padlock_sha256_update_nano, 515 .final = padlock_sha256_final_nano, 516 .export = padlock_sha_export_nano, 517 .import = padlock_sha_import_nano, 518 .descsize = sizeof(struct sha256_state), 519 .statesize = sizeof(struct sha256_state), 520 .base = { 521 .cra_name = "sha256", 522 .cra_driver_name = "sha256-padlock-nano", 523 .cra_priority = PADLOCK_CRA_PRIORITY, 524 .cra_flags = CRYPTO_ALG_TYPE_SHASH, 525 .cra_blocksize = SHA256_BLOCK_SIZE, 526 .cra_module = THIS_MODULE, 527 } 528 }; 529 530 static struct x86_cpu_id padlock_sha_ids[] = { 531 X86_FEATURE_MATCH(X86_FEATURE_PHE), 532 {} 533 }; 534 MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids); 535 536 static int __init padlock_init(void) 537 { 538 int rc = -ENODEV; 539 struct cpuinfo_x86 *c = &cpu_data(0); 540 struct shash_alg *sha1; 541 struct shash_alg *sha256; 542 543 if (!x86_match_cpu(padlock_sha_ids) || !cpu_has_phe_enabled) 544 return -ENODEV; 545 546 /* Register the newly added algorithm module if on * 547 * VIA Nano processor, or else just do as before */ 548 if (c->x86_model < 0x0f) { 549 sha1 = &sha1_alg; 550 sha256 = &sha256_alg; 551 } else { 552 sha1 = &sha1_alg_nano; 553 sha256 = &sha256_alg_nano; 554 } 555 556 rc = crypto_register_shash(sha1); 557 if (rc) 558 goto out; 559 560 rc = crypto_register_shash(sha256); 561 if (rc) 562 goto out_unreg1; 563 564 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n"); 565 566 return 0; 567 568 out_unreg1: 569 crypto_unregister_shash(sha1); 570 571 out: 572 printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n"); 573 return rc; 574 } 575 576 static void __exit padlock_fini(void) 577 { 578 struct cpuinfo_x86 *c = &cpu_data(0); 579 580 if (c->x86_model >= 0x0f) { 581 crypto_unregister_shash(&sha1_alg_nano); 582 crypto_unregister_shash(&sha256_alg_nano); 583 } else { 584 crypto_unregister_shash(&sha1_alg); 585 crypto_unregister_shash(&sha256_alg); 586 } 587 } 588 589 module_init(padlock_init); 590 module_exit(padlock_fini); 591 592 MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support."); 593 MODULE_LICENSE("GPL"); 594 MODULE_AUTHOR("Michal Ludvig"); 595 596 MODULE_ALIAS_CRYPTO("sha1-all"); 597 MODULE_ALIAS_CRYPTO("sha256-all"); 598 MODULE_ALIAS_CRYPTO("sha1-padlock"); 599 MODULE_ALIAS_CRYPTO("sha256-padlock"); 600