1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Cryptographic API. 4 * 5 * Support for VIA PadLock hardware crypto engine. 6 * 7 * Copyright (c) 2004 Michal Ludvig <michal@logix.cz> 8 * 9 */ 10 11 #include <crypto/algapi.h> 12 #include <crypto/aes.h> 13 #include <crypto/padlock.h> 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/types.h> 17 #include <linux/errno.h> 18 #include <linux/interrupt.h> 19 #include <linux/kernel.h> 20 #include <linux/percpu.h> 21 #include <linux/smp.h> 22 #include <linux/slab.h> 23 #include <asm/cpu_device_id.h> 24 #include <asm/byteorder.h> 25 #include <asm/processor.h> 26 #include <asm/fpu/api.h> 27 28 /* 29 * Number of data blocks actually fetched for each xcrypt insn. 30 * Processors with prefetch errata will fetch extra blocks. 31 */ 32 static unsigned int ecb_fetch_blocks = 2; 33 #define MAX_ECB_FETCH_BLOCKS (8) 34 #define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE) 35 36 static unsigned int cbc_fetch_blocks = 1; 37 #define MAX_CBC_FETCH_BLOCKS (4) 38 #define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE) 39 40 /* Control word. */ 41 struct cword { 42 unsigned int __attribute__ ((__packed__)) 43 rounds:4, 44 algo:3, 45 keygen:1, 46 interm:1, 47 encdec:1, 48 ksize:2; 49 } __attribute__ ((__aligned__(PADLOCK_ALIGNMENT))); 50 51 /* Whenever making any changes to the following 52 * structure *make sure* you keep E, d_data 53 * and cword aligned on 16 Bytes boundaries and 54 * the Hardware can access 16 * 16 bytes of E and d_data 55 * (only the first 15 * 16 bytes matter but the HW reads 56 * more). 57 */ 58 struct aes_ctx { 59 u32 E[AES_MAX_KEYLENGTH_U32] 60 __attribute__ ((__aligned__(PADLOCK_ALIGNMENT))); 61 u32 d_data[AES_MAX_KEYLENGTH_U32] 62 __attribute__ ((__aligned__(PADLOCK_ALIGNMENT))); 63 struct { 64 struct cword encrypt; 65 struct cword decrypt; 66 } cword; 67 u32 *D; 68 }; 69 70 static DEFINE_PER_CPU(struct cword *, paes_last_cword); 71 72 /* Tells whether the ACE is capable to generate 73 the extended key for a given key_len. */ 74 static inline int 75 aes_hw_extkey_available(uint8_t key_len) 76 { 77 /* TODO: We should check the actual CPU model/stepping 78 as it's possible that the capability will be 79 added in the next CPU revisions. */ 80 if (key_len == 16) 81 return 1; 82 return 0; 83 } 84 85 static inline struct aes_ctx *aes_ctx_common(void *ctx) 86 { 87 unsigned long addr = (unsigned long)ctx; 88 unsigned long align = PADLOCK_ALIGNMENT; 89 90 if (align <= crypto_tfm_ctx_alignment()) 91 align = 1; 92 return (struct aes_ctx *)ALIGN(addr, align); 93 } 94 95 static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm) 96 { 97 return aes_ctx_common(crypto_tfm_ctx(tfm)); 98 } 99 100 static inline struct aes_ctx *blk_aes_ctx(struct crypto_blkcipher *tfm) 101 { 102 return aes_ctx_common(crypto_blkcipher_ctx(tfm)); 103 } 104 105 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, 106 unsigned int key_len) 107 { 108 struct aes_ctx *ctx = aes_ctx(tfm); 109 const __le32 *key = (const __le32 *)in_key; 110 u32 *flags = &tfm->crt_flags; 111 struct crypto_aes_ctx gen_aes; 112 int cpu; 113 114 if (key_len % 8) { 115 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; 116 return -EINVAL; 117 } 118 119 /* 120 * If the hardware is capable of generating the extended key 121 * itself we must supply the plain key for both encryption 122 * and decryption. 123 */ 124 ctx->D = ctx->E; 125 126 ctx->E[0] = le32_to_cpu(key[0]); 127 ctx->E[1] = le32_to_cpu(key[1]); 128 ctx->E[2] = le32_to_cpu(key[2]); 129 ctx->E[3] = le32_to_cpu(key[3]); 130 131 /* Prepare control words. */ 132 memset(&ctx->cword, 0, sizeof(ctx->cword)); 133 134 ctx->cword.decrypt.encdec = 1; 135 ctx->cword.encrypt.rounds = 10 + (key_len - 16) / 4; 136 ctx->cword.decrypt.rounds = ctx->cword.encrypt.rounds; 137 ctx->cword.encrypt.ksize = (key_len - 16) / 8; 138 ctx->cword.decrypt.ksize = ctx->cword.encrypt.ksize; 139 140 /* Don't generate extended keys if the hardware can do it. */ 141 if (aes_hw_extkey_available(key_len)) 142 goto ok; 143 144 ctx->D = ctx->d_data; 145 ctx->cword.encrypt.keygen = 1; 146 ctx->cword.decrypt.keygen = 1; 147 148 if (aes_expandkey(&gen_aes, in_key, key_len)) { 149 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; 150 return -EINVAL; 151 } 152 153 memcpy(ctx->E, gen_aes.key_enc, AES_MAX_KEYLENGTH); 154 memcpy(ctx->D, gen_aes.key_dec, AES_MAX_KEYLENGTH); 155 156 ok: 157 for_each_online_cpu(cpu) 158 if (&ctx->cword.encrypt == per_cpu(paes_last_cword, cpu) || 159 &ctx->cword.decrypt == per_cpu(paes_last_cword, cpu)) 160 per_cpu(paes_last_cword, cpu) = NULL; 161 162 return 0; 163 } 164 165 /* ====== Encryption/decryption routines ====== */ 166 167 /* These are the real call to PadLock. */ 168 static inline void padlock_reset_key(struct cword *cword) 169 { 170 int cpu = raw_smp_processor_id(); 171 172 if (cword != per_cpu(paes_last_cword, cpu)) 173 #ifndef CONFIG_X86_64 174 asm volatile ("pushfl; popfl"); 175 #else 176 asm volatile ("pushfq; popfq"); 177 #endif 178 } 179 180 static inline void padlock_store_cword(struct cword *cword) 181 { 182 per_cpu(paes_last_cword, raw_smp_processor_id()) = cword; 183 } 184 185 /* 186 * While the padlock instructions don't use FP/SSE registers, they 187 * generate a spurious DNA fault when CR0.TS is '1'. Fortunately, 188 * the kernel doesn't use CR0.TS. 189 */ 190 191 static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key, 192 struct cword *control_word, int count) 193 { 194 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ 195 : "+S"(input), "+D"(output) 196 : "d"(control_word), "b"(key), "c"(count)); 197 } 198 199 static inline u8 *rep_xcrypt_cbc(const u8 *input, u8 *output, void *key, 200 u8 *iv, struct cword *control_word, int count) 201 { 202 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ 203 : "+S" (input), "+D" (output), "+a" (iv) 204 : "d" (control_word), "b" (key), "c" (count)); 205 return iv; 206 } 207 208 static void ecb_crypt_copy(const u8 *in, u8 *out, u32 *key, 209 struct cword *cword, int count) 210 { 211 /* 212 * Padlock prefetches extra data so we must provide mapped input buffers. 213 * Assume there are at least 16 bytes of stack already in use. 214 */ 215 u8 buf[AES_BLOCK_SIZE * (MAX_ECB_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1]; 216 u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 217 218 memcpy(tmp, in, count * AES_BLOCK_SIZE); 219 rep_xcrypt_ecb(tmp, out, key, cword, count); 220 } 221 222 static u8 *cbc_crypt_copy(const u8 *in, u8 *out, u32 *key, 223 u8 *iv, struct cword *cword, int count) 224 { 225 /* 226 * Padlock prefetches extra data so we must provide mapped input buffers. 227 * Assume there are at least 16 bytes of stack already in use. 228 */ 229 u8 buf[AES_BLOCK_SIZE * (MAX_CBC_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1]; 230 u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 231 232 memcpy(tmp, in, count * AES_BLOCK_SIZE); 233 return rep_xcrypt_cbc(tmp, out, key, iv, cword, count); 234 } 235 236 static inline void ecb_crypt(const u8 *in, u8 *out, u32 *key, 237 struct cword *cword, int count) 238 { 239 /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data. 240 * We could avoid some copying here but it's probably not worth it. 241 */ 242 if (unlikely(offset_in_page(in) + ecb_fetch_bytes > PAGE_SIZE)) { 243 ecb_crypt_copy(in, out, key, cword, count); 244 return; 245 } 246 247 rep_xcrypt_ecb(in, out, key, cword, count); 248 } 249 250 static inline u8 *cbc_crypt(const u8 *in, u8 *out, u32 *key, 251 u8 *iv, struct cword *cword, int count) 252 { 253 /* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */ 254 if (unlikely(offset_in_page(in) + cbc_fetch_bytes > PAGE_SIZE)) 255 return cbc_crypt_copy(in, out, key, iv, cword, count); 256 257 return rep_xcrypt_cbc(in, out, key, iv, cword, count); 258 } 259 260 static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, 261 void *control_word, u32 count) 262 { 263 u32 initial = count & (ecb_fetch_blocks - 1); 264 265 if (count < ecb_fetch_blocks) { 266 ecb_crypt(input, output, key, control_word, count); 267 return; 268 } 269 270 count -= initial; 271 272 if (initial) 273 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ 274 : "+S"(input), "+D"(output) 275 : "d"(control_word), "b"(key), "c"(initial)); 276 277 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ 278 : "+S"(input), "+D"(output) 279 : "d"(control_word), "b"(key), "c"(count)); 280 } 281 282 static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, 283 u8 *iv, void *control_word, u32 count) 284 { 285 u32 initial = count & (cbc_fetch_blocks - 1); 286 287 if (count < cbc_fetch_blocks) 288 return cbc_crypt(input, output, key, iv, control_word, count); 289 290 count -= initial; 291 292 if (initial) 293 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ 294 : "+S" (input), "+D" (output), "+a" (iv) 295 : "d" (control_word), "b" (key), "c" (initial)); 296 297 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ 298 : "+S" (input), "+D" (output), "+a" (iv) 299 : "d" (control_word), "b" (key), "c" (count)); 300 return iv; 301 } 302 303 static void padlock_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) 304 { 305 struct aes_ctx *ctx = aes_ctx(tfm); 306 307 padlock_reset_key(&ctx->cword.encrypt); 308 ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); 309 padlock_store_cword(&ctx->cword.encrypt); 310 } 311 312 static void padlock_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) 313 { 314 struct aes_ctx *ctx = aes_ctx(tfm); 315 316 padlock_reset_key(&ctx->cword.encrypt); 317 ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); 318 padlock_store_cword(&ctx->cword.encrypt); 319 } 320 321 static struct crypto_alg aes_alg = { 322 .cra_name = "aes", 323 .cra_driver_name = "aes-padlock", 324 .cra_priority = PADLOCK_CRA_PRIORITY, 325 .cra_flags = CRYPTO_ALG_TYPE_CIPHER, 326 .cra_blocksize = AES_BLOCK_SIZE, 327 .cra_ctxsize = sizeof(struct aes_ctx), 328 .cra_alignmask = PADLOCK_ALIGNMENT - 1, 329 .cra_module = THIS_MODULE, 330 .cra_u = { 331 .cipher = { 332 .cia_min_keysize = AES_MIN_KEY_SIZE, 333 .cia_max_keysize = AES_MAX_KEY_SIZE, 334 .cia_setkey = aes_set_key, 335 .cia_encrypt = padlock_aes_encrypt, 336 .cia_decrypt = padlock_aes_decrypt, 337 } 338 } 339 }; 340 341 static int ecb_aes_encrypt(struct blkcipher_desc *desc, 342 struct scatterlist *dst, struct scatterlist *src, 343 unsigned int nbytes) 344 { 345 struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); 346 struct blkcipher_walk walk; 347 int err; 348 349 padlock_reset_key(&ctx->cword.encrypt); 350 351 blkcipher_walk_init(&walk, dst, src, nbytes); 352 err = blkcipher_walk_virt(desc, &walk); 353 354 while ((nbytes = walk.nbytes)) { 355 padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, 356 ctx->E, &ctx->cword.encrypt, 357 nbytes / AES_BLOCK_SIZE); 358 nbytes &= AES_BLOCK_SIZE - 1; 359 err = blkcipher_walk_done(desc, &walk, nbytes); 360 } 361 362 padlock_store_cword(&ctx->cword.encrypt); 363 364 return err; 365 } 366 367 static int ecb_aes_decrypt(struct blkcipher_desc *desc, 368 struct scatterlist *dst, struct scatterlist *src, 369 unsigned int nbytes) 370 { 371 struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); 372 struct blkcipher_walk walk; 373 int err; 374 375 padlock_reset_key(&ctx->cword.decrypt); 376 377 blkcipher_walk_init(&walk, dst, src, nbytes); 378 err = blkcipher_walk_virt(desc, &walk); 379 380 while ((nbytes = walk.nbytes)) { 381 padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, 382 ctx->D, &ctx->cword.decrypt, 383 nbytes / AES_BLOCK_SIZE); 384 nbytes &= AES_BLOCK_SIZE - 1; 385 err = blkcipher_walk_done(desc, &walk, nbytes); 386 } 387 388 padlock_store_cword(&ctx->cword.encrypt); 389 390 return err; 391 } 392 393 static struct crypto_alg ecb_aes_alg = { 394 .cra_name = "ecb(aes)", 395 .cra_driver_name = "ecb-aes-padlock", 396 .cra_priority = PADLOCK_COMPOSITE_PRIORITY, 397 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 398 .cra_blocksize = AES_BLOCK_SIZE, 399 .cra_ctxsize = sizeof(struct aes_ctx), 400 .cra_alignmask = PADLOCK_ALIGNMENT - 1, 401 .cra_type = &crypto_blkcipher_type, 402 .cra_module = THIS_MODULE, 403 .cra_u = { 404 .blkcipher = { 405 .min_keysize = AES_MIN_KEY_SIZE, 406 .max_keysize = AES_MAX_KEY_SIZE, 407 .setkey = aes_set_key, 408 .encrypt = ecb_aes_encrypt, 409 .decrypt = ecb_aes_decrypt, 410 } 411 } 412 }; 413 414 static int cbc_aes_encrypt(struct blkcipher_desc *desc, 415 struct scatterlist *dst, struct scatterlist *src, 416 unsigned int nbytes) 417 { 418 struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); 419 struct blkcipher_walk walk; 420 int err; 421 422 padlock_reset_key(&ctx->cword.encrypt); 423 424 blkcipher_walk_init(&walk, dst, src, nbytes); 425 err = blkcipher_walk_virt(desc, &walk); 426 427 while ((nbytes = walk.nbytes)) { 428 u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr, 429 walk.dst.virt.addr, ctx->E, 430 walk.iv, &ctx->cword.encrypt, 431 nbytes / AES_BLOCK_SIZE); 432 memcpy(walk.iv, iv, AES_BLOCK_SIZE); 433 nbytes &= AES_BLOCK_SIZE - 1; 434 err = blkcipher_walk_done(desc, &walk, nbytes); 435 } 436 437 padlock_store_cword(&ctx->cword.decrypt); 438 439 return err; 440 } 441 442 static int cbc_aes_decrypt(struct blkcipher_desc *desc, 443 struct scatterlist *dst, struct scatterlist *src, 444 unsigned int nbytes) 445 { 446 struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); 447 struct blkcipher_walk walk; 448 int err; 449 450 padlock_reset_key(&ctx->cword.encrypt); 451 452 blkcipher_walk_init(&walk, dst, src, nbytes); 453 err = blkcipher_walk_virt(desc, &walk); 454 455 while ((nbytes = walk.nbytes)) { 456 padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, 457 ctx->D, walk.iv, &ctx->cword.decrypt, 458 nbytes / AES_BLOCK_SIZE); 459 nbytes &= AES_BLOCK_SIZE - 1; 460 err = blkcipher_walk_done(desc, &walk, nbytes); 461 } 462 463 padlock_store_cword(&ctx->cword.encrypt); 464 465 return err; 466 } 467 468 static struct crypto_alg cbc_aes_alg = { 469 .cra_name = "cbc(aes)", 470 .cra_driver_name = "cbc-aes-padlock", 471 .cra_priority = PADLOCK_COMPOSITE_PRIORITY, 472 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 473 .cra_blocksize = AES_BLOCK_SIZE, 474 .cra_ctxsize = sizeof(struct aes_ctx), 475 .cra_alignmask = PADLOCK_ALIGNMENT - 1, 476 .cra_type = &crypto_blkcipher_type, 477 .cra_module = THIS_MODULE, 478 .cra_u = { 479 .blkcipher = { 480 .min_keysize = AES_MIN_KEY_SIZE, 481 .max_keysize = AES_MAX_KEY_SIZE, 482 .ivsize = AES_BLOCK_SIZE, 483 .setkey = aes_set_key, 484 .encrypt = cbc_aes_encrypt, 485 .decrypt = cbc_aes_decrypt, 486 } 487 } 488 }; 489 490 static const struct x86_cpu_id padlock_cpu_id[] = { 491 X86_FEATURE_MATCH(X86_FEATURE_XCRYPT), 492 {} 493 }; 494 MODULE_DEVICE_TABLE(x86cpu, padlock_cpu_id); 495 496 static int __init padlock_init(void) 497 { 498 int ret; 499 struct cpuinfo_x86 *c = &cpu_data(0); 500 501 if (!x86_match_cpu(padlock_cpu_id)) 502 return -ENODEV; 503 504 if (!boot_cpu_has(X86_FEATURE_XCRYPT_EN)) { 505 printk(KERN_NOTICE PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n"); 506 return -ENODEV; 507 } 508 509 if ((ret = crypto_register_alg(&aes_alg))) 510 goto aes_err; 511 512 if ((ret = crypto_register_alg(&ecb_aes_alg))) 513 goto ecb_aes_err; 514 515 if ((ret = crypto_register_alg(&cbc_aes_alg))) 516 goto cbc_aes_err; 517 518 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); 519 520 if (c->x86 == 6 && c->x86_model == 15 && c->x86_stepping == 2) { 521 ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS; 522 cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS; 523 printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); 524 } 525 526 out: 527 return ret; 528 529 cbc_aes_err: 530 crypto_unregister_alg(&ecb_aes_alg); 531 ecb_aes_err: 532 crypto_unregister_alg(&aes_alg); 533 aes_err: 534 printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n"); 535 goto out; 536 } 537 538 static void __exit padlock_fini(void) 539 { 540 crypto_unregister_alg(&cbc_aes_alg); 541 crypto_unregister_alg(&ecb_aes_alg); 542 crypto_unregister_alg(&aes_alg); 543 } 544 545 module_init(padlock_init); 546 module_exit(padlock_fini); 547 548 MODULE_DESCRIPTION("VIA PadLock AES algorithm support"); 549 MODULE_LICENSE("GPL"); 550 MODULE_AUTHOR("Michal Ludvig"); 551 552 MODULE_ALIAS_CRYPTO("aes"); 553