1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Glue code for AES implementation for SPE instructions (PPC) 4 * 5 * Based on generic implementation. The assembler module takes care 6 * about the SPE registers so it can run from interrupt context. 7 * 8 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 9 */ 10 11 #include <crypto/aes.h> 12 #include <linux/module.h> 13 #include <linux/init.h> 14 #include <linux/types.h> 15 #include <linux/errno.h> 16 #include <linux/crypto.h> 17 #include <asm/byteorder.h> 18 #include <asm/switch_to.h> 19 #include <crypto/algapi.h> 20 #include <crypto/internal/skcipher.h> 21 #include <crypto/xts.h> 22 #include <crypto/gf128mul.h> 23 #include <crypto/scatterwalk.h> 24 25 /* 26 * MAX_BYTES defines the number of bytes that are allowed to be processed 27 * between preempt_disable() and preempt_enable(). e500 cores can issue two 28 * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32 29 * bit unit (SU2). One of these can be a memory access that is executed via 30 * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per 31 * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data 32 * will need an estimated maximum of 20,000 cycles. Headroom for cache misses 33 * included. Even with the low end model clocked at 667 MHz this equals to a 34 * critical time window of less than 30us. The value has been chosen to 35 * process a 512 byte disk block in one or a large 1400 bytes IPsec network 36 * packet in two runs. 37 * 38 */ 39 #define MAX_BYTES 768 40 41 struct ppc_aes_ctx { 42 u32 key_enc[AES_MAX_KEYLENGTH_U32]; 43 u32 key_dec[AES_MAX_KEYLENGTH_U32]; 44 u32 rounds; 45 }; 46 47 struct ppc_xts_ctx { 48 u32 key_enc[AES_MAX_KEYLENGTH_U32]; 49 u32 key_dec[AES_MAX_KEYLENGTH_U32]; 50 u32 key_twk[AES_MAX_KEYLENGTH_U32]; 51 u32 rounds; 52 }; 53 54 extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds); 55 extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds); 56 extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds, 57 u32 bytes); 58 extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds, 59 u32 bytes); 60 extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds, 61 u32 bytes, u8 *iv); 62 extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds, 63 u32 bytes, u8 *iv); 64 extern void ppc_crypt_ctr (u8 *out, const u8 *in, u32 *key_enc, u32 rounds, 65 u32 bytes, u8 *iv); 66 extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds, 67 u32 bytes, u8 *iv, u32 *key_twk); 68 extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds, 69 u32 bytes, u8 *iv, u32 *key_twk); 70 71 extern void ppc_expand_key_128(u32 *key_enc, const u8 *key); 72 extern void ppc_expand_key_192(u32 *key_enc, const u8 *key); 73 extern void ppc_expand_key_256(u32 *key_enc, const u8 *key); 74 75 extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc, 76 unsigned int key_len); 77 78 static void spe_begin(void) 79 { 80 /* disable preemption and save users SPE registers if required */ 81 preempt_disable(); 82 enable_kernel_spe(); 83 } 84 85 static void spe_end(void) 86 { 87 disable_kernel_spe(); 88 /* reenable preemption */ 89 preempt_enable(); 90 } 91 92 static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, 93 unsigned int key_len) 94 { 95 struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm); 96 97 if (key_len != AES_KEYSIZE_128 && 98 key_len != AES_KEYSIZE_192 && 99 key_len != AES_KEYSIZE_256) { 100 tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; 101 return -EINVAL; 102 } 103 104 switch (key_len) { 105 case AES_KEYSIZE_128: 106 ctx->rounds = 4; 107 ppc_expand_key_128(ctx->key_enc, in_key); 108 break; 109 case AES_KEYSIZE_192: 110 ctx->rounds = 5; 111 ppc_expand_key_192(ctx->key_enc, in_key); 112 break; 113 case AES_KEYSIZE_256: 114 ctx->rounds = 6; 115 ppc_expand_key_256(ctx->key_enc, in_key); 116 break; 117 } 118 119 ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len); 120 121 return 0; 122 } 123 124 static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm, 125 const u8 *in_key, unsigned int key_len) 126 { 127 return ppc_aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len); 128 } 129 130 static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, 131 unsigned int key_len) 132 { 133 struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm); 134 int err; 135 136 err = xts_verify_key(tfm, in_key, key_len); 137 if (err) 138 return err; 139 140 key_len >>= 1; 141 142 if (key_len != AES_KEYSIZE_128 && 143 key_len != AES_KEYSIZE_192 && 144 key_len != AES_KEYSIZE_256) { 145 crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); 146 return -EINVAL; 147 } 148 149 switch (key_len) { 150 case AES_KEYSIZE_128: 151 ctx->rounds = 4; 152 ppc_expand_key_128(ctx->key_enc, in_key); 153 ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128); 154 break; 155 case AES_KEYSIZE_192: 156 ctx->rounds = 5; 157 ppc_expand_key_192(ctx->key_enc, in_key); 158 ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192); 159 break; 160 case AES_KEYSIZE_256: 161 ctx->rounds = 6; 162 ppc_expand_key_256(ctx->key_enc, in_key); 163 ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256); 164 break; 165 } 166 167 ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len); 168 169 return 0; 170 } 171 172 static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) 173 { 174 struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm); 175 176 spe_begin(); 177 ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds); 178 spe_end(); 179 } 180 181 static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) 182 { 183 struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm); 184 185 spe_begin(); 186 ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds); 187 spe_end(); 188 } 189 190 static int ppc_ecb_crypt(struct skcipher_request *req, bool enc) 191 { 192 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 193 struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm); 194 struct skcipher_walk walk; 195 unsigned int nbytes; 196 int err; 197 198 err = skcipher_walk_virt(&walk, req, false); 199 200 while ((nbytes = walk.nbytes) != 0) { 201 nbytes = min_t(unsigned int, nbytes, MAX_BYTES); 202 nbytes = round_down(nbytes, AES_BLOCK_SIZE); 203 204 spe_begin(); 205 if (enc) 206 ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr, 207 ctx->key_enc, ctx->rounds, nbytes); 208 else 209 ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr, 210 ctx->key_dec, ctx->rounds, nbytes); 211 spe_end(); 212 213 err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 214 } 215 216 return err; 217 } 218 219 static int ppc_ecb_encrypt(struct skcipher_request *req) 220 { 221 return ppc_ecb_crypt(req, true); 222 } 223 224 static int ppc_ecb_decrypt(struct skcipher_request *req) 225 { 226 return ppc_ecb_crypt(req, false); 227 } 228 229 static int ppc_cbc_crypt(struct skcipher_request *req, bool enc) 230 { 231 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 232 struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm); 233 struct skcipher_walk walk; 234 unsigned int nbytes; 235 int err; 236 237 err = skcipher_walk_virt(&walk, req, false); 238 239 while ((nbytes = walk.nbytes) != 0) { 240 nbytes = min_t(unsigned int, nbytes, MAX_BYTES); 241 nbytes = round_down(nbytes, AES_BLOCK_SIZE); 242 243 spe_begin(); 244 if (enc) 245 ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr, 246 ctx->key_enc, ctx->rounds, nbytes, 247 walk.iv); 248 else 249 ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr, 250 ctx->key_dec, ctx->rounds, nbytes, 251 walk.iv); 252 spe_end(); 253 254 err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 255 } 256 257 return err; 258 } 259 260 static int ppc_cbc_encrypt(struct skcipher_request *req) 261 { 262 return ppc_cbc_crypt(req, true); 263 } 264 265 static int ppc_cbc_decrypt(struct skcipher_request *req) 266 { 267 return ppc_cbc_crypt(req, false); 268 } 269 270 static int ppc_ctr_crypt(struct skcipher_request *req) 271 { 272 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 273 struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm); 274 struct skcipher_walk walk; 275 unsigned int nbytes; 276 int err; 277 278 err = skcipher_walk_virt(&walk, req, false); 279 280 while ((nbytes = walk.nbytes) != 0) { 281 nbytes = min_t(unsigned int, nbytes, MAX_BYTES); 282 if (nbytes < walk.total) 283 nbytes = round_down(nbytes, AES_BLOCK_SIZE); 284 285 spe_begin(); 286 ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr, 287 ctx->key_enc, ctx->rounds, nbytes, walk.iv); 288 spe_end(); 289 290 err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 291 } 292 293 return err; 294 } 295 296 static int ppc_xts_crypt(struct skcipher_request *req, bool enc) 297 { 298 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 299 struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm); 300 struct skcipher_walk walk; 301 unsigned int nbytes; 302 int err; 303 u32 *twk; 304 305 err = skcipher_walk_virt(&walk, req, false); 306 twk = ctx->key_twk; 307 308 while ((nbytes = walk.nbytes) != 0) { 309 nbytes = min_t(unsigned int, nbytes, MAX_BYTES); 310 nbytes = round_down(nbytes, AES_BLOCK_SIZE); 311 312 spe_begin(); 313 if (enc) 314 ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr, 315 ctx->key_enc, ctx->rounds, nbytes, 316 walk.iv, twk); 317 else 318 ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr, 319 ctx->key_dec, ctx->rounds, nbytes, 320 walk.iv, twk); 321 spe_end(); 322 323 twk = NULL; 324 err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 325 } 326 327 return err; 328 } 329 330 static int ppc_xts_encrypt(struct skcipher_request *req) 331 { 332 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 333 struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm); 334 int tail = req->cryptlen % AES_BLOCK_SIZE; 335 int offset = req->cryptlen - tail - AES_BLOCK_SIZE; 336 struct skcipher_request subreq; 337 u8 b[2][AES_BLOCK_SIZE]; 338 int err; 339 340 if (req->cryptlen < AES_BLOCK_SIZE) 341 return -EINVAL; 342 343 if (tail) { 344 subreq = *req; 345 skcipher_request_set_crypt(&subreq, req->src, req->dst, 346 req->cryptlen - tail, req->iv); 347 req = &subreq; 348 } 349 350 err = ppc_xts_crypt(req, true); 351 if (err || !tail) 352 return err; 353 354 scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE, 0); 355 memcpy(b[1], b[0], tail); 356 scatterwalk_map_and_copy(b[0], req->src, offset + AES_BLOCK_SIZE, tail, 0); 357 358 spe_begin(); 359 ppc_encrypt_xts(b[0], b[0], ctx->key_enc, ctx->rounds, AES_BLOCK_SIZE, 360 req->iv, NULL); 361 spe_end(); 362 363 scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1); 364 365 return 0; 366 } 367 368 static int ppc_xts_decrypt(struct skcipher_request *req) 369 { 370 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 371 struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm); 372 int tail = req->cryptlen % AES_BLOCK_SIZE; 373 int offset = req->cryptlen - tail - AES_BLOCK_SIZE; 374 struct skcipher_request subreq; 375 u8 b[3][AES_BLOCK_SIZE]; 376 le128 twk; 377 int err; 378 379 if (req->cryptlen < AES_BLOCK_SIZE) 380 return -EINVAL; 381 382 if (tail) { 383 subreq = *req; 384 skcipher_request_set_crypt(&subreq, req->src, req->dst, 385 offset, req->iv); 386 req = &subreq; 387 } 388 389 err = ppc_xts_crypt(req, false); 390 if (err || !tail) 391 return err; 392 393 scatterwalk_map_and_copy(b[1], req->src, offset, AES_BLOCK_SIZE + tail, 0); 394 395 spe_begin(); 396 if (!offset) 397 ppc_encrypt_ecb(req->iv, req->iv, ctx->key_twk, ctx->rounds, 398 AES_BLOCK_SIZE); 399 400 gf128mul_x_ble(&twk, (le128 *)req->iv); 401 402 ppc_decrypt_xts(b[1], b[1], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE, 403 (u8 *)&twk, NULL); 404 memcpy(b[0], b[2], tail); 405 memcpy(b[0] + tail, b[1] + tail, AES_BLOCK_SIZE - tail); 406 ppc_decrypt_xts(b[0], b[0], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE, 407 req->iv, NULL); 408 spe_end(); 409 410 scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1); 411 412 return 0; 413 } 414 415 /* 416 * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen 417 * because the e500 platform can handle unaligned reads/writes very efficently. 418 * This improves IPsec thoughput by another few percent. Additionally we assume 419 * that AES context is always aligned to at least 8 bytes because it is created 420 * with kmalloc() in the crypto infrastructure 421 */ 422 423 static struct crypto_alg aes_cipher_alg = { 424 .cra_name = "aes", 425 .cra_driver_name = "aes-ppc-spe", 426 .cra_priority = 300, 427 .cra_flags = CRYPTO_ALG_TYPE_CIPHER, 428 .cra_blocksize = AES_BLOCK_SIZE, 429 .cra_ctxsize = sizeof(struct ppc_aes_ctx), 430 .cra_alignmask = 0, 431 .cra_module = THIS_MODULE, 432 .cra_u = { 433 .cipher = { 434 .cia_min_keysize = AES_MIN_KEY_SIZE, 435 .cia_max_keysize = AES_MAX_KEY_SIZE, 436 .cia_setkey = ppc_aes_setkey, 437 .cia_encrypt = ppc_aes_encrypt, 438 .cia_decrypt = ppc_aes_decrypt 439 } 440 } 441 }; 442 443 static struct skcipher_alg aes_skcipher_algs[] = { 444 { 445 .base.cra_name = "ecb(aes)", 446 .base.cra_driver_name = "ecb-ppc-spe", 447 .base.cra_priority = 300, 448 .base.cra_blocksize = AES_BLOCK_SIZE, 449 .base.cra_ctxsize = sizeof(struct ppc_aes_ctx), 450 .base.cra_module = THIS_MODULE, 451 .min_keysize = AES_MIN_KEY_SIZE, 452 .max_keysize = AES_MAX_KEY_SIZE, 453 .setkey = ppc_aes_setkey_skcipher, 454 .encrypt = ppc_ecb_encrypt, 455 .decrypt = ppc_ecb_decrypt, 456 }, { 457 .base.cra_name = "cbc(aes)", 458 .base.cra_driver_name = "cbc-ppc-spe", 459 .base.cra_priority = 300, 460 .base.cra_blocksize = AES_BLOCK_SIZE, 461 .base.cra_ctxsize = sizeof(struct ppc_aes_ctx), 462 .base.cra_module = THIS_MODULE, 463 .min_keysize = AES_MIN_KEY_SIZE, 464 .max_keysize = AES_MAX_KEY_SIZE, 465 .ivsize = AES_BLOCK_SIZE, 466 .setkey = ppc_aes_setkey_skcipher, 467 .encrypt = ppc_cbc_encrypt, 468 .decrypt = ppc_cbc_decrypt, 469 }, { 470 .base.cra_name = "ctr(aes)", 471 .base.cra_driver_name = "ctr-ppc-spe", 472 .base.cra_priority = 300, 473 .base.cra_blocksize = 1, 474 .base.cra_ctxsize = sizeof(struct ppc_aes_ctx), 475 .base.cra_module = THIS_MODULE, 476 .min_keysize = AES_MIN_KEY_SIZE, 477 .max_keysize = AES_MAX_KEY_SIZE, 478 .ivsize = AES_BLOCK_SIZE, 479 .setkey = ppc_aes_setkey_skcipher, 480 .encrypt = ppc_ctr_crypt, 481 .decrypt = ppc_ctr_crypt, 482 .chunksize = AES_BLOCK_SIZE, 483 }, { 484 .base.cra_name = "xts(aes)", 485 .base.cra_driver_name = "xts-ppc-spe", 486 .base.cra_priority = 300, 487 .base.cra_blocksize = AES_BLOCK_SIZE, 488 .base.cra_ctxsize = sizeof(struct ppc_xts_ctx), 489 .base.cra_module = THIS_MODULE, 490 .min_keysize = AES_MIN_KEY_SIZE * 2, 491 .max_keysize = AES_MAX_KEY_SIZE * 2, 492 .ivsize = AES_BLOCK_SIZE, 493 .setkey = ppc_xts_setkey, 494 .encrypt = ppc_xts_encrypt, 495 .decrypt = ppc_xts_decrypt, 496 } 497 }; 498 499 static int __init ppc_aes_mod_init(void) 500 { 501 int err; 502 503 err = crypto_register_alg(&aes_cipher_alg); 504 if (err) 505 return err; 506 507 err = crypto_register_skciphers(aes_skcipher_algs, 508 ARRAY_SIZE(aes_skcipher_algs)); 509 if (err) 510 crypto_unregister_alg(&aes_cipher_alg); 511 return err; 512 } 513 514 static void __exit ppc_aes_mod_fini(void) 515 { 516 crypto_unregister_alg(&aes_cipher_alg); 517 crypto_unregister_skciphers(aes_skcipher_algs, 518 ARRAY_SIZE(aes_skcipher_algs)); 519 } 520 521 module_init(ppc_aes_mod_init); 522 module_exit(ppc_aes_mod_fini); 523 524 MODULE_LICENSE("GPL"); 525 MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized"); 526 527 MODULE_ALIAS_CRYPTO("aes"); 528 MODULE_ALIAS_CRYPTO("ecb(aes)"); 529 MODULE_ALIAS_CRYPTO("cbc(aes)"); 530 MODULE_ALIAS_CRYPTO("ctr(aes)"); 531 MODULE_ALIAS_CRYPTO("xts(aes)"); 532 MODULE_ALIAS_CRYPTO("aes-ppc-spe"); 533