1 /* 2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher 3 * 4 * Copyright (C) 2012 Johannes Goetzfried 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 20 * USA 21 * 22 */ 23 24 #include <linux/module.h> 25 #include <linux/hardirq.h> 26 #include <linux/types.h> 27 #include <linux/crypto.h> 28 #include <linux/err.h> 29 #include <crypto/algapi.h> 30 #include <crypto/cast5.h> 31 #include <crypto/cryptd.h> 32 #include <crypto/ctr.h> 33 #include <asm/xcr.h> 34 #include <asm/xsave.h> 35 #include <asm/crypto/ablk_helper.h> 36 #include <asm/crypto/glue_helper.h> 37 38 #define CAST5_PARALLEL_BLOCKS 16 39 40 asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst, 41 const u8 *src, bool xor); 42 asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst, 43 const u8 *src); 44 45 static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst, 46 const u8 *src) 47 { 48 __cast5_enc_blk_16way(ctx, dst, src, false); 49 } 50 51 static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst, 52 const u8 *src) 53 { 54 __cast5_enc_blk_16way(ctx, dst, src, true); 55 } 56 57 static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst, 58 const u8 *src) 59 { 60 cast5_dec_blk_16way(ctx, dst, src); 61 } 62 63 64 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) 65 { 66 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, 67 NULL, fpu_enabled, nbytes); 68 } 69 70 static inline void cast5_fpu_end(bool fpu_enabled) 71 { 72 return glue_fpu_end(fpu_enabled); 73 } 74 75 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, 76 bool enc) 77 { 78 bool fpu_enabled = false; 79 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 80 const unsigned int bsize = CAST5_BLOCK_SIZE; 81 unsigned int nbytes; 82 int err; 83 84 err = blkcipher_walk_virt(desc, walk); 85 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 86 87 while ((nbytes = walk->nbytes)) { 88 u8 *wsrc = walk->src.virt.addr; 89 u8 *wdst = walk->dst.virt.addr; 90 91 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 92 93 /* Process multi-block batch */ 94 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 95 do { 96 if (enc) 97 cast5_enc_blk_xway(ctx, wdst, wsrc); 98 else 99 cast5_dec_blk_xway(ctx, wdst, wsrc); 100 101 wsrc += bsize * CAST5_PARALLEL_BLOCKS; 102 wdst += bsize * CAST5_PARALLEL_BLOCKS; 103 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 104 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 105 106 if (nbytes < bsize) 107 goto done; 108 } 109 110 /* Handle leftovers */ 111 do { 112 if (enc) 113 __cast5_encrypt(ctx, wdst, wsrc); 114 else 115 __cast5_decrypt(ctx, wdst, wsrc); 116 117 wsrc += bsize; 118 wdst += bsize; 119 nbytes -= bsize; 120 } while (nbytes >= bsize); 121 122 done: 123 err = blkcipher_walk_done(desc, walk, nbytes); 124 } 125 126 cast5_fpu_end(fpu_enabled); 127 return err; 128 } 129 130 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 131 struct scatterlist *src, unsigned int nbytes) 132 { 133 struct blkcipher_walk walk; 134 135 blkcipher_walk_init(&walk, dst, src, nbytes); 136 return ecb_crypt(desc, &walk, true); 137 } 138 139 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 140 struct scatterlist *src, unsigned int nbytes) 141 { 142 struct blkcipher_walk walk; 143 144 blkcipher_walk_init(&walk, dst, src, nbytes); 145 return ecb_crypt(desc, &walk, false); 146 } 147 148 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 149 struct blkcipher_walk *walk) 150 { 151 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 152 const unsigned int bsize = CAST5_BLOCK_SIZE; 153 unsigned int nbytes = walk->nbytes; 154 u64 *src = (u64 *)walk->src.virt.addr; 155 u64 *dst = (u64 *)walk->dst.virt.addr; 156 u64 *iv = (u64 *)walk->iv; 157 158 do { 159 *dst = *src ^ *iv; 160 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); 161 iv = dst; 162 163 src += 1; 164 dst += 1; 165 nbytes -= bsize; 166 } while (nbytes >= bsize); 167 168 *(u64 *)walk->iv = *iv; 169 return nbytes; 170 } 171 172 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 173 struct scatterlist *src, unsigned int nbytes) 174 { 175 struct blkcipher_walk walk; 176 int err; 177 178 blkcipher_walk_init(&walk, dst, src, nbytes); 179 err = blkcipher_walk_virt(desc, &walk); 180 181 while ((nbytes = walk.nbytes)) { 182 nbytes = __cbc_encrypt(desc, &walk); 183 err = blkcipher_walk_done(desc, &walk, nbytes); 184 } 185 186 return err; 187 } 188 189 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, 190 struct blkcipher_walk *walk) 191 { 192 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 193 const unsigned int bsize = CAST5_BLOCK_SIZE; 194 unsigned int nbytes = walk->nbytes; 195 u64 *src = (u64 *)walk->src.virt.addr; 196 u64 *dst = (u64 *)walk->dst.virt.addr; 197 u64 ivs[CAST5_PARALLEL_BLOCKS - 1]; 198 u64 last_iv; 199 int i; 200 201 /* Start of the last block. */ 202 src += nbytes / bsize - 1; 203 dst += nbytes / bsize - 1; 204 205 last_iv = *src; 206 207 /* Process multi-block batch */ 208 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 209 do { 210 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); 211 src -= CAST5_PARALLEL_BLOCKS - 1; 212 dst -= CAST5_PARALLEL_BLOCKS - 1; 213 214 for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++) 215 ivs[i] = src[i]; 216 217 cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); 218 219 for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++) 220 *(dst + (i + 1)) ^= *(ivs + i); 221 222 nbytes -= bsize; 223 if (nbytes < bsize) 224 goto done; 225 226 *dst ^= *(src - 1); 227 src -= 1; 228 dst -= 1; 229 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 230 231 if (nbytes < bsize) 232 goto done; 233 } 234 235 /* Handle leftovers */ 236 for (;;) { 237 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); 238 239 nbytes -= bsize; 240 if (nbytes < bsize) 241 break; 242 243 *dst ^= *(src - 1); 244 src -= 1; 245 dst -= 1; 246 } 247 248 done: 249 *dst ^= *(u64 *)walk->iv; 250 *(u64 *)walk->iv = last_iv; 251 252 return nbytes; 253 } 254 255 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 256 struct scatterlist *src, unsigned int nbytes) 257 { 258 bool fpu_enabled = false; 259 struct blkcipher_walk walk; 260 int err; 261 262 blkcipher_walk_init(&walk, dst, src, nbytes); 263 err = blkcipher_walk_virt(desc, &walk); 264 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 265 266 while ((nbytes = walk.nbytes)) { 267 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 268 nbytes = __cbc_decrypt(desc, &walk); 269 err = blkcipher_walk_done(desc, &walk, nbytes); 270 } 271 272 cast5_fpu_end(fpu_enabled); 273 return err; 274 } 275 276 static void ctr_crypt_final(struct blkcipher_desc *desc, 277 struct blkcipher_walk *walk) 278 { 279 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 280 u8 *ctrblk = walk->iv; 281 u8 keystream[CAST5_BLOCK_SIZE]; 282 u8 *src = walk->src.virt.addr; 283 u8 *dst = walk->dst.virt.addr; 284 unsigned int nbytes = walk->nbytes; 285 286 __cast5_encrypt(ctx, keystream, ctrblk); 287 crypto_xor(keystream, src, nbytes); 288 memcpy(dst, keystream, nbytes); 289 290 crypto_inc(ctrblk, CAST5_BLOCK_SIZE); 291 } 292 293 static unsigned int __ctr_crypt(struct blkcipher_desc *desc, 294 struct blkcipher_walk *walk) 295 { 296 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 297 const unsigned int bsize = CAST5_BLOCK_SIZE; 298 unsigned int nbytes = walk->nbytes; 299 u64 *src = (u64 *)walk->src.virt.addr; 300 u64 *dst = (u64 *)walk->dst.virt.addr; 301 u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); 302 __be64 ctrblocks[CAST5_PARALLEL_BLOCKS]; 303 int i; 304 305 /* Process multi-block batch */ 306 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 307 do { 308 /* create ctrblks for parallel encrypt */ 309 for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) { 310 if (dst != src) 311 dst[i] = src[i]; 312 313 ctrblocks[i] = cpu_to_be64(ctrblk++); 314 } 315 316 cast5_enc_blk_xway_xor(ctx, (u8 *)dst, 317 (u8 *)ctrblocks); 318 319 src += CAST5_PARALLEL_BLOCKS; 320 dst += CAST5_PARALLEL_BLOCKS; 321 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 322 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 323 324 if (nbytes < bsize) 325 goto done; 326 } 327 328 /* Handle leftovers */ 329 do { 330 if (dst != src) 331 *dst = *src; 332 333 ctrblocks[0] = cpu_to_be64(ctrblk++); 334 335 __cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); 336 *dst ^= ctrblocks[0]; 337 338 src += 1; 339 dst += 1; 340 nbytes -= bsize; 341 } while (nbytes >= bsize); 342 343 done: 344 *(__be64 *)walk->iv = cpu_to_be64(ctrblk); 345 return nbytes; 346 } 347 348 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 349 struct scatterlist *src, unsigned int nbytes) 350 { 351 bool fpu_enabled = false; 352 struct blkcipher_walk walk; 353 int err; 354 355 blkcipher_walk_init(&walk, dst, src, nbytes); 356 err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); 357 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 358 359 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { 360 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 361 nbytes = __ctr_crypt(desc, &walk); 362 err = blkcipher_walk_done(desc, &walk, nbytes); 363 } 364 365 cast5_fpu_end(fpu_enabled); 366 367 if (walk.nbytes) { 368 ctr_crypt_final(desc, &walk); 369 err = blkcipher_walk_done(desc, &walk, 0); 370 } 371 372 return err; 373 } 374 375 376 static struct crypto_alg cast5_algs[6] = { { 377 .cra_name = "__ecb-cast5-avx", 378 .cra_driver_name = "__driver-ecb-cast5-avx", 379 .cra_priority = 0, 380 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 381 .cra_blocksize = CAST5_BLOCK_SIZE, 382 .cra_ctxsize = sizeof(struct cast5_ctx), 383 .cra_alignmask = 0, 384 .cra_type = &crypto_blkcipher_type, 385 .cra_module = THIS_MODULE, 386 .cra_u = { 387 .blkcipher = { 388 .min_keysize = CAST5_MIN_KEY_SIZE, 389 .max_keysize = CAST5_MAX_KEY_SIZE, 390 .setkey = cast5_setkey, 391 .encrypt = ecb_encrypt, 392 .decrypt = ecb_decrypt, 393 }, 394 }, 395 }, { 396 .cra_name = "__cbc-cast5-avx", 397 .cra_driver_name = "__driver-cbc-cast5-avx", 398 .cra_priority = 0, 399 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 400 .cra_blocksize = CAST5_BLOCK_SIZE, 401 .cra_ctxsize = sizeof(struct cast5_ctx), 402 .cra_alignmask = 0, 403 .cra_type = &crypto_blkcipher_type, 404 .cra_module = THIS_MODULE, 405 .cra_u = { 406 .blkcipher = { 407 .min_keysize = CAST5_MIN_KEY_SIZE, 408 .max_keysize = CAST5_MAX_KEY_SIZE, 409 .setkey = cast5_setkey, 410 .encrypt = cbc_encrypt, 411 .decrypt = cbc_decrypt, 412 }, 413 }, 414 }, { 415 .cra_name = "__ctr-cast5-avx", 416 .cra_driver_name = "__driver-ctr-cast5-avx", 417 .cra_priority = 0, 418 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 419 .cra_blocksize = 1, 420 .cra_ctxsize = sizeof(struct cast5_ctx), 421 .cra_alignmask = 0, 422 .cra_type = &crypto_blkcipher_type, 423 .cra_module = THIS_MODULE, 424 .cra_u = { 425 .blkcipher = { 426 .min_keysize = CAST5_MIN_KEY_SIZE, 427 .max_keysize = CAST5_MAX_KEY_SIZE, 428 .ivsize = CAST5_BLOCK_SIZE, 429 .setkey = cast5_setkey, 430 .encrypt = ctr_crypt, 431 .decrypt = ctr_crypt, 432 }, 433 }, 434 }, { 435 .cra_name = "ecb(cast5)", 436 .cra_driver_name = "ecb-cast5-avx", 437 .cra_priority = 200, 438 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 439 .cra_blocksize = CAST5_BLOCK_SIZE, 440 .cra_ctxsize = sizeof(struct async_helper_ctx), 441 .cra_alignmask = 0, 442 .cra_type = &crypto_ablkcipher_type, 443 .cra_module = THIS_MODULE, 444 .cra_init = ablk_init, 445 .cra_exit = ablk_exit, 446 .cra_u = { 447 .ablkcipher = { 448 .min_keysize = CAST5_MIN_KEY_SIZE, 449 .max_keysize = CAST5_MAX_KEY_SIZE, 450 .setkey = ablk_set_key, 451 .encrypt = ablk_encrypt, 452 .decrypt = ablk_decrypt, 453 }, 454 }, 455 }, { 456 .cra_name = "cbc(cast5)", 457 .cra_driver_name = "cbc-cast5-avx", 458 .cra_priority = 200, 459 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 460 .cra_blocksize = CAST5_BLOCK_SIZE, 461 .cra_ctxsize = sizeof(struct async_helper_ctx), 462 .cra_alignmask = 0, 463 .cra_type = &crypto_ablkcipher_type, 464 .cra_module = THIS_MODULE, 465 .cra_init = ablk_init, 466 .cra_exit = ablk_exit, 467 .cra_u = { 468 .ablkcipher = { 469 .min_keysize = CAST5_MIN_KEY_SIZE, 470 .max_keysize = CAST5_MAX_KEY_SIZE, 471 .ivsize = CAST5_BLOCK_SIZE, 472 .setkey = ablk_set_key, 473 .encrypt = __ablk_encrypt, 474 .decrypt = ablk_decrypt, 475 }, 476 }, 477 }, { 478 .cra_name = "ctr(cast5)", 479 .cra_driver_name = "ctr-cast5-avx", 480 .cra_priority = 200, 481 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 482 .cra_blocksize = 1, 483 .cra_ctxsize = sizeof(struct async_helper_ctx), 484 .cra_alignmask = 0, 485 .cra_type = &crypto_ablkcipher_type, 486 .cra_module = THIS_MODULE, 487 .cra_init = ablk_init, 488 .cra_exit = ablk_exit, 489 .cra_u = { 490 .ablkcipher = { 491 .min_keysize = CAST5_MIN_KEY_SIZE, 492 .max_keysize = CAST5_MAX_KEY_SIZE, 493 .ivsize = CAST5_BLOCK_SIZE, 494 .setkey = ablk_set_key, 495 .encrypt = ablk_encrypt, 496 .decrypt = ablk_encrypt, 497 .geniv = "chainiv", 498 }, 499 }, 500 } }; 501 502 static int __init cast5_init(void) 503 { 504 u64 xcr0; 505 506 if (!cpu_has_avx || !cpu_has_osxsave) { 507 pr_info("AVX instructions are not detected.\n"); 508 return -ENODEV; 509 } 510 511 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 512 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { 513 pr_info("AVX detected but unusable.\n"); 514 return -ENODEV; 515 } 516 517 return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 518 } 519 520 static void __exit cast5_exit(void) 521 { 522 crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 523 } 524 525 module_init(cast5_init); 526 module_exit(cast5_exit); 527 528 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); 529 MODULE_LICENSE("GPL"); 530 MODULE_ALIAS("cast5"); 531