1 /* 2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher 3 * 4 * Copyright (C) 2012 Johannes Goetzfried 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 20 * USA 21 * 22 */ 23 24 #include <linux/module.h> 25 #include <linux/hardirq.h> 26 #include <linux/types.h> 27 #include <linux/crypto.h> 28 #include <linux/err.h> 29 #include <crypto/ablk_helper.h> 30 #include <crypto/algapi.h> 31 #include <crypto/cast5.h> 32 #include <crypto/cryptd.h> 33 #include <crypto/ctr.h> 34 #include <asm/xcr.h> 35 #include <asm/xsave.h> 36 #include <asm/crypto/glue_helper.h> 37 38 #define CAST5_PARALLEL_BLOCKS 16 39 40 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, 41 const u8 *src); 42 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, 43 const u8 *src); 44 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, 45 const u8 *src); 46 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, 47 __be64 *iv); 48 49 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) 50 { 51 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, 52 NULL, fpu_enabled, nbytes); 53 } 54 55 static inline void cast5_fpu_end(bool fpu_enabled) 56 { 57 return glue_fpu_end(fpu_enabled); 58 } 59 60 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, 61 bool enc) 62 { 63 bool fpu_enabled = false; 64 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 65 const unsigned int bsize = CAST5_BLOCK_SIZE; 66 unsigned int nbytes; 67 void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); 68 int err; 69 70 fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; 71 72 err = blkcipher_walk_virt(desc, walk); 73 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 74 75 while ((nbytes = walk->nbytes)) { 76 u8 *wsrc = walk->src.virt.addr; 77 u8 *wdst = walk->dst.virt.addr; 78 79 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 80 81 /* Process multi-block batch */ 82 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 83 do { 84 fn(ctx, wdst, wsrc); 85 86 wsrc += bsize * CAST5_PARALLEL_BLOCKS; 87 wdst += bsize * CAST5_PARALLEL_BLOCKS; 88 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 89 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 90 91 if (nbytes < bsize) 92 goto done; 93 } 94 95 fn = (enc) ? __cast5_encrypt : __cast5_decrypt; 96 97 /* Handle leftovers */ 98 do { 99 fn(ctx, wdst, wsrc); 100 101 wsrc += bsize; 102 wdst += bsize; 103 nbytes -= bsize; 104 } while (nbytes >= bsize); 105 106 done: 107 err = blkcipher_walk_done(desc, walk, nbytes); 108 } 109 110 cast5_fpu_end(fpu_enabled); 111 return err; 112 } 113 114 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 115 struct scatterlist *src, unsigned int nbytes) 116 { 117 struct blkcipher_walk walk; 118 119 blkcipher_walk_init(&walk, dst, src, nbytes); 120 return ecb_crypt(desc, &walk, true); 121 } 122 123 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 124 struct scatterlist *src, unsigned int nbytes) 125 { 126 struct blkcipher_walk walk; 127 128 blkcipher_walk_init(&walk, dst, src, nbytes); 129 return ecb_crypt(desc, &walk, false); 130 } 131 132 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 133 struct blkcipher_walk *walk) 134 { 135 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 136 const unsigned int bsize = CAST5_BLOCK_SIZE; 137 unsigned int nbytes = walk->nbytes; 138 u64 *src = (u64 *)walk->src.virt.addr; 139 u64 *dst = (u64 *)walk->dst.virt.addr; 140 u64 *iv = (u64 *)walk->iv; 141 142 do { 143 *dst = *src ^ *iv; 144 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); 145 iv = dst; 146 147 src += 1; 148 dst += 1; 149 nbytes -= bsize; 150 } while (nbytes >= bsize); 151 152 *(u64 *)walk->iv = *iv; 153 return nbytes; 154 } 155 156 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 157 struct scatterlist *src, unsigned int nbytes) 158 { 159 struct blkcipher_walk walk; 160 int err; 161 162 blkcipher_walk_init(&walk, dst, src, nbytes); 163 err = blkcipher_walk_virt(desc, &walk); 164 165 while ((nbytes = walk.nbytes)) { 166 nbytes = __cbc_encrypt(desc, &walk); 167 err = blkcipher_walk_done(desc, &walk, nbytes); 168 } 169 170 return err; 171 } 172 173 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, 174 struct blkcipher_walk *walk) 175 { 176 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 177 const unsigned int bsize = CAST5_BLOCK_SIZE; 178 unsigned int nbytes = walk->nbytes; 179 u64 *src = (u64 *)walk->src.virt.addr; 180 u64 *dst = (u64 *)walk->dst.virt.addr; 181 u64 last_iv; 182 183 /* Start of the last block. */ 184 src += nbytes / bsize - 1; 185 dst += nbytes / bsize - 1; 186 187 last_iv = *src; 188 189 /* Process multi-block batch */ 190 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 191 do { 192 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); 193 src -= CAST5_PARALLEL_BLOCKS - 1; 194 dst -= CAST5_PARALLEL_BLOCKS - 1; 195 196 cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); 197 198 nbytes -= bsize; 199 if (nbytes < bsize) 200 goto done; 201 202 *dst ^= *(src - 1); 203 src -= 1; 204 dst -= 1; 205 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 206 207 if (nbytes < bsize) 208 goto done; 209 } 210 211 /* Handle leftovers */ 212 for (;;) { 213 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); 214 215 nbytes -= bsize; 216 if (nbytes < bsize) 217 break; 218 219 *dst ^= *(src - 1); 220 src -= 1; 221 dst -= 1; 222 } 223 224 done: 225 *dst ^= *(u64 *)walk->iv; 226 *(u64 *)walk->iv = last_iv; 227 228 return nbytes; 229 } 230 231 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 232 struct scatterlist *src, unsigned int nbytes) 233 { 234 bool fpu_enabled = false; 235 struct blkcipher_walk walk; 236 int err; 237 238 blkcipher_walk_init(&walk, dst, src, nbytes); 239 err = blkcipher_walk_virt(desc, &walk); 240 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 241 242 while ((nbytes = walk.nbytes)) { 243 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 244 nbytes = __cbc_decrypt(desc, &walk); 245 err = blkcipher_walk_done(desc, &walk, nbytes); 246 } 247 248 cast5_fpu_end(fpu_enabled); 249 return err; 250 } 251 252 static void ctr_crypt_final(struct blkcipher_desc *desc, 253 struct blkcipher_walk *walk) 254 { 255 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 256 u8 *ctrblk = walk->iv; 257 u8 keystream[CAST5_BLOCK_SIZE]; 258 u8 *src = walk->src.virt.addr; 259 u8 *dst = walk->dst.virt.addr; 260 unsigned int nbytes = walk->nbytes; 261 262 __cast5_encrypt(ctx, keystream, ctrblk); 263 crypto_xor(keystream, src, nbytes); 264 memcpy(dst, keystream, nbytes); 265 266 crypto_inc(ctrblk, CAST5_BLOCK_SIZE); 267 } 268 269 static unsigned int __ctr_crypt(struct blkcipher_desc *desc, 270 struct blkcipher_walk *walk) 271 { 272 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 273 const unsigned int bsize = CAST5_BLOCK_SIZE; 274 unsigned int nbytes = walk->nbytes; 275 u64 *src = (u64 *)walk->src.virt.addr; 276 u64 *dst = (u64 *)walk->dst.virt.addr; 277 278 /* Process multi-block batch */ 279 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 280 do { 281 cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, 282 (__be64 *)walk->iv); 283 284 src += CAST5_PARALLEL_BLOCKS; 285 dst += CAST5_PARALLEL_BLOCKS; 286 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 287 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 288 289 if (nbytes < bsize) 290 goto done; 291 } 292 293 /* Handle leftovers */ 294 do { 295 u64 ctrblk; 296 297 if (dst != src) 298 *dst = *src; 299 300 ctrblk = *(u64 *)walk->iv; 301 be64_add_cpu((__be64 *)walk->iv, 1); 302 303 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 304 *dst ^= ctrblk; 305 306 src += 1; 307 dst += 1; 308 nbytes -= bsize; 309 } while (nbytes >= bsize); 310 311 done: 312 return nbytes; 313 } 314 315 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 316 struct scatterlist *src, unsigned int nbytes) 317 { 318 bool fpu_enabled = false; 319 struct blkcipher_walk walk; 320 int err; 321 322 blkcipher_walk_init(&walk, dst, src, nbytes); 323 err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); 324 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 325 326 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { 327 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 328 nbytes = __ctr_crypt(desc, &walk); 329 err = blkcipher_walk_done(desc, &walk, nbytes); 330 } 331 332 cast5_fpu_end(fpu_enabled); 333 334 if (walk.nbytes) { 335 ctr_crypt_final(desc, &walk); 336 err = blkcipher_walk_done(desc, &walk, 0); 337 } 338 339 return err; 340 } 341 342 343 static struct crypto_alg cast5_algs[6] = { { 344 .cra_name = "__ecb-cast5-avx", 345 .cra_driver_name = "__driver-ecb-cast5-avx", 346 .cra_priority = 0, 347 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 348 .cra_blocksize = CAST5_BLOCK_SIZE, 349 .cra_ctxsize = sizeof(struct cast5_ctx), 350 .cra_alignmask = 0, 351 .cra_type = &crypto_blkcipher_type, 352 .cra_module = THIS_MODULE, 353 .cra_u = { 354 .blkcipher = { 355 .min_keysize = CAST5_MIN_KEY_SIZE, 356 .max_keysize = CAST5_MAX_KEY_SIZE, 357 .setkey = cast5_setkey, 358 .encrypt = ecb_encrypt, 359 .decrypt = ecb_decrypt, 360 }, 361 }, 362 }, { 363 .cra_name = "__cbc-cast5-avx", 364 .cra_driver_name = "__driver-cbc-cast5-avx", 365 .cra_priority = 0, 366 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 367 .cra_blocksize = CAST5_BLOCK_SIZE, 368 .cra_ctxsize = sizeof(struct cast5_ctx), 369 .cra_alignmask = 0, 370 .cra_type = &crypto_blkcipher_type, 371 .cra_module = THIS_MODULE, 372 .cra_u = { 373 .blkcipher = { 374 .min_keysize = CAST5_MIN_KEY_SIZE, 375 .max_keysize = CAST5_MAX_KEY_SIZE, 376 .setkey = cast5_setkey, 377 .encrypt = cbc_encrypt, 378 .decrypt = cbc_decrypt, 379 }, 380 }, 381 }, { 382 .cra_name = "__ctr-cast5-avx", 383 .cra_driver_name = "__driver-ctr-cast5-avx", 384 .cra_priority = 0, 385 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 386 .cra_blocksize = 1, 387 .cra_ctxsize = sizeof(struct cast5_ctx), 388 .cra_alignmask = 0, 389 .cra_type = &crypto_blkcipher_type, 390 .cra_module = THIS_MODULE, 391 .cra_u = { 392 .blkcipher = { 393 .min_keysize = CAST5_MIN_KEY_SIZE, 394 .max_keysize = CAST5_MAX_KEY_SIZE, 395 .ivsize = CAST5_BLOCK_SIZE, 396 .setkey = cast5_setkey, 397 .encrypt = ctr_crypt, 398 .decrypt = ctr_crypt, 399 }, 400 }, 401 }, { 402 .cra_name = "ecb(cast5)", 403 .cra_driver_name = "ecb-cast5-avx", 404 .cra_priority = 200, 405 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 406 .cra_blocksize = CAST5_BLOCK_SIZE, 407 .cra_ctxsize = sizeof(struct async_helper_ctx), 408 .cra_alignmask = 0, 409 .cra_type = &crypto_ablkcipher_type, 410 .cra_module = THIS_MODULE, 411 .cra_init = ablk_init, 412 .cra_exit = ablk_exit, 413 .cra_u = { 414 .ablkcipher = { 415 .min_keysize = CAST5_MIN_KEY_SIZE, 416 .max_keysize = CAST5_MAX_KEY_SIZE, 417 .setkey = ablk_set_key, 418 .encrypt = ablk_encrypt, 419 .decrypt = ablk_decrypt, 420 }, 421 }, 422 }, { 423 .cra_name = "cbc(cast5)", 424 .cra_driver_name = "cbc-cast5-avx", 425 .cra_priority = 200, 426 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 427 .cra_blocksize = CAST5_BLOCK_SIZE, 428 .cra_ctxsize = sizeof(struct async_helper_ctx), 429 .cra_alignmask = 0, 430 .cra_type = &crypto_ablkcipher_type, 431 .cra_module = THIS_MODULE, 432 .cra_init = ablk_init, 433 .cra_exit = ablk_exit, 434 .cra_u = { 435 .ablkcipher = { 436 .min_keysize = CAST5_MIN_KEY_SIZE, 437 .max_keysize = CAST5_MAX_KEY_SIZE, 438 .ivsize = CAST5_BLOCK_SIZE, 439 .setkey = ablk_set_key, 440 .encrypt = __ablk_encrypt, 441 .decrypt = ablk_decrypt, 442 }, 443 }, 444 }, { 445 .cra_name = "ctr(cast5)", 446 .cra_driver_name = "ctr-cast5-avx", 447 .cra_priority = 200, 448 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 449 .cra_blocksize = 1, 450 .cra_ctxsize = sizeof(struct async_helper_ctx), 451 .cra_alignmask = 0, 452 .cra_type = &crypto_ablkcipher_type, 453 .cra_module = THIS_MODULE, 454 .cra_init = ablk_init, 455 .cra_exit = ablk_exit, 456 .cra_u = { 457 .ablkcipher = { 458 .min_keysize = CAST5_MIN_KEY_SIZE, 459 .max_keysize = CAST5_MAX_KEY_SIZE, 460 .ivsize = CAST5_BLOCK_SIZE, 461 .setkey = ablk_set_key, 462 .encrypt = ablk_encrypt, 463 .decrypt = ablk_encrypt, 464 .geniv = "chainiv", 465 }, 466 }, 467 } }; 468 469 static int __init cast5_init(void) 470 { 471 u64 xcr0; 472 473 if (!cpu_has_avx || !cpu_has_osxsave) { 474 pr_info("AVX instructions are not detected.\n"); 475 return -ENODEV; 476 } 477 478 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 479 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { 480 pr_info("AVX detected but unusable.\n"); 481 return -ENODEV; 482 } 483 484 return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 485 } 486 487 static void __exit cast5_exit(void) 488 { 489 crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 490 } 491 492 module_init(cast5_init); 493 module_exit(cast5_exit); 494 495 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); 496 MODULE_LICENSE("GPL"); 497 MODULE_ALIAS("cast5"); 498