1 /* 2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher 3 * 4 * Copyright (C) 2012 Johannes Goetzfried 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 20 * USA 21 * 22 */ 23 24 #include <linux/module.h> 25 #include <linux/hardirq.h> 26 #include <linux/types.h> 27 #include <linux/crypto.h> 28 #include <linux/err.h> 29 #include <crypto/ablk_helper.h> 30 #include <crypto/algapi.h> 31 #include <crypto/cast5.h> 32 #include <crypto/cryptd.h> 33 #include <crypto/ctr.h> 34 #include <asm/xcr.h> 35 #include <asm/xsave.h> 36 #include <asm/crypto/glue_helper.h> 37 38 #define CAST5_PARALLEL_BLOCKS 16 39 40 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, 41 const u8 *src); 42 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, 43 const u8 *src); 44 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, 45 const u8 *src); 46 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, 47 __be64 *iv); 48 49 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) 50 { 51 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, 52 NULL, fpu_enabled, nbytes); 53 } 54 55 static inline void cast5_fpu_end(bool fpu_enabled) 56 { 57 return glue_fpu_end(fpu_enabled); 58 } 59 60 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, 61 bool enc) 62 { 63 bool fpu_enabled = false; 64 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 65 const unsigned int bsize = CAST5_BLOCK_SIZE; 66 unsigned int nbytes; 67 void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); 68 int err; 69 70 fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; 71 72 err = blkcipher_walk_virt(desc, walk); 73 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 74 75 while ((nbytes = walk->nbytes)) { 76 u8 *wsrc = walk->src.virt.addr; 77 u8 *wdst = walk->dst.virt.addr; 78 79 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 80 81 /* Process multi-block batch */ 82 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 83 do { 84 fn(ctx, wdst, wsrc); 85 86 wsrc += bsize * CAST5_PARALLEL_BLOCKS; 87 wdst += bsize * CAST5_PARALLEL_BLOCKS; 88 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 89 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 90 91 if (nbytes < bsize) 92 goto done; 93 } 94 95 fn = (enc) ? __cast5_encrypt : __cast5_decrypt; 96 97 /* Handle leftovers */ 98 do { 99 fn(ctx, wdst, wsrc); 100 101 wsrc += bsize; 102 wdst += bsize; 103 nbytes -= bsize; 104 } while (nbytes >= bsize); 105 106 done: 107 err = blkcipher_walk_done(desc, walk, nbytes); 108 } 109 110 cast5_fpu_end(fpu_enabled); 111 return err; 112 } 113 114 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 115 struct scatterlist *src, unsigned int nbytes) 116 { 117 struct blkcipher_walk walk; 118 119 blkcipher_walk_init(&walk, dst, src, nbytes); 120 return ecb_crypt(desc, &walk, true); 121 } 122 123 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 124 struct scatterlist *src, unsigned int nbytes) 125 { 126 struct blkcipher_walk walk; 127 128 blkcipher_walk_init(&walk, dst, src, nbytes); 129 return ecb_crypt(desc, &walk, false); 130 } 131 132 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 133 struct blkcipher_walk *walk) 134 { 135 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 136 const unsigned int bsize = CAST5_BLOCK_SIZE; 137 unsigned int nbytes = walk->nbytes; 138 u64 *src = (u64 *)walk->src.virt.addr; 139 u64 *dst = (u64 *)walk->dst.virt.addr; 140 u64 *iv = (u64 *)walk->iv; 141 142 do { 143 *dst = *src ^ *iv; 144 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); 145 iv = dst; 146 147 src += 1; 148 dst += 1; 149 nbytes -= bsize; 150 } while (nbytes >= bsize); 151 152 *(u64 *)walk->iv = *iv; 153 return nbytes; 154 } 155 156 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 157 struct scatterlist *src, unsigned int nbytes) 158 { 159 struct blkcipher_walk walk; 160 int err; 161 162 blkcipher_walk_init(&walk, dst, src, nbytes); 163 err = blkcipher_walk_virt(desc, &walk); 164 165 while ((nbytes = walk.nbytes)) { 166 nbytes = __cbc_encrypt(desc, &walk); 167 err = blkcipher_walk_done(desc, &walk, nbytes); 168 } 169 170 return err; 171 } 172 173 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, 174 struct blkcipher_walk *walk) 175 { 176 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 177 const unsigned int bsize = CAST5_BLOCK_SIZE; 178 unsigned int nbytes = walk->nbytes; 179 u64 *src = (u64 *)walk->src.virt.addr; 180 u64 *dst = (u64 *)walk->dst.virt.addr; 181 u64 last_iv; 182 183 /* Start of the last block. */ 184 src += nbytes / bsize - 1; 185 dst += nbytes / bsize - 1; 186 187 last_iv = *src; 188 189 /* Process multi-block batch */ 190 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 191 do { 192 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); 193 src -= CAST5_PARALLEL_BLOCKS - 1; 194 dst -= CAST5_PARALLEL_BLOCKS - 1; 195 196 cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); 197 198 nbytes -= bsize; 199 if (nbytes < bsize) 200 goto done; 201 202 *dst ^= *(src - 1); 203 src -= 1; 204 dst -= 1; 205 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 206 } 207 208 /* Handle leftovers */ 209 for (;;) { 210 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); 211 212 nbytes -= bsize; 213 if (nbytes < bsize) 214 break; 215 216 *dst ^= *(src - 1); 217 src -= 1; 218 dst -= 1; 219 } 220 221 done: 222 *dst ^= *(u64 *)walk->iv; 223 *(u64 *)walk->iv = last_iv; 224 225 return nbytes; 226 } 227 228 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 229 struct scatterlist *src, unsigned int nbytes) 230 { 231 bool fpu_enabled = false; 232 struct blkcipher_walk walk; 233 int err; 234 235 blkcipher_walk_init(&walk, dst, src, nbytes); 236 err = blkcipher_walk_virt(desc, &walk); 237 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 238 239 while ((nbytes = walk.nbytes)) { 240 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 241 nbytes = __cbc_decrypt(desc, &walk); 242 err = blkcipher_walk_done(desc, &walk, nbytes); 243 } 244 245 cast5_fpu_end(fpu_enabled); 246 return err; 247 } 248 249 static void ctr_crypt_final(struct blkcipher_desc *desc, 250 struct blkcipher_walk *walk) 251 { 252 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 253 u8 *ctrblk = walk->iv; 254 u8 keystream[CAST5_BLOCK_SIZE]; 255 u8 *src = walk->src.virt.addr; 256 u8 *dst = walk->dst.virt.addr; 257 unsigned int nbytes = walk->nbytes; 258 259 __cast5_encrypt(ctx, keystream, ctrblk); 260 crypto_xor(keystream, src, nbytes); 261 memcpy(dst, keystream, nbytes); 262 263 crypto_inc(ctrblk, CAST5_BLOCK_SIZE); 264 } 265 266 static unsigned int __ctr_crypt(struct blkcipher_desc *desc, 267 struct blkcipher_walk *walk) 268 { 269 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 270 const unsigned int bsize = CAST5_BLOCK_SIZE; 271 unsigned int nbytes = walk->nbytes; 272 u64 *src = (u64 *)walk->src.virt.addr; 273 u64 *dst = (u64 *)walk->dst.virt.addr; 274 275 /* Process multi-block batch */ 276 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 277 do { 278 cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, 279 (__be64 *)walk->iv); 280 281 src += CAST5_PARALLEL_BLOCKS; 282 dst += CAST5_PARALLEL_BLOCKS; 283 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 284 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 285 286 if (nbytes < bsize) 287 goto done; 288 } 289 290 /* Handle leftovers */ 291 do { 292 u64 ctrblk; 293 294 if (dst != src) 295 *dst = *src; 296 297 ctrblk = *(u64 *)walk->iv; 298 be64_add_cpu((__be64 *)walk->iv, 1); 299 300 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 301 *dst ^= ctrblk; 302 303 src += 1; 304 dst += 1; 305 nbytes -= bsize; 306 } while (nbytes >= bsize); 307 308 done: 309 return nbytes; 310 } 311 312 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 313 struct scatterlist *src, unsigned int nbytes) 314 { 315 bool fpu_enabled = false; 316 struct blkcipher_walk walk; 317 int err; 318 319 blkcipher_walk_init(&walk, dst, src, nbytes); 320 err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); 321 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 322 323 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { 324 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 325 nbytes = __ctr_crypt(desc, &walk); 326 err = blkcipher_walk_done(desc, &walk, nbytes); 327 } 328 329 cast5_fpu_end(fpu_enabled); 330 331 if (walk.nbytes) { 332 ctr_crypt_final(desc, &walk); 333 err = blkcipher_walk_done(desc, &walk, 0); 334 } 335 336 return err; 337 } 338 339 340 static struct crypto_alg cast5_algs[6] = { { 341 .cra_name = "__ecb-cast5-avx", 342 .cra_driver_name = "__driver-ecb-cast5-avx", 343 .cra_priority = 0, 344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | 345 CRYPTO_ALG_INTERNAL, 346 .cra_blocksize = CAST5_BLOCK_SIZE, 347 .cra_ctxsize = sizeof(struct cast5_ctx), 348 .cra_alignmask = 0, 349 .cra_type = &crypto_blkcipher_type, 350 .cra_module = THIS_MODULE, 351 .cra_u = { 352 .blkcipher = { 353 .min_keysize = CAST5_MIN_KEY_SIZE, 354 .max_keysize = CAST5_MAX_KEY_SIZE, 355 .setkey = cast5_setkey, 356 .encrypt = ecb_encrypt, 357 .decrypt = ecb_decrypt, 358 }, 359 }, 360 }, { 361 .cra_name = "__cbc-cast5-avx", 362 .cra_driver_name = "__driver-cbc-cast5-avx", 363 .cra_priority = 0, 364 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | 365 CRYPTO_ALG_INTERNAL, 366 .cra_blocksize = CAST5_BLOCK_SIZE, 367 .cra_ctxsize = sizeof(struct cast5_ctx), 368 .cra_alignmask = 0, 369 .cra_type = &crypto_blkcipher_type, 370 .cra_module = THIS_MODULE, 371 .cra_u = { 372 .blkcipher = { 373 .min_keysize = CAST5_MIN_KEY_SIZE, 374 .max_keysize = CAST5_MAX_KEY_SIZE, 375 .setkey = cast5_setkey, 376 .encrypt = cbc_encrypt, 377 .decrypt = cbc_decrypt, 378 }, 379 }, 380 }, { 381 .cra_name = "__ctr-cast5-avx", 382 .cra_driver_name = "__driver-ctr-cast5-avx", 383 .cra_priority = 0, 384 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | 385 CRYPTO_ALG_INTERNAL, 386 .cra_blocksize = 1, 387 .cra_ctxsize = sizeof(struct cast5_ctx), 388 .cra_alignmask = 0, 389 .cra_type = &crypto_blkcipher_type, 390 .cra_module = THIS_MODULE, 391 .cra_u = { 392 .blkcipher = { 393 .min_keysize = CAST5_MIN_KEY_SIZE, 394 .max_keysize = CAST5_MAX_KEY_SIZE, 395 .ivsize = CAST5_BLOCK_SIZE, 396 .setkey = cast5_setkey, 397 .encrypt = ctr_crypt, 398 .decrypt = ctr_crypt, 399 }, 400 }, 401 }, { 402 .cra_name = "ecb(cast5)", 403 .cra_driver_name = "ecb-cast5-avx", 404 .cra_priority = 200, 405 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 406 .cra_blocksize = CAST5_BLOCK_SIZE, 407 .cra_ctxsize = sizeof(struct async_helper_ctx), 408 .cra_alignmask = 0, 409 .cra_type = &crypto_ablkcipher_type, 410 .cra_module = THIS_MODULE, 411 .cra_init = ablk_init, 412 .cra_exit = ablk_exit, 413 .cra_u = { 414 .ablkcipher = { 415 .min_keysize = CAST5_MIN_KEY_SIZE, 416 .max_keysize = CAST5_MAX_KEY_SIZE, 417 .setkey = ablk_set_key, 418 .encrypt = ablk_encrypt, 419 .decrypt = ablk_decrypt, 420 }, 421 }, 422 }, { 423 .cra_name = "cbc(cast5)", 424 .cra_driver_name = "cbc-cast5-avx", 425 .cra_priority = 200, 426 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 427 .cra_blocksize = CAST5_BLOCK_SIZE, 428 .cra_ctxsize = sizeof(struct async_helper_ctx), 429 .cra_alignmask = 0, 430 .cra_type = &crypto_ablkcipher_type, 431 .cra_module = THIS_MODULE, 432 .cra_init = ablk_init, 433 .cra_exit = ablk_exit, 434 .cra_u = { 435 .ablkcipher = { 436 .min_keysize = CAST5_MIN_KEY_SIZE, 437 .max_keysize = CAST5_MAX_KEY_SIZE, 438 .ivsize = CAST5_BLOCK_SIZE, 439 .setkey = ablk_set_key, 440 .encrypt = __ablk_encrypt, 441 .decrypt = ablk_decrypt, 442 }, 443 }, 444 }, { 445 .cra_name = "ctr(cast5)", 446 .cra_driver_name = "ctr-cast5-avx", 447 .cra_priority = 200, 448 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 449 .cra_blocksize = 1, 450 .cra_ctxsize = sizeof(struct async_helper_ctx), 451 .cra_alignmask = 0, 452 .cra_type = &crypto_ablkcipher_type, 453 .cra_module = THIS_MODULE, 454 .cra_init = ablk_init, 455 .cra_exit = ablk_exit, 456 .cra_u = { 457 .ablkcipher = { 458 .min_keysize = CAST5_MIN_KEY_SIZE, 459 .max_keysize = CAST5_MAX_KEY_SIZE, 460 .ivsize = CAST5_BLOCK_SIZE, 461 .setkey = ablk_set_key, 462 .encrypt = ablk_encrypt, 463 .decrypt = ablk_encrypt, 464 .geniv = "chainiv", 465 }, 466 }, 467 } }; 468 469 static int __init cast5_init(void) 470 { 471 u64 xcr0; 472 473 if (!cpu_has_avx || !cpu_has_osxsave) { 474 pr_info("AVX instructions are not detected.\n"); 475 return -ENODEV; 476 } 477 478 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 479 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { 480 pr_info("AVX detected but unusable.\n"); 481 return -ENODEV; 482 } 483 484 return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 485 } 486 487 static void __exit cast5_exit(void) 488 { 489 crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 490 } 491 492 module_init(cast5_init); 493 module_exit(cast5_exit); 494 495 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); 496 MODULE_LICENSE("GPL"); 497 MODULE_ALIAS_CRYPTO("cast5"); 498