1 /* 2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher 3 * 4 * Copyright (C) 2012 Johannes Goetzfried 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 20 * USA 21 * 22 */ 23 24 #include <linux/module.h> 25 #include <linux/hardirq.h> 26 #include <linux/types.h> 27 #include <linux/crypto.h> 28 #include <linux/err.h> 29 #include <crypto/ablk_helper.h> 30 #include <crypto/algapi.h> 31 #include <crypto/cast5.h> 32 #include <crypto/cryptd.h> 33 #include <crypto/ctr.h> 34 #include <asm/fpu/api.h> 35 #include <asm/crypto/glue_helper.h> 36 37 #define CAST5_PARALLEL_BLOCKS 16 38 39 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, 40 const u8 *src); 41 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, 42 const u8 *src); 43 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, 44 const u8 *src); 45 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, 46 __be64 *iv); 47 48 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) 49 { 50 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, 51 NULL, fpu_enabled, nbytes); 52 } 53 54 static inline void cast5_fpu_end(bool fpu_enabled) 55 { 56 return glue_fpu_end(fpu_enabled); 57 } 58 59 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, 60 bool enc) 61 { 62 bool fpu_enabled = false; 63 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 64 const unsigned int bsize = CAST5_BLOCK_SIZE; 65 unsigned int nbytes; 66 void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); 67 int err; 68 69 fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; 70 71 err = blkcipher_walk_virt(desc, walk); 72 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 73 74 while ((nbytes = walk->nbytes)) { 75 u8 *wsrc = walk->src.virt.addr; 76 u8 *wdst = walk->dst.virt.addr; 77 78 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 79 80 /* Process multi-block batch */ 81 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 82 do { 83 fn(ctx, wdst, wsrc); 84 85 wsrc += bsize * CAST5_PARALLEL_BLOCKS; 86 wdst += bsize * CAST5_PARALLEL_BLOCKS; 87 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 88 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 89 90 if (nbytes < bsize) 91 goto done; 92 } 93 94 fn = (enc) ? __cast5_encrypt : __cast5_decrypt; 95 96 /* Handle leftovers */ 97 do { 98 fn(ctx, wdst, wsrc); 99 100 wsrc += bsize; 101 wdst += bsize; 102 nbytes -= bsize; 103 } while (nbytes >= bsize); 104 105 done: 106 err = blkcipher_walk_done(desc, walk, nbytes); 107 } 108 109 cast5_fpu_end(fpu_enabled); 110 return err; 111 } 112 113 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 114 struct scatterlist *src, unsigned int nbytes) 115 { 116 struct blkcipher_walk walk; 117 118 blkcipher_walk_init(&walk, dst, src, nbytes); 119 return ecb_crypt(desc, &walk, true); 120 } 121 122 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 123 struct scatterlist *src, unsigned int nbytes) 124 { 125 struct blkcipher_walk walk; 126 127 blkcipher_walk_init(&walk, dst, src, nbytes); 128 return ecb_crypt(desc, &walk, false); 129 } 130 131 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 132 struct blkcipher_walk *walk) 133 { 134 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 135 const unsigned int bsize = CAST5_BLOCK_SIZE; 136 unsigned int nbytes = walk->nbytes; 137 u64 *src = (u64 *)walk->src.virt.addr; 138 u64 *dst = (u64 *)walk->dst.virt.addr; 139 u64 *iv = (u64 *)walk->iv; 140 141 do { 142 *dst = *src ^ *iv; 143 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); 144 iv = dst; 145 146 src += 1; 147 dst += 1; 148 nbytes -= bsize; 149 } while (nbytes >= bsize); 150 151 *(u64 *)walk->iv = *iv; 152 return nbytes; 153 } 154 155 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 156 struct scatterlist *src, unsigned int nbytes) 157 { 158 struct blkcipher_walk walk; 159 int err; 160 161 blkcipher_walk_init(&walk, dst, src, nbytes); 162 err = blkcipher_walk_virt(desc, &walk); 163 164 while ((nbytes = walk.nbytes)) { 165 nbytes = __cbc_encrypt(desc, &walk); 166 err = blkcipher_walk_done(desc, &walk, nbytes); 167 } 168 169 return err; 170 } 171 172 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, 173 struct blkcipher_walk *walk) 174 { 175 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 176 const unsigned int bsize = CAST5_BLOCK_SIZE; 177 unsigned int nbytes = walk->nbytes; 178 u64 *src = (u64 *)walk->src.virt.addr; 179 u64 *dst = (u64 *)walk->dst.virt.addr; 180 u64 last_iv; 181 182 /* Start of the last block. */ 183 src += nbytes / bsize - 1; 184 dst += nbytes / bsize - 1; 185 186 last_iv = *src; 187 188 /* Process multi-block batch */ 189 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 190 do { 191 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); 192 src -= CAST5_PARALLEL_BLOCKS - 1; 193 dst -= CAST5_PARALLEL_BLOCKS - 1; 194 195 cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); 196 197 nbytes -= bsize; 198 if (nbytes < bsize) 199 goto done; 200 201 *dst ^= *(src - 1); 202 src -= 1; 203 dst -= 1; 204 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 205 } 206 207 /* Handle leftovers */ 208 for (;;) { 209 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); 210 211 nbytes -= bsize; 212 if (nbytes < bsize) 213 break; 214 215 *dst ^= *(src - 1); 216 src -= 1; 217 dst -= 1; 218 } 219 220 done: 221 *dst ^= *(u64 *)walk->iv; 222 *(u64 *)walk->iv = last_iv; 223 224 return nbytes; 225 } 226 227 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 228 struct scatterlist *src, unsigned int nbytes) 229 { 230 bool fpu_enabled = false; 231 struct blkcipher_walk walk; 232 int err; 233 234 blkcipher_walk_init(&walk, dst, src, nbytes); 235 err = blkcipher_walk_virt(desc, &walk); 236 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 237 238 while ((nbytes = walk.nbytes)) { 239 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 240 nbytes = __cbc_decrypt(desc, &walk); 241 err = blkcipher_walk_done(desc, &walk, nbytes); 242 } 243 244 cast5_fpu_end(fpu_enabled); 245 return err; 246 } 247 248 static void ctr_crypt_final(struct blkcipher_desc *desc, 249 struct blkcipher_walk *walk) 250 { 251 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 252 u8 *ctrblk = walk->iv; 253 u8 keystream[CAST5_BLOCK_SIZE]; 254 u8 *src = walk->src.virt.addr; 255 u8 *dst = walk->dst.virt.addr; 256 unsigned int nbytes = walk->nbytes; 257 258 __cast5_encrypt(ctx, keystream, ctrblk); 259 crypto_xor_cpy(dst, keystream, src, nbytes); 260 261 crypto_inc(ctrblk, CAST5_BLOCK_SIZE); 262 } 263 264 static unsigned int __ctr_crypt(struct blkcipher_desc *desc, 265 struct blkcipher_walk *walk) 266 { 267 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 268 const unsigned int bsize = CAST5_BLOCK_SIZE; 269 unsigned int nbytes = walk->nbytes; 270 u64 *src = (u64 *)walk->src.virt.addr; 271 u64 *dst = (u64 *)walk->dst.virt.addr; 272 273 /* Process multi-block batch */ 274 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 275 do { 276 cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, 277 (__be64 *)walk->iv); 278 279 src += CAST5_PARALLEL_BLOCKS; 280 dst += CAST5_PARALLEL_BLOCKS; 281 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 282 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 283 284 if (nbytes < bsize) 285 goto done; 286 } 287 288 /* Handle leftovers */ 289 do { 290 u64 ctrblk; 291 292 if (dst != src) 293 *dst = *src; 294 295 ctrblk = *(u64 *)walk->iv; 296 be64_add_cpu((__be64 *)walk->iv, 1); 297 298 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 299 *dst ^= ctrblk; 300 301 src += 1; 302 dst += 1; 303 nbytes -= bsize; 304 } while (nbytes >= bsize); 305 306 done: 307 return nbytes; 308 } 309 310 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 311 struct scatterlist *src, unsigned int nbytes) 312 { 313 bool fpu_enabled = false; 314 struct blkcipher_walk walk; 315 int err; 316 317 blkcipher_walk_init(&walk, dst, src, nbytes); 318 err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); 319 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 320 321 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { 322 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 323 nbytes = __ctr_crypt(desc, &walk); 324 err = blkcipher_walk_done(desc, &walk, nbytes); 325 } 326 327 cast5_fpu_end(fpu_enabled); 328 329 if (walk.nbytes) { 330 ctr_crypt_final(desc, &walk); 331 err = blkcipher_walk_done(desc, &walk, 0); 332 } 333 334 return err; 335 } 336 337 338 static struct crypto_alg cast5_algs[6] = { { 339 .cra_name = "__ecb-cast5-avx", 340 .cra_driver_name = "__driver-ecb-cast5-avx", 341 .cra_priority = 0, 342 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | 343 CRYPTO_ALG_INTERNAL, 344 .cra_blocksize = CAST5_BLOCK_SIZE, 345 .cra_ctxsize = sizeof(struct cast5_ctx), 346 .cra_alignmask = 0, 347 .cra_type = &crypto_blkcipher_type, 348 .cra_module = THIS_MODULE, 349 .cra_u = { 350 .blkcipher = { 351 .min_keysize = CAST5_MIN_KEY_SIZE, 352 .max_keysize = CAST5_MAX_KEY_SIZE, 353 .setkey = cast5_setkey, 354 .encrypt = ecb_encrypt, 355 .decrypt = ecb_decrypt, 356 }, 357 }, 358 }, { 359 .cra_name = "__cbc-cast5-avx", 360 .cra_driver_name = "__driver-cbc-cast5-avx", 361 .cra_priority = 0, 362 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | 363 CRYPTO_ALG_INTERNAL, 364 .cra_blocksize = CAST5_BLOCK_SIZE, 365 .cra_ctxsize = sizeof(struct cast5_ctx), 366 .cra_alignmask = 0, 367 .cra_type = &crypto_blkcipher_type, 368 .cra_module = THIS_MODULE, 369 .cra_u = { 370 .blkcipher = { 371 .min_keysize = CAST5_MIN_KEY_SIZE, 372 .max_keysize = CAST5_MAX_KEY_SIZE, 373 .setkey = cast5_setkey, 374 .encrypt = cbc_encrypt, 375 .decrypt = cbc_decrypt, 376 }, 377 }, 378 }, { 379 .cra_name = "__ctr-cast5-avx", 380 .cra_driver_name = "__driver-ctr-cast5-avx", 381 .cra_priority = 0, 382 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | 383 CRYPTO_ALG_INTERNAL, 384 .cra_blocksize = 1, 385 .cra_ctxsize = sizeof(struct cast5_ctx), 386 .cra_alignmask = 0, 387 .cra_type = &crypto_blkcipher_type, 388 .cra_module = THIS_MODULE, 389 .cra_u = { 390 .blkcipher = { 391 .min_keysize = CAST5_MIN_KEY_SIZE, 392 .max_keysize = CAST5_MAX_KEY_SIZE, 393 .ivsize = CAST5_BLOCK_SIZE, 394 .setkey = cast5_setkey, 395 .encrypt = ctr_crypt, 396 .decrypt = ctr_crypt, 397 }, 398 }, 399 }, { 400 .cra_name = "ecb(cast5)", 401 .cra_driver_name = "ecb-cast5-avx", 402 .cra_priority = 200, 403 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 404 .cra_blocksize = CAST5_BLOCK_SIZE, 405 .cra_ctxsize = sizeof(struct async_helper_ctx), 406 .cra_alignmask = 0, 407 .cra_type = &crypto_ablkcipher_type, 408 .cra_module = THIS_MODULE, 409 .cra_init = ablk_init, 410 .cra_exit = ablk_exit, 411 .cra_u = { 412 .ablkcipher = { 413 .min_keysize = CAST5_MIN_KEY_SIZE, 414 .max_keysize = CAST5_MAX_KEY_SIZE, 415 .setkey = ablk_set_key, 416 .encrypt = ablk_encrypt, 417 .decrypt = ablk_decrypt, 418 }, 419 }, 420 }, { 421 .cra_name = "cbc(cast5)", 422 .cra_driver_name = "cbc-cast5-avx", 423 .cra_priority = 200, 424 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 425 .cra_blocksize = CAST5_BLOCK_SIZE, 426 .cra_ctxsize = sizeof(struct async_helper_ctx), 427 .cra_alignmask = 0, 428 .cra_type = &crypto_ablkcipher_type, 429 .cra_module = THIS_MODULE, 430 .cra_init = ablk_init, 431 .cra_exit = ablk_exit, 432 .cra_u = { 433 .ablkcipher = { 434 .min_keysize = CAST5_MIN_KEY_SIZE, 435 .max_keysize = CAST5_MAX_KEY_SIZE, 436 .ivsize = CAST5_BLOCK_SIZE, 437 .setkey = ablk_set_key, 438 .encrypt = __ablk_encrypt, 439 .decrypt = ablk_decrypt, 440 }, 441 }, 442 }, { 443 .cra_name = "ctr(cast5)", 444 .cra_driver_name = "ctr-cast5-avx", 445 .cra_priority = 200, 446 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 447 .cra_blocksize = 1, 448 .cra_ctxsize = sizeof(struct async_helper_ctx), 449 .cra_alignmask = 0, 450 .cra_type = &crypto_ablkcipher_type, 451 .cra_module = THIS_MODULE, 452 .cra_init = ablk_init, 453 .cra_exit = ablk_exit, 454 .cra_u = { 455 .ablkcipher = { 456 .min_keysize = CAST5_MIN_KEY_SIZE, 457 .max_keysize = CAST5_MAX_KEY_SIZE, 458 .ivsize = CAST5_BLOCK_SIZE, 459 .setkey = ablk_set_key, 460 .encrypt = ablk_encrypt, 461 .decrypt = ablk_encrypt, 462 .geniv = "chainiv", 463 }, 464 }, 465 } }; 466 467 static int __init cast5_init(void) 468 { 469 const char *feature_name; 470 471 if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, 472 &feature_name)) { 473 pr_info("CPU feature '%s' is not supported.\n", feature_name); 474 return -ENODEV; 475 } 476 477 return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 478 } 479 480 static void __exit cast5_exit(void) 481 { 482 crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 483 } 484 485 module_init(cast5_init); 486 module_exit(cast5_exit); 487 488 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); 489 MODULE_LICENSE("GPL"); 490 MODULE_ALIAS_CRYPTO("cast5"); 491