1 /* 2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher 3 * 4 * Copyright (C) 2012 Johannes Goetzfried 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 20 * USA 21 * 22 */ 23 24 #include <linux/module.h> 25 #include <linux/hardirq.h> 26 #include <linux/types.h> 27 #include <linux/crypto.h> 28 #include <linux/err.h> 29 #include <crypto/ablk_helper.h> 30 #include <crypto/algapi.h> 31 #include <crypto/cast5.h> 32 #include <crypto/cryptd.h> 33 #include <crypto/ctr.h> 34 #include <asm/fpu/api.h> 35 #include <asm/crypto/glue_helper.h> 36 37 #define CAST5_PARALLEL_BLOCKS 16 38 39 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, 40 const u8 *src); 41 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, 42 const u8 *src); 43 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, 44 const u8 *src); 45 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, 46 __be64 *iv); 47 48 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) 49 { 50 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, 51 NULL, fpu_enabled, nbytes); 52 } 53 54 static inline void cast5_fpu_end(bool fpu_enabled) 55 { 56 return glue_fpu_end(fpu_enabled); 57 } 58 59 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, 60 bool enc) 61 { 62 bool fpu_enabled = false; 63 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 64 const unsigned int bsize = CAST5_BLOCK_SIZE; 65 unsigned int nbytes; 66 void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); 67 int err; 68 69 fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; 70 71 err = blkcipher_walk_virt(desc, walk); 72 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 73 74 while ((nbytes = walk->nbytes)) { 75 u8 *wsrc = walk->src.virt.addr; 76 u8 *wdst = walk->dst.virt.addr; 77 78 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 79 80 /* Process multi-block batch */ 81 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 82 do { 83 fn(ctx, wdst, wsrc); 84 85 wsrc += bsize * CAST5_PARALLEL_BLOCKS; 86 wdst += bsize * CAST5_PARALLEL_BLOCKS; 87 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 88 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 89 90 if (nbytes < bsize) 91 goto done; 92 } 93 94 fn = (enc) ? __cast5_encrypt : __cast5_decrypt; 95 96 /* Handle leftovers */ 97 do { 98 fn(ctx, wdst, wsrc); 99 100 wsrc += bsize; 101 wdst += bsize; 102 nbytes -= bsize; 103 } while (nbytes >= bsize); 104 105 done: 106 err = blkcipher_walk_done(desc, walk, nbytes); 107 } 108 109 cast5_fpu_end(fpu_enabled); 110 return err; 111 } 112 113 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 114 struct scatterlist *src, unsigned int nbytes) 115 { 116 struct blkcipher_walk walk; 117 118 blkcipher_walk_init(&walk, dst, src, nbytes); 119 return ecb_crypt(desc, &walk, true); 120 } 121 122 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 123 struct scatterlist *src, unsigned int nbytes) 124 { 125 struct blkcipher_walk walk; 126 127 blkcipher_walk_init(&walk, dst, src, nbytes); 128 return ecb_crypt(desc, &walk, false); 129 } 130 131 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 132 struct blkcipher_walk *walk) 133 { 134 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 135 const unsigned int bsize = CAST5_BLOCK_SIZE; 136 unsigned int nbytes = walk->nbytes; 137 u64 *src = (u64 *)walk->src.virt.addr; 138 u64 *dst = (u64 *)walk->dst.virt.addr; 139 u64 *iv = (u64 *)walk->iv; 140 141 do { 142 *dst = *src ^ *iv; 143 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); 144 iv = dst; 145 146 src += 1; 147 dst += 1; 148 nbytes -= bsize; 149 } while (nbytes >= bsize); 150 151 *(u64 *)walk->iv = *iv; 152 return nbytes; 153 } 154 155 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 156 struct scatterlist *src, unsigned int nbytes) 157 { 158 struct blkcipher_walk walk; 159 int err; 160 161 blkcipher_walk_init(&walk, dst, src, nbytes); 162 err = blkcipher_walk_virt(desc, &walk); 163 164 while ((nbytes = walk.nbytes)) { 165 nbytes = __cbc_encrypt(desc, &walk); 166 err = blkcipher_walk_done(desc, &walk, nbytes); 167 } 168 169 return err; 170 } 171 172 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, 173 struct blkcipher_walk *walk) 174 { 175 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 176 const unsigned int bsize = CAST5_BLOCK_SIZE; 177 unsigned int nbytes = walk->nbytes; 178 u64 *src = (u64 *)walk->src.virt.addr; 179 u64 *dst = (u64 *)walk->dst.virt.addr; 180 u64 last_iv; 181 182 /* Start of the last block. */ 183 src += nbytes / bsize - 1; 184 dst += nbytes / bsize - 1; 185 186 last_iv = *src; 187 188 /* Process multi-block batch */ 189 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 190 do { 191 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); 192 src -= CAST5_PARALLEL_BLOCKS - 1; 193 dst -= CAST5_PARALLEL_BLOCKS - 1; 194 195 cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); 196 197 nbytes -= bsize; 198 if (nbytes < bsize) 199 goto done; 200 201 *dst ^= *(src - 1); 202 src -= 1; 203 dst -= 1; 204 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 205 } 206 207 /* Handle leftovers */ 208 for (;;) { 209 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); 210 211 nbytes -= bsize; 212 if (nbytes < bsize) 213 break; 214 215 *dst ^= *(src - 1); 216 src -= 1; 217 dst -= 1; 218 } 219 220 done: 221 *dst ^= *(u64 *)walk->iv; 222 *(u64 *)walk->iv = last_iv; 223 224 return nbytes; 225 } 226 227 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 228 struct scatterlist *src, unsigned int nbytes) 229 { 230 bool fpu_enabled = false; 231 struct blkcipher_walk walk; 232 int err; 233 234 blkcipher_walk_init(&walk, dst, src, nbytes); 235 err = blkcipher_walk_virt(desc, &walk); 236 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 237 238 while ((nbytes = walk.nbytes)) { 239 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 240 nbytes = __cbc_decrypt(desc, &walk); 241 err = blkcipher_walk_done(desc, &walk, nbytes); 242 } 243 244 cast5_fpu_end(fpu_enabled); 245 return err; 246 } 247 248 static void ctr_crypt_final(struct blkcipher_desc *desc, 249 struct blkcipher_walk *walk) 250 { 251 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 252 u8 *ctrblk = walk->iv; 253 u8 keystream[CAST5_BLOCK_SIZE]; 254 u8 *src = walk->src.virt.addr; 255 u8 *dst = walk->dst.virt.addr; 256 unsigned int nbytes = walk->nbytes; 257 258 __cast5_encrypt(ctx, keystream, ctrblk); 259 crypto_xor(keystream, src, nbytes); 260 memcpy(dst, keystream, nbytes); 261 262 crypto_inc(ctrblk, CAST5_BLOCK_SIZE); 263 } 264 265 static unsigned int __ctr_crypt(struct blkcipher_desc *desc, 266 struct blkcipher_walk *walk) 267 { 268 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 269 const unsigned int bsize = CAST5_BLOCK_SIZE; 270 unsigned int nbytes = walk->nbytes; 271 u64 *src = (u64 *)walk->src.virt.addr; 272 u64 *dst = (u64 *)walk->dst.virt.addr; 273 274 /* Process multi-block batch */ 275 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 276 do { 277 cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, 278 (__be64 *)walk->iv); 279 280 src += CAST5_PARALLEL_BLOCKS; 281 dst += CAST5_PARALLEL_BLOCKS; 282 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 283 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 284 285 if (nbytes < bsize) 286 goto done; 287 } 288 289 /* Handle leftovers */ 290 do { 291 u64 ctrblk; 292 293 if (dst != src) 294 *dst = *src; 295 296 ctrblk = *(u64 *)walk->iv; 297 be64_add_cpu((__be64 *)walk->iv, 1); 298 299 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 300 *dst ^= ctrblk; 301 302 src += 1; 303 dst += 1; 304 nbytes -= bsize; 305 } while (nbytes >= bsize); 306 307 done: 308 return nbytes; 309 } 310 311 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 312 struct scatterlist *src, unsigned int nbytes) 313 { 314 bool fpu_enabled = false; 315 struct blkcipher_walk walk; 316 int err; 317 318 blkcipher_walk_init(&walk, dst, src, nbytes); 319 err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); 320 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 321 322 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { 323 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 324 nbytes = __ctr_crypt(desc, &walk); 325 err = blkcipher_walk_done(desc, &walk, nbytes); 326 } 327 328 cast5_fpu_end(fpu_enabled); 329 330 if (walk.nbytes) { 331 ctr_crypt_final(desc, &walk); 332 err = blkcipher_walk_done(desc, &walk, 0); 333 } 334 335 return err; 336 } 337 338 339 static struct crypto_alg cast5_algs[6] = { { 340 .cra_name = "__ecb-cast5-avx", 341 .cra_driver_name = "__driver-ecb-cast5-avx", 342 .cra_priority = 0, 343 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | 344 CRYPTO_ALG_INTERNAL, 345 .cra_blocksize = CAST5_BLOCK_SIZE, 346 .cra_ctxsize = sizeof(struct cast5_ctx), 347 .cra_alignmask = 0, 348 .cra_type = &crypto_blkcipher_type, 349 .cra_module = THIS_MODULE, 350 .cra_u = { 351 .blkcipher = { 352 .min_keysize = CAST5_MIN_KEY_SIZE, 353 .max_keysize = CAST5_MAX_KEY_SIZE, 354 .setkey = cast5_setkey, 355 .encrypt = ecb_encrypt, 356 .decrypt = ecb_decrypt, 357 }, 358 }, 359 }, { 360 .cra_name = "__cbc-cast5-avx", 361 .cra_driver_name = "__driver-cbc-cast5-avx", 362 .cra_priority = 0, 363 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | 364 CRYPTO_ALG_INTERNAL, 365 .cra_blocksize = CAST5_BLOCK_SIZE, 366 .cra_ctxsize = sizeof(struct cast5_ctx), 367 .cra_alignmask = 0, 368 .cra_type = &crypto_blkcipher_type, 369 .cra_module = THIS_MODULE, 370 .cra_u = { 371 .blkcipher = { 372 .min_keysize = CAST5_MIN_KEY_SIZE, 373 .max_keysize = CAST5_MAX_KEY_SIZE, 374 .setkey = cast5_setkey, 375 .encrypt = cbc_encrypt, 376 .decrypt = cbc_decrypt, 377 }, 378 }, 379 }, { 380 .cra_name = "__ctr-cast5-avx", 381 .cra_driver_name = "__driver-ctr-cast5-avx", 382 .cra_priority = 0, 383 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | 384 CRYPTO_ALG_INTERNAL, 385 .cra_blocksize = 1, 386 .cra_ctxsize = sizeof(struct cast5_ctx), 387 .cra_alignmask = 0, 388 .cra_type = &crypto_blkcipher_type, 389 .cra_module = THIS_MODULE, 390 .cra_u = { 391 .blkcipher = { 392 .min_keysize = CAST5_MIN_KEY_SIZE, 393 .max_keysize = CAST5_MAX_KEY_SIZE, 394 .ivsize = CAST5_BLOCK_SIZE, 395 .setkey = cast5_setkey, 396 .encrypt = ctr_crypt, 397 .decrypt = ctr_crypt, 398 }, 399 }, 400 }, { 401 .cra_name = "ecb(cast5)", 402 .cra_driver_name = "ecb-cast5-avx", 403 .cra_priority = 200, 404 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 405 .cra_blocksize = CAST5_BLOCK_SIZE, 406 .cra_ctxsize = sizeof(struct async_helper_ctx), 407 .cra_alignmask = 0, 408 .cra_type = &crypto_ablkcipher_type, 409 .cra_module = THIS_MODULE, 410 .cra_init = ablk_init, 411 .cra_exit = ablk_exit, 412 .cra_u = { 413 .ablkcipher = { 414 .min_keysize = CAST5_MIN_KEY_SIZE, 415 .max_keysize = CAST5_MAX_KEY_SIZE, 416 .setkey = ablk_set_key, 417 .encrypt = ablk_encrypt, 418 .decrypt = ablk_decrypt, 419 }, 420 }, 421 }, { 422 .cra_name = "cbc(cast5)", 423 .cra_driver_name = "cbc-cast5-avx", 424 .cra_priority = 200, 425 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 426 .cra_blocksize = CAST5_BLOCK_SIZE, 427 .cra_ctxsize = sizeof(struct async_helper_ctx), 428 .cra_alignmask = 0, 429 .cra_type = &crypto_ablkcipher_type, 430 .cra_module = THIS_MODULE, 431 .cra_init = ablk_init, 432 .cra_exit = ablk_exit, 433 .cra_u = { 434 .ablkcipher = { 435 .min_keysize = CAST5_MIN_KEY_SIZE, 436 .max_keysize = CAST5_MAX_KEY_SIZE, 437 .ivsize = CAST5_BLOCK_SIZE, 438 .setkey = ablk_set_key, 439 .encrypt = __ablk_encrypt, 440 .decrypt = ablk_decrypt, 441 }, 442 }, 443 }, { 444 .cra_name = "ctr(cast5)", 445 .cra_driver_name = "ctr-cast5-avx", 446 .cra_priority = 200, 447 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 448 .cra_blocksize = 1, 449 .cra_ctxsize = sizeof(struct async_helper_ctx), 450 .cra_alignmask = 0, 451 .cra_type = &crypto_ablkcipher_type, 452 .cra_module = THIS_MODULE, 453 .cra_init = ablk_init, 454 .cra_exit = ablk_exit, 455 .cra_u = { 456 .ablkcipher = { 457 .min_keysize = CAST5_MIN_KEY_SIZE, 458 .max_keysize = CAST5_MAX_KEY_SIZE, 459 .ivsize = CAST5_BLOCK_SIZE, 460 .setkey = ablk_set_key, 461 .encrypt = ablk_encrypt, 462 .decrypt = ablk_encrypt, 463 .geniv = "chainiv", 464 }, 465 }, 466 } }; 467 468 static int __init cast5_init(void) 469 { 470 const char *feature_name; 471 472 if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, 473 &feature_name)) { 474 pr_info("CPU feature '%s' is not supported.\n", feature_name); 475 return -ENODEV; 476 } 477 478 return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 479 } 480 481 static void __exit cast5_exit(void) 482 { 483 crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 484 } 485 486 module_init(cast5_init); 487 module_exit(cast5_exit); 488 489 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); 490 MODULE_LICENSE("GPL"); 491 MODULE_ALIAS_CRYPTO("cast5"); 492