1 /* 2 * Glue Code for assembler optimized version of Blowfish 3 * 4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 5 * 6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: 7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> 8 * CTR part based on code (crypto/ctr.c) by: 9 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU General Public License as published by 13 * the Free Software Foundation; either version 2 of the License, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 24 * USA 25 * 26 */ 27 28 #include <asm/processor.h> 29 #include <crypto/blowfish.h> 30 #include <linux/crypto.h> 31 #include <linux/init.h> 32 #include <linux/module.h> 33 #include <linux/types.h> 34 #include <crypto/algapi.h> 35 36 /* regular block cipher functions */ 37 asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, 38 bool xor); 39 asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); 40 41 /* 4-way parallel cipher functions */ 42 asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, 43 const u8 *src, bool xor); 44 asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, 45 const u8 *src); 46 47 static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) 48 { 49 __blowfish_enc_blk(ctx, dst, src, false); 50 } 51 52 static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, 53 const u8 *src) 54 { 55 __blowfish_enc_blk(ctx, dst, src, true); 56 } 57 58 static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, 59 const u8 *src) 60 { 61 __blowfish_enc_blk_4way(ctx, dst, src, false); 62 } 63 64 static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, 65 const u8 *src) 66 { 67 __blowfish_enc_blk_4way(ctx, dst, src, true); 68 } 69 70 static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 71 { 72 blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src); 73 } 74 75 static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 76 { 77 blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); 78 } 79 80 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, 81 void (*fn)(struct bf_ctx *, u8 *, const u8 *), 82 void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) 83 { 84 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 85 unsigned int bsize = BF_BLOCK_SIZE; 86 unsigned int nbytes; 87 int err; 88 89 err = blkcipher_walk_virt(desc, walk); 90 91 while ((nbytes = walk->nbytes)) { 92 u8 *wsrc = walk->src.virt.addr; 93 u8 *wdst = walk->dst.virt.addr; 94 95 /* Process four block batch */ 96 if (nbytes >= bsize * 4) { 97 do { 98 fn_4way(ctx, wdst, wsrc); 99 100 wsrc += bsize * 4; 101 wdst += bsize * 4; 102 nbytes -= bsize * 4; 103 } while (nbytes >= bsize * 4); 104 105 if (nbytes < bsize) 106 goto done; 107 } 108 109 /* Handle leftovers */ 110 do { 111 fn(ctx, wdst, wsrc); 112 113 wsrc += bsize; 114 wdst += bsize; 115 nbytes -= bsize; 116 } while (nbytes >= bsize); 117 118 done: 119 err = blkcipher_walk_done(desc, walk, nbytes); 120 } 121 122 return err; 123 } 124 125 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 126 struct scatterlist *src, unsigned int nbytes) 127 { 128 struct blkcipher_walk walk; 129 130 blkcipher_walk_init(&walk, dst, src, nbytes); 131 return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way); 132 } 133 134 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 135 struct scatterlist *src, unsigned int nbytes) 136 { 137 struct blkcipher_walk walk; 138 139 blkcipher_walk_init(&walk, dst, src, nbytes); 140 return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way); 141 } 142 143 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 144 struct blkcipher_walk *walk) 145 { 146 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 147 unsigned int bsize = BF_BLOCK_SIZE; 148 unsigned int nbytes = walk->nbytes; 149 u64 *src = (u64 *)walk->src.virt.addr; 150 u64 *dst = (u64 *)walk->dst.virt.addr; 151 u64 *iv = (u64 *)walk->iv; 152 153 do { 154 *dst = *src ^ *iv; 155 blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); 156 iv = dst; 157 158 src += 1; 159 dst += 1; 160 nbytes -= bsize; 161 } while (nbytes >= bsize); 162 163 *(u64 *)walk->iv = *iv; 164 return nbytes; 165 } 166 167 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 168 struct scatterlist *src, unsigned int nbytes) 169 { 170 struct blkcipher_walk walk; 171 int err; 172 173 blkcipher_walk_init(&walk, dst, src, nbytes); 174 err = blkcipher_walk_virt(desc, &walk); 175 176 while ((nbytes = walk.nbytes)) { 177 nbytes = __cbc_encrypt(desc, &walk); 178 err = blkcipher_walk_done(desc, &walk, nbytes); 179 } 180 181 return err; 182 } 183 184 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, 185 struct blkcipher_walk *walk) 186 { 187 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 188 unsigned int bsize = BF_BLOCK_SIZE; 189 unsigned int nbytes = walk->nbytes; 190 u64 *src = (u64 *)walk->src.virt.addr; 191 u64 *dst = (u64 *)walk->dst.virt.addr; 192 u64 ivs[4 - 1]; 193 u64 last_iv; 194 195 /* Start of the last block. */ 196 src += nbytes / bsize - 1; 197 dst += nbytes / bsize - 1; 198 199 last_iv = *src; 200 201 /* Process four block batch */ 202 if (nbytes >= bsize * 4) { 203 do { 204 nbytes -= bsize * 4 - bsize; 205 src -= 4 - 1; 206 dst -= 4 - 1; 207 208 ivs[0] = src[0]; 209 ivs[1] = src[1]; 210 ivs[2] = src[2]; 211 212 blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); 213 214 dst[1] ^= ivs[0]; 215 dst[2] ^= ivs[1]; 216 dst[3] ^= ivs[2]; 217 218 nbytes -= bsize; 219 if (nbytes < bsize) 220 goto done; 221 222 *dst ^= *(src - 1); 223 src -= 1; 224 dst -= 1; 225 } while (nbytes >= bsize * 4); 226 227 if (nbytes < bsize) 228 goto done; 229 } 230 231 /* Handle leftovers */ 232 for (;;) { 233 blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); 234 235 nbytes -= bsize; 236 if (nbytes < bsize) 237 break; 238 239 *dst ^= *(src - 1); 240 src -= 1; 241 dst -= 1; 242 } 243 244 done: 245 *dst ^= *(u64 *)walk->iv; 246 *(u64 *)walk->iv = last_iv; 247 248 return nbytes; 249 } 250 251 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 252 struct scatterlist *src, unsigned int nbytes) 253 { 254 struct blkcipher_walk walk; 255 int err; 256 257 blkcipher_walk_init(&walk, dst, src, nbytes); 258 err = blkcipher_walk_virt(desc, &walk); 259 260 while ((nbytes = walk.nbytes)) { 261 nbytes = __cbc_decrypt(desc, &walk); 262 err = blkcipher_walk_done(desc, &walk, nbytes); 263 } 264 265 return err; 266 } 267 268 static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) 269 { 270 u8 *ctrblk = walk->iv; 271 u8 keystream[BF_BLOCK_SIZE]; 272 u8 *src = walk->src.virt.addr; 273 u8 *dst = walk->dst.virt.addr; 274 unsigned int nbytes = walk->nbytes; 275 276 blowfish_enc_blk(ctx, keystream, ctrblk); 277 crypto_xor(keystream, src, nbytes); 278 memcpy(dst, keystream, nbytes); 279 280 crypto_inc(ctrblk, BF_BLOCK_SIZE); 281 } 282 283 static unsigned int __ctr_crypt(struct blkcipher_desc *desc, 284 struct blkcipher_walk *walk) 285 { 286 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 287 unsigned int bsize = BF_BLOCK_SIZE; 288 unsigned int nbytes = walk->nbytes; 289 u64 *src = (u64 *)walk->src.virt.addr; 290 u64 *dst = (u64 *)walk->dst.virt.addr; 291 u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); 292 __be64 ctrblocks[4]; 293 294 /* Process four block batch */ 295 if (nbytes >= bsize * 4) { 296 do { 297 if (dst != src) { 298 dst[0] = src[0]; 299 dst[1] = src[1]; 300 dst[2] = src[2]; 301 dst[3] = src[3]; 302 } 303 304 /* create ctrblks for parallel encrypt */ 305 ctrblocks[0] = cpu_to_be64(ctrblk++); 306 ctrblocks[1] = cpu_to_be64(ctrblk++); 307 ctrblocks[2] = cpu_to_be64(ctrblk++); 308 ctrblocks[3] = cpu_to_be64(ctrblk++); 309 310 blowfish_enc_blk_xor_4way(ctx, (u8 *)dst, 311 (u8 *)ctrblocks); 312 313 src += 4; 314 dst += 4; 315 } while ((nbytes -= bsize * 4) >= bsize * 4); 316 317 if (nbytes < bsize) 318 goto done; 319 } 320 321 /* Handle leftovers */ 322 do { 323 if (dst != src) 324 *dst = *src; 325 326 ctrblocks[0] = cpu_to_be64(ctrblk++); 327 328 blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); 329 330 src += 1; 331 dst += 1; 332 } while ((nbytes -= bsize) >= bsize); 333 334 done: 335 *(__be64 *)walk->iv = cpu_to_be64(ctrblk); 336 return nbytes; 337 } 338 339 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 340 struct scatterlist *src, unsigned int nbytes) 341 { 342 struct blkcipher_walk walk; 343 int err; 344 345 blkcipher_walk_init(&walk, dst, src, nbytes); 346 err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); 347 348 while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { 349 nbytes = __ctr_crypt(desc, &walk); 350 err = blkcipher_walk_done(desc, &walk, nbytes); 351 } 352 353 if (walk.nbytes) { 354 ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); 355 err = blkcipher_walk_done(desc, &walk, 0); 356 } 357 358 return err; 359 } 360 361 static struct crypto_alg bf_algs[4] = { { 362 .cra_name = "blowfish", 363 .cra_driver_name = "blowfish-asm", 364 .cra_priority = 200, 365 .cra_flags = CRYPTO_ALG_TYPE_CIPHER, 366 .cra_blocksize = BF_BLOCK_SIZE, 367 .cra_ctxsize = sizeof(struct bf_ctx), 368 .cra_alignmask = 0, 369 .cra_module = THIS_MODULE, 370 .cra_list = LIST_HEAD_INIT(bf_algs[0].cra_list), 371 .cra_u = { 372 .cipher = { 373 .cia_min_keysize = BF_MIN_KEY_SIZE, 374 .cia_max_keysize = BF_MAX_KEY_SIZE, 375 .cia_setkey = blowfish_setkey, 376 .cia_encrypt = blowfish_encrypt, 377 .cia_decrypt = blowfish_decrypt, 378 } 379 } 380 }, { 381 .cra_name = "ecb(blowfish)", 382 .cra_driver_name = "ecb-blowfish-asm", 383 .cra_priority = 300, 384 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 385 .cra_blocksize = BF_BLOCK_SIZE, 386 .cra_ctxsize = sizeof(struct bf_ctx), 387 .cra_alignmask = 0, 388 .cra_type = &crypto_blkcipher_type, 389 .cra_module = THIS_MODULE, 390 .cra_list = LIST_HEAD_INIT(bf_algs[1].cra_list), 391 .cra_u = { 392 .blkcipher = { 393 .min_keysize = BF_MIN_KEY_SIZE, 394 .max_keysize = BF_MAX_KEY_SIZE, 395 .setkey = blowfish_setkey, 396 .encrypt = ecb_encrypt, 397 .decrypt = ecb_decrypt, 398 }, 399 }, 400 }, { 401 .cra_name = "cbc(blowfish)", 402 .cra_driver_name = "cbc-blowfish-asm", 403 .cra_priority = 300, 404 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 405 .cra_blocksize = BF_BLOCK_SIZE, 406 .cra_ctxsize = sizeof(struct bf_ctx), 407 .cra_alignmask = 0, 408 .cra_type = &crypto_blkcipher_type, 409 .cra_module = THIS_MODULE, 410 .cra_list = LIST_HEAD_INIT(bf_algs[2].cra_list), 411 .cra_u = { 412 .blkcipher = { 413 .min_keysize = BF_MIN_KEY_SIZE, 414 .max_keysize = BF_MAX_KEY_SIZE, 415 .ivsize = BF_BLOCK_SIZE, 416 .setkey = blowfish_setkey, 417 .encrypt = cbc_encrypt, 418 .decrypt = cbc_decrypt, 419 }, 420 }, 421 }, { 422 .cra_name = "ctr(blowfish)", 423 .cra_driver_name = "ctr-blowfish-asm", 424 .cra_priority = 300, 425 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 426 .cra_blocksize = 1, 427 .cra_ctxsize = sizeof(struct bf_ctx), 428 .cra_alignmask = 0, 429 .cra_type = &crypto_blkcipher_type, 430 .cra_module = THIS_MODULE, 431 .cra_list = LIST_HEAD_INIT(bf_algs[3].cra_list), 432 .cra_u = { 433 .blkcipher = { 434 .min_keysize = BF_MIN_KEY_SIZE, 435 .max_keysize = BF_MAX_KEY_SIZE, 436 .ivsize = BF_BLOCK_SIZE, 437 .setkey = blowfish_setkey, 438 .encrypt = ctr_crypt, 439 .decrypt = ctr_crypt, 440 }, 441 }, 442 } }; 443 444 static bool is_blacklisted_cpu(void) 445 { 446 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 447 return false; 448 449 if (boot_cpu_data.x86 == 0x0f) { 450 /* 451 * On Pentium 4, blowfish-x86_64 is slower than generic C 452 * implementation because use of 64bit rotates (which are really 453 * slow on P4). Therefore blacklist P4s. 454 */ 455 return true; 456 } 457 458 return false; 459 } 460 461 static int force; 462 module_param(force, int, 0); 463 MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); 464 465 static int __init init(void) 466 { 467 if (!force && is_blacklisted_cpu()) { 468 printk(KERN_INFO 469 "blowfish-x86_64: performance on this CPU " 470 "would be suboptimal: disabling " 471 "blowfish-x86_64.\n"); 472 return -ENODEV; 473 } 474 475 return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); 476 } 477 478 static void __exit fini(void) 479 { 480 crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); 481 } 482 483 module_init(init); 484 module_exit(fini); 485 486 MODULE_LICENSE("GPL"); 487 MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized"); 488 MODULE_ALIAS("blowfish"); 489 MODULE_ALIAS("blowfish-asm"); 490