1 /* 2 * Glue Code for the AVX assembler implementation of the Cast5 Cipher 3 * 4 * Copyright (C) 2012 Johannes Goetzfried 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 20 * USA 21 * 22 */ 23 24 #include <asm/crypto/glue_helper.h> 25 #include <crypto/algapi.h> 26 #include <crypto/cast5.h> 27 #include <crypto/internal/simd.h> 28 #include <linux/crypto.h> 29 #include <linux/err.h> 30 #include <linux/module.h> 31 #include <linux/types.h> 32 33 #define CAST5_PARALLEL_BLOCKS 16 34 35 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, 36 const u8 *src); 37 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, 38 const u8 *src); 39 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, 40 const u8 *src); 41 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, 42 __be64 *iv); 43 44 static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, 45 unsigned int keylen) 46 { 47 return cast5_setkey(&tfm->base, key, keylen); 48 } 49 50 static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk, 51 unsigned int nbytes) 52 { 53 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, 54 walk, fpu_enabled, nbytes); 55 } 56 57 static inline void cast5_fpu_end(bool fpu_enabled) 58 { 59 return glue_fpu_end(fpu_enabled); 60 } 61 62 static int ecb_crypt(struct skcipher_request *req, bool enc) 63 { 64 bool fpu_enabled = false; 65 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 66 struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); 67 struct skcipher_walk walk; 68 const unsigned int bsize = CAST5_BLOCK_SIZE; 69 unsigned int nbytes; 70 void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); 71 int err; 72 73 err = skcipher_walk_virt(&walk, req, false); 74 75 while ((nbytes = walk.nbytes)) { 76 u8 *wsrc = walk.src.virt.addr; 77 u8 *wdst = walk.dst.virt.addr; 78 79 fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); 80 81 /* Process multi-block batch */ 82 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 83 fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; 84 do { 85 fn(ctx, wdst, wsrc); 86 87 wsrc += bsize * CAST5_PARALLEL_BLOCKS; 88 wdst += bsize * CAST5_PARALLEL_BLOCKS; 89 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 90 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 91 92 if (nbytes < bsize) 93 goto done; 94 } 95 96 fn = (enc) ? __cast5_encrypt : __cast5_decrypt; 97 98 /* Handle leftovers */ 99 do { 100 fn(ctx, wdst, wsrc); 101 102 wsrc += bsize; 103 wdst += bsize; 104 nbytes -= bsize; 105 } while (nbytes >= bsize); 106 107 done: 108 err = skcipher_walk_done(&walk, nbytes); 109 } 110 111 cast5_fpu_end(fpu_enabled); 112 return err; 113 } 114 115 static int ecb_encrypt(struct skcipher_request *req) 116 { 117 return ecb_crypt(req, true); 118 } 119 120 static int ecb_decrypt(struct skcipher_request *req) 121 { 122 return ecb_crypt(req, false); 123 } 124 125 static int cbc_encrypt(struct skcipher_request *req) 126 { 127 const unsigned int bsize = CAST5_BLOCK_SIZE; 128 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 129 struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); 130 struct skcipher_walk walk; 131 unsigned int nbytes; 132 int err; 133 134 err = skcipher_walk_virt(&walk, req, false); 135 136 while ((nbytes = walk.nbytes)) { 137 u64 *src = (u64 *)walk.src.virt.addr; 138 u64 *dst = (u64 *)walk.dst.virt.addr; 139 u64 *iv = (u64 *)walk.iv; 140 141 do { 142 *dst = *src ^ *iv; 143 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); 144 iv = dst; 145 src++; 146 dst++; 147 nbytes -= bsize; 148 } while (nbytes >= bsize); 149 150 *(u64 *)walk.iv = *iv; 151 err = skcipher_walk_done(&walk, nbytes); 152 } 153 154 return err; 155 } 156 157 static unsigned int __cbc_decrypt(struct cast5_ctx *ctx, 158 struct skcipher_walk *walk) 159 { 160 const unsigned int bsize = CAST5_BLOCK_SIZE; 161 unsigned int nbytes = walk->nbytes; 162 u64 *src = (u64 *)walk->src.virt.addr; 163 u64 *dst = (u64 *)walk->dst.virt.addr; 164 u64 last_iv; 165 166 /* Start of the last block. */ 167 src += nbytes / bsize - 1; 168 dst += nbytes / bsize - 1; 169 170 last_iv = *src; 171 172 /* Process multi-block batch */ 173 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 174 do { 175 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); 176 src -= CAST5_PARALLEL_BLOCKS - 1; 177 dst -= CAST5_PARALLEL_BLOCKS - 1; 178 179 cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); 180 181 nbytes -= bsize; 182 if (nbytes < bsize) 183 goto done; 184 185 *dst ^= *(src - 1); 186 src -= 1; 187 dst -= 1; 188 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 189 } 190 191 /* Handle leftovers */ 192 for (;;) { 193 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); 194 195 nbytes -= bsize; 196 if (nbytes < bsize) 197 break; 198 199 *dst ^= *(src - 1); 200 src -= 1; 201 dst -= 1; 202 } 203 204 done: 205 *dst ^= *(u64 *)walk->iv; 206 *(u64 *)walk->iv = last_iv; 207 208 return nbytes; 209 } 210 211 static int cbc_decrypt(struct skcipher_request *req) 212 { 213 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 214 struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); 215 bool fpu_enabled = false; 216 struct skcipher_walk walk; 217 unsigned int nbytes; 218 int err; 219 220 err = skcipher_walk_virt(&walk, req, false); 221 222 while ((nbytes = walk.nbytes)) { 223 fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); 224 nbytes = __cbc_decrypt(ctx, &walk); 225 err = skcipher_walk_done(&walk, nbytes); 226 } 227 228 cast5_fpu_end(fpu_enabled); 229 return err; 230 } 231 232 static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx) 233 { 234 u8 *ctrblk = walk->iv; 235 u8 keystream[CAST5_BLOCK_SIZE]; 236 u8 *src = walk->src.virt.addr; 237 u8 *dst = walk->dst.virt.addr; 238 unsigned int nbytes = walk->nbytes; 239 240 __cast5_encrypt(ctx, keystream, ctrblk); 241 crypto_xor_cpy(dst, keystream, src, nbytes); 242 243 crypto_inc(ctrblk, CAST5_BLOCK_SIZE); 244 } 245 246 static unsigned int __ctr_crypt(struct skcipher_walk *walk, 247 struct cast5_ctx *ctx) 248 { 249 const unsigned int bsize = CAST5_BLOCK_SIZE; 250 unsigned int nbytes = walk->nbytes; 251 u64 *src = (u64 *)walk->src.virt.addr; 252 u64 *dst = (u64 *)walk->dst.virt.addr; 253 254 /* Process multi-block batch */ 255 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 256 do { 257 cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, 258 (__be64 *)walk->iv); 259 260 src += CAST5_PARALLEL_BLOCKS; 261 dst += CAST5_PARALLEL_BLOCKS; 262 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 263 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 264 265 if (nbytes < bsize) 266 goto done; 267 } 268 269 /* Handle leftovers */ 270 do { 271 u64 ctrblk; 272 273 if (dst != src) 274 *dst = *src; 275 276 ctrblk = *(u64 *)walk->iv; 277 be64_add_cpu((__be64 *)walk->iv, 1); 278 279 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 280 *dst ^= ctrblk; 281 282 src += 1; 283 dst += 1; 284 nbytes -= bsize; 285 } while (nbytes >= bsize); 286 287 done: 288 return nbytes; 289 } 290 291 static int ctr_crypt(struct skcipher_request *req) 292 { 293 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 294 struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); 295 bool fpu_enabled = false; 296 struct skcipher_walk walk; 297 unsigned int nbytes; 298 int err; 299 300 err = skcipher_walk_virt(&walk, req, false); 301 302 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { 303 fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); 304 nbytes = __ctr_crypt(&walk, ctx); 305 err = skcipher_walk_done(&walk, nbytes); 306 } 307 308 cast5_fpu_end(fpu_enabled); 309 310 if (walk.nbytes) { 311 ctr_crypt_final(&walk, ctx); 312 err = skcipher_walk_done(&walk, 0); 313 } 314 315 return err; 316 } 317 318 static struct skcipher_alg cast5_algs[] = { 319 { 320 .base.cra_name = "__ecb(cast5)", 321 .base.cra_driver_name = "__ecb-cast5-avx", 322 .base.cra_priority = 200, 323 .base.cra_flags = CRYPTO_ALG_INTERNAL, 324 .base.cra_blocksize = CAST5_BLOCK_SIZE, 325 .base.cra_ctxsize = sizeof(struct cast5_ctx), 326 .base.cra_module = THIS_MODULE, 327 .min_keysize = CAST5_MIN_KEY_SIZE, 328 .max_keysize = CAST5_MAX_KEY_SIZE, 329 .setkey = cast5_setkey_skcipher, 330 .encrypt = ecb_encrypt, 331 .decrypt = ecb_decrypt, 332 }, { 333 .base.cra_name = "__cbc(cast5)", 334 .base.cra_driver_name = "__cbc-cast5-avx", 335 .base.cra_priority = 200, 336 .base.cra_flags = CRYPTO_ALG_INTERNAL, 337 .base.cra_blocksize = CAST5_BLOCK_SIZE, 338 .base.cra_ctxsize = sizeof(struct cast5_ctx), 339 .base.cra_module = THIS_MODULE, 340 .min_keysize = CAST5_MIN_KEY_SIZE, 341 .max_keysize = CAST5_MAX_KEY_SIZE, 342 .ivsize = CAST5_BLOCK_SIZE, 343 .setkey = cast5_setkey_skcipher, 344 .encrypt = cbc_encrypt, 345 .decrypt = cbc_decrypt, 346 }, { 347 .base.cra_name = "__ctr(cast5)", 348 .base.cra_driver_name = "__ctr-cast5-avx", 349 .base.cra_priority = 200, 350 .base.cra_flags = CRYPTO_ALG_INTERNAL, 351 .base.cra_blocksize = 1, 352 .base.cra_ctxsize = sizeof(struct cast5_ctx), 353 .base.cra_module = THIS_MODULE, 354 .min_keysize = CAST5_MIN_KEY_SIZE, 355 .max_keysize = CAST5_MAX_KEY_SIZE, 356 .ivsize = CAST5_BLOCK_SIZE, 357 .chunksize = CAST5_BLOCK_SIZE, 358 .setkey = cast5_setkey_skcipher, 359 .encrypt = ctr_crypt, 360 .decrypt = ctr_crypt, 361 } 362 }; 363 364 static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)]; 365 366 static int __init cast5_init(void) 367 { 368 const char *feature_name; 369 370 if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, 371 &feature_name)) { 372 pr_info("CPU feature '%s' is not supported.\n", feature_name); 373 return -ENODEV; 374 } 375 376 return simd_register_skciphers_compat(cast5_algs, 377 ARRAY_SIZE(cast5_algs), 378 cast5_simd_algs); 379 } 380 381 static void __exit cast5_exit(void) 382 { 383 simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs), 384 cast5_simd_algs); 385 } 386 387 module_init(cast5_init); 388 module_exit(cast5_exit); 389 390 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); 391 MODULE_LICENSE("GPL"); 392 MODULE_ALIAS_CRYPTO("cast5"); 393