1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Glue Code for the AVX assembler implementation of the Cast5 Cipher 4 * 5 * Copyright (C) 2012 Johannes Goetzfried 6 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 7 */ 8 9 #include <asm/crypto/glue_helper.h> 10 #include <crypto/algapi.h> 11 #include <crypto/cast5.h> 12 #include <crypto/internal/simd.h> 13 #include <linux/crypto.h> 14 #include <linux/err.h> 15 #include <linux/module.h> 16 #include <linux/types.h> 17 18 #define CAST5_PARALLEL_BLOCKS 16 19 20 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, 21 const u8 *src); 22 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, 23 const u8 *src); 24 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, 25 const u8 *src); 26 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, 27 __be64 *iv); 28 29 static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, 30 unsigned int keylen) 31 { 32 return cast5_setkey(&tfm->base, key, keylen); 33 } 34 35 static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk, 36 unsigned int nbytes) 37 { 38 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, 39 walk, fpu_enabled, nbytes); 40 } 41 42 static inline void cast5_fpu_end(bool fpu_enabled) 43 { 44 return glue_fpu_end(fpu_enabled); 45 } 46 47 static int ecb_crypt(struct skcipher_request *req, bool enc) 48 { 49 bool fpu_enabled = false; 50 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 51 struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); 52 struct skcipher_walk walk; 53 const unsigned int bsize = CAST5_BLOCK_SIZE; 54 unsigned int nbytes; 55 void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); 56 int err; 57 58 err = skcipher_walk_virt(&walk, req, false); 59 60 while ((nbytes = walk.nbytes)) { 61 u8 *wsrc = walk.src.virt.addr; 62 u8 *wdst = walk.dst.virt.addr; 63 64 fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); 65 66 /* Process multi-block batch */ 67 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 68 fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; 69 do { 70 fn(ctx, wdst, wsrc); 71 72 wsrc += bsize * CAST5_PARALLEL_BLOCKS; 73 wdst += bsize * CAST5_PARALLEL_BLOCKS; 74 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 75 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 76 77 if (nbytes < bsize) 78 goto done; 79 } 80 81 fn = (enc) ? __cast5_encrypt : __cast5_decrypt; 82 83 /* Handle leftovers */ 84 do { 85 fn(ctx, wdst, wsrc); 86 87 wsrc += bsize; 88 wdst += bsize; 89 nbytes -= bsize; 90 } while (nbytes >= bsize); 91 92 done: 93 err = skcipher_walk_done(&walk, nbytes); 94 } 95 96 cast5_fpu_end(fpu_enabled); 97 return err; 98 } 99 100 static int ecb_encrypt(struct skcipher_request *req) 101 { 102 return ecb_crypt(req, true); 103 } 104 105 static int ecb_decrypt(struct skcipher_request *req) 106 { 107 return ecb_crypt(req, false); 108 } 109 110 static int cbc_encrypt(struct skcipher_request *req) 111 { 112 const unsigned int bsize = CAST5_BLOCK_SIZE; 113 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 114 struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); 115 struct skcipher_walk walk; 116 unsigned int nbytes; 117 int err; 118 119 err = skcipher_walk_virt(&walk, req, false); 120 121 while ((nbytes = walk.nbytes)) { 122 u64 *src = (u64 *)walk.src.virt.addr; 123 u64 *dst = (u64 *)walk.dst.virt.addr; 124 u64 *iv = (u64 *)walk.iv; 125 126 do { 127 *dst = *src ^ *iv; 128 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); 129 iv = dst; 130 src++; 131 dst++; 132 nbytes -= bsize; 133 } while (nbytes >= bsize); 134 135 *(u64 *)walk.iv = *iv; 136 err = skcipher_walk_done(&walk, nbytes); 137 } 138 139 return err; 140 } 141 142 static unsigned int __cbc_decrypt(struct cast5_ctx *ctx, 143 struct skcipher_walk *walk) 144 { 145 const unsigned int bsize = CAST5_BLOCK_SIZE; 146 unsigned int nbytes = walk->nbytes; 147 u64 *src = (u64 *)walk->src.virt.addr; 148 u64 *dst = (u64 *)walk->dst.virt.addr; 149 u64 last_iv; 150 151 /* Start of the last block. */ 152 src += nbytes / bsize - 1; 153 dst += nbytes / bsize - 1; 154 155 last_iv = *src; 156 157 /* Process multi-block batch */ 158 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 159 do { 160 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); 161 src -= CAST5_PARALLEL_BLOCKS - 1; 162 dst -= CAST5_PARALLEL_BLOCKS - 1; 163 164 cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); 165 166 nbytes -= bsize; 167 if (nbytes < bsize) 168 goto done; 169 170 *dst ^= *(src - 1); 171 src -= 1; 172 dst -= 1; 173 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 174 } 175 176 /* Handle leftovers */ 177 for (;;) { 178 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); 179 180 nbytes -= bsize; 181 if (nbytes < bsize) 182 break; 183 184 *dst ^= *(src - 1); 185 src -= 1; 186 dst -= 1; 187 } 188 189 done: 190 *dst ^= *(u64 *)walk->iv; 191 *(u64 *)walk->iv = last_iv; 192 193 return nbytes; 194 } 195 196 static int cbc_decrypt(struct skcipher_request *req) 197 { 198 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 199 struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); 200 bool fpu_enabled = false; 201 struct skcipher_walk walk; 202 unsigned int nbytes; 203 int err; 204 205 err = skcipher_walk_virt(&walk, req, false); 206 207 while ((nbytes = walk.nbytes)) { 208 fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); 209 nbytes = __cbc_decrypt(ctx, &walk); 210 err = skcipher_walk_done(&walk, nbytes); 211 } 212 213 cast5_fpu_end(fpu_enabled); 214 return err; 215 } 216 217 static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx) 218 { 219 u8 *ctrblk = walk->iv; 220 u8 keystream[CAST5_BLOCK_SIZE]; 221 u8 *src = walk->src.virt.addr; 222 u8 *dst = walk->dst.virt.addr; 223 unsigned int nbytes = walk->nbytes; 224 225 __cast5_encrypt(ctx, keystream, ctrblk); 226 crypto_xor_cpy(dst, keystream, src, nbytes); 227 228 crypto_inc(ctrblk, CAST5_BLOCK_SIZE); 229 } 230 231 static unsigned int __ctr_crypt(struct skcipher_walk *walk, 232 struct cast5_ctx *ctx) 233 { 234 const unsigned int bsize = CAST5_BLOCK_SIZE; 235 unsigned int nbytes = walk->nbytes; 236 u64 *src = (u64 *)walk->src.virt.addr; 237 u64 *dst = (u64 *)walk->dst.virt.addr; 238 239 /* Process multi-block batch */ 240 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 241 do { 242 cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, 243 (__be64 *)walk->iv); 244 245 src += CAST5_PARALLEL_BLOCKS; 246 dst += CAST5_PARALLEL_BLOCKS; 247 nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 248 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 249 250 if (nbytes < bsize) 251 goto done; 252 } 253 254 /* Handle leftovers */ 255 do { 256 u64 ctrblk; 257 258 if (dst != src) 259 *dst = *src; 260 261 ctrblk = *(u64 *)walk->iv; 262 be64_add_cpu((__be64 *)walk->iv, 1); 263 264 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 265 *dst ^= ctrblk; 266 267 src += 1; 268 dst += 1; 269 nbytes -= bsize; 270 } while (nbytes >= bsize); 271 272 done: 273 return nbytes; 274 } 275 276 static int ctr_crypt(struct skcipher_request *req) 277 { 278 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 279 struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); 280 bool fpu_enabled = false; 281 struct skcipher_walk walk; 282 unsigned int nbytes; 283 int err; 284 285 err = skcipher_walk_virt(&walk, req, false); 286 287 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { 288 fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); 289 nbytes = __ctr_crypt(&walk, ctx); 290 err = skcipher_walk_done(&walk, nbytes); 291 } 292 293 cast5_fpu_end(fpu_enabled); 294 295 if (walk.nbytes) { 296 ctr_crypt_final(&walk, ctx); 297 err = skcipher_walk_done(&walk, 0); 298 } 299 300 return err; 301 } 302 303 static struct skcipher_alg cast5_algs[] = { 304 { 305 .base.cra_name = "__ecb(cast5)", 306 .base.cra_driver_name = "__ecb-cast5-avx", 307 .base.cra_priority = 200, 308 .base.cra_flags = CRYPTO_ALG_INTERNAL, 309 .base.cra_blocksize = CAST5_BLOCK_SIZE, 310 .base.cra_ctxsize = sizeof(struct cast5_ctx), 311 .base.cra_module = THIS_MODULE, 312 .min_keysize = CAST5_MIN_KEY_SIZE, 313 .max_keysize = CAST5_MAX_KEY_SIZE, 314 .setkey = cast5_setkey_skcipher, 315 .encrypt = ecb_encrypt, 316 .decrypt = ecb_decrypt, 317 }, { 318 .base.cra_name = "__cbc(cast5)", 319 .base.cra_driver_name = "__cbc-cast5-avx", 320 .base.cra_priority = 200, 321 .base.cra_flags = CRYPTO_ALG_INTERNAL, 322 .base.cra_blocksize = CAST5_BLOCK_SIZE, 323 .base.cra_ctxsize = sizeof(struct cast5_ctx), 324 .base.cra_module = THIS_MODULE, 325 .min_keysize = CAST5_MIN_KEY_SIZE, 326 .max_keysize = CAST5_MAX_KEY_SIZE, 327 .ivsize = CAST5_BLOCK_SIZE, 328 .setkey = cast5_setkey_skcipher, 329 .encrypt = cbc_encrypt, 330 .decrypt = cbc_decrypt, 331 }, { 332 .base.cra_name = "__ctr(cast5)", 333 .base.cra_driver_name = "__ctr-cast5-avx", 334 .base.cra_priority = 200, 335 .base.cra_flags = CRYPTO_ALG_INTERNAL, 336 .base.cra_blocksize = 1, 337 .base.cra_ctxsize = sizeof(struct cast5_ctx), 338 .base.cra_module = THIS_MODULE, 339 .min_keysize = CAST5_MIN_KEY_SIZE, 340 .max_keysize = CAST5_MAX_KEY_SIZE, 341 .ivsize = CAST5_BLOCK_SIZE, 342 .chunksize = CAST5_BLOCK_SIZE, 343 .setkey = cast5_setkey_skcipher, 344 .encrypt = ctr_crypt, 345 .decrypt = ctr_crypt, 346 } 347 }; 348 349 static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)]; 350 351 static int __init cast5_init(void) 352 { 353 const char *feature_name; 354 355 if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, 356 &feature_name)) { 357 pr_info("CPU feature '%s' is not supported.\n", feature_name); 358 return -ENODEV; 359 } 360 361 return simd_register_skciphers_compat(cast5_algs, 362 ARRAY_SIZE(cast5_algs), 363 cast5_simd_algs); 364 } 365 366 static void __exit cast5_exit(void) 367 { 368 simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs), 369 cast5_simd_algs); 370 } 371 372 module_init(cast5_init); 373 module_exit(cast5_exit); 374 375 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); 376 MODULE_LICENSE("GPL"); 377 MODULE_ALIAS_CRYPTO("cast5"); 378