14d6d6a2cSJohannes Goetzfried /* 24d6d6a2cSJohannes Goetzfried * Glue Code for the AVX assembler implemention of the Cast5 Cipher 34d6d6a2cSJohannes Goetzfried * 44d6d6a2cSJohannes Goetzfried * Copyright (C) 2012 Johannes Goetzfried 54d6d6a2cSJohannes Goetzfried * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 64d6d6a2cSJohannes Goetzfried * 74d6d6a2cSJohannes Goetzfried * This program is free software; you can redistribute it and/or modify 84d6d6a2cSJohannes Goetzfried * it under the terms of the GNU General Public License as published by 94d6d6a2cSJohannes Goetzfried * the Free Software Foundation; either version 2 of the License, or 104d6d6a2cSJohannes Goetzfried * (at your option) any later version. 114d6d6a2cSJohannes Goetzfried * 124d6d6a2cSJohannes Goetzfried * This program is distributed in the hope that it will be useful, 134d6d6a2cSJohannes Goetzfried * but WITHOUT ANY WARRANTY; without even the implied warranty of 144d6d6a2cSJohannes Goetzfried * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 154d6d6a2cSJohannes Goetzfried * GNU General Public License for more details. 164d6d6a2cSJohannes Goetzfried * 174d6d6a2cSJohannes Goetzfried * You should have received a copy of the GNU General Public License 184d6d6a2cSJohannes Goetzfried * along with this program; if not, write to the Free Software 194d6d6a2cSJohannes Goetzfried * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 204d6d6a2cSJohannes Goetzfried * USA 214d6d6a2cSJohannes Goetzfried * 224d6d6a2cSJohannes Goetzfried */ 234d6d6a2cSJohannes Goetzfried 244d6d6a2cSJohannes Goetzfried #include <linux/module.h> 254d6d6a2cSJohannes Goetzfried #include <linux/hardirq.h> 264d6d6a2cSJohannes Goetzfried #include <linux/types.h> 274d6d6a2cSJohannes Goetzfried #include <linux/crypto.h> 284d6d6a2cSJohannes Goetzfried #include <linux/err.h> 294d6d6a2cSJohannes Goetzfried #include <crypto/algapi.h> 304d6d6a2cSJohannes Goetzfried #include <crypto/cast5.h> 314d6d6a2cSJohannes Goetzfried #include <crypto/cryptd.h> 324d6d6a2cSJohannes Goetzfried #include <crypto/ctr.h> 334d6d6a2cSJohannes Goetzfried #include <asm/xcr.h> 344d6d6a2cSJohannes Goetzfried #include <asm/xsave.h> 354d6d6a2cSJohannes Goetzfried #include <asm/crypto/ablk_helper.h> 364d6d6a2cSJohannes Goetzfried #include <asm/crypto/glue_helper.h> 374d6d6a2cSJohannes Goetzfried 384d6d6a2cSJohannes Goetzfried #define CAST5_PARALLEL_BLOCKS 16 394d6d6a2cSJohannes Goetzfried 404d6d6a2cSJohannes Goetzfried asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst, 414d6d6a2cSJohannes Goetzfried const u8 *src, bool xor); 424d6d6a2cSJohannes Goetzfried asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst, 434d6d6a2cSJohannes Goetzfried const u8 *src); 444d6d6a2cSJohannes Goetzfried 454d6d6a2cSJohannes Goetzfried static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst, 464d6d6a2cSJohannes Goetzfried const u8 *src) 474d6d6a2cSJohannes Goetzfried { 484d6d6a2cSJohannes Goetzfried __cast5_enc_blk_16way(ctx, dst, src, false); 494d6d6a2cSJohannes Goetzfried } 504d6d6a2cSJohannes Goetzfried 514d6d6a2cSJohannes Goetzfried static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst, 524d6d6a2cSJohannes Goetzfried const u8 *src) 534d6d6a2cSJohannes Goetzfried { 544d6d6a2cSJohannes Goetzfried __cast5_enc_blk_16way(ctx, dst, src, true); 554d6d6a2cSJohannes Goetzfried } 564d6d6a2cSJohannes Goetzfried 574d6d6a2cSJohannes Goetzfried static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst, 584d6d6a2cSJohannes Goetzfried const u8 *src) 594d6d6a2cSJohannes Goetzfried { 604d6d6a2cSJohannes Goetzfried cast5_dec_blk_16way(ctx, dst, src); 614d6d6a2cSJohannes Goetzfried } 624d6d6a2cSJohannes Goetzfried 634d6d6a2cSJohannes Goetzfried 644d6d6a2cSJohannes Goetzfried static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) 654d6d6a2cSJohannes Goetzfried { 664d6d6a2cSJohannes Goetzfried return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, 674d6d6a2cSJohannes Goetzfried NULL, fpu_enabled, nbytes); 684d6d6a2cSJohannes Goetzfried } 694d6d6a2cSJohannes Goetzfried 704d6d6a2cSJohannes Goetzfried static inline void cast5_fpu_end(bool fpu_enabled) 714d6d6a2cSJohannes Goetzfried { 724d6d6a2cSJohannes Goetzfried return glue_fpu_end(fpu_enabled); 734d6d6a2cSJohannes Goetzfried } 744d6d6a2cSJohannes Goetzfried 754d6d6a2cSJohannes Goetzfried static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, 764d6d6a2cSJohannes Goetzfried bool enc) 774d6d6a2cSJohannes Goetzfried { 784d6d6a2cSJohannes Goetzfried bool fpu_enabled = false; 794d6d6a2cSJohannes Goetzfried struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 804d6d6a2cSJohannes Goetzfried const unsigned int bsize = CAST5_BLOCK_SIZE; 814d6d6a2cSJohannes Goetzfried unsigned int nbytes; 824d6d6a2cSJohannes Goetzfried int err; 834d6d6a2cSJohannes Goetzfried 844d6d6a2cSJohannes Goetzfried err = blkcipher_walk_virt(desc, walk); 854d6d6a2cSJohannes Goetzfried desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 864d6d6a2cSJohannes Goetzfried 874d6d6a2cSJohannes Goetzfried while ((nbytes = walk->nbytes)) { 884d6d6a2cSJohannes Goetzfried u8 *wsrc = walk->src.virt.addr; 894d6d6a2cSJohannes Goetzfried u8 *wdst = walk->dst.virt.addr; 904d6d6a2cSJohannes Goetzfried 914d6d6a2cSJohannes Goetzfried fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 924d6d6a2cSJohannes Goetzfried 934d6d6a2cSJohannes Goetzfried /* Process multi-block batch */ 944d6d6a2cSJohannes Goetzfried if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 954d6d6a2cSJohannes Goetzfried do { 964d6d6a2cSJohannes Goetzfried if (enc) 974d6d6a2cSJohannes Goetzfried cast5_enc_blk_xway(ctx, wdst, wsrc); 984d6d6a2cSJohannes Goetzfried else 994d6d6a2cSJohannes Goetzfried cast5_dec_blk_xway(ctx, wdst, wsrc); 1004d6d6a2cSJohannes Goetzfried 1014d6d6a2cSJohannes Goetzfried wsrc += bsize * CAST5_PARALLEL_BLOCKS; 1024d6d6a2cSJohannes Goetzfried wdst += bsize * CAST5_PARALLEL_BLOCKS; 1034d6d6a2cSJohannes Goetzfried nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 1044d6d6a2cSJohannes Goetzfried } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 1054d6d6a2cSJohannes Goetzfried 1064d6d6a2cSJohannes Goetzfried if (nbytes < bsize) 1074d6d6a2cSJohannes Goetzfried goto done; 1084d6d6a2cSJohannes Goetzfried } 1094d6d6a2cSJohannes Goetzfried 1104d6d6a2cSJohannes Goetzfried /* Handle leftovers */ 1114d6d6a2cSJohannes Goetzfried do { 1124d6d6a2cSJohannes Goetzfried if (enc) 1134d6d6a2cSJohannes Goetzfried __cast5_encrypt(ctx, wdst, wsrc); 1144d6d6a2cSJohannes Goetzfried else 1154d6d6a2cSJohannes Goetzfried __cast5_decrypt(ctx, wdst, wsrc); 1164d6d6a2cSJohannes Goetzfried 1174d6d6a2cSJohannes Goetzfried wsrc += bsize; 1184d6d6a2cSJohannes Goetzfried wdst += bsize; 1194d6d6a2cSJohannes Goetzfried nbytes -= bsize; 1204d6d6a2cSJohannes Goetzfried } while (nbytes >= bsize); 1214d6d6a2cSJohannes Goetzfried 1224d6d6a2cSJohannes Goetzfried done: 1234d6d6a2cSJohannes Goetzfried err = blkcipher_walk_done(desc, walk, nbytes); 1244d6d6a2cSJohannes Goetzfried } 1254d6d6a2cSJohannes Goetzfried 1264d6d6a2cSJohannes Goetzfried cast5_fpu_end(fpu_enabled); 1274d6d6a2cSJohannes Goetzfried return err; 1284d6d6a2cSJohannes Goetzfried } 1294d6d6a2cSJohannes Goetzfried 1304d6d6a2cSJohannes Goetzfried static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 1314d6d6a2cSJohannes Goetzfried struct scatterlist *src, unsigned int nbytes) 1324d6d6a2cSJohannes Goetzfried { 1334d6d6a2cSJohannes Goetzfried struct blkcipher_walk walk; 1344d6d6a2cSJohannes Goetzfried 1354d6d6a2cSJohannes Goetzfried blkcipher_walk_init(&walk, dst, src, nbytes); 1364d6d6a2cSJohannes Goetzfried return ecb_crypt(desc, &walk, true); 1374d6d6a2cSJohannes Goetzfried } 1384d6d6a2cSJohannes Goetzfried 1394d6d6a2cSJohannes Goetzfried static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 1404d6d6a2cSJohannes Goetzfried struct scatterlist *src, unsigned int nbytes) 1414d6d6a2cSJohannes Goetzfried { 1424d6d6a2cSJohannes Goetzfried struct blkcipher_walk walk; 1434d6d6a2cSJohannes Goetzfried 1444d6d6a2cSJohannes Goetzfried blkcipher_walk_init(&walk, dst, src, nbytes); 1454d6d6a2cSJohannes Goetzfried return ecb_crypt(desc, &walk, false); 1464d6d6a2cSJohannes Goetzfried } 1474d6d6a2cSJohannes Goetzfried 1484d6d6a2cSJohannes Goetzfried static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 1494d6d6a2cSJohannes Goetzfried struct blkcipher_walk *walk) 1504d6d6a2cSJohannes Goetzfried { 1514d6d6a2cSJohannes Goetzfried struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 1524d6d6a2cSJohannes Goetzfried const unsigned int bsize = CAST5_BLOCK_SIZE; 1534d6d6a2cSJohannes Goetzfried unsigned int nbytes = walk->nbytes; 1544d6d6a2cSJohannes Goetzfried u64 *src = (u64 *)walk->src.virt.addr; 1554d6d6a2cSJohannes Goetzfried u64 *dst = (u64 *)walk->dst.virt.addr; 1564d6d6a2cSJohannes Goetzfried u64 *iv = (u64 *)walk->iv; 1574d6d6a2cSJohannes Goetzfried 1584d6d6a2cSJohannes Goetzfried do { 1594d6d6a2cSJohannes Goetzfried *dst = *src ^ *iv; 1604d6d6a2cSJohannes Goetzfried __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); 1614d6d6a2cSJohannes Goetzfried iv = dst; 1624d6d6a2cSJohannes Goetzfried 1634d6d6a2cSJohannes Goetzfried src += 1; 1644d6d6a2cSJohannes Goetzfried dst += 1; 1654d6d6a2cSJohannes Goetzfried nbytes -= bsize; 1664d6d6a2cSJohannes Goetzfried } while (nbytes >= bsize); 1674d6d6a2cSJohannes Goetzfried 168200429ccSJussi Kivilinna *(u64 *)walk->iv = *iv; 1694d6d6a2cSJohannes Goetzfried return nbytes; 1704d6d6a2cSJohannes Goetzfried } 1714d6d6a2cSJohannes Goetzfried 1724d6d6a2cSJohannes Goetzfried static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 1734d6d6a2cSJohannes Goetzfried struct scatterlist *src, unsigned int nbytes) 1744d6d6a2cSJohannes Goetzfried { 1754d6d6a2cSJohannes Goetzfried struct blkcipher_walk walk; 1764d6d6a2cSJohannes Goetzfried int err; 1774d6d6a2cSJohannes Goetzfried 1784d6d6a2cSJohannes Goetzfried blkcipher_walk_init(&walk, dst, src, nbytes); 1794d6d6a2cSJohannes Goetzfried err = blkcipher_walk_virt(desc, &walk); 1804d6d6a2cSJohannes Goetzfried 1814d6d6a2cSJohannes Goetzfried while ((nbytes = walk.nbytes)) { 1824d6d6a2cSJohannes Goetzfried nbytes = __cbc_encrypt(desc, &walk); 1834d6d6a2cSJohannes Goetzfried err = blkcipher_walk_done(desc, &walk, nbytes); 1844d6d6a2cSJohannes Goetzfried } 1854d6d6a2cSJohannes Goetzfried 1864d6d6a2cSJohannes Goetzfried return err; 1874d6d6a2cSJohannes Goetzfried } 1884d6d6a2cSJohannes Goetzfried 1894d6d6a2cSJohannes Goetzfried static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, 1904d6d6a2cSJohannes Goetzfried struct blkcipher_walk *walk) 1914d6d6a2cSJohannes Goetzfried { 1924d6d6a2cSJohannes Goetzfried struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 1934d6d6a2cSJohannes Goetzfried const unsigned int bsize = CAST5_BLOCK_SIZE; 1944d6d6a2cSJohannes Goetzfried unsigned int nbytes = walk->nbytes; 1954d6d6a2cSJohannes Goetzfried u64 *src = (u64 *)walk->src.virt.addr; 1964d6d6a2cSJohannes Goetzfried u64 *dst = (u64 *)walk->dst.virt.addr; 1974d6d6a2cSJohannes Goetzfried u64 ivs[CAST5_PARALLEL_BLOCKS - 1]; 1984d6d6a2cSJohannes Goetzfried u64 last_iv; 1994d6d6a2cSJohannes Goetzfried int i; 2004d6d6a2cSJohannes Goetzfried 2014d6d6a2cSJohannes Goetzfried /* Start of the last block. */ 2024d6d6a2cSJohannes Goetzfried src += nbytes / bsize - 1; 2034d6d6a2cSJohannes Goetzfried dst += nbytes / bsize - 1; 2044d6d6a2cSJohannes Goetzfried 2054d6d6a2cSJohannes Goetzfried last_iv = *src; 2064d6d6a2cSJohannes Goetzfried 2074d6d6a2cSJohannes Goetzfried /* Process multi-block batch */ 2084d6d6a2cSJohannes Goetzfried if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 2094d6d6a2cSJohannes Goetzfried do { 2104d6d6a2cSJohannes Goetzfried nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); 2114d6d6a2cSJohannes Goetzfried src -= CAST5_PARALLEL_BLOCKS - 1; 2124d6d6a2cSJohannes Goetzfried dst -= CAST5_PARALLEL_BLOCKS - 1; 2134d6d6a2cSJohannes Goetzfried 2144d6d6a2cSJohannes Goetzfried for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++) 2154d6d6a2cSJohannes Goetzfried ivs[i] = src[i]; 2164d6d6a2cSJohannes Goetzfried 2174d6d6a2cSJohannes Goetzfried cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); 2184d6d6a2cSJohannes Goetzfried 2194d6d6a2cSJohannes Goetzfried for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++) 2204d6d6a2cSJohannes Goetzfried *(dst + (i + 1)) ^= *(ivs + i); 2214d6d6a2cSJohannes Goetzfried 2224d6d6a2cSJohannes Goetzfried nbytes -= bsize; 2234d6d6a2cSJohannes Goetzfried if (nbytes < bsize) 2244d6d6a2cSJohannes Goetzfried goto done; 2254d6d6a2cSJohannes Goetzfried 2264d6d6a2cSJohannes Goetzfried *dst ^= *(src - 1); 2274d6d6a2cSJohannes Goetzfried src -= 1; 2284d6d6a2cSJohannes Goetzfried dst -= 1; 2294d6d6a2cSJohannes Goetzfried } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 2304d6d6a2cSJohannes Goetzfried 2314d6d6a2cSJohannes Goetzfried if (nbytes < bsize) 2324d6d6a2cSJohannes Goetzfried goto done; 2334d6d6a2cSJohannes Goetzfried } 2344d6d6a2cSJohannes Goetzfried 2354d6d6a2cSJohannes Goetzfried /* Handle leftovers */ 2364d6d6a2cSJohannes Goetzfried for (;;) { 2374d6d6a2cSJohannes Goetzfried __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); 2384d6d6a2cSJohannes Goetzfried 2394d6d6a2cSJohannes Goetzfried nbytes -= bsize; 2404d6d6a2cSJohannes Goetzfried if (nbytes < bsize) 2414d6d6a2cSJohannes Goetzfried break; 2424d6d6a2cSJohannes Goetzfried 2434d6d6a2cSJohannes Goetzfried *dst ^= *(src - 1); 2444d6d6a2cSJohannes Goetzfried src -= 1; 2454d6d6a2cSJohannes Goetzfried dst -= 1; 2464d6d6a2cSJohannes Goetzfried } 2474d6d6a2cSJohannes Goetzfried 2484d6d6a2cSJohannes Goetzfried done: 2494d6d6a2cSJohannes Goetzfried *dst ^= *(u64 *)walk->iv; 2504d6d6a2cSJohannes Goetzfried *(u64 *)walk->iv = last_iv; 2514d6d6a2cSJohannes Goetzfried 2524d6d6a2cSJohannes Goetzfried return nbytes; 2534d6d6a2cSJohannes Goetzfried } 2544d6d6a2cSJohannes Goetzfried 2554d6d6a2cSJohannes Goetzfried static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 2564d6d6a2cSJohannes Goetzfried struct scatterlist *src, unsigned int nbytes) 2574d6d6a2cSJohannes Goetzfried { 2584d6d6a2cSJohannes Goetzfried bool fpu_enabled = false; 2594d6d6a2cSJohannes Goetzfried struct blkcipher_walk walk; 2604d6d6a2cSJohannes Goetzfried int err; 2614d6d6a2cSJohannes Goetzfried 2624d6d6a2cSJohannes Goetzfried blkcipher_walk_init(&walk, dst, src, nbytes); 2634d6d6a2cSJohannes Goetzfried err = blkcipher_walk_virt(desc, &walk); 2644d6d6a2cSJohannes Goetzfried desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 2654d6d6a2cSJohannes Goetzfried 2664d6d6a2cSJohannes Goetzfried while ((nbytes = walk.nbytes)) { 2674d6d6a2cSJohannes Goetzfried fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 2684d6d6a2cSJohannes Goetzfried nbytes = __cbc_decrypt(desc, &walk); 2694d6d6a2cSJohannes Goetzfried err = blkcipher_walk_done(desc, &walk, nbytes); 2704d6d6a2cSJohannes Goetzfried } 2714d6d6a2cSJohannes Goetzfried 2724d6d6a2cSJohannes Goetzfried cast5_fpu_end(fpu_enabled); 2734d6d6a2cSJohannes Goetzfried return err; 2744d6d6a2cSJohannes Goetzfried } 2754d6d6a2cSJohannes Goetzfried 2764d6d6a2cSJohannes Goetzfried static void ctr_crypt_final(struct blkcipher_desc *desc, 2774d6d6a2cSJohannes Goetzfried struct blkcipher_walk *walk) 2784d6d6a2cSJohannes Goetzfried { 2794d6d6a2cSJohannes Goetzfried struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 2804d6d6a2cSJohannes Goetzfried u8 *ctrblk = walk->iv; 2814d6d6a2cSJohannes Goetzfried u8 keystream[CAST5_BLOCK_SIZE]; 2824d6d6a2cSJohannes Goetzfried u8 *src = walk->src.virt.addr; 2834d6d6a2cSJohannes Goetzfried u8 *dst = walk->dst.virt.addr; 2844d6d6a2cSJohannes Goetzfried unsigned int nbytes = walk->nbytes; 2854d6d6a2cSJohannes Goetzfried 2864d6d6a2cSJohannes Goetzfried __cast5_encrypt(ctx, keystream, ctrblk); 2874d6d6a2cSJohannes Goetzfried crypto_xor(keystream, src, nbytes); 2884d6d6a2cSJohannes Goetzfried memcpy(dst, keystream, nbytes); 2894d6d6a2cSJohannes Goetzfried 2904d6d6a2cSJohannes Goetzfried crypto_inc(ctrblk, CAST5_BLOCK_SIZE); 2914d6d6a2cSJohannes Goetzfried } 2924d6d6a2cSJohannes Goetzfried 2934d6d6a2cSJohannes Goetzfried static unsigned int __ctr_crypt(struct blkcipher_desc *desc, 2944d6d6a2cSJohannes Goetzfried struct blkcipher_walk *walk) 2954d6d6a2cSJohannes Goetzfried { 2964d6d6a2cSJohannes Goetzfried struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 2974d6d6a2cSJohannes Goetzfried const unsigned int bsize = CAST5_BLOCK_SIZE; 2984d6d6a2cSJohannes Goetzfried unsigned int nbytes = walk->nbytes; 2994d6d6a2cSJohannes Goetzfried u64 *src = (u64 *)walk->src.virt.addr; 3004d6d6a2cSJohannes Goetzfried u64 *dst = (u64 *)walk->dst.virt.addr; 3014d6d6a2cSJohannes Goetzfried u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); 3024d6d6a2cSJohannes Goetzfried __be64 ctrblocks[CAST5_PARALLEL_BLOCKS]; 3034d6d6a2cSJohannes Goetzfried int i; 3044d6d6a2cSJohannes Goetzfried 3054d6d6a2cSJohannes Goetzfried /* Process multi-block batch */ 3064d6d6a2cSJohannes Goetzfried if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { 3074d6d6a2cSJohannes Goetzfried do { 3084d6d6a2cSJohannes Goetzfried /* create ctrblks for parallel encrypt */ 3094d6d6a2cSJohannes Goetzfried for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) { 3104d6d6a2cSJohannes Goetzfried if (dst != src) 3114d6d6a2cSJohannes Goetzfried dst[i] = src[i]; 3124d6d6a2cSJohannes Goetzfried 3134d6d6a2cSJohannes Goetzfried ctrblocks[i] = cpu_to_be64(ctrblk++); 3144d6d6a2cSJohannes Goetzfried } 3154d6d6a2cSJohannes Goetzfried 3164d6d6a2cSJohannes Goetzfried cast5_enc_blk_xway_xor(ctx, (u8 *)dst, 3174d6d6a2cSJohannes Goetzfried (u8 *)ctrblocks); 3184d6d6a2cSJohannes Goetzfried 3194d6d6a2cSJohannes Goetzfried src += CAST5_PARALLEL_BLOCKS; 3204d6d6a2cSJohannes Goetzfried dst += CAST5_PARALLEL_BLOCKS; 3214d6d6a2cSJohannes Goetzfried nbytes -= bsize * CAST5_PARALLEL_BLOCKS; 3224d6d6a2cSJohannes Goetzfried } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); 3234d6d6a2cSJohannes Goetzfried 3244d6d6a2cSJohannes Goetzfried if (nbytes < bsize) 3254d6d6a2cSJohannes Goetzfried goto done; 3264d6d6a2cSJohannes Goetzfried } 3274d6d6a2cSJohannes Goetzfried 3284d6d6a2cSJohannes Goetzfried /* Handle leftovers */ 3294d6d6a2cSJohannes Goetzfried do { 3304d6d6a2cSJohannes Goetzfried if (dst != src) 3314d6d6a2cSJohannes Goetzfried *dst = *src; 3324d6d6a2cSJohannes Goetzfried 3334d6d6a2cSJohannes Goetzfried ctrblocks[0] = cpu_to_be64(ctrblk++); 3344d6d6a2cSJohannes Goetzfried 3354d6d6a2cSJohannes Goetzfried __cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); 3364d6d6a2cSJohannes Goetzfried *dst ^= ctrblocks[0]; 3374d6d6a2cSJohannes Goetzfried 3384d6d6a2cSJohannes Goetzfried src += 1; 3394d6d6a2cSJohannes Goetzfried dst += 1; 3404d6d6a2cSJohannes Goetzfried nbytes -= bsize; 3414d6d6a2cSJohannes Goetzfried } while (nbytes >= bsize); 3424d6d6a2cSJohannes Goetzfried 3434d6d6a2cSJohannes Goetzfried done: 3444d6d6a2cSJohannes Goetzfried *(__be64 *)walk->iv = cpu_to_be64(ctrblk); 3454d6d6a2cSJohannes Goetzfried return nbytes; 3464d6d6a2cSJohannes Goetzfried } 3474d6d6a2cSJohannes Goetzfried 3484d6d6a2cSJohannes Goetzfried static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 3494d6d6a2cSJohannes Goetzfried struct scatterlist *src, unsigned int nbytes) 3504d6d6a2cSJohannes Goetzfried { 3514d6d6a2cSJohannes Goetzfried bool fpu_enabled = false; 3524d6d6a2cSJohannes Goetzfried struct blkcipher_walk walk; 3534d6d6a2cSJohannes Goetzfried int err; 3544d6d6a2cSJohannes Goetzfried 3554d6d6a2cSJohannes Goetzfried blkcipher_walk_init(&walk, dst, src, nbytes); 3564d6d6a2cSJohannes Goetzfried err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); 3574d6d6a2cSJohannes Goetzfried desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 3584d6d6a2cSJohannes Goetzfried 3594d6d6a2cSJohannes Goetzfried while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { 3604d6d6a2cSJohannes Goetzfried fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); 3614d6d6a2cSJohannes Goetzfried nbytes = __ctr_crypt(desc, &walk); 3624d6d6a2cSJohannes Goetzfried err = blkcipher_walk_done(desc, &walk, nbytes); 3634d6d6a2cSJohannes Goetzfried } 3644d6d6a2cSJohannes Goetzfried 3654d6d6a2cSJohannes Goetzfried cast5_fpu_end(fpu_enabled); 3664d6d6a2cSJohannes Goetzfried 3674d6d6a2cSJohannes Goetzfried if (walk.nbytes) { 3684d6d6a2cSJohannes Goetzfried ctr_crypt_final(desc, &walk); 3694d6d6a2cSJohannes Goetzfried err = blkcipher_walk_done(desc, &walk, 0); 3704d6d6a2cSJohannes Goetzfried } 3714d6d6a2cSJohannes Goetzfried 3724d6d6a2cSJohannes Goetzfried return err; 3734d6d6a2cSJohannes Goetzfried } 3744d6d6a2cSJohannes Goetzfried 3754d6d6a2cSJohannes Goetzfried 3764d6d6a2cSJohannes Goetzfried static struct crypto_alg cast5_algs[6] = { { 3774d6d6a2cSJohannes Goetzfried .cra_name = "__ecb-cast5-avx", 3784d6d6a2cSJohannes Goetzfried .cra_driver_name = "__driver-ecb-cast5-avx", 3794d6d6a2cSJohannes Goetzfried .cra_priority = 0, 3804d6d6a2cSJohannes Goetzfried .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 3814d6d6a2cSJohannes Goetzfried .cra_blocksize = CAST5_BLOCK_SIZE, 3824d6d6a2cSJohannes Goetzfried .cra_ctxsize = sizeof(struct cast5_ctx), 3834d6d6a2cSJohannes Goetzfried .cra_alignmask = 0, 3844d6d6a2cSJohannes Goetzfried .cra_type = &crypto_blkcipher_type, 3854d6d6a2cSJohannes Goetzfried .cra_module = THIS_MODULE, 3864d6d6a2cSJohannes Goetzfried .cra_u = { 3874d6d6a2cSJohannes Goetzfried .blkcipher = { 3884d6d6a2cSJohannes Goetzfried .min_keysize = CAST5_MIN_KEY_SIZE, 3894d6d6a2cSJohannes Goetzfried .max_keysize = CAST5_MAX_KEY_SIZE, 3904d6d6a2cSJohannes Goetzfried .setkey = cast5_setkey, 3914d6d6a2cSJohannes Goetzfried .encrypt = ecb_encrypt, 3924d6d6a2cSJohannes Goetzfried .decrypt = ecb_decrypt, 3934d6d6a2cSJohannes Goetzfried }, 3944d6d6a2cSJohannes Goetzfried }, 3954d6d6a2cSJohannes Goetzfried }, { 3964d6d6a2cSJohannes Goetzfried .cra_name = "__cbc-cast5-avx", 3974d6d6a2cSJohannes Goetzfried .cra_driver_name = "__driver-cbc-cast5-avx", 3984d6d6a2cSJohannes Goetzfried .cra_priority = 0, 3994d6d6a2cSJohannes Goetzfried .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 4004d6d6a2cSJohannes Goetzfried .cra_blocksize = CAST5_BLOCK_SIZE, 4014d6d6a2cSJohannes Goetzfried .cra_ctxsize = sizeof(struct cast5_ctx), 4024d6d6a2cSJohannes Goetzfried .cra_alignmask = 0, 4034d6d6a2cSJohannes Goetzfried .cra_type = &crypto_blkcipher_type, 4044d6d6a2cSJohannes Goetzfried .cra_module = THIS_MODULE, 4054d6d6a2cSJohannes Goetzfried .cra_u = { 4064d6d6a2cSJohannes Goetzfried .blkcipher = { 4074d6d6a2cSJohannes Goetzfried .min_keysize = CAST5_MIN_KEY_SIZE, 4084d6d6a2cSJohannes Goetzfried .max_keysize = CAST5_MAX_KEY_SIZE, 4094d6d6a2cSJohannes Goetzfried .setkey = cast5_setkey, 4104d6d6a2cSJohannes Goetzfried .encrypt = cbc_encrypt, 4114d6d6a2cSJohannes Goetzfried .decrypt = cbc_decrypt, 4124d6d6a2cSJohannes Goetzfried }, 4134d6d6a2cSJohannes Goetzfried }, 4144d6d6a2cSJohannes Goetzfried }, { 4154d6d6a2cSJohannes Goetzfried .cra_name = "__ctr-cast5-avx", 4164d6d6a2cSJohannes Goetzfried .cra_driver_name = "__driver-ctr-cast5-avx", 4174d6d6a2cSJohannes Goetzfried .cra_priority = 0, 4184d6d6a2cSJohannes Goetzfried .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 4194d6d6a2cSJohannes Goetzfried .cra_blocksize = 1, 4204d6d6a2cSJohannes Goetzfried .cra_ctxsize = sizeof(struct cast5_ctx), 4214d6d6a2cSJohannes Goetzfried .cra_alignmask = 0, 4224d6d6a2cSJohannes Goetzfried .cra_type = &crypto_blkcipher_type, 4234d6d6a2cSJohannes Goetzfried .cra_module = THIS_MODULE, 4244d6d6a2cSJohannes Goetzfried .cra_u = { 4254d6d6a2cSJohannes Goetzfried .blkcipher = { 4264d6d6a2cSJohannes Goetzfried .min_keysize = CAST5_MIN_KEY_SIZE, 4274d6d6a2cSJohannes Goetzfried .max_keysize = CAST5_MAX_KEY_SIZE, 4284d6d6a2cSJohannes Goetzfried .ivsize = CAST5_BLOCK_SIZE, 4294d6d6a2cSJohannes Goetzfried .setkey = cast5_setkey, 4304d6d6a2cSJohannes Goetzfried .encrypt = ctr_crypt, 4314d6d6a2cSJohannes Goetzfried .decrypt = ctr_crypt, 4324d6d6a2cSJohannes Goetzfried }, 4334d6d6a2cSJohannes Goetzfried }, 4344d6d6a2cSJohannes Goetzfried }, { 4354d6d6a2cSJohannes Goetzfried .cra_name = "ecb(cast5)", 4364d6d6a2cSJohannes Goetzfried .cra_driver_name = "ecb-cast5-avx", 4374d6d6a2cSJohannes Goetzfried .cra_priority = 200, 4384d6d6a2cSJohannes Goetzfried .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 4394d6d6a2cSJohannes Goetzfried .cra_blocksize = CAST5_BLOCK_SIZE, 4404d6d6a2cSJohannes Goetzfried .cra_ctxsize = sizeof(struct async_helper_ctx), 4414d6d6a2cSJohannes Goetzfried .cra_alignmask = 0, 4424d6d6a2cSJohannes Goetzfried .cra_type = &crypto_ablkcipher_type, 4434d6d6a2cSJohannes Goetzfried .cra_module = THIS_MODULE, 4444d6d6a2cSJohannes Goetzfried .cra_init = ablk_init, 4454d6d6a2cSJohannes Goetzfried .cra_exit = ablk_exit, 4464d6d6a2cSJohannes Goetzfried .cra_u = { 4474d6d6a2cSJohannes Goetzfried .ablkcipher = { 4484d6d6a2cSJohannes Goetzfried .min_keysize = CAST5_MIN_KEY_SIZE, 4494d6d6a2cSJohannes Goetzfried .max_keysize = CAST5_MAX_KEY_SIZE, 4504d6d6a2cSJohannes Goetzfried .setkey = ablk_set_key, 4514d6d6a2cSJohannes Goetzfried .encrypt = ablk_encrypt, 4524d6d6a2cSJohannes Goetzfried .decrypt = ablk_decrypt, 4534d6d6a2cSJohannes Goetzfried }, 4544d6d6a2cSJohannes Goetzfried }, 4554d6d6a2cSJohannes Goetzfried }, { 4564d6d6a2cSJohannes Goetzfried .cra_name = "cbc(cast5)", 4574d6d6a2cSJohannes Goetzfried .cra_driver_name = "cbc-cast5-avx", 4584d6d6a2cSJohannes Goetzfried .cra_priority = 200, 4594d6d6a2cSJohannes Goetzfried .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 4604d6d6a2cSJohannes Goetzfried .cra_blocksize = CAST5_BLOCK_SIZE, 4614d6d6a2cSJohannes Goetzfried .cra_ctxsize = sizeof(struct async_helper_ctx), 4624d6d6a2cSJohannes Goetzfried .cra_alignmask = 0, 4634d6d6a2cSJohannes Goetzfried .cra_type = &crypto_ablkcipher_type, 4644d6d6a2cSJohannes Goetzfried .cra_module = THIS_MODULE, 4654d6d6a2cSJohannes Goetzfried .cra_init = ablk_init, 4664d6d6a2cSJohannes Goetzfried .cra_exit = ablk_exit, 4674d6d6a2cSJohannes Goetzfried .cra_u = { 4684d6d6a2cSJohannes Goetzfried .ablkcipher = { 4694d6d6a2cSJohannes Goetzfried .min_keysize = CAST5_MIN_KEY_SIZE, 4704d6d6a2cSJohannes Goetzfried .max_keysize = CAST5_MAX_KEY_SIZE, 4714d6d6a2cSJohannes Goetzfried .ivsize = CAST5_BLOCK_SIZE, 4724d6d6a2cSJohannes Goetzfried .setkey = ablk_set_key, 4734d6d6a2cSJohannes Goetzfried .encrypt = __ablk_encrypt, 4744d6d6a2cSJohannes Goetzfried .decrypt = ablk_decrypt, 4754d6d6a2cSJohannes Goetzfried }, 4764d6d6a2cSJohannes Goetzfried }, 4774d6d6a2cSJohannes Goetzfried }, { 4784d6d6a2cSJohannes Goetzfried .cra_name = "ctr(cast5)", 4794d6d6a2cSJohannes Goetzfried .cra_driver_name = "ctr-cast5-avx", 4804d6d6a2cSJohannes Goetzfried .cra_priority = 200, 4814d6d6a2cSJohannes Goetzfried .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 4824d6d6a2cSJohannes Goetzfried .cra_blocksize = 1, 4834d6d6a2cSJohannes Goetzfried .cra_ctxsize = sizeof(struct async_helper_ctx), 4844d6d6a2cSJohannes Goetzfried .cra_alignmask = 0, 4854d6d6a2cSJohannes Goetzfried .cra_type = &crypto_ablkcipher_type, 4864d6d6a2cSJohannes Goetzfried .cra_module = THIS_MODULE, 4874d6d6a2cSJohannes Goetzfried .cra_init = ablk_init, 4884d6d6a2cSJohannes Goetzfried .cra_exit = ablk_exit, 4894d6d6a2cSJohannes Goetzfried .cra_u = { 4904d6d6a2cSJohannes Goetzfried .ablkcipher = { 4914d6d6a2cSJohannes Goetzfried .min_keysize = CAST5_MIN_KEY_SIZE, 4924d6d6a2cSJohannes Goetzfried .max_keysize = CAST5_MAX_KEY_SIZE, 4934d6d6a2cSJohannes Goetzfried .ivsize = CAST5_BLOCK_SIZE, 4944d6d6a2cSJohannes Goetzfried .setkey = ablk_set_key, 4954d6d6a2cSJohannes Goetzfried .encrypt = ablk_encrypt, 4964d6d6a2cSJohannes Goetzfried .decrypt = ablk_encrypt, 4974d6d6a2cSJohannes Goetzfried .geniv = "chainiv", 4984d6d6a2cSJohannes Goetzfried }, 4994d6d6a2cSJohannes Goetzfried }, 5004d6d6a2cSJohannes Goetzfried } }; 5014d6d6a2cSJohannes Goetzfried 5024d6d6a2cSJohannes Goetzfried static int __init cast5_init(void) 5034d6d6a2cSJohannes Goetzfried { 5044d6d6a2cSJohannes Goetzfried u64 xcr0; 5054d6d6a2cSJohannes Goetzfried 5064d6d6a2cSJohannes Goetzfried if (!cpu_has_avx || !cpu_has_osxsave) { 5074d6d6a2cSJohannes Goetzfried pr_info("AVX instructions are not detected.\n"); 5084d6d6a2cSJohannes Goetzfried return -ENODEV; 5094d6d6a2cSJohannes Goetzfried } 5104d6d6a2cSJohannes Goetzfried 5114d6d6a2cSJohannes Goetzfried xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 5124d6d6a2cSJohannes Goetzfried if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { 5134d6d6a2cSJohannes Goetzfried pr_info("AVX detected but unusable.\n"); 5144d6d6a2cSJohannes Goetzfried return -ENODEV; 5154d6d6a2cSJohannes Goetzfried } 5164d6d6a2cSJohannes Goetzfried 5174d6d6a2cSJohannes Goetzfried return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 5184d6d6a2cSJohannes Goetzfried } 5194d6d6a2cSJohannes Goetzfried 5204d6d6a2cSJohannes Goetzfried static void __exit cast5_exit(void) 5214d6d6a2cSJohannes Goetzfried { 5224d6d6a2cSJohannes Goetzfried crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); 5234d6d6a2cSJohannes Goetzfried } 5244d6d6a2cSJohannes Goetzfried 5254d6d6a2cSJohannes Goetzfried module_init(cast5_init); 5264d6d6a2cSJohannes Goetzfried module_exit(cast5_exit); 5274d6d6a2cSJohannes Goetzfried 5284d6d6a2cSJohannes Goetzfried MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); 5294d6d6a2cSJohannes Goetzfried MODULE_LICENSE("GPL"); 5304d6d6a2cSJohannes Goetzfried MODULE_ALIAS("cast5"); 531