xref: /openbmc/linux/arch/x86/crypto/cast5_avx_glue.c (revision d5d34d98)
14d6d6a2cSJohannes Goetzfried /*
24d6d6a2cSJohannes Goetzfried  * Glue Code for the AVX assembler implemention of the Cast5 Cipher
34d6d6a2cSJohannes Goetzfried  *
44d6d6a2cSJohannes Goetzfried  * Copyright (C) 2012 Johannes Goetzfried
54d6d6a2cSJohannes Goetzfried  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
64d6d6a2cSJohannes Goetzfried  *
74d6d6a2cSJohannes Goetzfried  * This program is free software; you can redistribute it and/or modify
84d6d6a2cSJohannes Goetzfried  * it under the terms of the GNU General Public License as published by
94d6d6a2cSJohannes Goetzfried  * the Free Software Foundation; either version 2 of the License, or
104d6d6a2cSJohannes Goetzfried  * (at your option) any later version.
114d6d6a2cSJohannes Goetzfried  *
124d6d6a2cSJohannes Goetzfried  * This program is distributed in the hope that it will be useful,
134d6d6a2cSJohannes Goetzfried  * but WITHOUT ANY WARRANTY; without even the implied warranty of
144d6d6a2cSJohannes Goetzfried  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
154d6d6a2cSJohannes Goetzfried  * GNU General Public License for more details.
164d6d6a2cSJohannes Goetzfried  *
174d6d6a2cSJohannes Goetzfried  * You should have received a copy of the GNU General Public License
184d6d6a2cSJohannes Goetzfried  * along with this program; if not, write to the Free Software
194d6d6a2cSJohannes Goetzfried  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
204d6d6a2cSJohannes Goetzfried  * USA
214d6d6a2cSJohannes Goetzfried  *
224d6d6a2cSJohannes Goetzfried  */
234d6d6a2cSJohannes Goetzfried 
244d6d6a2cSJohannes Goetzfried #include <linux/module.h>
254d6d6a2cSJohannes Goetzfried #include <linux/hardirq.h>
264d6d6a2cSJohannes Goetzfried #include <linux/types.h>
274d6d6a2cSJohannes Goetzfried #include <linux/crypto.h>
284d6d6a2cSJohannes Goetzfried #include <linux/err.h>
29801201aaSArd Biesheuvel #include <crypto/ablk_helper.h>
304d6d6a2cSJohannes Goetzfried #include <crypto/algapi.h>
314d6d6a2cSJohannes Goetzfried #include <crypto/cast5.h>
324d6d6a2cSJohannes Goetzfried #include <crypto/cryptd.h>
334d6d6a2cSJohannes Goetzfried #include <crypto/ctr.h>
34d5d34d98SIngo Molnar #include <asm/fpu/api.h>
354d6d6a2cSJohannes Goetzfried #include <asm/crypto/glue_helper.h>
364d6d6a2cSJohannes Goetzfried 
374d6d6a2cSJohannes Goetzfried #define CAST5_PARALLEL_BLOCKS 16
384d6d6a2cSJohannes Goetzfried 
39c12ab20bSJussi Kivilinna asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
404d6d6a2cSJohannes Goetzfried 				    const u8 *src);
41c12ab20bSJussi Kivilinna asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
42c12ab20bSJussi Kivilinna 				    const u8 *src);
43c12ab20bSJussi Kivilinna asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
44c12ab20bSJussi Kivilinna 				    const u8 *src);
45c12ab20bSJussi Kivilinna asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
46c12ab20bSJussi Kivilinna 				__be64 *iv);
474d6d6a2cSJohannes Goetzfried 
484d6d6a2cSJohannes Goetzfried static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
494d6d6a2cSJohannes Goetzfried {
504d6d6a2cSJohannes Goetzfried 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
514d6d6a2cSJohannes Goetzfried 			      NULL, fpu_enabled, nbytes);
524d6d6a2cSJohannes Goetzfried }
534d6d6a2cSJohannes Goetzfried 
544d6d6a2cSJohannes Goetzfried static inline void cast5_fpu_end(bool fpu_enabled)
554d6d6a2cSJohannes Goetzfried {
564d6d6a2cSJohannes Goetzfried 	return glue_fpu_end(fpu_enabled);
574d6d6a2cSJohannes Goetzfried }
584d6d6a2cSJohannes Goetzfried 
594d6d6a2cSJohannes Goetzfried static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
604d6d6a2cSJohannes Goetzfried 		     bool enc)
614d6d6a2cSJohannes Goetzfried {
624d6d6a2cSJohannes Goetzfried 	bool fpu_enabled = false;
634d6d6a2cSJohannes Goetzfried 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
644d6d6a2cSJohannes Goetzfried 	const unsigned int bsize = CAST5_BLOCK_SIZE;
654d6d6a2cSJohannes Goetzfried 	unsigned int nbytes;
66c12ab20bSJussi Kivilinna 	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
674d6d6a2cSJohannes Goetzfried 	int err;
684d6d6a2cSJohannes Goetzfried 
69c12ab20bSJussi Kivilinna 	fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
70c12ab20bSJussi Kivilinna 
714d6d6a2cSJohannes Goetzfried 	err = blkcipher_walk_virt(desc, walk);
724d6d6a2cSJohannes Goetzfried 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
734d6d6a2cSJohannes Goetzfried 
744d6d6a2cSJohannes Goetzfried 	while ((nbytes = walk->nbytes)) {
754d6d6a2cSJohannes Goetzfried 		u8 *wsrc = walk->src.virt.addr;
764d6d6a2cSJohannes Goetzfried 		u8 *wdst = walk->dst.virt.addr;
774d6d6a2cSJohannes Goetzfried 
784d6d6a2cSJohannes Goetzfried 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
794d6d6a2cSJohannes Goetzfried 
804d6d6a2cSJohannes Goetzfried 		/* Process multi-block batch */
814d6d6a2cSJohannes Goetzfried 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
824d6d6a2cSJohannes Goetzfried 			do {
83c12ab20bSJussi Kivilinna 				fn(ctx, wdst, wsrc);
844d6d6a2cSJohannes Goetzfried 
854d6d6a2cSJohannes Goetzfried 				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
864d6d6a2cSJohannes Goetzfried 				wdst += bsize * CAST5_PARALLEL_BLOCKS;
874d6d6a2cSJohannes Goetzfried 				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
884d6d6a2cSJohannes Goetzfried 			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
894d6d6a2cSJohannes Goetzfried 
904d6d6a2cSJohannes Goetzfried 			if (nbytes < bsize)
914d6d6a2cSJohannes Goetzfried 				goto done;
924d6d6a2cSJohannes Goetzfried 		}
934d6d6a2cSJohannes Goetzfried 
94c12ab20bSJussi Kivilinna 		fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
95c12ab20bSJussi Kivilinna 
964d6d6a2cSJohannes Goetzfried 		/* Handle leftovers */
974d6d6a2cSJohannes Goetzfried 		do {
98c12ab20bSJussi Kivilinna 			fn(ctx, wdst, wsrc);
994d6d6a2cSJohannes Goetzfried 
1004d6d6a2cSJohannes Goetzfried 			wsrc += bsize;
1014d6d6a2cSJohannes Goetzfried 			wdst += bsize;
1024d6d6a2cSJohannes Goetzfried 			nbytes -= bsize;
1034d6d6a2cSJohannes Goetzfried 		} while (nbytes >= bsize);
1044d6d6a2cSJohannes Goetzfried 
1054d6d6a2cSJohannes Goetzfried done:
1064d6d6a2cSJohannes Goetzfried 		err = blkcipher_walk_done(desc, walk, nbytes);
1074d6d6a2cSJohannes Goetzfried 	}
1084d6d6a2cSJohannes Goetzfried 
1094d6d6a2cSJohannes Goetzfried 	cast5_fpu_end(fpu_enabled);
1104d6d6a2cSJohannes Goetzfried 	return err;
1114d6d6a2cSJohannes Goetzfried }
1124d6d6a2cSJohannes Goetzfried 
1134d6d6a2cSJohannes Goetzfried static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1144d6d6a2cSJohannes Goetzfried 		       struct scatterlist *src, unsigned int nbytes)
1154d6d6a2cSJohannes Goetzfried {
1164d6d6a2cSJohannes Goetzfried 	struct blkcipher_walk walk;
1174d6d6a2cSJohannes Goetzfried 
1184d6d6a2cSJohannes Goetzfried 	blkcipher_walk_init(&walk, dst, src, nbytes);
1194d6d6a2cSJohannes Goetzfried 	return ecb_crypt(desc, &walk, true);
1204d6d6a2cSJohannes Goetzfried }
1214d6d6a2cSJohannes Goetzfried 
1224d6d6a2cSJohannes Goetzfried static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1234d6d6a2cSJohannes Goetzfried 		       struct scatterlist *src, unsigned int nbytes)
1244d6d6a2cSJohannes Goetzfried {
1254d6d6a2cSJohannes Goetzfried 	struct blkcipher_walk walk;
1264d6d6a2cSJohannes Goetzfried 
1274d6d6a2cSJohannes Goetzfried 	blkcipher_walk_init(&walk, dst, src, nbytes);
1284d6d6a2cSJohannes Goetzfried 	return ecb_crypt(desc, &walk, false);
1294d6d6a2cSJohannes Goetzfried }
1304d6d6a2cSJohannes Goetzfried 
1314d6d6a2cSJohannes Goetzfried static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
1324d6d6a2cSJohannes Goetzfried 				  struct blkcipher_walk *walk)
1334d6d6a2cSJohannes Goetzfried {
1344d6d6a2cSJohannes Goetzfried 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1354d6d6a2cSJohannes Goetzfried 	const unsigned int bsize = CAST5_BLOCK_SIZE;
1364d6d6a2cSJohannes Goetzfried 	unsigned int nbytes = walk->nbytes;
1374d6d6a2cSJohannes Goetzfried 	u64 *src = (u64 *)walk->src.virt.addr;
1384d6d6a2cSJohannes Goetzfried 	u64 *dst = (u64 *)walk->dst.virt.addr;
1394d6d6a2cSJohannes Goetzfried 	u64 *iv = (u64 *)walk->iv;
1404d6d6a2cSJohannes Goetzfried 
1414d6d6a2cSJohannes Goetzfried 	do {
1424d6d6a2cSJohannes Goetzfried 		*dst = *src ^ *iv;
1434d6d6a2cSJohannes Goetzfried 		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
1444d6d6a2cSJohannes Goetzfried 		iv = dst;
1454d6d6a2cSJohannes Goetzfried 
1464d6d6a2cSJohannes Goetzfried 		src += 1;
1474d6d6a2cSJohannes Goetzfried 		dst += 1;
1484d6d6a2cSJohannes Goetzfried 		nbytes -= bsize;
1494d6d6a2cSJohannes Goetzfried 	} while (nbytes >= bsize);
1504d6d6a2cSJohannes Goetzfried 
151200429ccSJussi Kivilinna 	*(u64 *)walk->iv = *iv;
1524d6d6a2cSJohannes Goetzfried 	return nbytes;
1534d6d6a2cSJohannes Goetzfried }
1544d6d6a2cSJohannes Goetzfried 
1554d6d6a2cSJohannes Goetzfried static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1564d6d6a2cSJohannes Goetzfried 		       struct scatterlist *src, unsigned int nbytes)
1574d6d6a2cSJohannes Goetzfried {
1584d6d6a2cSJohannes Goetzfried 	struct blkcipher_walk walk;
1594d6d6a2cSJohannes Goetzfried 	int err;
1604d6d6a2cSJohannes Goetzfried 
1614d6d6a2cSJohannes Goetzfried 	blkcipher_walk_init(&walk, dst, src, nbytes);
1624d6d6a2cSJohannes Goetzfried 	err = blkcipher_walk_virt(desc, &walk);
1634d6d6a2cSJohannes Goetzfried 
1644d6d6a2cSJohannes Goetzfried 	while ((nbytes = walk.nbytes)) {
1654d6d6a2cSJohannes Goetzfried 		nbytes = __cbc_encrypt(desc, &walk);
1664d6d6a2cSJohannes Goetzfried 		err = blkcipher_walk_done(desc, &walk, nbytes);
1674d6d6a2cSJohannes Goetzfried 	}
1684d6d6a2cSJohannes Goetzfried 
1694d6d6a2cSJohannes Goetzfried 	return err;
1704d6d6a2cSJohannes Goetzfried }
1714d6d6a2cSJohannes Goetzfried 
1724d6d6a2cSJohannes Goetzfried static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
1734d6d6a2cSJohannes Goetzfried 				  struct blkcipher_walk *walk)
1744d6d6a2cSJohannes Goetzfried {
1754d6d6a2cSJohannes Goetzfried 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1764d6d6a2cSJohannes Goetzfried 	const unsigned int bsize = CAST5_BLOCK_SIZE;
1774d6d6a2cSJohannes Goetzfried 	unsigned int nbytes = walk->nbytes;
1784d6d6a2cSJohannes Goetzfried 	u64 *src = (u64 *)walk->src.virt.addr;
1794d6d6a2cSJohannes Goetzfried 	u64 *dst = (u64 *)walk->dst.virt.addr;
1804d6d6a2cSJohannes Goetzfried 	u64 last_iv;
1814d6d6a2cSJohannes Goetzfried 
1824d6d6a2cSJohannes Goetzfried 	/* Start of the last block. */
1834d6d6a2cSJohannes Goetzfried 	src += nbytes / bsize - 1;
1844d6d6a2cSJohannes Goetzfried 	dst += nbytes / bsize - 1;
1854d6d6a2cSJohannes Goetzfried 
1864d6d6a2cSJohannes Goetzfried 	last_iv = *src;
1874d6d6a2cSJohannes Goetzfried 
1884d6d6a2cSJohannes Goetzfried 	/* Process multi-block batch */
1894d6d6a2cSJohannes Goetzfried 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
1904d6d6a2cSJohannes Goetzfried 		do {
1914d6d6a2cSJohannes Goetzfried 			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
1924d6d6a2cSJohannes Goetzfried 			src -= CAST5_PARALLEL_BLOCKS - 1;
1934d6d6a2cSJohannes Goetzfried 			dst -= CAST5_PARALLEL_BLOCKS - 1;
1944d6d6a2cSJohannes Goetzfried 
195c12ab20bSJussi Kivilinna 			cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
1964d6d6a2cSJohannes Goetzfried 
1974d6d6a2cSJohannes Goetzfried 			nbytes -= bsize;
1984d6d6a2cSJohannes Goetzfried 			if (nbytes < bsize)
1994d6d6a2cSJohannes Goetzfried 				goto done;
2004d6d6a2cSJohannes Goetzfried 
2014d6d6a2cSJohannes Goetzfried 			*dst ^= *(src - 1);
2024d6d6a2cSJohannes Goetzfried 			src -= 1;
2034d6d6a2cSJohannes Goetzfried 			dst -= 1;
2044d6d6a2cSJohannes Goetzfried 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
2054d6d6a2cSJohannes Goetzfried 	}
2064d6d6a2cSJohannes Goetzfried 
2074d6d6a2cSJohannes Goetzfried 	/* Handle leftovers */
2084d6d6a2cSJohannes Goetzfried 	for (;;) {
2094d6d6a2cSJohannes Goetzfried 		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
2104d6d6a2cSJohannes Goetzfried 
2114d6d6a2cSJohannes Goetzfried 		nbytes -= bsize;
2124d6d6a2cSJohannes Goetzfried 		if (nbytes < bsize)
2134d6d6a2cSJohannes Goetzfried 			break;
2144d6d6a2cSJohannes Goetzfried 
2154d6d6a2cSJohannes Goetzfried 		*dst ^= *(src - 1);
2164d6d6a2cSJohannes Goetzfried 		src -= 1;
2174d6d6a2cSJohannes Goetzfried 		dst -= 1;
2184d6d6a2cSJohannes Goetzfried 	}
2194d6d6a2cSJohannes Goetzfried 
2204d6d6a2cSJohannes Goetzfried done:
2214d6d6a2cSJohannes Goetzfried 	*dst ^= *(u64 *)walk->iv;
2224d6d6a2cSJohannes Goetzfried 	*(u64 *)walk->iv = last_iv;
2234d6d6a2cSJohannes Goetzfried 
2244d6d6a2cSJohannes Goetzfried 	return nbytes;
2254d6d6a2cSJohannes Goetzfried }
2264d6d6a2cSJohannes Goetzfried 
2274d6d6a2cSJohannes Goetzfried static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
2284d6d6a2cSJohannes Goetzfried 		       struct scatterlist *src, unsigned int nbytes)
2294d6d6a2cSJohannes Goetzfried {
2304d6d6a2cSJohannes Goetzfried 	bool fpu_enabled = false;
2314d6d6a2cSJohannes Goetzfried 	struct blkcipher_walk walk;
2324d6d6a2cSJohannes Goetzfried 	int err;
2334d6d6a2cSJohannes Goetzfried 
2344d6d6a2cSJohannes Goetzfried 	blkcipher_walk_init(&walk, dst, src, nbytes);
2354d6d6a2cSJohannes Goetzfried 	err = blkcipher_walk_virt(desc, &walk);
2364d6d6a2cSJohannes Goetzfried 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
2374d6d6a2cSJohannes Goetzfried 
2384d6d6a2cSJohannes Goetzfried 	while ((nbytes = walk.nbytes)) {
2394d6d6a2cSJohannes Goetzfried 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
2404d6d6a2cSJohannes Goetzfried 		nbytes = __cbc_decrypt(desc, &walk);
2414d6d6a2cSJohannes Goetzfried 		err = blkcipher_walk_done(desc, &walk, nbytes);
2424d6d6a2cSJohannes Goetzfried 	}
2434d6d6a2cSJohannes Goetzfried 
2444d6d6a2cSJohannes Goetzfried 	cast5_fpu_end(fpu_enabled);
2454d6d6a2cSJohannes Goetzfried 	return err;
2464d6d6a2cSJohannes Goetzfried }
2474d6d6a2cSJohannes Goetzfried 
2484d6d6a2cSJohannes Goetzfried static void ctr_crypt_final(struct blkcipher_desc *desc,
2494d6d6a2cSJohannes Goetzfried 			    struct blkcipher_walk *walk)
2504d6d6a2cSJohannes Goetzfried {
2514d6d6a2cSJohannes Goetzfried 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
2524d6d6a2cSJohannes Goetzfried 	u8 *ctrblk = walk->iv;
2534d6d6a2cSJohannes Goetzfried 	u8 keystream[CAST5_BLOCK_SIZE];
2544d6d6a2cSJohannes Goetzfried 	u8 *src = walk->src.virt.addr;
2554d6d6a2cSJohannes Goetzfried 	u8 *dst = walk->dst.virt.addr;
2564d6d6a2cSJohannes Goetzfried 	unsigned int nbytes = walk->nbytes;
2574d6d6a2cSJohannes Goetzfried 
2584d6d6a2cSJohannes Goetzfried 	__cast5_encrypt(ctx, keystream, ctrblk);
2594d6d6a2cSJohannes Goetzfried 	crypto_xor(keystream, src, nbytes);
2604d6d6a2cSJohannes Goetzfried 	memcpy(dst, keystream, nbytes);
2614d6d6a2cSJohannes Goetzfried 
2624d6d6a2cSJohannes Goetzfried 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
2634d6d6a2cSJohannes Goetzfried }
2644d6d6a2cSJohannes Goetzfried 
2654d6d6a2cSJohannes Goetzfried static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
2664d6d6a2cSJohannes Goetzfried 				struct blkcipher_walk *walk)
2674d6d6a2cSJohannes Goetzfried {
2684d6d6a2cSJohannes Goetzfried 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
2694d6d6a2cSJohannes Goetzfried 	const unsigned int bsize = CAST5_BLOCK_SIZE;
2704d6d6a2cSJohannes Goetzfried 	unsigned int nbytes = walk->nbytes;
2714d6d6a2cSJohannes Goetzfried 	u64 *src = (u64 *)walk->src.virt.addr;
2724d6d6a2cSJohannes Goetzfried 	u64 *dst = (u64 *)walk->dst.virt.addr;
2734d6d6a2cSJohannes Goetzfried 
2744d6d6a2cSJohannes Goetzfried 	/* Process multi-block batch */
2754d6d6a2cSJohannes Goetzfried 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
2764d6d6a2cSJohannes Goetzfried 		do {
277c12ab20bSJussi Kivilinna 			cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
278c12ab20bSJussi Kivilinna 					(__be64 *)walk->iv);
2794d6d6a2cSJohannes Goetzfried 
2804d6d6a2cSJohannes Goetzfried 			src += CAST5_PARALLEL_BLOCKS;
2814d6d6a2cSJohannes Goetzfried 			dst += CAST5_PARALLEL_BLOCKS;
2824d6d6a2cSJohannes Goetzfried 			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
2834d6d6a2cSJohannes Goetzfried 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
2844d6d6a2cSJohannes Goetzfried 
2854d6d6a2cSJohannes Goetzfried 		if (nbytes < bsize)
2864d6d6a2cSJohannes Goetzfried 			goto done;
2874d6d6a2cSJohannes Goetzfried 	}
2884d6d6a2cSJohannes Goetzfried 
2894d6d6a2cSJohannes Goetzfried 	/* Handle leftovers */
2904d6d6a2cSJohannes Goetzfried 	do {
291c12ab20bSJussi Kivilinna 		u64 ctrblk;
292c12ab20bSJussi Kivilinna 
2934d6d6a2cSJohannes Goetzfried 		if (dst != src)
2944d6d6a2cSJohannes Goetzfried 			*dst = *src;
2954d6d6a2cSJohannes Goetzfried 
296c12ab20bSJussi Kivilinna 		ctrblk = *(u64 *)walk->iv;
297c12ab20bSJussi Kivilinna 		be64_add_cpu((__be64 *)walk->iv, 1);
2984d6d6a2cSJohannes Goetzfried 
299c12ab20bSJussi Kivilinna 		__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
300c12ab20bSJussi Kivilinna 		*dst ^= ctrblk;
3014d6d6a2cSJohannes Goetzfried 
3024d6d6a2cSJohannes Goetzfried 		src += 1;
3034d6d6a2cSJohannes Goetzfried 		dst += 1;
3044d6d6a2cSJohannes Goetzfried 		nbytes -= bsize;
3054d6d6a2cSJohannes Goetzfried 	} while (nbytes >= bsize);
3064d6d6a2cSJohannes Goetzfried 
3074d6d6a2cSJohannes Goetzfried done:
3084d6d6a2cSJohannes Goetzfried 	return nbytes;
3094d6d6a2cSJohannes Goetzfried }
3104d6d6a2cSJohannes Goetzfried 
3114d6d6a2cSJohannes Goetzfried static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
3124d6d6a2cSJohannes Goetzfried 		     struct scatterlist *src, unsigned int nbytes)
3134d6d6a2cSJohannes Goetzfried {
3144d6d6a2cSJohannes Goetzfried 	bool fpu_enabled = false;
3154d6d6a2cSJohannes Goetzfried 	struct blkcipher_walk walk;
3164d6d6a2cSJohannes Goetzfried 	int err;
3174d6d6a2cSJohannes Goetzfried 
3184d6d6a2cSJohannes Goetzfried 	blkcipher_walk_init(&walk, dst, src, nbytes);
3194d6d6a2cSJohannes Goetzfried 	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
3204d6d6a2cSJohannes Goetzfried 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
3214d6d6a2cSJohannes Goetzfried 
3224d6d6a2cSJohannes Goetzfried 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
3234d6d6a2cSJohannes Goetzfried 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
3244d6d6a2cSJohannes Goetzfried 		nbytes = __ctr_crypt(desc, &walk);
3254d6d6a2cSJohannes Goetzfried 		err = blkcipher_walk_done(desc, &walk, nbytes);
3264d6d6a2cSJohannes Goetzfried 	}
3274d6d6a2cSJohannes Goetzfried 
3284d6d6a2cSJohannes Goetzfried 	cast5_fpu_end(fpu_enabled);
3294d6d6a2cSJohannes Goetzfried 
3304d6d6a2cSJohannes Goetzfried 	if (walk.nbytes) {
3314d6d6a2cSJohannes Goetzfried 		ctr_crypt_final(desc, &walk);
3324d6d6a2cSJohannes Goetzfried 		err = blkcipher_walk_done(desc, &walk, 0);
3334d6d6a2cSJohannes Goetzfried 	}
3344d6d6a2cSJohannes Goetzfried 
3354d6d6a2cSJohannes Goetzfried 	return err;
3364d6d6a2cSJohannes Goetzfried }
3374d6d6a2cSJohannes Goetzfried 
3384d6d6a2cSJohannes Goetzfried 
3394d6d6a2cSJohannes Goetzfried static struct crypto_alg cast5_algs[6] = { {
3404d6d6a2cSJohannes Goetzfried 	.cra_name		= "__ecb-cast5-avx",
3414d6d6a2cSJohannes Goetzfried 	.cra_driver_name	= "__driver-ecb-cast5-avx",
3424d6d6a2cSJohannes Goetzfried 	.cra_priority		= 0,
343680574e8SStephan Mueller 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
344680574e8SStephan Mueller 				  CRYPTO_ALG_INTERNAL,
3454d6d6a2cSJohannes Goetzfried 	.cra_blocksize		= CAST5_BLOCK_SIZE,
3464d6d6a2cSJohannes Goetzfried 	.cra_ctxsize		= sizeof(struct cast5_ctx),
3474d6d6a2cSJohannes Goetzfried 	.cra_alignmask		= 0,
3484d6d6a2cSJohannes Goetzfried 	.cra_type		= &crypto_blkcipher_type,
3494d6d6a2cSJohannes Goetzfried 	.cra_module		= THIS_MODULE,
3504d6d6a2cSJohannes Goetzfried 	.cra_u = {
3514d6d6a2cSJohannes Goetzfried 		.blkcipher = {
3524d6d6a2cSJohannes Goetzfried 			.min_keysize	= CAST5_MIN_KEY_SIZE,
3534d6d6a2cSJohannes Goetzfried 			.max_keysize	= CAST5_MAX_KEY_SIZE,
3544d6d6a2cSJohannes Goetzfried 			.setkey		= cast5_setkey,
3554d6d6a2cSJohannes Goetzfried 			.encrypt	= ecb_encrypt,
3564d6d6a2cSJohannes Goetzfried 			.decrypt	= ecb_decrypt,
3574d6d6a2cSJohannes Goetzfried 		},
3584d6d6a2cSJohannes Goetzfried 	},
3594d6d6a2cSJohannes Goetzfried }, {
3604d6d6a2cSJohannes Goetzfried 	.cra_name		= "__cbc-cast5-avx",
3614d6d6a2cSJohannes Goetzfried 	.cra_driver_name	= "__driver-cbc-cast5-avx",
3624d6d6a2cSJohannes Goetzfried 	.cra_priority		= 0,
363680574e8SStephan Mueller 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
364680574e8SStephan Mueller 				  CRYPTO_ALG_INTERNAL,
3654d6d6a2cSJohannes Goetzfried 	.cra_blocksize		= CAST5_BLOCK_SIZE,
3664d6d6a2cSJohannes Goetzfried 	.cra_ctxsize		= sizeof(struct cast5_ctx),
3674d6d6a2cSJohannes Goetzfried 	.cra_alignmask		= 0,
3684d6d6a2cSJohannes Goetzfried 	.cra_type		= &crypto_blkcipher_type,
3694d6d6a2cSJohannes Goetzfried 	.cra_module		= THIS_MODULE,
3704d6d6a2cSJohannes Goetzfried 	.cra_u = {
3714d6d6a2cSJohannes Goetzfried 		.blkcipher = {
3724d6d6a2cSJohannes Goetzfried 			.min_keysize	= CAST5_MIN_KEY_SIZE,
3734d6d6a2cSJohannes Goetzfried 			.max_keysize	= CAST5_MAX_KEY_SIZE,
3744d6d6a2cSJohannes Goetzfried 			.setkey		= cast5_setkey,
3754d6d6a2cSJohannes Goetzfried 			.encrypt	= cbc_encrypt,
3764d6d6a2cSJohannes Goetzfried 			.decrypt	= cbc_decrypt,
3774d6d6a2cSJohannes Goetzfried 		},
3784d6d6a2cSJohannes Goetzfried 	},
3794d6d6a2cSJohannes Goetzfried }, {
3804d6d6a2cSJohannes Goetzfried 	.cra_name		= "__ctr-cast5-avx",
3814d6d6a2cSJohannes Goetzfried 	.cra_driver_name	= "__driver-ctr-cast5-avx",
3824d6d6a2cSJohannes Goetzfried 	.cra_priority		= 0,
383680574e8SStephan Mueller 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
384680574e8SStephan Mueller 				  CRYPTO_ALG_INTERNAL,
3854d6d6a2cSJohannes Goetzfried 	.cra_blocksize		= 1,
3864d6d6a2cSJohannes Goetzfried 	.cra_ctxsize		= sizeof(struct cast5_ctx),
3874d6d6a2cSJohannes Goetzfried 	.cra_alignmask		= 0,
3884d6d6a2cSJohannes Goetzfried 	.cra_type		= &crypto_blkcipher_type,
3894d6d6a2cSJohannes Goetzfried 	.cra_module		= THIS_MODULE,
3904d6d6a2cSJohannes Goetzfried 	.cra_u = {
3914d6d6a2cSJohannes Goetzfried 		.blkcipher = {
3924d6d6a2cSJohannes Goetzfried 			.min_keysize	= CAST5_MIN_KEY_SIZE,
3934d6d6a2cSJohannes Goetzfried 			.max_keysize	= CAST5_MAX_KEY_SIZE,
3944d6d6a2cSJohannes Goetzfried 			.ivsize		= CAST5_BLOCK_SIZE,
3954d6d6a2cSJohannes Goetzfried 			.setkey		= cast5_setkey,
3964d6d6a2cSJohannes Goetzfried 			.encrypt	= ctr_crypt,
3974d6d6a2cSJohannes Goetzfried 			.decrypt	= ctr_crypt,
3984d6d6a2cSJohannes Goetzfried 		},
3994d6d6a2cSJohannes Goetzfried 	},
4004d6d6a2cSJohannes Goetzfried }, {
4014d6d6a2cSJohannes Goetzfried 	.cra_name		= "ecb(cast5)",
4024d6d6a2cSJohannes Goetzfried 	.cra_driver_name	= "ecb-cast5-avx",
4034d6d6a2cSJohannes Goetzfried 	.cra_priority		= 200,
4044d6d6a2cSJohannes Goetzfried 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
4054d6d6a2cSJohannes Goetzfried 	.cra_blocksize		= CAST5_BLOCK_SIZE,
4064d6d6a2cSJohannes Goetzfried 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
4074d6d6a2cSJohannes Goetzfried 	.cra_alignmask		= 0,
4084d6d6a2cSJohannes Goetzfried 	.cra_type		= &crypto_ablkcipher_type,
4094d6d6a2cSJohannes Goetzfried 	.cra_module		= THIS_MODULE,
4104d6d6a2cSJohannes Goetzfried 	.cra_init		= ablk_init,
4114d6d6a2cSJohannes Goetzfried 	.cra_exit		= ablk_exit,
4124d6d6a2cSJohannes Goetzfried 	.cra_u = {
4134d6d6a2cSJohannes Goetzfried 		.ablkcipher = {
4144d6d6a2cSJohannes Goetzfried 			.min_keysize	= CAST5_MIN_KEY_SIZE,
4154d6d6a2cSJohannes Goetzfried 			.max_keysize	= CAST5_MAX_KEY_SIZE,
4164d6d6a2cSJohannes Goetzfried 			.setkey		= ablk_set_key,
4174d6d6a2cSJohannes Goetzfried 			.encrypt	= ablk_encrypt,
4184d6d6a2cSJohannes Goetzfried 			.decrypt	= ablk_decrypt,
4194d6d6a2cSJohannes Goetzfried 		},
4204d6d6a2cSJohannes Goetzfried 	},
4214d6d6a2cSJohannes Goetzfried }, {
4224d6d6a2cSJohannes Goetzfried 	.cra_name		= "cbc(cast5)",
4234d6d6a2cSJohannes Goetzfried 	.cra_driver_name	= "cbc-cast5-avx",
4244d6d6a2cSJohannes Goetzfried 	.cra_priority		= 200,
4254d6d6a2cSJohannes Goetzfried 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
4264d6d6a2cSJohannes Goetzfried 	.cra_blocksize		= CAST5_BLOCK_SIZE,
4274d6d6a2cSJohannes Goetzfried 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
4284d6d6a2cSJohannes Goetzfried 	.cra_alignmask		= 0,
4294d6d6a2cSJohannes Goetzfried 	.cra_type		= &crypto_ablkcipher_type,
4304d6d6a2cSJohannes Goetzfried 	.cra_module		= THIS_MODULE,
4314d6d6a2cSJohannes Goetzfried 	.cra_init		= ablk_init,
4324d6d6a2cSJohannes Goetzfried 	.cra_exit		= ablk_exit,
4334d6d6a2cSJohannes Goetzfried 	.cra_u = {
4344d6d6a2cSJohannes Goetzfried 		.ablkcipher = {
4354d6d6a2cSJohannes Goetzfried 			.min_keysize	= CAST5_MIN_KEY_SIZE,
4364d6d6a2cSJohannes Goetzfried 			.max_keysize	= CAST5_MAX_KEY_SIZE,
4374d6d6a2cSJohannes Goetzfried 			.ivsize		= CAST5_BLOCK_SIZE,
4384d6d6a2cSJohannes Goetzfried 			.setkey		= ablk_set_key,
4394d6d6a2cSJohannes Goetzfried 			.encrypt	= __ablk_encrypt,
4404d6d6a2cSJohannes Goetzfried 			.decrypt	= ablk_decrypt,
4414d6d6a2cSJohannes Goetzfried 		},
4424d6d6a2cSJohannes Goetzfried 	},
4434d6d6a2cSJohannes Goetzfried }, {
4444d6d6a2cSJohannes Goetzfried 	.cra_name		= "ctr(cast5)",
4454d6d6a2cSJohannes Goetzfried 	.cra_driver_name	= "ctr-cast5-avx",
4464d6d6a2cSJohannes Goetzfried 	.cra_priority		= 200,
4474d6d6a2cSJohannes Goetzfried 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
4484d6d6a2cSJohannes Goetzfried 	.cra_blocksize		= 1,
4494d6d6a2cSJohannes Goetzfried 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
4504d6d6a2cSJohannes Goetzfried 	.cra_alignmask		= 0,
4514d6d6a2cSJohannes Goetzfried 	.cra_type		= &crypto_ablkcipher_type,
4524d6d6a2cSJohannes Goetzfried 	.cra_module		= THIS_MODULE,
4534d6d6a2cSJohannes Goetzfried 	.cra_init		= ablk_init,
4544d6d6a2cSJohannes Goetzfried 	.cra_exit		= ablk_exit,
4554d6d6a2cSJohannes Goetzfried 	.cra_u = {
4564d6d6a2cSJohannes Goetzfried 		.ablkcipher = {
4574d6d6a2cSJohannes Goetzfried 			.min_keysize	= CAST5_MIN_KEY_SIZE,
4584d6d6a2cSJohannes Goetzfried 			.max_keysize	= CAST5_MAX_KEY_SIZE,
4594d6d6a2cSJohannes Goetzfried 			.ivsize		= CAST5_BLOCK_SIZE,
4604d6d6a2cSJohannes Goetzfried 			.setkey		= ablk_set_key,
4614d6d6a2cSJohannes Goetzfried 			.encrypt	= ablk_encrypt,
4624d6d6a2cSJohannes Goetzfried 			.decrypt	= ablk_encrypt,
4634d6d6a2cSJohannes Goetzfried 			.geniv		= "chainiv",
4644d6d6a2cSJohannes Goetzfried 		},
4654d6d6a2cSJohannes Goetzfried 	},
4664d6d6a2cSJohannes Goetzfried } };
4674d6d6a2cSJohannes Goetzfried 
4684d6d6a2cSJohannes Goetzfried static int __init cast5_init(void)
4694d6d6a2cSJohannes Goetzfried {
470d5d34d98SIngo Molnar 	const char *feature_name;
4714d6d6a2cSJohannes Goetzfried 
472d5d34d98SIngo Molnar 	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
473d5d34d98SIngo Molnar 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
4744d6d6a2cSJohannes Goetzfried 		return -ENODEV;
4754d6d6a2cSJohannes Goetzfried 	}
4764d6d6a2cSJohannes Goetzfried 
4774d6d6a2cSJohannes Goetzfried 	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
4784d6d6a2cSJohannes Goetzfried }
4794d6d6a2cSJohannes Goetzfried 
4804d6d6a2cSJohannes Goetzfried static void __exit cast5_exit(void)
4814d6d6a2cSJohannes Goetzfried {
4824d6d6a2cSJohannes Goetzfried 	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
4834d6d6a2cSJohannes Goetzfried }
4844d6d6a2cSJohannes Goetzfried 
4854d6d6a2cSJohannes Goetzfried module_init(cast5_init);
4864d6d6a2cSJohannes Goetzfried module_exit(cast5_exit);
4874d6d6a2cSJohannes Goetzfried 
4884d6d6a2cSJohannes Goetzfried MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
4894d6d6a2cSJohannes Goetzfried MODULE_LICENSE("GPL");
4905d26a105SKees Cook MODULE_ALIAS_CRYPTO("cast5");
491