xref: /openbmc/linux/arch/x86/crypto/cast5_avx_glue.c (revision 200429cc)
14d6d6a2cSJohannes Goetzfried /*
24d6d6a2cSJohannes Goetzfried  * Glue Code for the AVX assembler implemention of the Cast5 Cipher
34d6d6a2cSJohannes Goetzfried  *
44d6d6a2cSJohannes Goetzfried  * Copyright (C) 2012 Johannes Goetzfried
54d6d6a2cSJohannes Goetzfried  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
64d6d6a2cSJohannes Goetzfried  *
74d6d6a2cSJohannes Goetzfried  * This program is free software; you can redistribute it and/or modify
84d6d6a2cSJohannes Goetzfried  * it under the terms of the GNU General Public License as published by
94d6d6a2cSJohannes Goetzfried  * the Free Software Foundation; either version 2 of the License, or
104d6d6a2cSJohannes Goetzfried  * (at your option) any later version.
114d6d6a2cSJohannes Goetzfried  *
124d6d6a2cSJohannes Goetzfried  * This program is distributed in the hope that it will be useful,
134d6d6a2cSJohannes Goetzfried  * but WITHOUT ANY WARRANTY; without even the implied warranty of
144d6d6a2cSJohannes Goetzfried  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
154d6d6a2cSJohannes Goetzfried  * GNU General Public License for more details.
164d6d6a2cSJohannes Goetzfried  *
174d6d6a2cSJohannes Goetzfried  * You should have received a copy of the GNU General Public License
184d6d6a2cSJohannes Goetzfried  * along with this program; if not, write to the Free Software
194d6d6a2cSJohannes Goetzfried  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
204d6d6a2cSJohannes Goetzfried  * USA
214d6d6a2cSJohannes Goetzfried  *
224d6d6a2cSJohannes Goetzfried  */
234d6d6a2cSJohannes Goetzfried 
244d6d6a2cSJohannes Goetzfried #include <linux/module.h>
254d6d6a2cSJohannes Goetzfried #include <linux/hardirq.h>
264d6d6a2cSJohannes Goetzfried #include <linux/types.h>
274d6d6a2cSJohannes Goetzfried #include <linux/crypto.h>
284d6d6a2cSJohannes Goetzfried #include <linux/err.h>
294d6d6a2cSJohannes Goetzfried #include <crypto/algapi.h>
304d6d6a2cSJohannes Goetzfried #include <crypto/cast5.h>
314d6d6a2cSJohannes Goetzfried #include <crypto/cryptd.h>
324d6d6a2cSJohannes Goetzfried #include <crypto/ctr.h>
334d6d6a2cSJohannes Goetzfried #include <asm/xcr.h>
344d6d6a2cSJohannes Goetzfried #include <asm/xsave.h>
354d6d6a2cSJohannes Goetzfried #include <asm/crypto/ablk_helper.h>
364d6d6a2cSJohannes Goetzfried #include <asm/crypto/glue_helper.h>
374d6d6a2cSJohannes Goetzfried 
384d6d6a2cSJohannes Goetzfried #define CAST5_PARALLEL_BLOCKS 16
394d6d6a2cSJohannes Goetzfried 
404d6d6a2cSJohannes Goetzfried asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst,
414d6d6a2cSJohannes Goetzfried 				      const u8 *src, bool xor);
424d6d6a2cSJohannes Goetzfried asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst,
434d6d6a2cSJohannes Goetzfried 				    const u8 *src);
444d6d6a2cSJohannes Goetzfried 
454d6d6a2cSJohannes Goetzfried static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst,
464d6d6a2cSJohannes Goetzfried 				      const u8 *src)
474d6d6a2cSJohannes Goetzfried {
484d6d6a2cSJohannes Goetzfried 	__cast5_enc_blk_16way(ctx, dst, src, false);
494d6d6a2cSJohannes Goetzfried }
504d6d6a2cSJohannes Goetzfried 
514d6d6a2cSJohannes Goetzfried static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst,
524d6d6a2cSJohannes Goetzfried 					  const u8 *src)
534d6d6a2cSJohannes Goetzfried {
544d6d6a2cSJohannes Goetzfried 	__cast5_enc_blk_16way(ctx, dst, src, true);
554d6d6a2cSJohannes Goetzfried }
564d6d6a2cSJohannes Goetzfried 
574d6d6a2cSJohannes Goetzfried static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst,
584d6d6a2cSJohannes Goetzfried 				      const u8 *src)
594d6d6a2cSJohannes Goetzfried {
604d6d6a2cSJohannes Goetzfried 	cast5_dec_blk_16way(ctx, dst, src);
614d6d6a2cSJohannes Goetzfried }
624d6d6a2cSJohannes Goetzfried 
634d6d6a2cSJohannes Goetzfried 
644d6d6a2cSJohannes Goetzfried static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
654d6d6a2cSJohannes Goetzfried {
664d6d6a2cSJohannes Goetzfried 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
674d6d6a2cSJohannes Goetzfried 			      NULL, fpu_enabled, nbytes);
684d6d6a2cSJohannes Goetzfried }
694d6d6a2cSJohannes Goetzfried 
704d6d6a2cSJohannes Goetzfried static inline void cast5_fpu_end(bool fpu_enabled)
714d6d6a2cSJohannes Goetzfried {
724d6d6a2cSJohannes Goetzfried 	return glue_fpu_end(fpu_enabled);
734d6d6a2cSJohannes Goetzfried }
744d6d6a2cSJohannes Goetzfried 
754d6d6a2cSJohannes Goetzfried static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
764d6d6a2cSJohannes Goetzfried 		     bool enc)
774d6d6a2cSJohannes Goetzfried {
784d6d6a2cSJohannes Goetzfried 	bool fpu_enabled = false;
794d6d6a2cSJohannes Goetzfried 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
804d6d6a2cSJohannes Goetzfried 	const unsigned int bsize = CAST5_BLOCK_SIZE;
814d6d6a2cSJohannes Goetzfried 	unsigned int nbytes;
824d6d6a2cSJohannes Goetzfried 	int err;
834d6d6a2cSJohannes Goetzfried 
844d6d6a2cSJohannes Goetzfried 	err = blkcipher_walk_virt(desc, walk);
854d6d6a2cSJohannes Goetzfried 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
864d6d6a2cSJohannes Goetzfried 
874d6d6a2cSJohannes Goetzfried 	while ((nbytes = walk->nbytes)) {
884d6d6a2cSJohannes Goetzfried 		u8 *wsrc = walk->src.virt.addr;
894d6d6a2cSJohannes Goetzfried 		u8 *wdst = walk->dst.virt.addr;
904d6d6a2cSJohannes Goetzfried 
914d6d6a2cSJohannes Goetzfried 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
924d6d6a2cSJohannes Goetzfried 
934d6d6a2cSJohannes Goetzfried 		/* Process multi-block batch */
944d6d6a2cSJohannes Goetzfried 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
954d6d6a2cSJohannes Goetzfried 			do {
964d6d6a2cSJohannes Goetzfried 				if (enc)
974d6d6a2cSJohannes Goetzfried 					cast5_enc_blk_xway(ctx, wdst, wsrc);
984d6d6a2cSJohannes Goetzfried 				else
994d6d6a2cSJohannes Goetzfried 					cast5_dec_blk_xway(ctx, wdst, wsrc);
1004d6d6a2cSJohannes Goetzfried 
1014d6d6a2cSJohannes Goetzfried 				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
1024d6d6a2cSJohannes Goetzfried 				wdst += bsize * CAST5_PARALLEL_BLOCKS;
1034d6d6a2cSJohannes Goetzfried 				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
1044d6d6a2cSJohannes Goetzfried 			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
1054d6d6a2cSJohannes Goetzfried 
1064d6d6a2cSJohannes Goetzfried 			if (nbytes < bsize)
1074d6d6a2cSJohannes Goetzfried 				goto done;
1084d6d6a2cSJohannes Goetzfried 		}
1094d6d6a2cSJohannes Goetzfried 
1104d6d6a2cSJohannes Goetzfried 		/* Handle leftovers */
1114d6d6a2cSJohannes Goetzfried 		do {
1124d6d6a2cSJohannes Goetzfried 			if (enc)
1134d6d6a2cSJohannes Goetzfried 				__cast5_encrypt(ctx, wdst, wsrc);
1144d6d6a2cSJohannes Goetzfried 			else
1154d6d6a2cSJohannes Goetzfried 				__cast5_decrypt(ctx, wdst, wsrc);
1164d6d6a2cSJohannes Goetzfried 
1174d6d6a2cSJohannes Goetzfried 			wsrc += bsize;
1184d6d6a2cSJohannes Goetzfried 			wdst += bsize;
1194d6d6a2cSJohannes Goetzfried 			nbytes -= bsize;
1204d6d6a2cSJohannes Goetzfried 		} while (nbytes >= bsize);
1214d6d6a2cSJohannes Goetzfried 
1224d6d6a2cSJohannes Goetzfried done:
1234d6d6a2cSJohannes Goetzfried 		err = blkcipher_walk_done(desc, walk, nbytes);
1244d6d6a2cSJohannes Goetzfried 	}
1254d6d6a2cSJohannes Goetzfried 
1264d6d6a2cSJohannes Goetzfried 	cast5_fpu_end(fpu_enabled);
1274d6d6a2cSJohannes Goetzfried 	return err;
1284d6d6a2cSJohannes Goetzfried }
1294d6d6a2cSJohannes Goetzfried 
1304d6d6a2cSJohannes Goetzfried static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1314d6d6a2cSJohannes Goetzfried 		       struct scatterlist *src, unsigned int nbytes)
1324d6d6a2cSJohannes Goetzfried {
1334d6d6a2cSJohannes Goetzfried 	struct blkcipher_walk walk;
1344d6d6a2cSJohannes Goetzfried 
1354d6d6a2cSJohannes Goetzfried 	blkcipher_walk_init(&walk, dst, src, nbytes);
1364d6d6a2cSJohannes Goetzfried 	return ecb_crypt(desc, &walk, true);
1374d6d6a2cSJohannes Goetzfried }
1384d6d6a2cSJohannes Goetzfried 
1394d6d6a2cSJohannes Goetzfried static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1404d6d6a2cSJohannes Goetzfried 		       struct scatterlist *src, unsigned int nbytes)
1414d6d6a2cSJohannes Goetzfried {
1424d6d6a2cSJohannes Goetzfried 	struct blkcipher_walk walk;
1434d6d6a2cSJohannes Goetzfried 
1444d6d6a2cSJohannes Goetzfried 	blkcipher_walk_init(&walk, dst, src, nbytes);
1454d6d6a2cSJohannes Goetzfried 	return ecb_crypt(desc, &walk, false);
1464d6d6a2cSJohannes Goetzfried }
1474d6d6a2cSJohannes Goetzfried 
1484d6d6a2cSJohannes Goetzfried static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
1494d6d6a2cSJohannes Goetzfried 				  struct blkcipher_walk *walk)
1504d6d6a2cSJohannes Goetzfried {
1514d6d6a2cSJohannes Goetzfried 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1524d6d6a2cSJohannes Goetzfried 	const unsigned int bsize = CAST5_BLOCK_SIZE;
1534d6d6a2cSJohannes Goetzfried 	unsigned int nbytes = walk->nbytes;
1544d6d6a2cSJohannes Goetzfried 	u64 *src = (u64 *)walk->src.virt.addr;
1554d6d6a2cSJohannes Goetzfried 	u64 *dst = (u64 *)walk->dst.virt.addr;
1564d6d6a2cSJohannes Goetzfried 	u64 *iv = (u64 *)walk->iv;
1574d6d6a2cSJohannes Goetzfried 
1584d6d6a2cSJohannes Goetzfried 	do {
1594d6d6a2cSJohannes Goetzfried 		*dst = *src ^ *iv;
1604d6d6a2cSJohannes Goetzfried 		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
1614d6d6a2cSJohannes Goetzfried 		iv = dst;
1624d6d6a2cSJohannes Goetzfried 
1634d6d6a2cSJohannes Goetzfried 		src += 1;
1644d6d6a2cSJohannes Goetzfried 		dst += 1;
1654d6d6a2cSJohannes Goetzfried 		nbytes -= bsize;
1664d6d6a2cSJohannes Goetzfried 	} while (nbytes >= bsize);
1674d6d6a2cSJohannes Goetzfried 
168200429ccSJussi Kivilinna 	*(u64 *)walk->iv = *iv;
1694d6d6a2cSJohannes Goetzfried 	return nbytes;
1704d6d6a2cSJohannes Goetzfried }
1714d6d6a2cSJohannes Goetzfried 
1724d6d6a2cSJohannes Goetzfried static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1734d6d6a2cSJohannes Goetzfried 		       struct scatterlist *src, unsigned int nbytes)
1744d6d6a2cSJohannes Goetzfried {
1754d6d6a2cSJohannes Goetzfried 	struct blkcipher_walk walk;
1764d6d6a2cSJohannes Goetzfried 	int err;
1774d6d6a2cSJohannes Goetzfried 
1784d6d6a2cSJohannes Goetzfried 	blkcipher_walk_init(&walk, dst, src, nbytes);
1794d6d6a2cSJohannes Goetzfried 	err = blkcipher_walk_virt(desc, &walk);
1804d6d6a2cSJohannes Goetzfried 
1814d6d6a2cSJohannes Goetzfried 	while ((nbytes = walk.nbytes)) {
1824d6d6a2cSJohannes Goetzfried 		nbytes = __cbc_encrypt(desc, &walk);
1834d6d6a2cSJohannes Goetzfried 		err = blkcipher_walk_done(desc, &walk, nbytes);
1844d6d6a2cSJohannes Goetzfried 	}
1854d6d6a2cSJohannes Goetzfried 
1864d6d6a2cSJohannes Goetzfried 	return err;
1874d6d6a2cSJohannes Goetzfried }
1884d6d6a2cSJohannes Goetzfried 
1894d6d6a2cSJohannes Goetzfried static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
1904d6d6a2cSJohannes Goetzfried 				  struct blkcipher_walk *walk)
1914d6d6a2cSJohannes Goetzfried {
1924d6d6a2cSJohannes Goetzfried 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1934d6d6a2cSJohannes Goetzfried 	const unsigned int bsize = CAST5_BLOCK_SIZE;
1944d6d6a2cSJohannes Goetzfried 	unsigned int nbytes = walk->nbytes;
1954d6d6a2cSJohannes Goetzfried 	u64 *src = (u64 *)walk->src.virt.addr;
1964d6d6a2cSJohannes Goetzfried 	u64 *dst = (u64 *)walk->dst.virt.addr;
1974d6d6a2cSJohannes Goetzfried 	u64 ivs[CAST5_PARALLEL_BLOCKS - 1];
1984d6d6a2cSJohannes Goetzfried 	u64 last_iv;
1994d6d6a2cSJohannes Goetzfried 	int i;
2004d6d6a2cSJohannes Goetzfried 
2014d6d6a2cSJohannes Goetzfried 	/* Start of the last block. */
2024d6d6a2cSJohannes Goetzfried 	src += nbytes / bsize - 1;
2034d6d6a2cSJohannes Goetzfried 	dst += nbytes / bsize - 1;
2044d6d6a2cSJohannes Goetzfried 
2054d6d6a2cSJohannes Goetzfried 	last_iv = *src;
2064d6d6a2cSJohannes Goetzfried 
2074d6d6a2cSJohannes Goetzfried 	/* Process multi-block batch */
2084d6d6a2cSJohannes Goetzfried 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
2094d6d6a2cSJohannes Goetzfried 		do {
2104d6d6a2cSJohannes Goetzfried 			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
2114d6d6a2cSJohannes Goetzfried 			src -= CAST5_PARALLEL_BLOCKS - 1;
2124d6d6a2cSJohannes Goetzfried 			dst -= CAST5_PARALLEL_BLOCKS - 1;
2134d6d6a2cSJohannes Goetzfried 
2144d6d6a2cSJohannes Goetzfried 			for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
2154d6d6a2cSJohannes Goetzfried 				ivs[i] = src[i];
2164d6d6a2cSJohannes Goetzfried 
2174d6d6a2cSJohannes Goetzfried 			cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
2184d6d6a2cSJohannes Goetzfried 
2194d6d6a2cSJohannes Goetzfried 			for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
2204d6d6a2cSJohannes Goetzfried 				*(dst + (i + 1)) ^= *(ivs + i);
2214d6d6a2cSJohannes Goetzfried 
2224d6d6a2cSJohannes Goetzfried 			nbytes -= bsize;
2234d6d6a2cSJohannes Goetzfried 			if (nbytes < bsize)
2244d6d6a2cSJohannes Goetzfried 				goto done;
2254d6d6a2cSJohannes Goetzfried 
2264d6d6a2cSJohannes Goetzfried 			*dst ^= *(src - 1);
2274d6d6a2cSJohannes Goetzfried 			src -= 1;
2284d6d6a2cSJohannes Goetzfried 			dst -= 1;
2294d6d6a2cSJohannes Goetzfried 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
2304d6d6a2cSJohannes Goetzfried 
2314d6d6a2cSJohannes Goetzfried 		if (nbytes < bsize)
2324d6d6a2cSJohannes Goetzfried 			goto done;
2334d6d6a2cSJohannes Goetzfried 	}
2344d6d6a2cSJohannes Goetzfried 
2354d6d6a2cSJohannes Goetzfried 	/* Handle leftovers */
2364d6d6a2cSJohannes Goetzfried 	for (;;) {
2374d6d6a2cSJohannes Goetzfried 		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
2384d6d6a2cSJohannes Goetzfried 
2394d6d6a2cSJohannes Goetzfried 		nbytes -= bsize;
2404d6d6a2cSJohannes Goetzfried 		if (nbytes < bsize)
2414d6d6a2cSJohannes Goetzfried 			break;
2424d6d6a2cSJohannes Goetzfried 
2434d6d6a2cSJohannes Goetzfried 		*dst ^= *(src - 1);
2444d6d6a2cSJohannes Goetzfried 		src -= 1;
2454d6d6a2cSJohannes Goetzfried 		dst -= 1;
2464d6d6a2cSJohannes Goetzfried 	}
2474d6d6a2cSJohannes Goetzfried 
2484d6d6a2cSJohannes Goetzfried done:
2494d6d6a2cSJohannes Goetzfried 	*dst ^= *(u64 *)walk->iv;
2504d6d6a2cSJohannes Goetzfried 	*(u64 *)walk->iv = last_iv;
2514d6d6a2cSJohannes Goetzfried 
2524d6d6a2cSJohannes Goetzfried 	return nbytes;
2534d6d6a2cSJohannes Goetzfried }
2544d6d6a2cSJohannes Goetzfried 
2554d6d6a2cSJohannes Goetzfried static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
2564d6d6a2cSJohannes Goetzfried 		       struct scatterlist *src, unsigned int nbytes)
2574d6d6a2cSJohannes Goetzfried {
2584d6d6a2cSJohannes Goetzfried 	bool fpu_enabled = false;
2594d6d6a2cSJohannes Goetzfried 	struct blkcipher_walk walk;
2604d6d6a2cSJohannes Goetzfried 	int err;
2614d6d6a2cSJohannes Goetzfried 
2624d6d6a2cSJohannes Goetzfried 	blkcipher_walk_init(&walk, dst, src, nbytes);
2634d6d6a2cSJohannes Goetzfried 	err = blkcipher_walk_virt(desc, &walk);
2644d6d6a2cSJohannes Goetzfried 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
2654d6d6a2cSJohannes Goetzfried 
2664d6d6a2cSJohannes Goetzfried 	while ((nbytes = walk.nbytes)) {
2674d6d6a2cSJohannes Goetzfried 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
2684d6d6a2cSJohannes Goetzfried 		nbytes = __cbc_decrypt(desc, &walk);
2694d6d6a2cSJohannes Goetzfried 		err = blkcipher_walk_done(desc, &walk, nbytes);
2704d6d6a2cSJohannes Goetzfried 	}
2714d6d6a2cSJohannes Goetzfried 
2724d6d6a2cSJohannes Goetzfried 	cast5_fpu_end(fpu_enabled);
2734d6d6a2cSJohannes Goetzfried 	return err;
2744d6d6a2cSJohannes Goetzfried }
2754d6d6a2cSJohannes Goetzfried 
2764d6d6a2cSJohannes Goetzfried static void ctr_crypt_final(struct blkcipher_desc *desc,
2774d6d6a2cSJohannes Goetzfried 			    struct blkcipher_walk *walk)
2784d6d6a2cSJohannes Goetzfried {
2794d6d6a2cSJohannes Goetzfried 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
2804d6d6a2cSJohannes Goetzfried 	u8 *ctrblk = walk->iv;
2814d6d6a2cSJohannes Goetzfried 	u8 keystream[CAST5_BLOCK_SIZE];
2824d6d6a2cSJohannes Goetzfried 	u8 *src = walk->src.virt.addr;
2834d6d6a2cSJohannes Goetzfried 	u8 *dst = walk->dst.virt.addr;
2844d6d6a2cSJohannes Goetzfried 	unsigned int nbytes = walk->nbytes;
2854d6d6a2cSJohannes Goetzfried 
2864d6d6a2cSJohannes Goetzfried 	__cast5_encrypt(ctx, keystream, ctrblk);
2874d6d6a2cSJohannes Goetzfried 	crypto_xor(keystream, src, nbytes);
2884d6d6a2cSJohannes Goetzfried 	memcpy(dst, keystream, nbytes);
2894d6d6a2cSJohannes Goetzfried 
2904d6d6a2cSJohannes Goetzfried 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
2914d6d6a2cSJohannes Goetzfried }
2924d6d6a2cSJohannes Goetzfried 
2934d6d6a2cSJohannes Goetzfried static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
2944d6d6a2cSJohannes Goetzfried 				struct blkcipher_walk *walk)
2954d6d6a2cSJohannes Goetzfried {
2964d6d6a2cSJohannes Goetzfried 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
2974d6d6a2cSJohannes Goetzfried 	const unsigned int bsize = CAST5_BLOCK_SIZE;
2984d6d6a2cSJohannes Goetzfried 	unsigned int nbytes = walk->nbytes;
2994d6d6a2cSJohannes Goetzfried 	u64 *src = (u64 *)walk->src.virt.addr;
3004d6d6a2cSJohannes Goetzfried 	u64 *dst = (u64 *)walk->dst.virt.addr;
3014d6d6a2cSJohannes Goetzfried 	u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
3024d6d6a2cSJohannes Goetzfried 	__be64 ctrblocks[CAST5_PARALLEL_BLOCKS];
3034d6d6a2cSJohannes Goetzfried 	int i;
3044d6d6a2cSJohannes Goetzfried 
3054d6d6a2cSJohannes Goetzfried 	/* Process multi-block batch */
3064d6d6a2cSJohannes Goetzfried 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
3074d6d6a2cSJohannes Goetzfried 		do {
3084d6d6a2cSJohannes Goetzfried 			/* create ctrblks for parallel encrypt */
3094d6d6a2cSJohannes Goetzfried 			for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) {
3104d6d6a2cSJohannes Goetzfried 				if (dst != src)
3114d6d6a2cSJohannes Goetzfried 					dst[i] = src[i];
3124d6d6a2cSJohannes Goetzfried 
3134d6d6a2cSJohannes Goetzfried 				ctrblocks[i] = cpu_to_be64(ctrblk++);
3144d6d6a2cSJohannes Goetzfried 			}
3154d6d6a2cSJohannes Goetzfried 
3164d6d6a2cSJohannes Goetzfried 			cast5_enc_blk_xway_xor(ctx, (u8 *)dst,
3174d6d6a2cSJohannes Goetzfried 					       (u8 *)ctrblocks);
3184d6d6a2cSJohannes Goetzfried 
3194d6d6a2cSJohannes Goetzfried 			src += CAST5_PARALLEL_BLOCKS;
3204d6d6a2cSJohannes Goetzfried 			dst += CAST5_PARALLEL_BLOCKS;
3214d6d6a2cSJohannes Goetzfried 			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
3224d6d6a2cSJohannes Goetzfried 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
3234d6d6a2cSJohannes Goetzfried 
3244d6d6a2cSJohannes Goetzfried 		if (nbytes < bsize)
3254d6d6a2cSJohannes Goetzfried 			goto done;
3264d6d6a2cSJohannes Goetzfried 	}
3274d6d6a2cSJohannes Goetzfried 
3284d6d6a2cSJohannes Goetzfried 	/* Handle leftovers */
3294d6d6a2cSJohannes Goetzfried 	do {
3304d6d6a2cSJohannes Goetzfried 		if (dst != src)
3314d6d6a2cSJohannes Goetzfried 			*dst = *src;
3324d6d6a2cSJohannes Goetzfried 
3334d6d6a2cSJohannes Goetzfried 		ctrblocks[0] = cpu_to_be64(ctrblk++);
3344d6d6a2cSJohannes Goetzfried 
3354d6d6a2cSJohannes Goetzfried 		__cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
3364d6d6a2cSJohannes Goetzfried 		*dst ^= ctrblocks[0];
3374d6d6a2cSJohannes Goetzfried 
3384d6d6a2cSJohannes Goetzfried 		src += 1;
3394d6d6a2cSJohannes Goetzfried 		dst += 1;
3404d6d6a2cSJohannes Goetzfried 		nbytes -= bsize;
3414d6d6a2cSJohannes Goetzfried 	} while (nbytes >= bsize);
3424d6d6a2cSJohannes Goetzfried 
3434d6d6a2cSJohannes Goetzfried done:
3444d6d6a2cSJohannes Goetzfried 	*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
3454d6d6a2cSJohannes Goetzfried 	return nbytes;
3464d6d6a2cSJohannes Goetzfried }
3474d6d6a2cSJohannes Goetzfried 
3484d6d6a2cSJohannes Goetzfried static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
3494d6d6a2cSJohannes Goetzfried 		     struct scatterlist *src, unsigned int nbytes)
3504d6d6a2cSJohannes Goetzfried {
3514d6d6a2cSJohannes Goetzfried 	bool fpu_enabled = false;
3524d6d6a2cSJohannes Goetzfried 	struct blkcipher_walk walk;
3534d6d6a2cSJohannes Goetzfried 	int err;
3544d6d6a2cSJohannes Goetzfried 
3554d6d6a2cSJohannes Goetzfried 	blkcipher_walk_init(&walk, dst, src, nbytes);
3564d6d6a2cSJohannes Goetzfried 	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
3574d6d6a2cSJohannes Goetzfried 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
3584d6d6a2cSJohannes Goetzfried 
3594d6d6a2cSJohannes Goetzfried 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
3604d6d6a2cSJohannes Goetzfried 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
3614d6d6a2cSJohannes Goetzfried 		nbytes = __ctr_crypt(desc, &walk);
3624d6d6a2cSJohannes Goetzfried 		err = blkcipher_walk_done(desc, &walk, nbytes);
3634d6d6a2cSJohannes Goetzfried 	}
3644d6d6a2cSJohannes Goetzfried 
3654d6d6a2cSJohannes Goetzfried 	cast5_fpu_end(fpu_enabled);
3664d6d6a2cSJohannes Goetzfried 
3674d6d6a2cSJohannes Goetzfried 	if (walk.nbytes) {
3684d6d6a2cSJohannes Goetzfried 		ctr_crypt_final(desc, &walk);
3694d6d6a2cSJohannes Goetzfried 		err = blkcipher_walk_done(desc, &walk, 0);
3704d6d6a2cSJohannes Goetzfried 	}
3714d6d6a2cSJohannes Goetzfried 
3724d6d6a2cSJohannes Goetzfried 	return err;
3734d6d6a2cSJohannes Goetzfried }
3744d6d6a2cSJohannes Goetzfried 
3754d6d6a2cSJohannes Goetzfried 
3764d6d6a2cSJohannes Goetzfried static struct crypto_alg cast5_algs[6] = { {
3774d6d6a2cSJohannes Goetzfried 	.cra_name		= "__ecb-cast5-avx",
3784d6d6a2cSJohannes Goetzfried 	.cra_driver_name	= "__driver-ecb-cast5-avx",
3794d6d6a2cSJohannes Goetzfried 	.cra_priority		= 0,
3804d6d6a2cSJohannes Goetzfried 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
3814d6d6a2cSJohannes Goetzfried 	.cra_blocksize		= CAST5_BLOCK_SIZE,
3824d6d6a2cSJohannes Goetzfried 	.cra_ctxsize		= sizeof(struct cast5_ctx),
3834d6d6a2cSJohannes Goetzfried 	.cra_alignmask		= 0,
3844d6d6a2cSJohannes Goetzfried 	.cra_type		= &crypto_blkcipher_type,
3854d6d6a2cSJohannes Goetzfried 	.cra_module		= THIS_MODULE,
3864d6d6a2cSJohannes Goetzfried 	.cra_u = {
3874d6d6a2cSJohannes Goetzfried 		.blkcipher = {
3884d6d6a2cSJohannes Goetzfried 			.min_keysize	= CAST5_MIN_KEY_SIZE,
3894d6d6a2cSJohannes Goetzfried 			.max_keysize	= CAST5_MAX_KEY_SIZE,
3904d6d6a2cSJohannes Goetzfried 			.setkey		= cast5_setkey,
3914d6d6a2cSJohannes Goetzfried 			.encrypt	= ecb_encrypt,
3924d6d6a2cSJohannes Goetzfried 			.decrypt	= ecb_decrypt,
3934d6d6a2cSJohannes Goetzfried 		},
3944d6d6a2cSJohannes Goetzfried 	},
3954d6d6a2cSJohannes Goetzfried }, {
3964d6d6a2cSJohannes Goetzfried 	.cra_name		= "__cbc-cast5-avx",
3974d6d6a2cSJohannes Goetzfried 	.cra_driver_name	= "__driver-cbc-cast5-avx",
3984d6d6a2cSJohannes Goetzfried 	.cra_priority		= 0,
3994d6d6a2cSJohannes Goetzfried 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
4004d6d6a2cSJohannes Goetzfried 	.cra_blocksize		= CAST5_BLOCK_SIZE,
4014d6d6a2cSJohannes Goetzfried 	.cra_ctxsize		= sizeof(struct cast5_ctx),
4024d6d6a2cSJohannes Goetzfried 	.cra_alignmask		= 0,
4034d6d6a2cSJohannes Goetzfried 	.cra_type		= &crypto_blkcipher_type,
4044d6d6a2cSJohannes Goetzfried 	.cra_module		= THIS_MODULE,
4054d6d6a2cSJohannes Goetzfried 	.cra_u = {
4064d6d6a2cSJohannes Goetzfried 		.blkcipher = {
4074d6d6a2cSJohannes Goetzfried 			.min_keysize	= CAST5_MIN_KEY_SIZE,
4084d6d6a2cSJohannes Goetzfried 			.max_keysize	= CAST5_MAX_KEY_SIZE,
4094d6d6a2cSJohannes Goetzfried 			.setkey		= cast5_setkey,
4104d6d6a2cSJohannes Goetzfried 			.encrypt	= cbc_encrypt,
4114d6d6a2cSJohannes Goetzfried 			.decrypt	= cbc_decrypt,
4124d6d6a2cSJohannes Goetzfried 		},
4134d6d6a2cSJohannes Goetzfried 	},
4144d6d6a2cSJohannes Goetzfried }, {
4154d6d6a2cSJohannes Goetzfried 	.cra_name		= "__ctr-cast5-avx",
4164d6d6a2cSJohannes Goetzfried 	.cra_driver_name	= "__driver-ctr-cast5-avx",
4174d6d6a2cSJohannes Goetzfried 	.cra_priority		= 0,
4184d6d6a2cSJohannes Goetzfried 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
4194d6d6a2cSJohannes Goetzfried 	.cra_blocksize		= 1,
4204d6d6a2cSJohannes Goetzfried 	.cra_ctxsize		= sizeof(struct cast5_ctx),
4214d6d6a2cSJohannes Goetzfried 	.cra_alignmask		= 0,
4224d6d6a2cSJohannes Goetzfried 	.cra_type		= &crypto_blkcipher_type,
4234d6d6a2cSJohannes Goetzfried 	.cra_module		= THIS_MODULE,
4244d6d6a2cSJohannes Goetzfried 	.cra_u = {
4254d6d6a2cSJohannes Goetzfried 		.blkcipher = {
4264d6d6a2cSJohannes Goetzfried 			.min_keysize	= CAST5_MIN_KEY_SIZE,
4274d6d6a2cSJohannes Goetzfried 			.max_keysize	= CAST5_MAX_KEY_SIZE,
4284d6d6a2cSJohannes Goetzfried 			.ivsize		= CAST5_BLOCK_SIZE,
4294d6d6a2cSJohannes Goetzfried 			.setkey		= cast5_setkey,
4304d6d6a2cSJohannes Goetzfried 			.encrypt	= ctr_crypt,
4314d6d6a2cSJohannes Goetzfried 			.decrypt	= ctr_crypt,
4324d6d6a2cSJohannes Goetzfried 		},
4334d6d6a2cSJohannes Goetzfried 	},
4344d6d6a2cSJohannes Goetzfried }, {
4354d6d6a2cSJohannes Goetzfried 	.cra_name		= "ecb(cast5)",
4364d6d6a2cSJohannes Goetzfried 	.cra_driver_name	= "ecb-cast5-avx",
4374d6d6a2cSJohannes Goetzfried 	.cra_priority		= 200,
4384d6d6a2cSJohannes Goetzfried 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
4394d6d6a2cSJohannes Goetzfried 	.cra_blocksize		= CAST5_BLOCK_SIZE,
4404d6d6a2cSJohannes Goetzfried 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
4414d6d6a2cSJohannes Goetzfried 	.cra_alignmask		= 0,
4424d6d6a2cSJohannes Goetzfried 	.cra_type		= &crypto_ablkcipher_type,
4434d6d6a2cSJohannes Goetzfried 	.cra_module		= THIS_MODULE,
4444d6d6a2cSJohannes Goetzfried 	.cra_init		= ablk_init,
4454d6d6a2cSJohannes Goetzfried 	.cra_exit		= ablk_exit,
4464d6d6a2cSJohannes Goetzfried 	.cra_u = {
4474d6d6a2cSJohannes Goetzfried 		.ablkcipher = {
4484d6d6a2cSJohannes Goetzfried 			.min_keysize	= CAST5_MIN_KEY_SIZE,
4494d6d6a2cSJohannes Goetzfried 			.max_keysize	= CAST5_MAX_KEY_SIZE,
4504d6d6a2cSJohannes Goetzfried 			.setkey		= ablk_set_key,
4514d6d6a2cSJohannes Goetzfried 			.encrypt	= ablk_encrypt,
4524d6d6a2cSJohannes Goetzfried 			.decrypt	= ablk_decrypt,
4534d6d6a2cSJohannes Goetzfried 		},
4544d6d6a2cSJohannes Goetzfried 	},
4554d6d6a2cSJohannes Goetzfried }, {
4564d6d6a2cSJohannes Goetzfried 	.cra_name		= "cbc(cast5)",
4574d6d6a2cSJohannes Goetzfried 	.cra_driver_name	= "cbc-cast5-avx",
4584d6d6a2cSJohannes Goetzfried 	.cra_priority		= 200,
4594d6d6a2cSJohannes Goetzfried 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
4604d6d6a2cSJohannes Goetzfried 	.cra_blocksize		= CAST5_BLOCK_SIZE,
4614d6d6a2cSJohannes Goetzfried 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
4624d6d6a2cSJohannes Goetzfried 	.cra_alignmask		= 0,
4634d6d6a2cSJohannes Goetzfried 	.cra_type		= &crypto_ablkcipher_type,
4644d6d6a2cSJohannes Goetzfried 	.cra_module		= THIS_MODULE,
4654d6d6a2cSJohannes Goetzfried 	.cra_init		= ablk_init,
4664d6d6a2cSJohannes Goetzfried 	.cra_exit		= ablk_exit,
4674d6d6a2cSJohannes Goetzfried 	.cra_u = {
4684d6d6a2cSJohannes Goetzfried 		.ablkcipher = {
4694d6d6a2cSJohannes Goetzfried 			.min_keysize	= CAST5_MIN_KEY_SIZE,
4704d6d6a2cSJohannes Goetzfried 			.max_keysize	= CAST5_MAX_KEY_SIZE,
4714d6d6a2cSJohannes Goetzfried 			.ivsize		= CAST5_BLOCK_SIZE,
4724d6d6a2cSJohannes Goetzfried 			.setkey		= ablk_set_key,
4734d6d6a2cSJohannes Goetzfried 			.encrypt	= __ablk_encrypt,
4744d6d6a2cSJohannes Goetzfried 			.decrypt	= ablk_decrypt,
4754d6d6a2cSJohannes Goetzfried 		},
4764d6d6a2cSJohannes Goetzfried 	},
4774d6d6a2cSJohannes Goetzfried }, {
4784d6d6a2cSJohannes Goetzfried 	.cra_name		= "ctr(cast5)",
4794d6d6a2cSJohannes Goetzfried 	.cra_driver_name	= "ctr-cast5-avx",
4804d6d6a2cSJohannes Goetzfried 	.cra_priority		= 200,
4814d6d6a2cSJohannes Goetzfried 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
4824d6d6a2cSJohannes Goetzfried 	.cra_blocksize		= 1,
4834d6d6a2cSJohannes Goetzfried 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
4844d6d6a2cSJohannes Goetzfried 	.cra_alignmask		= 0,
4854d6d6a2cSJohannes Goetzfried 	.cra_type		= &crypto_ablkcipher_type,
4864d6d6a2cSJohannes Goetzfried 	.cra_module		= THIS_MODULE,
4874d6d6a2cSJohannes Goetzfried 	.cra_init		= ablk_init,
4884d6d6a2cSJohannes Goetzfried 	.cra_exit		= ablk_exit,
4894d6d6a2cSJohannes Goetzfried 	.cra_u = {
4904d6d6a2cSJohannes Goetzfried 		.ablkcipher = {
4914d6d6a2cSJohannes Goetzfried 			.min_keysize	= CAST5_MIN_KEY_SIZE,
4924d6d6a2cSJohannes Goetzfried 			.max_keysize	= CAST5_MAX_KEY_SIZE,
4934d6d6a2cSJohannes Goetzfried 			.ivsize		= CAST5_BLOCK_SIZE,
4944d6d6a2cSJohannes Goetzfried 			.setkey		= ablk_set_key,
4954d6d6a2cSJohannes Goetzfried 			.encrypt	= ablk_encrypt,
4964d6d6a2cSJohannes Goetzfried 			.decrypt	= ablk_encrypt,
4974d6d6a2cSJohannes Goetzfried 			.geniv		= "chainiv",
4984d6d6a2cSJohannes Goetzfried 		},
4994d6d6a2cSJohannes Goetzfried 	},
5004d6d6a2cSJohannes Goetzfried } };
5014d6d6a2cSJohannes Goetzfried 
5024d6d6a2cSJohannes Goetzfried static int __init cast5_init(void)
5034d6d6a2cSJohannes Goetzfried {
5044d6d6a2cSJohannes Goetzfried 	u64 xcr0;
5054d6d6a2cSJohannes Goetzfried 
5064d6d6a2cSJohannes Goetzfried 	if (!cpu_has_avx || !cpu_has_osxsave) {
5074d6d6a2cSJohannes Goetzfried 		pr_info("AVX instructions are not detected.\n");
5084d6d6a2cSJohannes Goetzfried 		return -ENODEV;
5094d6d6a2cSJohannes Goetzfried 	}
5104d6d6a2cSJohannes Goetzfried 
5114d6d6a2cSJohannes Goetzfried 	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
5124d6d6a2cSJohannes Goetzfried 	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
5134d6d6a2cSJohannes Goetzfried 		pr_info("AVX detected but unusable.\n");
5144d6d6a2cSJohannes Goetzfried 		return -ENODEV;
5154d6d6a2cSJohannes Goetzfried 	}
5164d6d6a2cSJohannes Goetzfried 
5174d6d6a2cSJohannes Goetzfried 	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
5184d6d6a2cSJohannes Goetzfried }
5194d6d6a2cSJohannes Goetzfried 
5204d6d6a2cSJohannes Goetzfried static void __exit cast5_exit(void)
5214d6d6a2cSJohannes Goetzfried {
5224d6d6a2cSJohannes Goetzfried 	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
5234d6d6a2cSJohannes Goetzfried }
5244d6d6a2cSJohannes Goetzfried 
5254d6d6a2cSJohannes Goetzfried module_init(cast5_init);
5264d6d6a2cSJohannes Goetzfried module_exit(cast5_exit);
5274d6d6a2cSJohannes Goetzfried 
5284d6d6a2cSJohannes Goetzfried MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
5294d6d6a2cSJohannes Goetzfried MODULE_LICENSE("GPL");
5304d6d6a2cSJohannes Goetzfried MODULE_ALIAS("cast5");
531