xref: /openbmc/linux/arch/arm64/crypto/chacha-neon-glue.c (revision ead5d1f4d877e92c051e1a1ade623d0d30e71619)
195a34b77SEric Biggers /*
2b36d8c09SArd Biesheuvel  * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
395a34b77SEric Biggers  * including ChaCha20 (RFC7539)
495a34b77SEric Biggers  *
595a34b77SEric Biggers  * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
695a34b77SEric Biggers  *
795a34b77SEric Biggers  * This program is free software; you can redistribute it and/or modify
895a34b77SEric Biggers  * it under the terms of the GNU General Public License version 2 as
995a34b77SEric Biggers  * published by the Free Software Foundation.
1095a34b77SEric Biggers  *
1195a34b77SEric Biggers  * Based on:
1295a34b77SEric Biggers  * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
1395a34b77SEric Biggers  *
1495a34b77SEric Biggers  * Copyright (C) 2015 Martin Willi
1595a34b77SEric Biggers  *
1695a34b77SEric Biggers  * This program is free software; you can redistribute it and/or modify
1795a34b77SEric Biggers  * it under the terms of the GNU General Public License as published by
1895a34b77SEric Biggers  * the Free Software Foundation; either version 2 of the License, or
1995a34b77SEric Biggers  * (at your option) any later version.
2095a34b77SEric Biggers  */
2195a34b77SEric Biggers 
2295a34b77SEric Biggers #include <crypto/algapi.h>
235fb8ef25SArd Biesheuvel #include <crypto/internal/chacha.h>
24e52b7023SEric Biggers #include <crypto/internal/simd.h>
2595a34b77SEric Biggers #include <crypto/internal/skcipher.h>
26b3aad5baSArd Biesheuvel #include <linux/jump_label.h>
2795a34b77SEric Biggers #include <linux/kernel.h>
2895a34b77SEric Biggers #include <linux/module.h>
2995a34b77SEric Biggers 
3095a34b77SEric Biggers #include <asm/hwcap.h>
3195a34b77SEric Biggers #include <asm/neon.h>
3295a34b77SEric Biggers #include <asm/simd.h>
3395a34b77SEric Biggers 
3495a34b77SEric Biggers asmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src,
3595a34b77SEric Biggers 				      int nrounds);
3695a34b77SEric Biggers asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
37f2ca1cbdSArd Biesheuvel 				       int nrounds, int bytes);
3895a34b77SEric Biggers asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
3995a34b77SEric Biggers 
40b3aad5baSArd Biesheuvel static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
41b3aad5baSArd Biesheuvel 
chacha_doneon(u32 * state,u8 * dst,const u8 * src,int bytes,int nrounds)4295a34b77SEric Biggers static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
43f2ca1cbdSArd Biesheuvel 			  int bytes, int nrounds)
4495a34b77SEric Biggers {
452fe55987SArd Biesheuvel 	while (bytes > 0) {
462fe55987SArd Biesheuvel 		int l = min(bytes, CHACHA_BLOCK_SIZE * 5);
472fe55987SArd Biesheuvel 
482fe55987SArd Biesheuvel 		if (l <= CHACHA_BLOCK_SIZE) {
4995a34b77SEric Biggers 			u8 buf[CHACHA_BLOCK_SIZE];
5095a34b77SEric Biggers 
512fe55987SArd Biesheuvel 			memcpy(buf, src, l);
52f2ca1cbdSArd Biesheuvel 			chacha_block_xor_neon(state, buf, buf, nrounds);
532fe55987SArd Biesheuvel 			memcpy(dst, buf, l);
542fe55987SArd Biesheuvel 			state[12] += 1;
552fe55987SArd Biesheuvel 			break;
56f2ca1cbdSArd Biesheuvel 		}
572fe55987SArd Biesheuvel 		chacha_4block_xor_neon(state, dst, src, nrounds, l);
58c8cfcb78SJason A. Donenfeld 		bytes -= l;
59c8cfcb78SJason A. Donenfeld 		src += l;
60c8cfcb78SJason A. Donenfeld 		dst += l;
61c8cfcb78SJason A. Donenfeld 		state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
6295a34b77SEric Biggers 	}
6395a34b77SEric Biggers }
6495a34b77SEric Biggers 
hchacha_block_arch(const u32 * state,u32 * stream,int nrounds)65b3aad5baSArd Biesheuvel void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
66b3aad5baSArd Biesheuvel {
67b3aad5baSArd Biesheuvel 	if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
68b3aad5baSArd Biesheuvel 		hchacha_block_generic(state, stream, nrounds);
69b3aad5baSArd Biesheuvel 	} else {
70b3aad5baSArd Biesheuvel 		kernel_neon_begin();
71b3aad5baSArd Biesheuvel 		hchacha_block_neon(state, stream, nrounds);
72b3aad5baSArd Biesheuvel 		kernel_neon_end();
73b3aad5baSArd Biesheuvel 	}
74b3aad5baSArd Biesheuvel }
75b3aad5baSArd Biesheuvel EXPORT_SYMBOL(hchacha_block_arch);
76b3aad5baSArd Biesheuvel 
chacha_init_arch(u32 * state,const u32 * key,const u8 * iv)77b3aad5baSArd Biesheuvel void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
78b3aad5baSArd Biesheuvel {
79b3aad5baSArd Biesheuvel 	chacha_init_generic(state, key, iv);
80b3aad5baSArd Biesheuvel }
81b3aad5baSArd Biesheuvel EXPORT_SYMBOL(chacha_init_arch);
82b3aad5baSArd Biesheuvel 
chacha_crypt_arch(u32 * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)83b3aad5baSArd Biesheuvel void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
84b3aad5baSArd Biesheuvel 		       int nrounds)
85b3aad5baSArd Biesheuvel {
86b3aad5baSArd Biesheuvel 	if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
87b3aad5baSArd Biesheuvel 	    !crypto_simd_usable())
88b3aad5baSArd Biesheuvel 		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
89b3aad5baSArd Biesheuvel 
90*706024a5SJason A. Donenfeld 	do {
91*706024a5SJason A. Donenfeld 		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
92*706024a5SJason A. Donenfeld 
93b3aad5baSArd Biesheuvel 		kernel_neon_begin();
94*706024a5SJason A. Donenfeld 		chacha_doneon(state, dst, src, todo, nrounds);
95b3aad5baSArd Biesheuvel 		kernel_neon_end();
96*706024a5SJason A. Donenfeld 
97*706024a5SJason A. Donenfeld 		bytes -= todo;
98*706024a5SJason A. Donenfeld 		src += todo;
99*706024a5SJason A. Donenfeld 		dst += todo;
100*706024a5SJason A. Donenfeld 	} while (bytes);
101b3aad5baSArd Biesheuvel }
102b3aad5baSArd Biesheuvel EXPORT_SYMBOL(chacha_crypt_arch);
103b3aad5baSArd Biesheuvel 
chacha_neon_stream_xor(struct skcipher_request * req,const struct chacha_ctx * ctx,const u8 * iv)10495a34b77SEric Biggers static int chacha_neon_stream_xor(struct skcipher_request *req,
105860ab2e5SEric Biggers 				  const struct chacha_ctx *ctx, const u8 *iv)
10695a34b77SEric Biggers {
10795a34b77SEric Biggers 	struct skcipher_walk walk;
10895a34b77SEric Biggers 	u32 state[16];
10995a34b77SEric Biggers 	int err;
11095a34b77SEric Biggers 
11195a34b77SEric Biggers 	err = skcipher_walk_virt(&walk, req, false);
11295a34b77SEric Biggers 
113c77da486SArd Biesheuvel 	chacha_init_generic(state, ctx->key, iv);
11495a34b77SEric Biggers 
11595a34b77SEric Biggers 	while (walk.nbytes > 0) {
11695a34b77SEric Biggers 		unsigned int nbytes = walk.nbytes;
11795a34b77SEric Biggers 
11895a34b77SEric Biggers 		if (nbytes < walk.total)
1192fe55987SArd Biesheuvel 			nbytes = rounddown(nbytes, walk.stride);
12095a34b77SEric Biggers 
121b3aad5baSArd Biesheuvel 		if (!static_branch_likely(&have_neon) ||
122b3aad5baSArd Biesheuvel 		    !crypto_simd_usable()) {
123c77da486SArd Biesheuvel 			chacha_crypt_generic(state, walk.dst.virt.addr,
124c77da486SArd Biesheuvel 					     walk.src.virt.addr, nbytes,
125c77da486SArd Biesheuvel 					     ctx->nrounds);
126c77da486SArd Biesheuvel 		} else {
127f2ca1cbdSArd Biesheuvel 			kernel_neon_begin();
128c77da486SArd Biesheuvel 			chacha_doneon(state, walk.dst.virt.addr,
129c77da486SArd Biesheuvel 				      walk.src.virt.addr, nbytes, ctx->nrounds);
130f2ca1cbdSArd Biesheuvel 			kernel_neon_end();
131c77da486SArd Biesheuvel 		}
13295a34b77SEric Biggers 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
13395a34b77SEric Biggers 	}
13495a34b77SEric Biggers 
13595a34b77SEric Biggers 	return err;
13695a34b77SEric Biggers }
13795a34b77SEric Biggers 
chacha_neon(struct skcipher_request * req)13895a34b77SEric Biggers static int chacha_neon(struct skcipher_request *req)
13995a34b77SEric Biggers {
14095a34b77SEric Biggers 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
14195a34b77SEric Biggers 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
14295a34b77SEric Biggers 
14395a34b77SEric Biggers 	return chacha_neon_stream_xor(req, ctx, req->iv);
14495a34b77SEric Biggers }
14595a34b77SEric Biggers 
xchacha_neon(struct skcipher_request * req)14695a34b77SEric Biggers static int xchacha_neon(struct skcipher_request *req)
14795a34b77SEric Biggers {
14895a34b77SEric Biggers 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
14995a34b77SEric Biggers 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
15095a34b77SEric Biggers 	struct chacha_ctx subctx;
15195a34b77SEric Biggers 	u32 state[16];
15295a34b77SEric Biggers 	u8 real_iv[16];
15395a34b77SEric Biggers 
154c77da486SArd Biesheuvel 	chacha_init_generic(state, ctx->key, req->iv);
155b3aad5baSArd Biesheuvel 	hchacha_block_arch(state, subctx.key, ctx->nrounds);
15695a34b77SEric Biggers 	subctx.nrounds = ctx->nrounds;
15795a34b77SEric Biggers 
15895a34b77SEric Biggers 	memcpy(&real_iv[0], req->iv + 24, 8);
15995a34b77SEric Biggers 	memcpy(&real_iv[8], req->iv + 16, 8);
16095a34b77SEric Biggers 	return chacha_neon_stream_xor(req, &subctx, real_iv);
16195a34b77SEric Biggers }
16295a34b77SEric Biggers 
16395a34b77SEric Biggers static struct skcipher_alg algs[] = {
16495a34b77SEric Biggers 	{
16595a34b77SEric Biggers 		.base.cra_name		= "chacha20",
16695a34b77SEric Biggers 		.base.cra_driver_name	= "chacha20-neon",
16795a34b77SEric Biggers 		.base.cra_priority	= 300,
16895a34b77SEric Biggers 		.base.cra_blocksize	= 1,
16995a34b77SEric Biggers 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
17095a34b77SEric Biggers 		.base.cra_module	= THIS_MODULE,
17195a34b77SEric Biggers 
17295a34b77SEric Biggers 		.min_keysize		= CHACHA_KEY_SIZE,
17395a34b77SEric Biggers 		.max_keysize		= CHACHA_KEY_SIZE,
17495a34b77SEric Biggers 		.ivsize			= CHACHA_IV_SIZE,
17595a34b77SEric Biggers 		.chunksize		= CHACHA_BLOCK_SIZE,
1762fe55987SArd Biesheuvel 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
177c77da486SArd Biesheuvel 		.setkey			= chacha20_setkey,
17895a34b77SEric Biggers 		.encrypt		= chacha_neon,
17995a34b77SEric Biggers 		.decrypt		= chacha_neon,
18095a34b77SEric Biggers 	}, {
18195a34b77SEric Biggers 		.base.cra_name		= "xchacha20",
18295a34b77SEric Biggers 		.base.cra_driver_name	= "xchacha20-neon",
18395a34b77SEric Biggers 		.base.cra_priority	= 300,
18495a34b77SEric Biggers 		.base.cra_blocksize	= 1,
18595a34b77SEric Biggers 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
18695a34b77SEric Biggers 		.base.cra_module	= THIS_MODULE,
18795a34b77SEric Biggers 
18895a34b77SEric Biggers 		.min_keysize		= CHACHA_KEY_SIZE,
18995a34b77SEric Biggers 		.max_keysize		= CHACHA_KEY_SIZE,
19095a34b77SEric Biggers 		.ivsize			= XCHACHA_IV_SIZE,
19195a34b77SEric Biggers 		.chunksize		= CHACHA_BLOCK_SIZE,
1922fe55987SArd Biesheuvel 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
193c77da486SArd Biesheuvel 		.setkey			= chacha20_setkey,
19495a34b77SEric Biggers 		.encrypt		= xchacha_neon,
19595a34b77SEric Biggers 		.decrypt		= xchacha_neon,
19619c11c97SEric Biggers 	}, {
19719c11c97SEric Biggers 		.base.cra_name		= "xchacha12",
19819c11c97SEric Biggers 		.base.cra_driver_name	= "xchacha12-neon",
19919c11c97SEric Biggers 		.base.cra_priority	= 300,
20019c11c97SEric Biggers 		.base.cra_blocksize	= 1,
20119c11c97SEric Biggers 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
20219c11c97SEric Biggers 		.base.cra_module	= THIS_MODULE,
20319c11c97SEric Biggers 
20419c11c97SEric Biggers 		.min_keysize		= CHACHA_KEY_SIZE,
20519c11c97SEric Biggers 		.max_keysize		= CHACHA_KEY_SIZE,
20619c11c97SEric Biggers 		.ivsize			= XCHACHA_IV_SIZE,
20719c11c97SEric Biggers 		.chunksize		= CHACHA_BLOCK_SIZE,
2082fe55987SArd Biesheuvel 		.walksize		= 5 * CHACHA_BLOCK_SIZE,
209c77da486SArd Biesheuvel 		.setkey			= chacha12_setkey,
21019c11c97SEric Biggers 		.encrypt		= xchacha_neon,
21119c11c97SEric Biggers 		.decrypt		= xchacha_neon,
21295a34b77SEric Biggers 	}
21395a34b77SEric Biggers };
21495a34b77SEric Biggers 
chacha_simd_mod_init(void)21595a34b77SEric Biggers static int __init chacha_simd_mod_init(void)
21695a34b77SEric Biggers {
217aaba098fSAndrew Murray 	if (!cpu_have_named_feature(ASIMD))
218b3aad5baSArd Biesheuvel 		return 0;
219b3aad5baSArd Biesheuvel 
220b3aad5baSArd Biesheuvel 	static_branch_enable(&have_neon);
22195a34b77SEric Biggers 
2228394bfecSJason A. Donenfeld 	return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
2238394bfecSJason A. Donenfeld 		crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
22495a34b77SEric Biggers }
22595a34b77SEric Biggers 
chacha_simd_mod_fini(void)22695a34b77SEric Biggers static void __exit chacha_simd_mod_fini(void)
22795a34b77SEric Biggers {
2288394bfecSJason A. Donenfeld 	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && cpu_have_named_feature(ASIMD))
22995a34b77SEric Biggers 		crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
23095a34b77SEric Biggers }
23195a34b77SEric Biggers 
23295a34b77SEric Biggers module_init(chacha_simd_mod_init);
23395a34b77SEric Biggers module_exit(chacha_simd_mod_fini);
23495a34b77SEric Biggers 
23595a34b77SEric Biggers MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
23695a34b77SEric Biggers MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
23795a34b77SEric Biggers MODULE_LICENSE("GPL v2");
23895a34b77SEric Biggers MODULE_ALIAS_CRYPTO("chacha20");
23995a34b77SEric Biggers MODULE_ALIAS_CRYPTO("chacha20-neon");
24095a34b77SEric Biggers MODULE_ALIAS_CRYPTO("xchacha20");
24195a34b77SEric Biggers MODULE_ALIAS_CRYPTO("xchacha20-neon");
24219c11c97SEric Biggers MODULE_ALIAS_CRYPTO("xchacha12");
24319c11c97SEric Biggers MODULE_ALIAS_CRYPTO("xchacha12-neon");
244