195a34b77SEric Biggers /*
2b36d8c09SArd Biesheuvel * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
395a34b77SEric Biggers * including ChaCha20 (RFC7539)
495a34b77SEric Biggers *
595a34b77SEric Biggers * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
695a34b77SEric Biggers *
795a34b77SEric Biggers * This program is free software; you can redistribute it and/or modify
895a34b77SEric Biggers * it under the terms of the GNU General Public License version 2 as
995a34b77SEric Biggers * published by the Free Software Foundation.
1095a34b77SEric Biggers *
1195a34b77SEric Biggers * Based on:
1295a34b77SEric Biggers * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
1395a34b77SEric Biggers *
1495a34b77SEric Biggers * Copyright (C) 2015 Martin Willi
1595a34b77SEric Biggers *
1695a34b77SEric Biggers * This program is free software; you can redistribute it and/or modify
1795a34b77SEric Biggers * it under the terms of the GNU General Public License as published by
1895a34b77SEric Biggers * the Free Software Foundation; either version 2 of the License, or
1995a34b77SEric Biggers * (at your option) any later version.
2095a34b77SEric Biggers */
2195a34b77SEric Biggers
2295a34b77SEric Biggers #include <crypto/algapi.h>
235fb8ef25SArd Biesheuvel #include <crypto/internal/chacha.h>
24e52b7023SEric Biggers #include <crypto/internal/simd.h>
2595a34b77SEric Biggers #include <crypto/internal/skcipher.h>
26b3aad5baSArd Biesheuvel #include <linux/jump_label.h>
2795a34b77SEric Biggers #include <linux/kernel.h>
2895a34b77SEric Biggers #include <linux/module.h>
2995a34b77SEric Biggers
3095a34b77SEric Biggers #include <asm/hwcap.h>
3195a34b77SEric Biggers #include <asm/neon.h>
3295a34b77SEric Biggers #include <asm/simd.h>
3395a34b77SEric Biggers
3495a34b77SEric Biggers asmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src,
3595a34b77SEric Biggers int nrounds);
3695a34b77SEric Biggers asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
37f2ca1cbdSArd Biesheuvel int nrounds, int bytes);
3895a34b77SEric Biggers asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
3995a34b77SEric Biggers
40b3aad5baSArd Biesheuvel static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
41b3aad5baSArd Biesheuvel
chacha_doneon(u32 * state,u8 * dst,const u8 * src,int bytes,int nrounds)4295a34b77SEric Biggers static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
43f2ca1cbdSArd Biesheuvel int bytes, int nrounds)
4495a34b77SEric Biggers {
452fe55987SArd Biesheuvel while (bytes > 0) {
462fe55987SArd Biesheuvel int l = min(bytes, CHACHA_BLOCK_SIZE * 5);
472fe55987SArd Biesheuvel
482fe55987SArd Biesheuvel if (l <= CHACHA_BLOCK_SIZE) {
4995a34b77SEric Biggers u8 buf[CHACHA_BLOCK_SIZE];
5095a34b77SEric Biggers
512fe55987SArd Biesheuvel memcpy(buf, src, l);
52f2ca1cbdSArd Biesheuvel chacha_block_xor_neon(state, buf, buf, nrounds);
532fe55987SArd Biesheuvel memcpy(dst, buf, l);
542fe55987SArd Biesheuvel state[12] += 1;
552fe55987SArd Biesheuvel break;
56f2ca1cbdSArd Biesheuvel }
572fe55987SArd Biesheuvel chacha_4block_xor_neon(state, dst, src, nrounds, l);
58c8cfcb78SJason A. Donenfeld bytes -= l;
59c8cfcb78SJason A. Donenfeld src += l;
60c8cfcb78SJason A. Donenfeld dst += l;
61c8cfcb78SJason A. Donenfeld state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
6295a34b77SEric Biggers }
6395a34b77SEric Biggers }
6495a34b77SEric Biggers
hchacha_block_arch(const u32 * state,u32 * stream,int nrounds)65b3aad5baSArd Biesheuvel void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
66b3aad5baSArd Biesheuvel {
67b3aad5baSArd Biesheuvel if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
68b3aad5baSArd Biesheuvel hchacha_block_generic(state, stream, nrounds);
69b3aad5baSArd Biesheuvel } else {
70b3aad5baSArd Biesheuvel kernel_neon_begin();
71b3aad5baSArd Biesheuvel hchacha_block_neon(state, stream, nrounds);
72b3aad5baSArd Biesheuvel kernel_neon_end();
73b3aad5baSArd Biesheuvel }
74b3aad5baSArd Biesheuvel }
75b3aad5baSArd Biesheuvel EXPORT_SYMBOL(hchacha_block_arch);
76b3aad5baSArd Biesheuvel
chacha_init_arch(u32 * state,const u32 * key,const u8 * iv)77b3aad5baSArd Biesheuvel void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
78b3aad5baSArd Biesheuvel {
79b3aad5baSArd Biesheuvel chacha_init_generic(state, key, iv);
80b3aad5baSArd Biesheuvel }
81b3aad5baSArd Biesheuvel EXPORT_SYMBOL(chacha_init_arch);
82b3aad5baSArd Biesheuvel
chacha_crypt_arch(u32 * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)83b3aad5baSArd Biesheuvel void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
84b3aad5baSArd Biesheuvel int nrounds)
85b3aad5baSArd Biesheuvel {
86b3aad5baSArd Biesheuvel if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
87b3aad5baSArd Biesheuvel !crypto_simd_usable())
88b3aad5baSArd Biesheuvel return chacha_crypt_generic(state, dst, src, bytes, nrounds);
89b3aad5baSArd Biesheuvel
90*706024a5SJason A. Donenfeld do {
91*706024a5SJason A. Donenfeld unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
92*706024a5SJason A. Donenfeld
93b3aad5baSArd Biesheuvel kernel_neon_begin();
94*706024a5SJason A. Donenfeld chacha_doneon(state, dst, src, todo, nrounds);
95b3aad5baSArd Biesheuvel kernel_neon_end();
96*706024a5SJason A. Donenfeld
97*706024a5SJason A. Donenfeld bytes -= todo;
98*706024a5SJason A. Donenfeld src += todo;
99*706024a5SJason A. Donenfeld dst += todo;
100*706024a5SJason A. Donenfeld } while (bytes);
101b3aad5baSArd Biesheuvel }
102b3aad5baSArd Biesheuvel EXPORT_SYMBOL(chacha_crypt_arch);
103b3aad5baSArd Biesheuvel
chacha_neon_stream_xor(struct skcipher_request * req,const struct chacha_ctx * ctx,const u8 * iv)10495a34b77SEric Biggers static int chacha_neon_stream_xor(struct skcipher_request *req,
105860ab2e5SEric Biggers const struct chacha_ctx *ctx, const u8 *iv)
10695a34b77SEric Biggers {
10795a34b77SEric Biggers struct skcipher_walk walk;
10895a34b77SEric Biggers u32 state[16];
10995a34b77SEric Biggers int err;
11095a34b77SEric Biggers
11195a34b77SEric Biggers err = skcipher_walk_virt(&walk, req, false);
11295a34b77SEric Biggers
113c77da486SArd Biesheuvel chacha_init_generic(state, ctx->key, iv);
11495a34b77SEric Biggers
11595a34b77SEric Biggers while (walk.nbytes > 0) {
11695a34b77SEric Biggers unsigned int nbytes = walk.nbytes;
11795a34b77SEric Biggers
11895a34b77SEric Biggers if (nbytes < walk.total)
1192fe55987SArd Biesheuvel nbytes = rounddown(nbytes, walk.stride);
12095a34b77SEric Biggers
121b3aad5baSArd Biesheuvel if (!static_branch_likely(&have_neon) ||
122b3aad5baSArd Biesheuvel !crypto_simd_usable()) {
123c77da486SArd Biesheuvel chacha_crypt_generic(state, walk.dst.virt.addr,
124c77da486SArd Biesheuvel walk.src.virt.addr, nbytes,
125c77da486SArd Biesheuvel ctx->nrounds);
126c77da486SArd Biesheuvel } else {
127f2ca1cbdSArd Biesheuvel kernel_neon_begin();
128c77da486SArd Biesheuvel chacha_doneon(state, walk.dst.virt.addr,
129c77da486SArd Biesheuvel walk.src.virt.addr, nbytes, ctx->nrounds);
130f2ca1cbdSArd Biesheuvel kernel_neon_end();
131c77da486SArd Biesheuvel }
13295a34b77SEric Biggers err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
13395a34b77SEric Biggers }
13495a34b77SEric Biggers
13595a34b77SEric Biggers return err;
13695a34b77SEric Biggers }
13795a34b77SEric Biggers
chacha_neon(struct skcipher_request * req)13895a34b77SEric Biggers static int chacha_neon(struct skcipher_request *req)
13995a34b77SEric Biggers {
14095a34b77SEric Biggers struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
14195a34b77SEric Biggers struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
14295a34b77SEric Biggers
14395a34b77SEric Biggers return chacha_neon_stream_xor(req, ctx, req->iv);
14495a34b77SEric Biggers }
14595a34b77SEric Biggers
xchacha_neon(struct skcipher_request * req)14695a34b77SEric Biggers static int xchacha_neon(struct skcipher_request *req)
14795a34b77SEric Biggers {
14895a34b77SEric Biggers struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
14995a34b77SEric Biggers struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
15095a34b77SEric Biggers struct chacha_ctx subctx;
15195a34b77SEric Biggers u32 state[16];
15295a34b77SEric Biggers u8 real_iv[16];
15395a34b77SEric Biggers
154c77da486SArd Biesheuvel chacha_init_generic(state, ctx->key, req->iv);
155b3aad5baSArd Biesheuvel hchacha_block_arch(state, subctx.key, ctx->nrounds);
15695a34b77SEric Biggers subctx.nrounds = ctx->nrounds;
15795a34b77SEric Biggers
15895a34b77SEric Biggers memcpy(&real_iv[0], req->iv + 24, 8);
15995a34b77SEric Biggers memcpy(&real_iv[8], req->iv + 16, 8);
16095a34b77SEric Biggers return chacha_neon_stream_xor(req, &subctx, real_iv);
16195a34b77SEric Biggers }
16295a34b77SEric Biggers
16395a34b77SEric Biggers static struct skcipher_alg algs[] = {
16495a34b77SEric Biggers {
16595a34b77SEric Biggers .base.cra_name = "chacha20",
16695a34b77SEric Biggers .base.cra_driver_name = "chacha20-neon",
16795a34b77SEric Biggers .base.cra_priority = 300,
16895a34b77SEric Biggers .base.cra_blocksize = 1,
16995a34b77SEric Biggers .base.cra_ctxsize = sizeof(struct chacha_ctx),
17095a34b77SEric Biggers .base.cra_module = THIS_MODULE,
17195a34b77SEric Biggers
17295a34b77SEric Biggers .min_keysize = CHACHA_KEY_SIZE,
17395a34b77SEric Biggers .max_keysize = CHACHA_KEY_SIZE,
17495a34b77SEric Biggers .ivsize = CHACHA_IV_SIZE,
17595a34b77SEric Biggers .chunksize = CHACHA_BLOCK_SIZE,
1762fe55987SArd Biesheuvel .walksize = 5 * CHACHA_BLOCK_SIZE,
177c77da486SArd Biesheuvel .setkey = chacha20_setkey,
17895a34b77SEric Biggers .encrypt = chacha_neon,
17995a34b77SEric Biggers .decrypt = chacha_neon,
18095a34b77SEric Biggers }, {
18195a34b77SEric Biggers .base.cra_name = "xchacha20",
18295a34b77SEric Biggers .base.cra_driver_name = "xchacha20-neon",
18395a34b77SEric Biggers .base.cra_priority = 300,
18495a34b77SEric Biggers .base.cra_blocksize = 1,
18595a34b77SEric Biggers .base.cra_ctxsize = sizeof(struct chacha_ctx),
18695a34b77SEric Biggers .base.cra_module = THIS_MODULE,
18795a34b77SEric Biggers
18895a34b77SEric Biggers .min_keysize = CHACHA_KEY_SIZE,
18995a34b77SEric Biggers .max_keysize = CHACHA_KEY_SIZE,
19095a34b77SEric Biggers .ivsize = XCHACHA_IV_SIZE,
19195a34b77SEric Biggers .chunksize = CHACHA_BLOCK_SIZE,
1922fe55987SArd Biesheuvel .walksize = 5 * CHACHA_BLOCK_SIZE,
193c77da486SArd Biesheuvel .setkey = chacha20_setkey,
19495a34b77SEric Biggers .encrypt = xchacha_neon,
19595a34b77SEric Biggers .decrypt = xchacha_neon,
19619c11c97SEric Biggers }, {
19719c11c97SEric Biggers .base.cra_name = "xchacha12",
19819c11c97SEric Biggers .base.cra_driver_name = "xchacha12-neon",
19919c11c97SEric Biggers .base.cra_priority = 300,
20019c11c97SEric Biggers .base.cra_blocksize = 1,
20119c11c97SEric Biggers .base.cra_ctxsize = sizeof(struct chacha_ctx),
20219c11c97SEric Biggers .base.cra_module = THIS_MODULE,
20319c11c97SEric Biggers
20419c11c97SEric Biggers .min_keysize = CHACHA_KEY_SIZE,
20519c11c97SEric Biggers .max_keysize = CHACHA_KEY_SIZE,
20619c11c97SEric Biggers .ivsize = XCHACHA_IV_SIZE,
20719c11c97SEric Biggers .chunksize = CHACHA_BLOCK_SIZE,
2082fe55987SArd Biesheuvel .walksize = 5 * CHACHA_BLOCK_SIZE,
209c77da486SArd Biesheuvel .setkey = chacha12_setkey,
21019c11c97SEric Biggers .encrypt = xchacha_neon,
21119c11c97SEric Biggers .decrypt = xchacha_neon,
21295a34b77SEric Biggers }
21395a34b77SEric Biggers };
21495a34b77SEric Biggers
chacha_simd_mod_init(void)21595a34b77SEric Biggers static int __init chacha_simd_mod_init(void)
21695a34b77SEric Biggers {
217aaba098fSAndrew Murray if (!cpu_have_named_feature(ASIMD))
218b3aad5baSArd Biesheuvel return 0;
219b3aad5baSArd Biesheuvel
220b3aad5baSArd Biesheuvel static_branch_enable(&have_neon);
22195a34b77SEric Biggers
2228394bfecSJason A. Donenfeld return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
2238394bfecSJason A. Donenfeld crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
22495a34b77SEric Biggers }
22595a34b77SEric Biggers
chacha_simd_mod_fini(void)22695a34b77SEric Biggers static void __exit chacha_simd_mod_fini(void)
22795a34b77SEric Biggers {
2288394bfecSJason A. Donenfeld if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && cpu_have_named_feature(ASIMD))
22995a34b77SEric Biggers crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
23095a34b77SEric Biggers }
23195a34b77SEric Biggers
23295a34b77SEric Biggers module_init(chacha_simd_mod_init);
23395a34b77SEric Biggers module_exit(chacha_simd_mod_fini);
23495a34b77SEric Biggers
23595a34b77SEric Biggers MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
23695a34b77SEric Biggers MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
23795a34b77SEric Biggers MODULE_LICENSE("GPL v2");
23895a34b77SEric Biggers MODULE_ALIAS_CRYPTO("chacha20");
23995a34b77SEric Biggers MODULE_ALIAS_CRYPTO("chacha20-neon");
24095a34b77SEric Biggers MODULE_ALIAS_CRYPTO("xchacha20");
24195a34b77SEric Biggers MODULE_ALIAS_CRYPTO("xchacha20-neon");
24219c11c97SEric Biggers MODULE_ALIAS_CRYPTO("xchacha12");
24319c11c97SEric Biggers MODULE_ALIAS_CRYPTO("xchacha12-neon");
244