1f569ca16SArd Biesheuvel // SPDX-License-Identifier: GPL-2.0
2f569ca16SArd Biesheuvel /*
3f569ca16SArd Biesheuvel * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
4f569ca16SArd Biesheuvel *
5f569ca16SArd Biesheuvel * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6f569ca16SArd Biesheuvel */
7f569ca16SArd Biesheuvel
8f569ca16SArd Biesheuvel #include <asm/hwcap.h>
9f569ca16SArd Biesheuvel #include <asm/neon.h>
10f569ca16SArd Biesheuvel #include <asm/simd.h>
11f569ca16SArd Biesheuvel #include <asm/unaligned.h>
12f569ca16SArd Biesheuvel #include <crypto/algapi.h>
13f569ca16SArd Biesheuvel #include <crypto/internal/hash.h>
14f569ca16SArd Biesheuvel #include <crypto/internal/poly1305.h>
15f569ca16SArd Biesheuvel #include <crypto/internal/simd.h>
16f569ca16SArd Biesheuvel #include <linux/cpufeature.h>
17f569ca16SArd Biesheuvel #include <linux/crypto.h>
18f569ca16SArd Biesheuvel #include <linux/jump_label.h>
19f569ca16SArd Biesheuvel #include <linux/module.h>
20f569ca16SArd Biesheuvel
21f569ca16SArd Biesheuvel asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
22f569ca16SArd Biesheuvel asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
23f569ca16SArd Biesheuvel asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
2431899908SJason A. Donenfeld asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
25f569ca16SArd Biesheuvel
26f569ca16SArd Biesheuvel static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
27f569ca16SArd Biesheuvel
poly1305_init_arch(struct poly1305_desc_ctx * dctx,const u8 key[POLY1305_KEY_SIZE])288d195e7aSArnd Bergmann void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
29f569ca16SArd Biesheuvel {
30f569ca16SArd Biesheuvel poly1305_init_arm64(&dctx->h, key);
31f569ca16SArd Biesheuvel dctx->s[0] = get_unaligned_le32(key + 16);
32f569ca16SArd Biesheuvel dctx->s[1] = get_unaligned_le32(key + 20);
33f569ca16SArd Biesheuvel dctx->s[2] = get_unaligned_le32(key + 24);
34f569ca16SArd Biesheuvel dctx->s[3] = get_unaligned_le32(key + 28);
35f569ca16SArd Biesheuvel dctx->buflen = 0;
36f569ca16SArd Biesheuvel }
37f569ca16SArd Biesheuvel EXPORT_SYMBOL(poly1305_init_arch);
38f569ca16SArd Biesheuvel
neon_poly1305_init(struct shash_desc * desc)39f569ca16SArd Biesheuvel static int neon_poly1305_init(struct shash_desc *desc)
40f569ca16SArd Biesheuvel {
41f569ca16SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
42f569ca16SArd Biesheuvel
43f569ca16SArd Biesheuvel dctx->buflen = 0;
44f569ca16SArd Biesheuvel dctx->rset = 0;
45f569ca16SArd Biesheuvel dctx->sset = false;
46f569ca16SArd Biesheuvel
47f569ca16SArd Biesheuvel return 0;
48f569ca16SArd Biesheuvel }
49f569ca16SArd Biesheuvel
neon_poly1305_blocks(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,u32 hibit,bool do_neon)50f569ca16SArd Biesheuvel static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
51f569ca16SArd Biesheuvel u32 len, u32 hibit, bool do_neon)
52f569ca16SArd Biesheuvel {
53f569ca16SArd Biesheuvel if (unlikely(!dctx->sset)) {
54f569ca16SArd Biesheuvel if (!dctx->rset) {
55*7ae19d42SGUO Zihua poly1305_init_arm64(&dctx->h, src);
56f569ca16SArd Biesheuvel src += POLY1305_BLOCK_SIZE;
57f569ca16SArd Biesheuvel len -= POLY1305_BLOCK_SIZE;
58f569ca16SArd Biesheuvel dctx->rset = 1;
59f569ca16SArd Biesheuvel }
60f569ca16SArd Biesheuvel if (len >= POLY1305_BLOCK_SIZE) {
61f569ca16SArd Biesheuvel dctx->s[0] = get_unaligned_le32(src + 0);
62f569ca16SArd Biesheuvel dctx->s[1] = get_unaligned_le32(src + 4);
63f569ca16SArd Biesheuvel dctx->s[2] = get_unaligned_le32(src + 8);
64f569ca16SArd Biesheuvel dctx->s[3] = get_unaligned_le32(src + 12);
65f569ca16SArd Biesheuvel src += POLY1305_BLOCK_SIZE;
66f569ca16SArd Biesheuvel len -= POLY1305_BLOCK_SIZE;
67f569ca16SArd Biesheuvel dctx->sset = true;
68f569ca16SArd Biesheuvel }
69f569ca16SArd Biesheuvel if (len < POLY1305_BLOCK_SIZE)
70f569ca16SArd Biesheuvel return;
71f569ca16SArd Biesheuvel }
72f569ca16SArd Biesheuvel
73f569ca16SArd Biesheuvel len &= ~(POLY1305_BLOCK_SIZE - 1);
74f569ca16SArd Biesheuvel
75f569ca16SArd Biesheuvel if (static_branch_likely(&have_neon) && likely(do_neon))
76f569ca16SArd Biesheuvel poly1305_blocks_neon(&dctx->h, src, len, hibit);
77f569ca16SArd Biesheuvel else
78f569ca16SArd Biesheuvel poly1305_blocks(&dctx->h, src, len, hibit);
79f569ca16SArd Biesheuvel }
80f569ca16SArd Biesheuvel
neon_poly1305_do_update(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,bool do_neon)81f569ca16SArd Biesheuvel static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
82f569ca16SArd Biesheuvel const u8 *src, u32 len, bool do_neon)
83f569ca16SArd Biesheuvel {
84f569ca16SArd Biesheuvel if (unlikely(dctx->buflen)) {
85f569ca16SArd Biesheuvel u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
86f569ca16SArd Biesheuvel
87f569ca16SArd Biesheuvel memcpy(dctx->buf + dctx->buflen, src, bytes);
88f569ca16SArd Biesheuvel src += bytes;
89f569ca16SArd Biesheuvel len -= bytes;
90f569ca16SArd Biesheuvel dctx->buflen += bytes;
91f569ca16SArd Biesheuvel
92f569ca16SArd Biesheuvel if (dctx->buflen == POLY1305_BLOCK_SIZE) {
93f569ca16SArd Biesheuvel neon_poly1305_blocks(dctx, dctx->buf,
94f569ca16SArd Biesheuvel POLY1305_BLOCK_SIZE, 1, false);
95f569ca16SArd Biesheuvel dctx->buflen = 0;
96f569ca16SArd Biesheuvel }
97f569ca16SArd Biesheuvel }
98f569ca16SArd Biesheuvel
99f569ca16SArd Biesheuvel if (likely(len >= POLY1305_BLOCK_SIZE)) {
100f569ca16SArd Biesheuvel neon_poly1305_blocks(dctx, src, len, 1, do_neon);
101f569ca16SArd Biesheuvel src += round_down(len, POLY1305_BLOCK_SIZE);
102f569ca16SArd Biesheuvel len %= POLY1305_BLOCK_SIZE;
103f569ca16SArd Biesheuvel }
104f569ca16SArd Biesheuvel
105f569ca16SArd Biesheuvel if (unlikely(len)) {
106f569ca16SArd Biesheuvel dctx->buflen = len;
107f569ca16SArd Biesheuvel memcpy(dctx->buf, src, len);
108f569ca16SArd Biesheuvel }
109f569ca16SArd Biesheuvel }
110f569ca16SArd Biesheuvel
neon_poly1305_update(struct shash_desc * desc,const u8 * src,unsigned int srclen)111f569ca16SArd Biesheuvel static int neon_poly1305_update(struct shash_desc *desc,
112f569ca16SArd Biesheuvel const u8 *src, unsigned int srclen)
113f569ca16SArd Biesheuvel {
114f569ca16SArd Biesheuvel bool do_neon = crypto_simd_usable() && srclen > 128;
115f569ca16SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
116f569ca16SArd Biesheuvel
117f569ca16SArd Biesheuvel if (static_branch_likely(&have_neon) && do_neon)
118f569ca16SArd Biesheuvel kernel_neon_begin();
119f569ca16SArd Biesheuvel neon_poly1305_do_update(dctx, src, srclen, do_neon);
120f569ca16SArd Biesheuvel if (static_branch_likely(&have_neon) && do_neon)
121f569ca16SArd Biesheuvel kernel_neon_end();
122f569ca16SArd Biesheuvel return 0;
123f569ca16SArd Biesheuvel }
124f569ca16SArd Biesheuvel
poly1305_update_arch(struct poly1305_desc_ctx * dctx,const u8 * src,unsigned int nbytes)125f569ca16SArd Biesheuvel void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
126f569ca16SArd Biesheuvel unsigned int nbytes)
127f569ca16SArd Biesheuvel {
128f569ca16SArd Biesheuvel if (unlikely(dctx->buflen)) {
129f569ca16SArd Biesheuvel u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
130f569ca16SArd Biesheuvel
131f569ca16SArd Biesheuvel memcpy(dctx->buf + dctx->buflen, src, bytes);
132f569ca16SArd Biesheuvel src += bytes;
133f569ca16SArd Biesheuvel nbytes -= bytes;
134f569ca16SArd Biesheuvel dctx->buflen += bytes;
135f569ca16SArd Biesheuvel
136f569ca16SArd Biesheuvel if (dctx->buflen == POLY1305_BLOCK_SIZE) {
137f569ca16SArd Biesheuvel poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
138f569ca16SArd Biesheuvel dctx->buflen = 0;
139f569ca16SArd Biesheuvel }
140f569ca16SArd Biesheuvel }
141f569ca16SArd Biesheuvel
142f569ca16SArd Biesheuvel if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
143f569ca16SArd Biesheuvel unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
144f569ca16SArd Biesheuvel
145f569ca16SArd Biesheuvel if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
146706024a5SJason A. Donenfeld do {
147706024a5SJason A. Donenfeld unsigned int todo = min_t(unsigned int, len, SZ_4K);
148706024a5SJason A. Donenfeld
149f569ca16SArd Biesheuvel kernel_neon_begin();
150706024a5SJason A. Donenfeld poly1305_blocks_neon(&dctx->h, src, todo, 1);
151f569ca16SArd Biesheuvel kernel_neon_end();
152706024a5SJason A. Donenfeld
153706024a5SJason A. Donenfeld len -= todo;
154706024a5SJason A. Donenfeld src += todo;
155706024a5SJason A. Donenfeld } while (len);
156f569ca16SArd Biesheuvel } else {
157f569ca16SArd Biesheuvel poly1305_blocks(&dctx->h, src, len, 1);
158f569ca16SArd Biesheuvel src += len;
159706024a5SJason A. Donenfeld }
160f569ca16SArd Biesheuvel nbytes %= POLY1305_BLOCK_SIZE;
161f569ca16SArd Biesheuvel }
162f569ca16SArd Biesheuvel
163f569ca16SArd Biesheuvel if (unlikely(nbytes)) {
164f569ca16SArd Biesheuvel dctx->buflen = nbytes;
165f569ca16SArd Biesheuvel memcpy(dctx->buf, src, nbytes);
166f569ca16SArd Biesheuvel }
167f569ca16SArd Biesheuvel }
168f569ca16SArd Biesheuvel EXPORT_SYMBOL(poly1305_update_arch);
169f569ca16SArd Biesheuvel
poly1305_final_arch(struct poly1305_desc_ctx * dctx,u8 * dst)170f569ca16SArd Biesheuvel void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
171f569ca16SArd Biesheuvel {
172f569ca16SArd Biesheuvel if (unlikely(dctx->buflen)) {
173f569ca16SArd Biesheuvel dctx->buf[dctx->buflen++] = 1;
174f569ca16SArd Biesheuvel memset(dctx->buf + dctx->buflen, 0,
175f569ca16SArd Biesheuvel POLY1305_BLOCK_SIZE - dctx->buflen);
176f569ca16SArd Biesheuvel poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
177f569ca16SArd Biesheuvel }
178f569ca16SArd Biesheuvel
17931899908SJason A. Donenfeld poly1305_emit(&dctx->h, dst, dctx->s);
180458c0480SArvind Sankar memzero_explicit(dctx, sizeof(*dctx));
181f569ca16SArd Biesheuvel }
182f569ca16SArd Biesheuvel EXPORT_SYMBOL(poly1305_final_arch);
183f569ca16SArd Biesheuvel
neon_poly1305_final(struct shash_desc * desc,u8 * dst)184f569ca16SArd Biesheuvel static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
185f569ca16SArd Biesheuvel {
186f569ca16SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
187f569ca16SArd Biesheuvel
188f569ca16SArd Biesheuvel if (unlikely(!dctx->sset))
189f569ca16SArd Biesheuvel return -ENOKEY;
190f569ca16SArd Biesheuvel
191f569ca16SArd Biesheuvel poly1305_final_arch(dctx, dst);
192f569ca16SArd Biesheuvel return 0;
193f569ca16SArd Biesheuvel }
194f569ca16SArd Biesheuvel
195f569ca16SArd Biesheuvel static struct shash_alg neon_poly1305_alg = {
196f569ca16SArd Biesheuvel .init = neon_poly1305_init,
197f569ca16SArd Biesheuvel .update = neon_poly1305_update,
198f569ca16SArd Biesheuvel .final = neon_poly1305_final,
199f569ca16SArd Biesheuvel .digestsize = POLY1305_DIGEST_SIZE,
200f569ca16SArd Biesheuvel .descsize = sizeof(struct poly1305_desc_ctx),
201f569ca16SArd Biesheuvel
202f569ca16SArd Biesheuvel .base.cra_name = "poly1305",
203f569ca16SArd Biesheuvel .base.cra_driver_name = "poly1305-neon",
204f569ca16SArd Biesheuvel .base.cra_priority = 200,
205f569ca16SArd Biesheuvel .base.cra_blocksize = POLY1305_BLOCK_SIZE,
206f569ca16SArd Biesheuvel .base.cra_module = THIS_MODULE,
207f569ca16SArd Biesheuvel };
208f569ca16SArd Biesheuvel
neon_poly1305_mod_init(void)209f569ca16SArd Biesheuvel static int __init neon_poly1305_mod_init(void)
210f569ca16SArd Biesheuvel {
211f569ca16SArd Biesheuvel if (!cpu_have_named_feature(ASIMD))
212f569ca16SArd Biesheuvel return 0;
213f569ca16SArd Biesheuvel
214f569ca16SArd Biesheuvel static_branch_enable(&have_neon);
215f569ca16SArd Biesheuvel
2168394bfecSJason A. Donenfeld return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
2178394bfecSJason A. Donenfeld crypto_register_shash(&neon_poly1305_alg) : 0;
218f569ca16SArd Biesheuvel }
219f569ca16SArd Biesheuvel
neon_poly1305_mod_exit(void)220f569ca16SArd Biesheuvel static void __exit neon_poly1305_mod_exit(void)
221f569ca16SArd Biesheuvel {
2228394bfecSJason A. Donenfeld if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
223f569ca16SArd Biesheuvel crypto_unregister_shash(&neon_poly1305_alg);
224f569ca16SArd Biesheuvel }
225f569ca16SArd Biesheuvel
226f569ca16SArd Biesheuvel module_init(neon_poly1305_mod_init);
227f569ca16SArd Biesheuvel module_exit(neon_poly1305_mod_exit);
228f569ca16SArd Biesheuvel
229f569ca16SArd Biesheuvel MODULE_LICENSE("GPL v2");
230f569ca16SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305");
231f569ca16SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-neon");
232