1a6b803b3SArd Biesheuvel // SPDX-License-Identifier: GPL-2.0
2a6b803b3SArd Biesheuvel /*
3a6b803b3SArd Biesheuvel * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
4a6b803b3SArd Biesheuvel *
5a6b803b3SArd Biesheuvel * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6a6b803b3SArd Biesheuvel */
7a6b803b3SArd Biesheuvel
8a6b803b3SArd Biesheuvel #include <asm/hwcap.h>
9a6b803b3SArd Biesheuvel #include <asm/neon.h>
10a6b803b3SArd Biesheuvel #include <asm/simd.h>
11a6b803b3SArd Biesheuvel #include <asm/unaligned.h>
12a6b803b3SArd Biesheuvel #include <crypto/algapi.h>
13a6b803b3SArd Biesheuvel #include <crypto/internal/hash.h>
14a6b803b3SArd Biesheuvel #include <crypto/internal/poly1305.h>
15a6b803b3SArd Biesheuvel #include <crypto/internal/simd.h>
16a6b803b3SArd Biesheuvel #include <linux/cpufeature.h>
17a6b803b3SArd Biesheuvel #include <linux/crypto.h>
18a6b803b3SArd Biesheuvel #include <linux/jump_label.h>
19a6b803b3SArd Biesheuvel #include <linux/module.h>
20a6b803b3SArd Biesheuvel
21a6b803b3SArd Biesheuvel void poly1305_init_arm(void *state, const u8 *key);
22a6b803b3SArd Biesheuvel void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
2351982ea0SHerbert Xu void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
2431899908SJason A. Donenfeld void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
25a6b803b3SArd Biesheuvel
poly1305_blocks_neon(void * state,const u8 * src,u32 len,u32 hibit)26a6b803b3SArd Biesheuvel void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
27a6b803b3SArd Biesheuvel {
28a6b803b3SArd Biesheuvel }
29a6b803b3SArd Biesheuvel
30a6b803b3SArd Biesheuvel static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
31a6b803b3SArd Biesheuvel
poly1305_init_arch(struct poly1305_desc_ctx * dctx,const u8 key[POLY1305_KEY_SIZE])32*8d195e7aSArnd Bergmann void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
33a6b803b3SArd Biesheuvel {
34a6b803b3SArd Biesheuvel poly1305_init_arm(&dctx->h, key);
35a6b803b3SArd Biesheuvel dctx->s[0] = get_unaligned_le32(key + 16);
36a6b803b3SArd Biesheuvel dctx->s[1] = get_unaligned_le32(key + 20);
37a6b803b3SArd Biesheuvel dctx->s[2] = get_unaligned_le32(key + 24);
38a6b803b3SArd Biesheuvel dctx->s[3] = get_unaligned_le32(key + 28);
39a6b803b3SArd Biesheuvel dctx->buflen = 0;
40a6b803b3SArd Biesheuvel }
41a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_init_arch);
42a6b803b3SArd Biesheuvel
arm_poly1305_init(struct shash_desc * desc)43a6b803b3SArd Biesheuvel static int arm_poly1305_init(struct shash_desc *desc)
44a6b803b3SArd Biesheuvel {
45a6b803b3SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
46a6b803b3SArd Biesheuvel
47a6b803b3SArd Biesheuvel dctx->buflen = 0;
48a6b803b3SArd Biesheuvel dctx->rset = 0;
49a6b803b3SArd Biesheuvel dctx->sset = false;
50a6b803b3SArd Biesheuvel
51a6b803b3SArd Biesheuvel return 0;
52a6b803b3SArd Biesheuvel }
53a6b803b3SArd Biesheuvel
arm_poly1305_blocks(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,u32 hibit,bool do_neon)54a6b803b3SArd Biesheuvel static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
55a6b803b3SArd Biesheuvel u32 len, u32 hibit, bool do_neon)
56a6b803b3SArd Biesheuvel {
57a6b803b3SArd Biesheuvel if (unlikely(!dctx->sset)) {
58a6b803b3SArd Biesheuvel if (!dctx->rset) {
59a6b803b3SArd Biesheuvel poly1305_init_arm(&dctx->h, src);
60a6b803b3SArd Biesheuvel src += POLY1305_BLOCK_SIZE;
61a6b803b3SArd Biesheuvel len -= POLY1305_BLOCK_SIZE;
62a6b803b3SArd Biesheuvel dctx->rset = 1;
63a6b803b3SArd Biesheuvel }
64a6b803b3SArd Biesheuvel if (len >= POLY1305_BLOCK_SIZE) {
65a6b803b3SArd Biesheuvel dctx->s[0] = get_unaligned_le32(src + 0);
66a6b803b3SArd Biesheuvel dctx->s[1] = get_unaligned_le32(src + 4);
67a6b803b3SArd Biesheuvel dctx->s[2] = get_unaligned_le32(src + 8);
68a6b803b3SArd Biesheuvel dctx->s[3] = get_unaligned_le32(src + 12);
69a6b803b3SArd Biesheuvel src += POLY1305_BLOCK_SIZE;
70a6b803b3SArd Biesheuvel len -= POLY1305_BLOCK_SIZE;
71a6b803b3SArd Biesheuvel dctx->sset = true;
72a6b803b3SArd Biesheuvel }
73a6b803b3SArd Biesheuvel if (len < POLY1305_BLOCK_SIZE)
74a6b803b3SArd Biesheuvel return;
75a6b803b3SArd Biesheuvel }
76a6b803b3SArd Biesheuvel
77a6b803b3SArd Biesheuvel len &= ~(POLY1305_BLOCK_SIZE - 1);
78a6b803b3SArd Biesheuvel
79a6b803b3SArd Biesheuvel if (static_branch_likely(&have_neon) && likely(do_neon))
80a6b803b3SArd Biesheuvel poly1305_blocks_neon(&dctx->h, src, len, hibit);
81a6b803b3SArd Biesheuvel else
82a6b803b3SArd Biesheuvel poly1305_blocks_arm(&dctx->h, src, len, hibit);
83a6b803b3SArd Biesheuvel }
84a6b803b3SArd Biesheuvel
arm_poly1305_do_update(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,bool do_neon)85a6b803b3SArd Biesheuvel static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
86a6b803b3SArd Biesheuvel const u8 *src, u32 len, bool do_neon)
87a6b803b3SArd Biesheuvel {
88a6b803b3SArd Biesheuvel if (unlikely(dctx->buflen)) {
89a6b803b3SArd Biesheuvel u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
90a6b803b3SArd Biesheuvel
91a6b803b3SArd Biesheuvel memcpy(dctx->buf + dctx->buflen, src, bytes);
92a6b803b3SArd Biesheuvel src += bytes;
93a6b803b3SArd Biesheuvel len -= bytes;
94a6b803b3SArd Biesheuvel dctx->buflen += bytes;
95a6b803b3SArd Biesheuvel
96a6b803b3SArd Biesheuvel if (dctx->buflen == POLY1305_BLOCK_SIZE) {
97a6b803b3SArd Biesheuvel arm_poly1305_blocks(dctx, dctx->buf,
98a6b803b3SArd Biesheuvel POLY1305_BLOCK_SIZE, 1, false);
99a6b803b3SArd Biesheuvel dctx->buflen = 0;
100a6b803b3SArd Biesheuvel }
101a6b803b3SArd Biesheuvel }
102a6b803b3SArd Biesheuvel
103a6b803b3SArd Biesheuvel if (likely(len >= POLY1305_BLOCK_SIZE)) {
104a6b803b3SArd Biesheuvel arm_poly1305_blocks(dctx, src, len, 1, do_neon);
105a6b803b3SArd Biesheuvel src += round_down(len, POLY1305_BLOCK_SIZE);
106a6b803b3SArd Biesheuvel len %= POLY1305_BLOCK_SIZE;
107a6b803b3SArd Biesheuvel }
108a6b803b3SArd Biesheuvel
109a6b803b3SArd Biesheuvel if (unlikely(len)) {
110a6b803b3SArd Biesheuvel dctx->buflen = len;
111a6b803b3SArd Biesheuvel memcpy(dctx->buf, src, len);
112a6b803b3SArd Biesheuvel }
113a6b803b3SArd Biesheuvel }
114a6b803b3SArd Biesheuvel
arm_poly1305_update(struct shash_desc * desc,const u8 * src,unsigned int srclen)115a6b803b3SArd Biesheuvel static int arm_poly1305_update(struct shash_desc *desc,
116a6b803b3SArd Biesheuvel const u8 *src, unsigned int srclen)
117a6b803b3SArd Biesheuvel {
118a6b803b3SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
119a6b803b3SArd Biesheuvel
120a6b803b3SArd Biesheuvel arm_poly1305_do_update(dctx, src, srclen, false);
121a6b803b3SArd Biesheuvel return 0;
122a6b803b3SArd Biesheuvel }
123a6b803b3SArd Biesheuvel
arm_poly1305_update_neon(struct shash_desc * desc,const u8 * src,unsigned int srclen)124a6b803b3SArd Biesheuvel static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
125a6b803b3SArd Biesheuvel const u8 *src,
126a6b803b3SArd Biesheuvel unsigned int srclen)
127a6b803b3SArd Biesheuvel {
128a6b803b3SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
129a6b803b3SArd Biesheuvel bool do_neon = crypto_simd_usable() && srclen > 128;
130a6b803b3SArd Biesheuvel
131a6b803b3SArd Biesheuvel if (static_branch_likely(&have_neon) && do_neon)
132a6b803b3SArd Biesheuvel kernel_neon_begin();
133a6b803b3SArd Biesheuvel arm_poly1305_do_update(dctx, src, srclen, do_neon);
134a6b803b3SArd Biesheuvel if (static_branch_likely(&have_neon) && do_neon)
135a6b803b3SArd Biesheuvel kernel_neon_end();
136a6b803b3SArd Biesheuvel return 0;
137a6b803b3SArd Biesheuvel }
138a6b803b3SArd Biesheuvel
poly1305_update_arch(struct poly1305_desc_ctx * dctx,const u8 * src,unsigned int nbytes)139a6b803b3SArd Biesheuvel void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
140a6b803b3SArd Biesheuvel unsigned int nbytes)
141a6b803b3SArd Biesheuvel {
142a6b803b3SArd Biesheuvel bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
143a6b803b3SArd Biesheuvel crypto_simd_usable();
144a6b803b3SArd Biesheuvel
145a6b803b3SArd Biesheuvel if (unlikely(dctx->buflen)) {
146a6b803b3SArd Biesheuvel u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
147a6b803b3SArd Biesheuvel
148a6b803b3SArd Biesheuvel memcpy(dctx->buf + dctx->buflen, src, bytes);
149a6b803b3SArd Biesheuvel src += bytes;
150a6b803b3SArd Biesheuvel nbytes -= bytes;
151a6b803b3SArd Biesheuvel dctx->buflen += bytes;
152a6b803b3SArd Biesheuvel
153a6b803b3SArd Biesheuvel if (dctx->buflen == POLY1305_BLOCK_SIZE) {
154a6b803b3SArd Biesheuvel poly1305_blocks_arm(&dctx->h, dctx->buf,
155a6b803b3SArd Biesheuvel POLY1305_BLOCK_SIZE, 1);
156a6b803b3SArd Biesheuvel dctx->buflen = 0;
157a6b803b3SArd Biesheuvel }
158a6b803b3SArd Biesheuvel }
159a6b803b3SArd Biesheuvel
160a6b803b3SArd Biesheuvel if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
161a6b803b3SArd Biesheuvel unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
162a6b803b3SArd Biesheuvel
163a6b803b3SArd Biesheuvel if (static_branch_likely(&have_neon) && do_neon) {
164706024a5SJason A. Donenfeld do {
165706024a5SJason A. Donenfeld unsigned int todo = min_t(unsigned int, len, SZ_4K);
166706024a5SJason A. Donenfeld
167a6b803b3SArd Biesheuvel kernel_neon_begin();
168706024a5SJason A. Donenfeld poly1305_blocks_neon(&dctx->h, src, todo, 1);
169a6b803b3SArd Biesheuvel kernel_neon_end();
170706024a5SJason A. Donenfeld
171706024a5SJason A. Donenfeld len -= todo;
172706024a5SJason A. Donenfeld src += todo;
173706024a5SJason A. Donenfeld } while (len);
174a6b803b3SArd Biesheuvel } else {
175a6b803b3SArd Biesheuvel poly1305_blocks_arm(&dctx->h, src, len, 1);
176a6b803b3SArd Biesheuvel src += len;
177706024a5SJason A. Donenfeld }
178a6b803b3SArd Biesheuvel nbytes %= POLY1305_BLOCK_SIZE;
179a6b803b3SArd Biesheuvel }
180a6b803b3SArd Biesheuvel
181a6b803b3SArd Biesheuvel if (unlikely(nbytes)) {
182a6b803b3SArd Biesheuvel dctx->buflen = nbytes;
183a6b803b3SArd Biesheuvel memcpy(dctx->buf, src, nbytes);
184a6b803b3SArd Biesheuvel }
185a6b803b3SArd Biesheuvel }
186a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_update_arch);
187a6b803b3SArd Biesheuvel
poly1305_final_arch(struct poly1305_desc_ctx * dctx,u8 * dst)188a6b803b3SArd Biesheuvel void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
189a6b803b3SArd Biesheuvel {
190a6b803b3SArd Biesheuvel if (unlikely(dctx->buflen)) {
191a6b803b3SArd Biesheuvel dctx->buf[dctx->buflen++] = 1;
192a6b803b3SArd Biesheuvel memset(dctx->buf + dctx->buflen, 0,
193a6b803b3SArd Biesheuvel POLY1305_BLOCK_SIZE - dctx->buflen);
194a6b803b3SArd Biesheuvel poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
195a6b803b3SArd Biesheuvel }
196a6b803b3SArd Biesheuvel
19731899908SJason A. Donenfeld poly1305_emit_arm(&dctx->h, dst, dctx->s);
198a6b803b3SArd Biesheuvel *dctx = (struct poly1305_desc_ctx){};
199a6b803b3SArd Biesheuvel }
200a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_final_arch);
201a6b803b3SArd Biesheuvel
arm_poly1305_final(struct shash_desc * desc,u8 * dst)202a6b803b3SArd Biesheuvel static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
203a6b803b3SArd Biesheuvel {
204a6b803b3SArd Biesheuvel struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
205a6b803b3SArd Biesheuvel
206a6b803b3SArd Biesheuvel if (unlikely(!dctx->sset))
207a6b803b3SArd Biesheuvel return -ENOKEY;
208a6b803b3SArd Biesheuvel
209a6b803b3SArd Biesheuvel poly1305_final_arch(dctx, dst);
210a6b803b3SArd Biesheuvel return 0;
211a6b803b3SArd Biesheuvel }
212a6b803b3SArd Biesheuvel
213a6b803b3SArd Biesheuvel static struct shash_alg arm_poly1305_algs[] = {{
214a6b803b3SArd Biesheuvel .init = arm_poly1305_init,
215a6b803b3SArd Biesheuvel .update = arm_poly1305_update,
216a6b803b3SArd Biesheuvel .final = arm_poly1305_final,
217a6b803b3SArd Biesheuvel .digestsize = POLY1305_DIGEST_SIZE,
218a6b803b3SArd Biesheuvel .descsize = sizeof(struct poly1305_desc_ctx),
219a6b803b3SArd Biesheuvel
220a6b803b3SArd Biesheuvel .base.cra_name = "poly1305",
221a6b803b3SArd Biesheuvel .base.cra_driver_name = "poly1305-arm",
222a6b803b3SArd Biesheuvel .base.cra_priority = 150,
223a6b803b3SArd Biesheuvel .base.cra_blocksize = POLY1305_BLOCK_SIZE,
224a6b803b3SArd Biesheuvel .base.cra_module = THIS_MODULE,
225a6b803b3SArd Biesheuvel #ifdef CONFIG_KERNEL_MODE_NEON
226a6b803b3SArd Biesheuvel }, {
227a6b803b3SArd Biesheuvel .init = arm_poly1305_init,
228a6b803b3SArd Biesheuvel .update = arm_poly1305_update_neon,
229a6b803b3SArd Biesheuvel .final = arm_poly1305_final,
230a6b803b3SArd Biesheuvel .digestsize = POLY1305_DIGEST_SIZE,
231a6b803b3SArd Biesheuvel .descsize = sizeof(struct poly1305_desc_ctx),
232a6b803b3SArd Biesheuvel
233a6b803b3SArd Biesheuvel .base.cra_name = "poly1305",
234a6b803b3SArd Biesheuvel .base.cra_driver_name = "poly1305-neon",
235a6b803b3SArd Biesheuvel .base.cra_priority = 200,
236a6b803b3SArd Biesheuvel .base.cra_blocksize = POLY1305_BLOCK_SIZE,
237a6b803b3SArd Biesheuvel .base.cra_module = THIS_MODULE,
238a6b803b3SArd Biesheuvel #endif
239a6b803b3SArd Biesheuvel }};
240a6b803b3SArd Biesheuvel
arm_poly1305_mod_init(void)241a6b803b3SArd Biesheuvel static int __init arm_poly1305_mod_init(void)
242a6b803b3SArd Biesheuvel {
243a6b803b3SArd Biesheuvel if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
244a6b803b3SArd Biesheuvel (elf_hwcap & HWCAP_NEON))
245a6b803b3SArd Biesheuvel static_branch_enable(&have_neon);
2468394bfecSJason A. Donenfeld else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
247a6b803b3SArd Biesheuvel /* register only the first entry */
248a6b803b3SArd Biesheuvel return crypto_register_shash(&arm_poly1305_algs[0]);
249a6b803b3SArd Biesheuvel
2508394bfecSJason A. Donenfeld return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
2518394bfecSJason A. Donenfeld crypto_register_shashes(arm_poly1305_algs,
2528394bfecSJason A. Donenfeld ARRAY_SIZE(arm_poly1305_algs)) : 0;
253a6b803b3SArd Biesheuvel }
254a6b803b3SArd Biesheuvel
arm_poly1305_mod_exit(void)255a6b803b3SArd Biesheuvel static void __exit arm_poly1305_mod_exit(void)
256a6b803b3SArd Biesheuvel {
2578394bfecSJason A. Donenfeld if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
2588394bfecSJason A. Donenfeld return;
259a6b803b3SArd Biesheuvel if (!static_branch_likely(&have_neon)) {
260a6b803b3SArd Biesheuvel crypto_unregister_shash(&arm_poly1305_algs[0]);
261a6b803b3SArd Biesheuvel return;
262a6b803b3SArd Biesheuvel }
263a6b803b3SArd Biesheuvel crypto_unregister_shashes(arm_poly1305_algs,
264a6b803b3SArd Biesheuvel ARRAY_SIZE(arm_poly1305_algs));
265a6b803b3SArd Biesheuvel }
266a6b803b3SArd Biesheuvel
267a6b803b3SArd Biesheuvel module_init(arm_poly1305_mod_init);
268a6b803b3SArd Biesheuvel module_exit(arm_poly1305_mod_exit);
269a6b803b3SArd Biesheuvel
270a6b803b3SArd Biesheuvel MODULE_LICENSE("GPL v2");
271a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305");
272a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-arm");
273a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-neon");
274