xref: /openbmc/linux/arch/arm/crypto/poly1305-glue.c (revision 706024a52c614b478b63f7728d202532ce6591a9)
1a6b803b3SArd Biesheuvel // SPDX-License-Identifier: GPL-2.0
2a6b803b3SArd Biesheuvel /*
3a6b803b3SArd Biesheuvel  * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
4a6b803b3SArd Biesheuvel  *
5a6b803b3SArd Biesheuvel  * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6a6b803b3SArd Biesheuvel  */
7a6b803b3SArd Biesheuvel 
8a6b803b3SArd Biesheuvel #include <asm/hwcap.h>
9a6b803b3SArd Biesheuvel #include <asm/neon.h>
10a6b803b3SArd Biesheuvel #include <asm/simd.h>
11a6b803b3SArd Biesheuvel #include <asm/unaligned.h>
12a6b803b3SArd Biesheuvel #include <crypto/algapi.h>
13a6b803b3SArd Biesheuvel #include <crypto/internal/hash.h>
14a6b803b3SArd Biesheuvel #include <crypto/internal/poly1305.h>
15a6b803b3SArd Biesheuvel #include <crypto/internal/simd.h>
16a6b803b3SArd Biesheuvel #include <linux/cpufeature.h>
17a6b803b3SArd Biesheuvel #include <linux/crypto.h>
18a6b803b3SArd Biesheuvel #include <linux/jump_label.h>
19a6b803b3SArd Biesheuvel #include <linux/module.h>
20a6b803b3SArd Biesheuvel 
21a6b803b3SArd Biesheuvel void poly1305_init_arm(void *state, const u8 *key);
22a6b803b3SArd Biesheuvel void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
2331899908SJason A. Donenfeld void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
24a6b803b3SArd Biesheuvel 
25a6b803b3SArd Biesheuvel void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
26a6b803b3SArd Biesheuvel {
27a6b803b3SArd Biesheuvel }
28a6b803b3SArd Biesheuvel 
29a6b803b3SArd Biesheuvel static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
30a6b803b3SArd Biesheuvel 
31a6b803b3SArd Biesheuvel void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
32a6b803b3SArd Biesheuvel {
33a6b803b3SArd Biesheuvel 	poly1305_init_arm(&dctx->h, key);
34a6b803b3SArd Biesheuvel 	dctx->s[0] = get_unaligned_le32(key + 16);
35a6b803b3SArd Biesheuvel 	dctx->s[1] = get_unaligned_le32(key + 20);
36a6b803b3SArd Biesheuvel 	dctx->s[2] = get_unaligned_le32(key + 24);
37a6b803b3SArd Biesheuvel 	dctx->s[3] = get_unaligned_le32(key + 28);
38a6b803b3SArd Biesheuvel 	dctx->buflen = 0;
39a6b803b3SArd Biesheuvel }
40a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_init_arch);
41a6b803b3SArd Biesheuvel 
42a6b803b3SArd Biesheuvel static int arm_poly1305_init(struct shash_desc *desc)
43a6b803b3SArd Biesheuvel {
44a6b803b3SArd Biesheuvel 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
45a6b803b3SArd Biesheuvel 
46a6b803b3SArd Biesheuvel 	dctx->buflen = 0;
47a6b803b3SArd Biesheuvel 	dctx->rset = 0;
48a6b803b3SArd Biesheuvel 	dctx->sset = false;
49a6b803b3SArd Biesheuvel 
50a6b803b3SArd Biesheuvel 	return 0;
51a6b803b3SArd Biesheuvel }
52a6b803b3SArd Biesheuvel 
53a6b803b3SArd Biesheuvel static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
54a6b803b3SArd Biesheuvel 				 u32 len, u32 hibit, bool do_neon)
55a6b803b3SArd Biesheuvel {
56a6b803b3SArd Biesheuvel 	if (unlikely(!dctx->sset)) {
57a6b803b3SArd Biesheuvel 		if (!dctx->rset) {
58a6b803b3SArd Biesheuvel 			poly1305_init_arm(&dctx->h, src);
59a6b803b3SArd Biesheuvel 			src += POLY1305_BLOCK_SIZE;
60a6b803b3SArd Biesheuvel 			len -= POLY1305_BLOCK_SIZE;
61a6b803b3SArd Biesheuvel 			dctx->rset = 1;
62a6b803b3SArd Biesheuvel 		}
63a6b803b3SArd Biesheuvel 		if (len >= POLY1305_BLOCK_SIZE) {
64a6b803b3SArd Biesheuvel 			dctx->s[0] = get_unaligned_le32(src +  0);
65a6b803b3SArd Biesheuvel 			dctx->s[1] = get_unaligned_le32(src +  4);
66a6b803b3SArd Biesheuvel 			dctx->s[2] = get_unaligned_le32(src +  8);
67a6b803b3SArd Biesheuvel 			dctx->s[3] = get_unaligned_le32(src + 12);
68a6b803b3SArd Biesheuvel 			src += POLY1305_BLOCK_SIZE;
69a6b803b3SArd Biesheuvel 			len -= POLY1305_BLOCK_SIZE;
70a6b803b3SArd Biesheuvel 			dctx->sset = true;
71a6b803b3SArd Biesheuvel 		}
72a6b803b3SArd Biesheuvel 		if (len < POLY1305_BLOCK_SIZE)
73a6b803b3SArd Biesheuvel 			return;
74a6b803b3SArd Biesheuvel 	}
75a6b803b3SArd Biesheuvel 
76a6b803b3SArd Biesheuvel 	len &= ~(POLY1305_BLOCK_SIZE - 1);
77a6b803b3SArd Biesheuvel 
78a6b803b3SArd Biesheuvel 	if (static_branch_likely(&have_neon) && likely(do_neon))
79a6b803b3SArd Biesheuvel 		poly1305_blocks_neon(&dctx->h, src, len, hibit);
80a6b803b3SArd Biesheuvel 	else
81a6b803b3SArd Biesheuvel 		poly1305_blocks_arm(&dctx->h, src, len, hibit);
82a6b803b3SArd Biesheuvel }
83a6b803b3SArd Biesheuvel 
84a6b803b3SArd Biesheuvel static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
85a6b803b3SArd Biesheuvel 				    const u8 *src, u32 len, bool do_neon)
86a6b803b3SArd Biesheuvel {
87a6b803b3SArd Biesheuvel 	if (unlikely(dctx->buflen)) {
88a6b803b3SArd Biesheuvel 		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
89a6b803b3SArd Biesheuvel 
90a6b803b3SArd Biesheuvel 		memcpy(dctx->buf + dctx->buflen, src, bytes);
91a6b803b3SArd Biesheuvel 		src += bytes;
92a6b803b3SArd Biesheuvel 		len -= bytes;
93a6b803b3SArd Biesheuvel 		dctx->buflen += bytes;
94a6b803b3SArd Biesheuvel 
95a6b803b3SArd Biesheuvel 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
96a6b803b3SArd Biesheuvel 			arm_poly1305_blocks(dctx, dctx->buf,
97a6b803b3SArd Biesheuvel 					    POLY1305_BLOCK_SIZE, 1, false);
98a6b803b3SArd Biesheuvel 			dctx->buflen = 0;
99a6b803b3SArd Biesheuvel 		}
100a6b803b3SArd Biesheuvel 	}
101a6b803b3SArd Biesheuvel 
102a6b803b3SArd Biesheuvel 	if (likely(len >= POLY1305_BLOCK_SIZE)) {
103a6b803b3SArd Biesheuvel 		arm_poly1305_blocks(dctx, src, len, 1, do_neon);
104a6b803b3SArd Biesheuvel 		src += round_down(len, POLY1305_BLOCK_SIZE);
105a6b803b3SArd Biesheuvel 		len %= POLY1305_BLOCK_SIZE;
106a6b803b3SArd Biesheuvel 	}
107a6b803b3SArd Biesheuvel 
108a6b803b3SArd Biesheuvel 	if (unlikely(len)) {
109a6b803b3SArd Biesheuvel 		dctx->buflen = len;
110a6b803b3SArd Biesheuvel 		memcpy(dctx->buf, src, len);
111a6b803b3SArd Biesheuvel 	}
112a6b803b3SArd Biesheuvel }
113a6b803b3SArd Biesheuvel 
114a6b803b3SArd Biesheuvel static int arm_poly1305_update(struct shash_desc *desc,
115a6b803b3SArd Biesheuvel 			       const u8 *src, unsigned int srclen)
116a6b803b3SArd Biesheuvel {
117a6b803b3SArd Biesheuvel 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
118a6b803b3SArd Biesheuvel 
119a6b803b3SArd Biesheuvel 	arm_poly1305_do_update(dctx, src, srclen, false);
120a6b803b3SArd Biesheuvel 	return 0;
121a6b803b3SArd Biesheuvel }
122a6b803b3SArd Biesheuvel 
123a6b803b3SArd Biesheuvel static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
124a6b803b3SArd Biesheuvel 						   const u8 *src,
125a6b803b3SArd Biesheuvel 						   unsigned int srclen)
126a6b803b3SArd Biesheuvel {
127a6b803b3SArd Biesheuvel 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
128a6b803b3SArd Biesheuvel 	bool do_neon = crypto_simd_usable() && srclen > 128;
129a6b803b3SArd Biesheuvel 
130a6b803b3SArd Biesheuvel 	if (static_branch_likely(&have_neon) && do_neon)
131a6b803b3SArd Biesheuvel 		kernel_neon_begin();
132a6b803b3SArd Biesheuvel 	arm_poly1305_do_update(dctx, src, srclen, do_neon);
133a6b803b3SArd Biesheuvel 	if (static_branch_likely(&have_neon) && do_neon)
134a6b803b3SArd Biesheuvel 		kernel_neon_end();
135a6b803b3SArd Biesheuvel 	return 0;
136a6b803b3SArd Biesheuvel }
137a6b803b3SArd Biesheuvel 
138a6b803b3SArd Biesheuvel void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
139a6b803b3SArd Biesheuvel 			  unsigned int nbytes)
140a6b803b3SArd Biesheuvel {
141a6b803b3SArd Biesheuvel 	bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
142a6b803b3SArd Biesheuvel 		       crypto_simd_usable();
143a6b803b3SArd Biesheuvel 
144a6b803b3SArd Biesheuvel 	if (unlikely(dctx->buflen)) {
145a6b803b3SArd Biesheuvel 		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
146a6b803b3SArd Biesheuvel 
147a6b803b3SArd Biesheuvel 		memcpy(dctx->buf + dctx->buflen, src, bytes);
148a6b803b3SArd Biesheuvel 		src += bytes;
149a6b803b3SArd Biesheuvel 		nbytes -= bytes;
150a6b803b3SArd Biesheuvel 		dctx->buflen += bytes;
151a6b803b3SArd Biesheuvel 
152a6b803b3SArd Biesheuvel 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
153a6b803b3SArd Biesheuvel 			poly1305_blocks_arm(&dctx->h, dctx->buf,
154a6b803b3SArd Biesheuvel 					    POLY1305_BLOCK_SIZE, 1);
155a6b803b3SArd Biesheuvel 			dctx->buflen = 0;
156a6b803b3SArd Biesheuvel 		}
157a6b803b3SArd Biesheuvel 	}
158a6b803b3SArd Biesheuvel 
159a6b803b3SArd Biesheuvel 	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
160a6b803b3SArd Biesheuvel 		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
161a6b803b3SArd Biesheuvel 
162a6b803b3SArd Biesheuvel 		if (static_branch_likely(&have_neon) && do_neon) {
163*706024a5SJason A. Donenfeld 			do {
164*706024a5SJason A. Donenfeld 				unsigned int todo = min_t(unsigned int, len, SZ_4K);
165*706024a5SJason A. Donenfeld 
166a6b803b3SArd Biesheuvel 				kernel_neon_begin();
167*706024a5SJason A. Donenfeld 				poly1305_blocks_neon(&dctx->h, src, todo, 1);
168a6b803b3SArd Biesheuvel 				kernel_neon_end();
169*706024a5SJason A. Donenfeld 
170*706024a5SJason A. Donenfeld 				len -= todo;
171*706024a5SJason A. Donenfeld 				src += todo;
172*706024a5SJason A. Donenfeld 			} while (len);
173a6b803b3SArd Biesheuvel 		} else {
174a6b803b3SArd Biesheuvel 			poly1305_blocks_arm(&dctx->h, src, len, 1);
175a6b803b3SArd Biesheuvel 			src += len;
176*706024a5SJason A. Donenfeld 		}
177a6b803b3SArd Biesheuvel 		nbytes %= POLY1305_BLOCK_SIZE;
178a6b803b3SArd Biesheuvel 	}
179a6b803b3SArd Biesheuvel 
180a6b803b3SArd Biesheuvel 	if (unlikely(nbytes)) {
181a6b803b3SArd Biesheuvel 		dctx->buflen = nbytes;
182a6b803b3SArd Biesheuvel 		memcpy(dctx->buf, src, nbytes);
183a6b803b3SArd Biesheuvel 	}
184a6b803b3SArd Biesheuvel }
185a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_update_arch);
186a6b803b3SArd Biesheuvel 
187a6b803b3SArd Biesheuvel void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
188a6b803b3SArd Biesheuvel {
189a6b803b3SArd Biesheuvel 	if (unlikely(dctx->buflen)) {
190a6b803b3SArd Biesheuvel 		dctx->buf[dctx->buflen++] = 1;
191a6b803b3SArd Biesheuvel 		memset(dctx->buf + dctx->buflen, 0,
192a6b803b3SArd Biesheuvel 		       POLY1305_BLOCK_SIZE - dctx->buflen);
193a6b803b3SArd Biesheuvel 		poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
194a6b803b3SArd Biesheuvel 	}
195a6b803b3SArd Biesheuvel 
19631899908SJason A. Donenfeld 	poly1305_emit_arm(&dctx->h, dst, dctx->s);
197a6b803b3SArd Biesheuvel 	*dctx = (struct poly1305_desc_ctx){};
198a6b803b3SArd Biesheuvel }
199a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_final_arch);
200a6b803b3SArd Biesheuvel 
201a6b803b3SArd Biesheuvel static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
202a6b803b3SArd Biesheuvel {
203a6b803b3SArd Biesheuvel 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
204a6b803b3SArd Biesheuvel 
205a6b803b3SArd Biesheuvel 	if (unlikely(!dctx->sset))
206a6b803b3SArd Biesheuvel 		return -ENOKEY;
207a6b803b3SArd Biesheuvel 
208a6b803b3SArd Biesheuvel 	poly1305_final_arch(dctx, dst);
209a6b803b3SArd Biesheuvel 	return 0;
210a6b803b3SArd Biesheuvel }
211a6b803b3SArd Biesheuvel 
212a6b803b3SArd Biesheuvel static struct shash_alg arm_poly1305_algs[] = {{
213a6b803b3SArd Biesheuvel 	.init			= arm_poly1305_init,
214a6b803b3SArd Biesheuvel 	.update			= arm_poly1305_update,
215a6b803b3SArd Biesheuvel 	.final			= arm_poly1305_final,
216a6b803b3SArd Biesheuvel 	.digestsize		= POLY1305_DIGEST_SIZE,
217a6b803b3SArd Biesheuvel 	.descsize		= sizeof(struct poly1305_desc_ctx),
218a6b803b3SArd Biesheuvel 
219a6b803b3SArd Biesheuvel 	.base.cra_name		= "poly1305",
220a6b803b3SArd Biesheuvel 	.base.cra_driver_name	= "poly1305-arm",
221a6b803b3SArd Biesheuvel 	.base.cra_priority	= 150,
222a6b803b3SArd Biesheuvel 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
223a6b803b3SArd Biesheuvel 	.base.cra_module	= THIS_MODULE,
224a6b803b3SArd Biesheuvel #ifdef CONFIG_KERNEL_MODE_NEON
225a6b803b3SArd Biesheuvel }, {
226a6b803b3SArd Biesheuvel 	.init			= arm_poly1305_init,
227a6b803b3SArd Biesheuvel 	.update			= arm_poly1305_update_neon,
228a6b803b3SArd Biesheuvel 	.final			= arm_poly1305_final,
229a6b803b3SArd Biesheuvel 	.digestsize		= POLY1305_DIGEST_SIZE,
230a6b803b3SArd Biesheuvel 	.descsize		= sizeof(struct poly1305_desc_ctx),
231a6b803b3SArd Biesheuvel 
232a6b803b3SArd Biesheuvel 	.base.cra_name		= "poly1305",
233a6b803b3SArd Biesheuvel 	.base.cra_driver_name	= "poly1305-neon",
234a6b803b3SArd Biesheuvel 	.base.cra_priority	= 200,
235a6b803b3SArd Biesheuvel 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
236a6b803b3SArd Biesheuvel 	.base.cra_module	= THIS_MODULE,
237a6b803b3SArd Biesheuvel #endif
238a6b803b3SArd Biesheuvel }};
239a6b803b3SArd Biesheuvel 
240a6b803b3SArd Biesheuvel static int __init arm_poly1305_mod_init(void)
241a6b803b3SArd Biesheuvel {
242a6b803b3SArd Biesheuvel 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
243a6b803b3SArd Biesheuvel 	    (elf_hwcap & HWCAP_NEON))
244a6b803b3SArd Biesheuvel 		static_branch_enable(&have_neon);
2458394bfecSJason A. Donenfeld 	else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
246a6b803b3SArd Biesheuvel 		/* register only the first entry */
247a6b803b3SArd Biesheuvel 		return crypto_register_shash(&arm_poly1305_algs[0]);
248a6b803b3SArd Biesheuvel 
2498394bfecSJason A. Donenfeld 	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
2508394bfecSJason A. Donenfeld 		crypto_register_shashes(arm_poly1305_algs,
2518394bfecSJason A. Donenfeld 					ARRAY_SIZE(arm_poly1305_algs)) : 0;
252a6b803b3SArd Biesheuvel }
253a6b803b3SArd Biesheuvel 
254a6b803b3SArd Biesheuvel static void __exit arm_poly1305_mod_exit(void)
255a6b803b3SArd Biesheuvel {
2568394bfecSJason A. Donenfeld 	if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
2578394bfecSJason A. Donenfeld 		return;
258a6b803b3SArd Biesheuvel 	if (!static_branch_likely(&have_neon)) {
259a6b803b3SArd Biesheuvel 		crypto_unregister_shash(&arm_poly1305_algs[0]);
260a6b803b3SArd Biesheuvel 		return;
261a6b803b3SArd Biesheuvel 	}
262a6b803b3SArd Biesheuvel 	crypto_unregister_shashes(arm_poly1305_algs,
263a6b803b3SArd Biesheuvel 				  ARRAY_SIZE(arm_poly1305_algs));
264a6b803b3SArd Biesheuvel }
265a6b803b3SArd Biesheuvel 
266a6b803b3SArd Biesheuvel module_init(arm_poly1305_mod_init);
267a6b803b3SArd Biesheuvel module_exit(arm_poly1305_mod_exit);
268a6b803b3SArd Biesheuvel 
269a6b803b3SArd Biesheuvel MODULE_LICENSE("GPL v2");
270a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305");
271a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-arm");
272a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-neon");
273