xref: /openbmc/linux/arch/arm/crypto/poly1305-glue.c (revision 8394bfec51e0e565556101bcc4e2fe7551104cd8)
1a6b803b3SArd Biesheuvel // SPDX-License-Identifier: GPL-2.0
2a6b803b3SArd Biesheuvel /*
3a6b803b3SArd Biesheuvel  * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
4a6b803b3SArd Biesheuvel  *
5a6b803b3SArd Biesheuvel  * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6a6b803b3SArd Biesheuvel  */
7a6b803b3SArd Biesheuvel 
8a6b803b3SArd Biesheuvel #include <asm/hwcap.h>
9a6b803b3SArd Biesheuvel #include <asm/neon.h>
10a6b803b3SArd Biesheuvel #include <asm/simd.h>
11a6b803b3SArd Biesheuvel #include <asm/unaligned.h>
12a6b803b3SArd Biesheuvel #include <crypto/algapi.h>
13a6b803b3SArd Biesheuvel #include <crypto/internal/hash.h>
14a6b803b3SArd Biesheuvel #include <crypto/internal/poly1305.h>
15a6b803b3SArd Biesheuvel #include <crypto/internal/simd.h>
16a6b803b3SArd Biesheuvel #include <linux/cpufeature.h>
17a6b803b3SArd Biesheuvel #include <linux/crypto.h>
18a6b803b3SArd Biesheuvel #include <linux/jump_label.h>
19a6b803b3SArd Biesheuvel #include <linux/module.h>
20a6b803b3SArd Biesheuvel 
21a6b803b3SArd Biesheuvel void poly1305_init_arm(void *state, const u8 *key);
22a6b803b3SArd Biesheuvel void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
23a6b803b3SArd Biesheuvel void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce);
24a6b803b3SArd Biesheuvel 
25a6b803b3SArd Biesheuvel void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
26a6b803b3SArd Biesheuvel {
27a6b803b3SArd Biesheuvel }
28a6b803b3SArd Biesheuvel 
29a6b803b3SArd Biesheuvel static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
30a6b803b3SArd Biesheuvel 
31a6b803b3SArd Biesheuvel void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
32a6b803b3SArd Biesheuvel {
33a6b803b3SArd Biesheuvel 	poly1305_init_arm(&dctx->h, key);
34a6b803b3SArd Biesheuvel 	dctx->s[0] = get_unaligned_le32(key + 16);
35a6b803b3SArd Biesheuvel 	dctx->s[1] = get_unaligned_le32(key + 20);
36a6b803b3SArd Biesheuvel 	dctx->s[2] = get_unaligned_le32(key + 24);
37a6b803b3SArd Biesheuvel 	dctx->s[3] = get_unaligned_le32(key + 28);
38a6b803b3SArd Biesheuvel 	dctx->buflen = 0;
39a6b803b3SArd Biesheuvel }
40a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_init_arch);
41a6b803b3SArd Biesheuvel 
42a6b803b3SArd Biesheuvel static int arm_poly1305_init(struct shash_desc *desc)
43a6b803b3SArd Biesheuvel {
44a6b803b3SArd Biesheuvel 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
45a6b803b3SArd Biesheuvel 
46a6b803b3SArd Biesheuvel 	dctx->buflen = 0;
47a6b803b3SArd Biesheuvel 	dctx->rset = 0;
48a6b803b3SArd Biesheuvel 	dctx->sset = false;
49a6b803b3SArd Biesheuvel 
50a6b803b3SArd Biesheuvel 	return 0;
51a6b803b3SArd Biesheuvel }
52a6b803b3SArd Biesheuvel 
53a6b803b3SArd Biesheuvel static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
54a6b803b3SArd Biesheuvel 				 u32 len, u32 hibit, bool do_neon)
55a6b803b3SArd Biesheuvel {
56a6b803b3SArd Biesheuvel 	if (unlikely(!dctx->sset)) {
57a6b803b3SArd Biesheuvel 		if (!dctx->rset) {
58a6b803b3SArd Biesheuvel 			poly1305_init_arm(&dctx->h, src);
59a6b803b3SArd Biesheuvel 			src += POLY1305_BLOCK_SIZE;
60a6b803b3SArd Biesheuvel 			len -= POLY1305_BLOCK_SIZE;
61a6b803b3SArd Biesheuvel 			dctx->rset = 1;
62a6b803b3SArd Biesheuvel 		}
63a6b803b3SArd Biesheuvel 		if (len >= POLY1305_BLOCK_SIZE) {
64a6b803b3SArd Biesheuvel 			dctx->s[0] = get_unaligned_le32(src +  0);
65a6b803b3SArd Biesheuvel 			dctx->s[1] = get_unaligned_le32(src +  4);
66a6b803b3SArd Biesheuvel 			dctx->s[2] = get_unaligned_le32(src +  8);
67a6b803b3SArd Biesheuvel 			dctx->s[3] = get_unaligned_le32(src + 12);
68a6b803b3SArd Biesheuvel 			src += POLY1305_BLOCK_SIZE;
69a6b803b3SArd Biesheuvel 			len -= POLY1305_BLOCK_SIZE;
70a6b803b3SArd Biesheuvel 			dctx->sset = true;
71a6b803b3SArd Biesheuvel 		}
72a6b803b3SArd Biesheuvel 		if (len < POLY1305_BLOCK_SIZE)
73a6b803b3SArd Biesheuvel 			return;
74a6b803b3SArd Biesheuvel 	}
75a6b803b3SArd Biesheuvel 
76a6b803b3SArd Biesheuvel 	len &= ~(POLY1305_BLOCK_SIZE - 1);
77a6b803b3SArd Biesheuvel 
78a6b803b3SArd Biesheuvel 	if (static_branch_likely(&have_neon) && likely(do_neon))
79a6b803b3SArd Biesheuvel 		poly1305_blocks_neon(&dctx->h, src, len, hibit);
80a6b803b3SArd Biesheuvel 	else
81a6b803b3SArd Biesheuvel 		poly1305_blocks_arm(&dctx->h, src, len, hibit);
82a6b803b3SArd Biesheuvel }
83a6b803b3SArd Biesheuvel 
84a6b803b3SArd Biesheuvel static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
85a6b803b3SArd Biesheuvel 				    const u8 *src, u32 len, bool do_neon)
86a6b803b3SArd Biesheuvel {
87a6b803b3SArd Biesheuvel 	if (unlikely(dctx->buflen)) {
88a6b803b3SArd Biesheuvel 		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
89a6b803b3SArd Biesheuvel 
90a6b803b3SArd Biesheuvel 		memcpy(dctx->buf + dctx->buflen, src, bytes);
91a6b803b3SArd Biesheuvel 		src += bytes;
92a6b803b3SArd Biesheuvel 		len -= bytes;
93a6b803b3SArd Biesheuvel 		dctx->buflen += bytes;
94a6b803b3SArd Biesheuvel 
95a6b803b3SArd Biesheuvel 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
96a6b803b3SArd Biesheuvel 			arm_poly1305_blocks(dctx, dctx->buf,
97a6b803b3SArd Biesheuvel 					    POLY1305_BLOCK_SIZE, 1, false);
98a6b803b3SArd Biesheuvel 			dctx->buflen = 0;
99a6b803b3SArd Biesheuvel 		}
100a6b803b3SArd Biesheuvel 	}
101a6b803b3SArd Biesheuvel 
102a6b803b3SArd Biesheuvel 	if (likely(len >= POLY1305_BLOCK_SIZE)) {
103a6b803b3SArd Biesheuvel 		arm_poly1305_blocks(dctx, src, len, 1, do_neon);
104a6b803b3SArd Biesheuvel 		src += round_down(len, POLY1305_BLOCK_SIZE);
105a6b803b3SArd Biesheuvel 		len %= POLY1305_BLOCK_SIZE;
106a6b803b3SArd Biesheuvel 	}
107a6b803b3SArd Biesheuvel 
108a6b803b3SArd Biesheuvel 	if (unlikely(len)) {
109a6b803b3SArd Biesheuvel 		dctx->buflen = len;
110a6b803b3SArd Biesheuvel 		memcpy(dctx->buf, src, len);
111a6b803b3SArd Biesheuvel 	}
112a6b803b3SArd Biesheuvel }
113a6b803b3SArd Biesheuvel 
114a6b803b3SArd Biesheuvel static int arm_poly1305_update(struct shash_desc *desc,
115a6b803b3SArd Biesheuvel 			       const u8 *src, unsigned int srclen)
116a6b803b3SArd Biesheuvel {
117a6b803b3SArd Biesheuvel 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
118a6b803b3SArd Biesheuvel 
119a6b803b3SArd Biesheuvel 	arm_poly1305_do_update(dctx, src, srclen, false);
120a6b803b3SArd Biesheuvel 	return 0;
121a6b803b3SArd Biesheuvel }
122a6b803b3SArd Biesheuvel 
123a6b803b3SArd Biesheuvel static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
124a6b803b3SArd Biesheuvel 						   const u8 *src,
125a6b803b3SArd Biesheuvel 						   unsigned int srclen)
126a6b803b3SArd Biesheuvel {
127a6b803b3SArd Biesheuvel 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
128a6b803b3SArd Biesheuvel 	bool do_neon = crypto_simd_usable() && srclen > 128;
129a6b803b3SArd Biesheuvel 
130a6b803b3SArd Biesheuvel 	if (static_branch_likely(&have_neon) && do_neon)
131a6b803b3SArd Biesheuvel 		kernel_neon_begin();
132a6b803b3SArd Biesheuvel 	arm_poly1305_do_update(dctx, src, srclen, do_neon);
133a6b803b3SArd Biesheuvel 	if (static_branch_likely(&have_neon) && do_neon)
134a6b803b3SArd Biesheuvel 		kernel_neon_end();
135a6b803b3SArd Biesheuvel 	return 0;
136a6b803b3SArd Biesheuvel }
137a6b803b3SArd Biesheuvel 
138a6b803b3SArd Biesheuvel void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
139a6b803b3SArd Biesheuvel 			  unsigned int nbytes)
140a6b803b3SArd Biesheuvel {
141a6b803b3SArd Biesheuvel 	bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
142a6b803b3SArd Biesheuvel 		       crypto_simd_usable();
143a6b803b3SArd Biesheuvel 
144a6b803b3SArd Biesheuvel 	if (unlikely(dctx->buflen)) {
145a6b803b3SArd Biesheuvel 		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
146a6b803b3SArd Biesheuvel 
147a6b803b3SArd Biesheuvel 		memcpy(dctx->buf + dctx->buflen, src, bytes);
148a6b803b3SArd Biesheuvel 		src += bytes;
149a6b803b3SArd Biesheuvel 		nbytes -= bytes;
150a6b803b3SArd Biesheuvel 		dctx->buflen += bytes;
151a6b803b3SArd Biesheuvel 
152a6b803b3SArd Biesheuvel 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
153a6b803b3SArd Biesheuvel 			poly1305_blocks_arm(&dctx->h, dctx->buf,
154a6b803b3SArd Biesheuvel 					    POLY1305_BLOCK_SIZE, 1);
155a6b803b3SArd Biesheuvel 			dctx->buflen = 0;
156a6b803b3SArd Biesheuvel 		}
157a6b803b3SArd Biesheuvel 	}
158a6b803b3SArd Biesheuvel 
159a6b803b3SArd Biesheuvel 	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
160a6b803b3SArd Biesheuvel 		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
161a6b803b3SArd Biesheuvel 
162a6b803b3SArd Biesheuvel 		if (static_branch_likely(&have_neon) && do_neon) {
163a6b803b3SArd Biesheuvel 			kernel_neon_begin();
164a6b803b3SArd Biesheuvel 			poly1305_blocks_neon(&dctx->h, src, len, 1);
165a6b803b3SArd Biesheuvel 			kernel_neon_end();
166a6b803b3SArd Biesheuvel 		} else {
167a6b803b3SArd Biesheuvel 			poly1305_blocks_arm(&dctx->h, src, len, 1);
168a6b803b3SArd Biesheuvel 		}
169a6b803b3SArd Biesheuvel 		src += len;
170a6b803b3SArd Biesheuvel 		nbytes %= POLY1305_BLOCK_SIZE;
171a6b803b3SArd Biesheuvel 	}
172a6b803b3SArd Biesheuvel 
173a6b803b3SArd Biesheuvel 	if (unlikely(nbytes)) {
174a6b803b3SArd Biesheuvel 		dctx->buflen = nbytes;
175a6b803b3SArd Biesheuvel 		memcpy(dctx->buf, src, nbytes);
176a6b803b3SArd Biesheuvel 	}
177a6b803b3SArd Biesheuvel }
178a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_update_arch);
179a6b803b3SArd Biesheuvel 
180a6b803b3SArd Biesheuvel void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
181a6b803b3SArd Biesheuvel {
182a6b803b3SArd Biesheuvel 	__le32 digest[4];
183a6b803b3SArd Biesheuvel 	u64 f = 0;
184a6b803b3SArd Biesheuvel 
185a6b803b3SArd Biesheuvel 	if (unlikely(dctx->buflen)) {
186a6b803b3SArd Biesheuvel 		dctx->buf[dctx->buflen++] = 1;
187a6b803b3SArd Biesheuvel 		memset(dctx->buf + dctx->buflen, 0,
188a6b803b3SArd Biesheuvel 		       POLY1305_BLOCK_SIZE - dctx->buflen);
189a6b803b3SArd Biesheuvel 		poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
190a6b803b3SArd Biesheuvel 	}
191a6b803b3SArd Biesheuvel 
192a6b803b3SArd Biesheuvel 	poly1305_emit_arm(&dctx->h, digest, dctx->s);
193a6b803b3SArd Biesheuvel 
194a6b803b3SArd Biesheuvel 	/* mac = (h + s) % (2^128) */
195a6b803b3SArd Biesheuvel 	f = (f >> 32) + le32_to_cpu(digest[0]);
196a6b803b3SArd Biesheuvel 	put_unaligned_le32(f, dst);
197a6b803b3SArd Biesheuvel 	f = (f >> 32) + le32_to_cpu(digest[1]);
198a6b803b3SArd Biesheuvel 	put_unaligned_le32(f, dst + 4);
199a6b803b3SArd Biesheuvel 	f = (f >> 32) + le32_to_cpu(digest[2]);
200a6b803b3SArd Biesheuvel 	put_unaligned_le32(f, dst + 8);
201a6b803b3SArd Biesheuvel 	f = (f >> 32) + le32_to_cpu(digest[3]);
202a6b803b3SArd Biesheuvel 	put_unaligned_le32(f, dst + 12);
203a6b803b3SArd Biesheuvel 
204a6b803b3SArd Biesheuvel 	*dctx = (struct poly1305_desc_ctx){};
205a6b803b3SArd Biesheuvel }
206a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_final_arch);
207a6b803b3SArd Biesheuvel 
208a6b803b3SArd Biesheuvel static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
209a6b803b3SArd Biesheuvel {
210a6b803b3SArd Biesheuvel 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
211a6b803b3SArd Biesheuvel 
212a6b803b3SArd Biesheuvel 	if (unlikely(!dctx->sset))
213a6b803b3SArd Biesheuvel 		return -ENOKEY;
214a6b803b3SArd Biesheuvel 
215a6b803b3SArd Biesheuvel 	poly1305_final_arch(dctx, dst);
216a6b803b3SArd Biesheuvel 	return 0;
217a6b803b3SArd Biesheuvel }
218a6b803b3SArd Biesheuvel 
219a6b803b3SArd Biesheuvel static struct shash_alg arm_poly1305_algs[] = {{
220a6b803b3SArd Biesheuvel 	.init			= arm_poly1305_init,
221a6b803b3SArd Biesheuvel 	.update			= arm_poly1305_update,
222a6b803b3SArd Biesheuvel 	.final			= arm_poly1305_final,
223a6b803b3SArd Biesheuvel 	.digestsize		= POLY1305_DIGEST_SIZE,
224a6b803b3SArd Biesheuvel 	.descsize		= sizeof(struct poly1305_desc_ctx),
225a6b803b3SArd Biesheuvel 
226a6b803b3SArd Biesheuvel 	.base.cra_name		= "poly1305",
227a6b803b3SArd Biesheuvel 	.base.cra_driver_name	= "poly1305-arm",
228a6b803b3SArd Biesheuvel 	.base.cra_priority	= 150,
229a6b803b3SArd Biesheuvel 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
230a6b803b3SArd Biesheuvel 	.base.cra_module	= THIS_MODULE,
231a6b803b3SArd Biesheuvel #ifdef CONFIG_KERNEL_MODE_NEON
232a6b803b3SArd Biesheuvel }, {
233a6b803b3SArd Biesheuvel 	.init			= arm_poly1305_init,
234a6b803b3SArd Biesheuvel 	.update			= arm_poly1305_update_neon,
235a6b803b3SArd Biesheuvel 	.final			= arm_poly1305_final,
236a6b803b3SArd Biesheuvel 	.digestsize		= POLY1305_DIGEST_SIZE,
237a6b803b3SArd Biesheuvel 	.descsize		= sizeof(struct poly1305_desc_ctx),
238a6b803b3SArd Biesheuvel 
239a6b803b3SArd Biesheuvel 	.base.cra_name		= "poly1305",
240a6b803b3SArd Biesheuvel 	.base.cra_driver_name	= "poly1305-neon",
241a6b803b3SArd Biesheuvel 	.base.cra_priority	= 200,
242a6b803b3SArd Biesheuvel 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
243a6b803b3SArd Biesheuvel 	.base.cra_module	= THIS_MODULE,
244a6b803b3SArd Biesheuvel #endif
245a6b803b3SArd Biesheuvel }};
246a6b803b3SArd Biesheuvel 
247a6b803b3SArd Biesheuvel static int __init arm_poly1305_mod_init(void)
248a6b803b3SArd Biesheuvel {
249a6b803b3SArd Biesheuvel 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
250a6b803b3SArd Biesheuvel 	    (elf_hwcap & HWCAP_NEON))
251a6b803b3SArd Biesheuvel 		static_branch_enable(&have_neon);
252*8394bfecSJason A. Donenfeld 	else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
253a6b803b3SArd Biesheuvel 		/* register only the first entry */
254a6b803b3SArd Biesheuvel 		return crypto_register_shash(&arm_poly1305_algs[0]);
255a6b803b3SArd Biesheuvel 
256*8394bfecSJason A. Donenfeld 	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
257*8394bfecSJason A. Donenfeld 		crypto_register_shashes(arm_poly1305_algs,
258*8394bfecSJason A. Donenfeld 					ARRAY_SIZE(arm_poly1305_algs)) : 0;
259a6b803b3SArd Biesheuvel }
260a6b803b3SArd Biesheuvel 
261a6b803b3SArd Biesheuvel static void __exit arm_poly1305_mod_exit(void)
262a6b803b3SArd Biesheuvel {
263*8394bfecSJason A. Donenfeld 	if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
264*8394bfecSJason A. Donenfeld 		return;
265a6b803b3SArd Biesheuvel 	if (!static_branch_likely(&have_neon)) {
266a6b803b3SArd Biesheuvel 		crypto_unregister_shash(&arm_poly1305_algs[0]);
267a6b803b3SArd Biesheuvel 		return;
268a6b803b3SArd Biesheuvel 	}
269a6b803b3SArd Biesheuvel 	crypto_unregister_shashes(arm_poly1305_algs,
270a6b803b3SArd Biesheuvel 				  ARRAY_SIZE(arm_poly1305_algs));
271a6b803b3SArd Biesheuvel }
272a6b803b3SArd Biesheuvel 
273a6b803b3SArd Biesheuvel module_init(arm_poly1305_mod_init);
274a6b803b3SArd Biesheuvel module_exit(arm_poly1305_mod_exit);
275a6b803b3SArd Biesheuvel 
276a6b803b3SArd Biesheuvel MODULE_LICENSE("GPL v2");
277a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305");
278a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-arm");
279a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-neon");
280