xref: /openbmc/linux/arch/arm/crypto/poly1305-glue.c (revision a6b803b3ddc793d6db0c16f12fc12d30d20fa9cc)
1*a6b803b3SArd Biesheuvel // SPDX-License-Identifier: GPL-2.0
2*a6b803b3SArd Biesheuvel /*
3*a6b803b3SArd Biesheuvel  * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
4*a6b803b3SArd Biesheuvel  *
5*a6b803b3SArd Biesheuvel  * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6*a6b803b3SArd Biesheuvel  */
7*a6b803b3SArd Biesheuvel 
8*a6b803b3SArd Biesheuvel #include <asm/hwcap.h>
9*a6b803b3SArd Biesheuvel #include <asm/neon.h>
10*a6b803b3SArd Biesheuvel #include <asm/simd.h>
11*a6b803b3SArd Biesheuvel #include <asm/unaligned.h>
12*a6b803b3SArd Biesheuvel #include <crypto/algapi.h>
13*a6b803b3SArd Biesheuvel #include <crypto/internal/hash.h>
14*a6b803b3SArd Biesheuvel #include <crypto/internal/poly1305.h>
15*a6b803b3SArd Biesheuvel #include <crypto/internal/simd.h>
16*a6b803b3SArd Biesheuvel #include <linux/cpufeature.h>
17*a6b803b3SArd Biesheuvel #include <linux/crypto.h>
18*a6b803b3SArd Biesheuvel #include <linux/jump_label.h>
19*a6b803b3SArd Biesheuvel #include <linux/module.h>
20*a6b803b3SArd Biesheuvel 
21*a6b803b3SArd Biesheuvel void poly1305_init_arm(void *state, const u8 *key);
22*a6b803b3SArd Biesheuvel void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
23*a6b803b3SArd Biesheuvel void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce);
24*a6b803b3SArd Biesheuvel 
25*a6b803b3SArd Biesheuvel void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
26*a6b803b3SArd Biesheuvel {
27*a6b803b3SArd Biesheuvel }
28*a6b803b3SArd Biesheuvel 
29*a6b803b3SArd Biesheuvel static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
30*a6b803b3SArd Biesheuvel 
31*a6b803b3SArd Biesheuvel void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
32*a6b803b3SArd Biesheuvel {
33*a6b803b3SArd Biesheuvel 	poly1305_init_arm(&dctx->h, key);
34*a6b803b3SArd Biesheuvel 	dctx->s[0] = get_unaligned_le32(key + 16);
35*a6b803b3SArd Biesheuvel 	dctx->s[1] = get_unaligned_le32(key + 20);
36*a6b803b3SArd Biesheuvel 	dctx->s[2] = get_unaligned_le32(key + 24);
37*a6b803b3SArd Biesheuvel 	dctx->s[3] = get_unaligned_le32(key + 28);
38*a6b803b3SArd Biesheuvel 	dctx->buflen = 0;
39*a6b803b3SArd Biesheuvel }
40*a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_init_arch);
41*a6b803b3SArd Biesheuvel 
42*a6b803b3SArd Biesheuvel static int arm_poly1305_init(struct shash_desc *desc)
43*a6b803b3SArd Biesheuvel {
44*a6b803b3SArd Biesheuvel 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
45*a6b803b3SArd Biesheuvel 
46*a6b803b3SArd Biesheuvel 	dctx->buflen = 0;
47*a6b803b3SArd Biesheuvel 	dctx->rset = 0;
48*a6b803b3SArd Biesheuvel 	dctx->sset = false;
49*a6b803b3SArd Biesheuvel 
50*a6b803b3SArd Biesheuvel 	return 0;
51*a6b803b3SArd Biesheuvel }
52*a6b803b3SArd Biesheuvel 
53*a6b803b3SArd Biesheuvel static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
54*a6b803b3SArd Biesheuvel 				 u32 len, u32 hibit, bool do_neon)
55*a6b803b3SArd Biesheuvel {
56*a6b803b3SArd Biesheuvel 	if (unlikely(!dctx->sset)) {
57*a6b803b3SArd Biesheuvel 		if (!dctx->rset) {
58*a6b803b3SArd Biesheuvel 			poly1305_init_arm(&dctx->h, src);
59*a6b803b3SArd Biesheuvel 			src += POLY1305_BLOCK_SIZE;
60*a6b803b3SArd Biesheuvel 			len -= POLY1305_BLOCK_SIZE;
61*a6b803b3SArd Biesheuvel 			dctx->rset = 1;
62*a6b803b3SArd Biesheuvel 		}
63*a6b803b3SArd Biesheuvel 		if (len >= POLY1305_BLOCK_SIZE) {
64*a6b803b3SArd Biesheuvel 			dctx->s[0] = get_unaligned_le32(src +  0);
65*a6b803b3SArd Biesheuvel 			dctx->s[1] = get_unaligned_le32(src +  4);
66*a6b803b3SArd Biesheuvel 			dctx->s[2] = get_unaligned_le32(src +  8);
67*a6b803b3SArd Biesheuvel 			dctx->s[3] = get_unaligned_le32(src + 12);
68*a6b803b3SArd Biesheuvel 			src += POLY1305_BLOCK_SIZE;
69*a6b803b3SArd Biesheuvel 			len -= POLY1305_BLOCK_SIZE;
70*a6b803b3SArd Biesheuvel 			dctx->sset = true;
71*a6b803b3SArd Biesheuvel 		}
72*a6b803b3SArd Biesheuvel 		if (len < POLY1305_BLOCK_SIZE)
73*a6b803b3SArd Biesheuvel 			return;
74*a6b803b3SArd Biesheuvel 	}
75*a6b803b3SArd Biesheuvel 
76*a6b803b3SArd Biesheuvel 	len &= ~(POLY1305_BLOCK_SIZE - 1);
77*a6b803b3SArd Biesheuvel 
78*a6b803b3SArd Biesheuvel 	if (static_branch_likely(&have_neon) && likely(do_neon))
79*a6b803b3SArd Biesheuvel 		poly1305_blocks_neon(&dctx->h, src, len, hibit);
80*a6b803b3SArd Biesheuvel 	else
81*a6b803b3SArd Biesheuvel 		poly1305_blocks_arm(&dctx->h, src, len, hibit);
82*a6b803b3SArd Biesheuvel }
83*a6b803b3SArd Biesheuvel 
84*a6b803b3SArd Biesheuvel static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
85*a6b803b3SArd Biesheuvel 				    const u8 *src, u32 len, bool do_neon)
86*a6b803b3SArd Biesheuvel {
87*a6b803b3SArd Biesheuvel 	if (unlikely(dctx->buflen)) {
88*a6b803b3SArd Biesheuvel 		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
89*a6b803b3SArd Biesheuvel 
90*a6b803b3SArd Biesheuvel 		memcpy(dctx->buf + dctx->buflen, src, bytes);
91*a6b803b3SArd Biesheuvel 		src += bytes;
92*a6b803b3SArd Biesheuvel 		len -= bytes;
93*a6b803b3SArd Biesheuvel 		dctx->buflen += bytes;
94*a6b803b3SArd Biesheuvel 
95*a6b803b3SArd Biesheuvel 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
96*a6b803b3SArd Biesheuvel 			arm_poly1305_blocks(dctx, dctx->buf,
97*a6b803b3SArd Biesheuvel 					    POLY1305_BLOCK_SIZE, 1, false);
98*a6b803b3SArd Biesheuvel 			dctx->buflen = 0;
99*a6b803b3SArd Biesheuvel 		}
100*a6b803b3SArd Biesheuvel 	}
101*a6b803b3SArd Biesheuvel 
102*a6b803b3SArd Biesheuvel 	if (likely(len >= POLY1305_BLOCK_SIZE)) {
103*a6b803b3SArd Biesheuvel 		arm_poly1305_blocks(dctx, src, len, 1, do_neon);
104*a6b803b3SArd Biesheuvel 		src += round_down(len, POLY1305_BLOCK_SIZE);
105*a6b803b3SArd Biesheuvel 		len %= POLY1305_BLOCK_SIZE;
106*a6b803b3SArd Biesheuvel 	}
107*a6b803b3SArd Biesheuvel 
108*a6b803b3SArd Biesheuvel 	if (unlikely(len)) {
109*a6b803b3SArd Biesheuvel 		dctx->buflen = len;
110*a6b803b3SArd Biesheuvel 		memcpy(dctx->buf, src, len);
111*a6b803b3SArd Biesheuvel 	}
112*a6b803b3SArd Biesheuvel }
113*a6b803b3SArd Biesheuvel 
114*a6b803b3SArd Biesheuvel static int arm_poly1305_update(struct shash_desc *desc,
115*a6b803b3SArd Biesheuvel 			       const u8 *src, unsigned int srclen)
116*a6b803b3SArd Biesheuvel {
117*a6b803b3SArd Biesheuvel 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
118*a6b803b3SArd Biesheuvel 
119*a6b803b3SArd Biesheuvel 	arm_poly1305_do_update(dctx, src, srclen, false);
120*a6b803b3SArd Biesheuvel 	return 0;
121*a6b803b3SArd Biesheuvel }
122*a6b803b3SArd Biesheuvel 
123*a6b803b3SArd Biesheuvel static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
124*a6b803b3SArd Biesheuvel 						   const u8 *src,
125*a6b803b3SArd Biesheuvel 						   unsigned int srclen)
126*a6b803b3SArd Biesheuvel {
127*a6b803b3SArd Biesheuvel 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
128*a6b803b3SArd Biesheuvel 	bool do_neon = crypto_simd_usable() && srclen > 128;
129*a6b803b3SArd Biesheuvel 
130*a6b803b3SArd Biesheuvel 	if (static_branch_likely(&have_neon) && do_neon)
131*a6b803b3SArd Biesheuvel 		kernel_neon_begin();
132*a6b803b3SArd Biesheuvel 	arm_poly1305_do_update(dctx, src, srclen, do_neon);
133*a6b803b3SArd Biesheuvel 	if (static_branch_likely(&have_neon) && do_neon)
134*a6b803b3SArd Biesheuvel 		kernel_neon_end();
135*a6b803b3SArd Biesheuvel 	return 0;
136*a6b803b3SArd Biesheuvel }
137*a6b803b3SArd Biesheuvel 
138*a6b803b3SArd Biesheuvel void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
139*a6b803b3SArd Biesheuvel 			  unsigned int nbytes)
140*a6b803b3SArd Biesheuvel {
141*a6b803b3SArd Biesheuvel 	bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
142*a6b803b3SArd Biesheuvel 		       crypto_simd_usable();
143*a6b803b3SArd Biesheuvel 
144*a6b803b3SArd Biesheuvel 	if (unlikely(dctx->buflen)) {
145*a6b803b3SArd Biesheuvel 		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
146*a6b803b3SArd Biesheuvel 
147*a6b803b3SArd Biesheuvel 		memcpy(dctx->buf + dctx->buflen, src, bytes);
148*a6b803b3SArd Biesheuvel 		src += bytes;
149*a6b803b3SArd Biesheuvel 		nbytes -= bytes;
150*a6b803b3SArd Biesheuvel 		dctx->buflen += bytes;
151*a6b803b3SArd Biesheuvel 
152*a6b803b3SArd Biesheuvel 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
153*a6b803b3SArd Biesheuvel 			poly1305_blocks_arm(&dctx->h, dctx->buf,
154*a6b803b3SArd Biesheuvel 					    POLY1305_BLOCK_SIZE, 1);
155*a6b803b3SArd Biesheuvel 			dctx->buflen = 0;
156*a6b803b3SArd Biesheuvel 		}
157*a6b803b3SArd Biesheuvel 	}
158*a6b803b3SArd Biesheuvel 
159*a6b803b3SArd Biesheuvel 	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
160*a6b803b3SArd Biesheuvel 		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
161*a6b803b3SArd Biesheuvel 
162*a6b803b3SArd Biesheuvel 		if (static_branch_likely(&have_neon) && do_neon) {
163*a6b803b3SArd Biesheuvel 			kernel_neon_begin();
164*a6b803b3SArd Biesheuvel 			poly1305_blocks_neon(&dctx->h, src, len, 1);
165*a6b803b3SArd Biesheuvel 			kernel_neon_end();
166*a6b803b3SArd Biesheuvel 		} else {
167*a6b803b3SArd Biesheuvel 			poly1305_blocks_arm(&dctx->h, src, len, 1);
168*a6b803b3SArd Biesheuvel 		}
169*a6b803b3SArd Biesheuvel 		src += len;
170*a6b803b3SArd Biesheuvel 		nbytes %= POLY1305_BLOCK_SIZE;
171*a6b803b3SArd Biesheuvel 	}
172*a6b803b3SArd Biesheuvel 
173*a6b803b3SArd Biesheuvel 	if (unlikely(nbytes)) {
174*a6b803b3SArd Biesheuvel 		dctx->buflen = nbytes;
175*a6b803b3SArd Biesheuvel 		memcpy(dctx->buf, src, nbytes);
176*a6b803b3SArd Biesheuvel 	}
177*a6b803b3SArd Biesheuvel }
178*a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_update_arch);
179*a6b803b3SArd Biesheuvel 
180*a6b803b3SArd Biesheuvel void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
181*a6b803b3SArd Biesheuvel {
182*a6b803b3SArd Biesheuvel 	__le32 digest[4];
183*a6b803b3SArd Biesheuvel 	u64 f = 0;
184*a6b803b3SArd Biesheuvel 
185*a6b803b3SArd Biesheuvel 	if (unlikely(dctx->buflen)) {
186*a6b803b3SArd Biesheuvel 		dctx->buf[dctx->buflen++] = 1;
187*a6b803b3SArd Biesheuvel 		memset(dctx->buf + dctx->buflen, 0,
188*a6b803b3SArd Biesheuvel 		       POLY1305_BLOCK_SIZE - dctx->buflen);
189*a6b803b3SArd Biesheuvel 		poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
190*a6b803b3SArd Biesheuvel 	}
191*a6b803b3SArd Biesheuvel 
192*a6b803b3SArd Biesheuvel 	poly1305_emit_arm(&dctx->h, digest, dctx->s);
193*a6b803b3SArd Biesheuvel 
194*a6b803b3SArd Biesheuvel 	/* mac = (h + s) % (2^128) */
195*a6b803b3SArd Biesheuvel 	f = (f >> 32) + le32_to_cpu(digest[0]);
196*a6b803b3SArd Biesheuvel 	put_unaligned_le32(f, dst);
197*a6b803b3SArd Biesheuvel 	f = (f >> 32) + le32_to_cpu(digest[1]);
198*a6b803b3SArd Biesheuvel 	put_unaligned_le32(f, dst + 4);
199*a6b803b3SArd Biesheuvel 	f = (f >> 32) + le32_to_cpu(digest[2]);
200*a6b803b3SArd Biesheuvel 	put_unaligned_le32(f, dst + 8);
201*a6b803b3SArd Biesheuvel 	f = (f >> 32) + le32_to_cpu(digest[3]);
202*a6b803b3SArd Biesheuvel 	put_unaligned_le32(f, dst + 12);
203*a6b803b3SArd Biesheuvel 
204*a6b803b3SArd Biesheuvel 	*dctx = (struct poly1305_desc_ctx){};
205*a6b803b3SArd Biesheuvel }
206*a6b803b3SArd Biesheuvel EXPORT_SYMBOL(poly1305_final_arch);
207*a6b803b3SArd Biesheuvel 
208*a6b803b3SArd Biesheuvel static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
209*a6b803b3SArd Biesheuvel {
210*a6b803b3SArd Biesheuvel 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
211*a6b803b3SArd Biesheuvel 
212*a6b803b3SArd Biesheuvel 	if (unlikely(!dctx->sset))
213*a6b803b3SArd Biesheuvel 		return -ENOKEY;
214*a6b803b3SArd Biesheuvel 
215*a6b803b3SArd Biesheuvel 	poly1305_final_arch(dctx, dst);
216*a6b803b3SArd Biesheuvel 	return 0;
217*a6b803b3SArd Biesheuvel }
218*a6b803b3SArd Biesheuvel 
219*a6b803b3SArd Biesheuvel static struct shash_alg arm_poly1305_algs[] = {{
220*a6b803b3SArd Biesheuvel 	.init			= arm_poly1305_init,
221*a6b803b3SArd Biesheuvel 	.update			= arm_poly1305_update,
222*a6b803b3SArd Biesheuvel 	.final			= arm_poly1305_final,
223*a6b803b3SArd Biesheuvel 	.digestsize		= POLY1305_DIGEST_SIZE,
224*a6b803b3SArd Biesheuvel 	.descsize		= sizeof(struct poly1305_desc_ctx),
225*a6b803b3SArd Biesheuvel 
226*a6b803b3SArd Biesheuvel 	.base.cra_name		= "poly1305",
227*a6b803b3SArd Biesheuvel 	.base.cra_driver_name	= "poly1305-arm",
228*a6b803b3SArd Biesheuvel 	.base.cra_priority	= 150,
229*a6b803b3SArd Biesheuvel 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
230*a6b803b3SArd Biesheuvel 	.base.cra_module	= THIS_MODULE,
231*a6b803b3SArd Biesheuvel #ifdef CONFIG_KERNEL_MODE_NEON
232*a6b803b3SArd Biesheuvel }, {
233*a6b803b3SArd Biesheuvel 	.init			= arm_poly1305_init,
234*a6b803b3SArd Biesheuvel 	.update			= arm_poly1305_update_neon,
235*a6b803b3SArd Biesheuvel 	.final			= arm_poly1305_final,
236*a6b803b3SArd Biesheuvel 	.digestsize		= POLY1305_DIGEST_SIZE,
237*a6b803b3SArd Biesheuvel 	.descsize		= sizeof(struct poly1305_desc_ctx),
238*a6b803b3SArd Biesheuvel 
239*a6b803b3SArd Biesheuvel 	.base.cra_name		= "poly1305",
240*a6b803b3SArd Biesheuvel 	.base.cra_driver_name	= "poly1305-neon",
241*a6b803b3SArd Biesheuvel 	.base.cra_priority	= 200,
242*a6b803b3SArd Biesheuvel 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
243*a6b803b3SArd Biesheuvel 	.base.cra_module	= THIS_MODULE,
244*a6b803b3SArd Biesheuvel #endif
245*a6b803b3SArd Biesheuvel }};
246*a6b803b3SArd Biesheuvel 
247*a6b803b3SArd Biesheuvel static int __init arm_poly1305_mod_init(void)
248*a6b803b3SArd Biesheuvel {
249*a6b803b3SArd Biesheuvel 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
250*a6b803b3SArd Biesheuvel 	    (elf_hwcap & HWCAP_NEON))
251*a6b803b3SArd Biesheuvel 		static_branch_enable(&have_neon);
252*a6b803b3SArd Biesheuvel 	else
253*a6b803b3SArd Biesheuvel 		/* register only the first entry */
254*a6b803b3SArd Biesheuvel 		return crypto_register_shash(&arm_poly1305_algs[0]);
255*a6b803b3SArd Biesheuvel 
256*a6b803b3SArd Biesheuvel 	return crypto_register_shashes(arm_poly1305_algs,
257*a6b803b3SArd Biesheuvel 				       ARRAY_SIZE(arm_poly1305_algs));
258*a6b803b3SArd Biesheuvel }
259*a6b803b3SArd Biesheuvel 
260*a6b803b3SArd Biesheuvel static void __exit arm_poly1305_mod_exit(void)
261*a6b803b3SArd Biesheuvel {
262*a6b803b3SArd Biesheuvel 	if (!static_branch_likely(&have_neon)) {
263*a6b803b3SArd Biesheuvel 		crypto_unregister_shash(&arm_poly1305_algs[0]);
264*a6b803b3SArd Biesheuvel 		return;
265*a6b803b3SArd Biesheuvel 	}
266*a6b803b3SArd Biesheuvel 	crypto_unregister_shashes(arm_poly1305_algs,
267*a6b803b3SArd Biesheuvel 				  ARRAY_SIZE(arm_poly1305_algs));
268*a6b803b3SArd Biesheuvel }
269*a6b803b3SArd Biesheuvel 
270*a6b803b3SArd Biesheuvel module_init(arm_poly1305_mod_init);
271*a6b803b3SArd Biesheuvel module_exit(arm_poly1305_mod_exit);
272*a6b803b3SArd Biesheuvel 
273*a6b803b3SArd Biesheuvel MODULE_LICENSE("GPL v2");
274*a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305");
275*a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-arm");
276*a6b803b3SArd Biesheuvel MODULE_ALIAS_CRYPTO("poly1305-neon");
277