1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
4  *
5  * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6  */
7 
8 #include <asm/hwcap.h>
9 #include <asm/neon.h>
10 #include <asm/simd.h>
11 #include <asm/unaligned.h>
12 #include <crypto/algapi.h>
13 #include <crypto/internal/hash.h>
14 #include <crypto/internal/poly1305.h>
15 #include <crypto/internal/simd.h>
16 #include <linux/cpufeature.h>
17 #include <linux/crypto.h>
18 #include <linux/jump_label.h>
19 #include <linux/module.h>
20 
21 asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
22 asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
23 asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
24 asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce);
25 
26 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
27 
28 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
29 {
30 	poly1305_init_arm64(&dctx->h, key);
31 	dctx->s[0] = get_unaligned_le32(key + 16);
32 	dctx->s[1] = get_unaligned_le32(key + 20);
33 	dctx->s[2] = get_unaligned_le32(key + 24);
34 	dctx->s[3] = get_unaligned_le32(key + 28);
35 	dctx->buflen = 0;
36 }
37 EXPORT_SYMBOL(poly1305_init_arch);
38 
39 static int neon_poly1305_init(struct shash_desc *desc)
40 {
41 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
42 
43 	dctx->buflen = 0;
44 	dctx->rset = 0;
45 	dctx->sset = false;
46 
47 	return 0;
48 }
49 
50 static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
51 				 u32 len, u32 hibit, bool do_neon)
52 {
53 	if (unlikely(!dctx->sset)) {
54 		if (!dctx->rset) {
55 			poly1305_init_arch(dctx, src);
56 			src += POLY1305_BLOCK_SIZE;
57 			len -= POLY1305_BLOCK_SIZE;
58 			dctx->rset = 1;
59 		}
60 		if (len >= POLY1305_BLOCK_SIZE) {
61 			dctx->s[0] = get_unaligned_le32(src +  0);
62 			dctx->s[1] = get_unaligned_le32(src +  4);
63 			dctx->s[2] = get_unaligned_le32(src +  8);
64 			dctx->s[3] = get_unaligned_le32(src + 12);
65 			src += POLY1305_BLOCK_SIZE;
66 			len -= POLY1305_BLOCK_SIZE;
67 			dctx->sset = true;
68 		}
69 		if (len < POLY1305_BLOCK_SIZE)
70 			return;
71 	}
72 
73 	len &= ~(POLY1305_BLOCK_SIZE - 1);
74 
75 	if (static_branch_likely(&have_neon) && likely(do_neon))
76 		poly1305_blocks_neon(&dctx->h, src, len, hibit);
77 	else
78 		poly1305_blocks(&dctx->h, src, len, hibit);
79 }
80 
81 static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
82 				    const u8 *src, u32 len, bool do_neon)
83 {
84 	if (unlikely(dctx->buflen)) {
85 		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
86 
87 		memcpy(dctx->buf + dctx->buflen, src, bytes);
88 		src += bytes;
89 		len -= bytes;
90 		dctx->buflen += bytes;
91 
92 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
93 			neon_poly1305_blocks(dctx, dctx->buf,
94 					     POLY1305_BLOCK_SIZE, 1, false);
95 			dctx->buflen = 0;
96 		}
97 	}
98 
99 	if (likely(len >= POLY1305_BLOCK_SIZE)) {
100 		neon_poly1305_blocks(dctx, src, len, 1, do_neon);
101 		src += round_down(len, POLY1305_BLOCK_SIZE);
102 		len %= POLY1305_BLOCK_SIZE;
103 	}
104 
105 	if (unlikely(len)) {
106 		dctx->buflen = len;
107 		memcpy(dctx->buf, src, len);
108 	}
109 }
110 
111 static int neon_poly1305_update(struct shash_desc *desc,
112 				const u8 *src, unsigned int srclen)
113 {
114 	bool do_neon = crypto_simd_usable() && srclen > 128;
115 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
116 
117 	if (static_branch_likely(&have_neon) && do_neon)
118 		kernel_neon_begin();
119 	neon_poly1305_do_update(dctx, src, srclen, do_neon);
120 	if (static_branch_likely(&have_neon) && do_neon)
121 		kernel_neon_end();
122 	return 0;
123 }
124 
125 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
126 			  unsigned int nbytes)
127 {
128 	if (unlikely(dctx->buflen)) {
129 		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
130 
131 		memcpy(dctx->buf + dctx->buflen, src, bytes);
132 		src += bytes;
133 		nbytes -= bytes;
134 		dctx->buflen += bytes;
135 
136 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
137 			poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
138 			dctx->buflen = 0;
139 		}
140 	}
141 
142 	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
143 		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
144 
145 		if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
146 			kernel_neon_begin();
147 			poly1305_blocks_neon(&dctx->h, src, len, 1);
148 			kernel_neon_end();
149 		} else {
150 			poly1305_blocks(&dctx->h, src, len, 1);
151 		}
152 		src += len;
153 		nbytes %= POLY1305_BLOCK_SIZE;
154 	}
155 
156 	if (unlikely(nbytes)) {
157 		dctx->buflen = nbytes;
158 		memcpy(dctx->buf, src, nbytes);
159 	}
160 }
161 EXPORT_SYMBOL(poly1305_update_arch);
162 
163 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
164 {
165 	__le32 digest[4];
166 	u64 f = 0;
167 
168 	if (unlikely(dctx->buflen)) {
169 		dctx->buf[dctx->buflen++] = 1;
170 		memset(dctx->buf + dctx->buflen, 0,
171 		       POLY1305_BLOCK_SIZE - dctx->buflen);
172 		poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
173 	}
174 
175 	poly1305_emit(&dctx->h, digest, dctx->s);
176 
177 	/* mac = (h + s) % (2^128) */
178 	f = (f >> 32) + le32_to_cpu(digest[0]);
179 	put_unaligned_le32(f, dst);
180 	f = (f >> 32) + le32_to_cpu(digest[1]);
181 	put_unaligned_le32(f, dst + 4);
182 	f = (f >> 32) + le32_to_cpu(digest[2]);
183 	put_unaligned_le32(f, dst + 8);
184 	f = (f >> 32) + le32_to_cpu(digest[3]);
185 	put_unaligned_le32(f, dst + 12);
186 
187 	*dctx = (struct poly1305_desc_ctx){};
188 }
189 EXPORT_SYMBOL(poly1305_final_arch);
190 
191 static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
192 {
193 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
194 
195 	if (unlikely(!dctx->sset))
196 		return -ENOKEY;
197 
198 	poly1305_final_arch(dctx, dst);
199 	return 0;
200 }
201 
202 static struct shash_alg neon_poly1305_alg = {
203 	.init			= neon_poly1305_init,
204 	.update			= neon_poly1305_update,
205 	.final			= neon_poly1305_final,
206 	.digestsize		= POLY1305_DIGEST_SIZE,
207 	.descsize		= sizeof(struct poly1305_desc_ctx),
208 
209 	.base.cra_name		= "poly1305",
210 	.base.cra_driver_name	= "poly1305-neon",
211 	.base.cra_priority	= 200,
212 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
213 	.base.cra_module	= THIS_MODULE,
214 };
215 
216 static int __init neon_poly1305_mod_init(void)
217 {
218 	if (!cpu_have_named_feature(ASIMD))
219 		return 0;
220 
221 	static_branch_enable(&have_neon);
222 
223 	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
224 		crypto_register_shash(&neon_poly1305_alg) : 0;
225 }
226 
227 static void __exit neon_poly1305_mod_exit(void)
228 {
229 	if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
230 		crypto_unregister_shash(&neon_poly1305_alg);
231 }
232 
233 module_init(neon_poly1305_mod_init);
234 module_exit(neon_poly1305_mod_exit);
235 
236 MODULE_LICENSE("GPL v2");
237 MODULE_ALIAS_CRYPTO("poly1305");
238 MODULE_ALIAS_CRYPTO("poly1305-neon");
239