1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
4  *
5  * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6  */
7 
8 #include <asm/hwcap.h>
9 #include <asm/neon.h>
10 #include <asm/simd.h>
11 #include <asm/unaligned.h>
12 #include <crypto/algapi.h>
13 #include <crypto/internal/hash.h>
14 #include <crypto/internal/poly1305.h>
15 #include <crypto/internal/simd.h>
16 #include <linux/cpufeature.h>
17 #include <linux/crypto.h>
18 #include <linux/jump_label.h>
19 #include <linux/module.h>
20 
21 asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
22 asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
23 asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
24 asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
25 
26 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
27 
poly1305_init_arch(struct poly1305_desc_ctx * dctx,const u8 key[POLY1305_KEY_SIZE])28 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
29 {
30 	poly1305_init_arm64(&dctx->h, key);
31 	dctx->s[0] = get_unaligned_le32(key + 16);
32 	dctx->s[1] = get_unaligned_le32(key + 20);
33 	dctx->s[2] = get_unaligned_le32(key + 24);
34 	dctx->s[3] = get_unaligned_le32(key + 28);
35 	dctx->buflen = 0;
36 }
37 EXPORT_SYMBOL(poly1305_init_arch);
38 
neon_poly1305_init(struct shash_desc * desc)39 static int neon_poly1305_init(struct shash_desc *desc)
40 {
41 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
42 
43 	dctx->buflen = 0;
44 	dctx->rset = 0;
45 	dctx->sset = false;
46 
47 	return 0;
48 }
49 
neon_poly1305_blocks(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,u32 hibit,bool do_neon)50 static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
51 				 u32 len, u32 hibit, bool do_neon)
52 {
53 	if (unlikely(!dctx->sset)) {
54 		if (!dctx->rset) {
55 			poly1305_init_arm64(&dctx->h, src);
56 			src += POLY1305_BLOCK_SIZE;
57 			len -= POLY1305_BLOCK_SIZE;
58 			dctx->rset = 1;
59 		}
60 		if (len >= POLY1305_BLOCK_SIZE) {
61 			dctx->s[0] = get_unaligned_le32(src +  0);
62 			dctx->s[1] = get_unaligned_le32(src +  4);
63 			dctx->s[2] = get_unaligned_le32(src +  8);
64 			dctx->s[3] = get_unaligned_le32(src + 12);
65 			src += POLY1305_BLOCK_SIZE;
66 			len -= POLY1305_BLOCK_SIZE;
67 			dctx->sset = true;
68 		}
69 		if (len < POLY1305_BLOCK_SIZE)
70 			return;
71 	}
72 
73 	len &= ~(POLY1305_BLOCK_SIZE - 1);
74 
75 	if (static_branch_likely(&have_neon) && likely(do_neon))
76 		poly1305_blocks_neon(&dctx->h, src, len, hibit);
77 	else
78 		poly1305_blocks(&dctx->h, src, len, hibit);
79 }
80 
neon_poly1305_do_update(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,bool do_neon)81 static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
82 				    const u8 *src, u32 len, bool do_neon)
83 {
84 	if (unlikely(dctx->buflen)) {
85 		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
86 
87 		memcpy(dctx->buf + dctx->buflen, src, bytes);
88 		src += bytes;
89 		len -= bytes;
90 		dctx->buflen += bytes;
91 
92 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
93 			neon_poly1305_blocks(dctx, dctx->buf,
94 					     POLY1305_BLOCK_SIZE, 1, false);
95 			dctx->buflen = 0;
96 		}
97 	}
98 
99 	if (likely(len >= POLY1305_BLOCK_SIZE)) {
100 		neon_poly1305_blocks(dctx, src, len, 1, do_neon);
101 		src += round_down(len, POLY1305_BLOCK_SIZE);
102 		len %= POLY1305_BLOCK_SIZE;
103 	}
104 
105 	if (unlikely(len)) {
106 		dctx->buflen = len;
107 		memcpy(dctx->buf, src, len);
108 	}
109 }
110 
neon_poly1305_update(struct shash_desc * desc,const u8 * src,unsigned int srclen)111 static int neon_poly1305_update(struct shash_desc *desc,
112 				const u8 *src, unsigned int srclen)
113 {
114 	bool do_neon = crypto_simd_usable() && srclen > 128;
115 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
116 
117 	if (static_branch_likely(&have_neon) && do_neon)
118 		kernel_neon_begin();
119 	neon_poly1305_do_update(dctx, src, srclen, do_neon);
120 	if (static_branch_likely(&have_neon) && do_neon)
121 		kernel_neon_end();
122 	return 0;
123 }
124 
poly1305_update_arch(struct poly1305_desc_ctx * dctx,const u8 * src,unsigned int nbytes)125 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
126 			  unsigned int nbytes)
127 {
128 	if (unlikely(dctx->buflen)) {
129 		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
130 
131 		memcpy(dctx->buf + dctx->buflen, src, bytes);
132 		src += bytes;
133 		nbytes -= bytes;
134 		dctx->buflen += bytes;
135 
136 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
137 			poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
138 			dctx->buflen = 0;
139 		}
140 	}
141 
142 	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
143 		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
144 
145 		if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
146 			do {
147 				unsigned int todo = min_t(unsigned int, len, SZ_4K);
148 
149 				kernel_neon_begin();
150 				poly1305_blocks_neon(&dctx->h, src, todo, 1);
151 				kernel_neon_end();
152 
153 				len -= todo;
154 				src += todo;
155 			} while (len);
156 		} else {
157 			poly1305_blocks(&dctx->h, src, len, 1);
158 			src += len;
159 		}
160 		nbytes %= POLY1305_BLOCK_SIZE;
161 	}
162 
163 	if (unlikely(nbytes)) {
164 		dctx->buflen = nbytes;
165 		memcpy(dctx->buf, src, nbytes);
166 	}
167 }
168 EXPORT_SYMBOL(poly1305_update_arch);
169 
poly1305_final_arch(struct poly1305_desc_ctx * dctx,u8 * dst)170 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
171 {
172 	if (unlikely(dctx->buflen)) {
173 		dctx->buf[dctx->buflen++] = 1;
174 		memset(dctx->buf + dctx->buflen, 0,
175 		       POLY1305_BLOCK_SIZE - dctx->buflen);
176 		poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
177 	}
178 
179 	poly1305_emit(&dctx->h, dst, dctx->s);
180 	memzero_explicit(dctx, sizeof(*dctx));
181 }
182 EXPORT_SYMBOL(poly1305_final_arch);
183 
neon_poly1305_final(struct shash_desc * desc,u8 * dst)184 static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
185 {
186 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
187 
188 	if (unlikely(!dctx->sset))
189 		return -ENOKEY;
190 
191 	poly1305_final_arch(dctx, dst);
192 	return 0;
193 }
194 
195 static struct shash_alg neon_poly1305_alg = {
196 	.init			= neon_poly1305_init,
197 	.update			= neon_poly1305_update,
198 	.final			= neon_poly1305_final,
199 	.digestsize		= POLY1305_DIGEST_SIZE,
200 	.descsize		= sizeof(struct poly1305_desc_ctx),
201 
202 	.base.cra_name		= "poly1305",
203 	.base.cra_driver_name	= "poly1305-neon",
204 	.base.cra_priority	= 200,
205 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
206 	.base.cra_module	= THIS_MODULE,
207 };
208 
neon_poly1305_mod_init(void)209 static int __init neon_poly1305_mod_init(void)
210 {
211 	if (!cpu_have_named_feature(ASIMD))
212 		return 0;
213 
214 	static_branch_enable(&have_neon);
215 
216 	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
217 		crypto_register_shash(&neon_poly1305_alg) : 0;
218 }
219 
neon_poly1305_mod_exit(void)220 static void __exit neon_poly1305_mod_exit(void)
221 {
222 	if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
223 		crypto_unregister_shash(&neon_poly1305_alg);
224 }
225 
226 module_init(neon_poly1305_mod_init);
227 module_exit(neon_poly1305_mod_exit);
228 
229 MODULE_LICENSE("GPL v2");
230 MODULE_ALIAS_CRYPTO("poly1305");
231 MODULE_ALIAS_CRYPTO("poly1305-neon");
232