1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
4  *
5  * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6  */
7 
8 #include <asm/hwcap.h>
9 #include <asm/neon.h>
10 #include <asm/simd.h>
11 #include <asm/unaligned.h>
12 #include <crypto/algapi.h>
13 #include <crypto/internal/hash.h>
14 #include <crypto/internal/poly1305.h>
15 #include <crypto/internal/simd.h>
16 #include <linux/cpufeature.h>
17 #include <linux/crypto.h>
18 #include <linux/jump_label.h>
19 #include <linux/module.h>
20 
21 asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
22 asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
23 asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
24 asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
25 
26 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
27 
28 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
29 {
30 	poly1305_init_arm64(&dctx->h, key);
31 	dctx->s[0] = get_unaligned_le32(key + 16);
32 	dctx->s[1] = get_unaligned_le32(key + 20);
33 	dctx->s[2] = get_unaligned_le32(key + 24);
34 	dctx->s[3] = get_unaligned_le32(key + 28);
35 	dctx->buflen = 0;
36 }
37 EXPORT_SYMBOL(poly1305_init_arch);
38 
39 static int neon_poly1305_init(struct shash_desc *desc)
40 {
41 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
42 
43 	dctx->buflen = 0;
44 	dctx->rset = 0;
45 	dctx->sset = false;
46 
47 	return 0;
48 }
49 
50 static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
51 				 u32 len, u32 hibit, bool do_neon)
52 {
53 	if (unlikely(!dctx->sset)) {
54 		if (!dctx->rset) {
55 			poly1305_init_arch(dctx, src);
56 			src += POLY1305_BLOCK_SIZE;
57 			len -= POLY1305_BLOCK_SIZE;
58 			dctx->rset = 1;
59 		}
60 		if (len >= POLY1305_BLOCK_SIZE) {
61 			dctx->s[0] = get_unaligned_le32(src +  0);
62 			dctx->s[1] = get_unaligned_le32(src +  4);
63 			dctx->s[2] = get_unaligned_le32(src +  8);
64 			dctx->s[3] = get_unaligned_le32(src + 12);
65 			src += POLY1305_BLOCK_SIZE;
66 			len -= POLY1305_BLOCK_SIZE;
67 			dctx->sset = true;
68 		}
69 		if (len < POLY1305_BLOCK_SIZE)
70 			return;
71 	}
72 
73 	len &= ~(POLY1305_BLOCK_SIZE - 1);
74 
75 	if (static_branch_likely(&have_neon) && likely(do_neon))
76 		poly1305_blocks_neon(&dctx->h, src, len, hibit);
77 	else
78 		poly1305_blocks(&dctx->h, src, len, hibit);
79 }
80 
81 static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
82 				    const u8 *src, u32 len, bool do_neon)
83 {
84 	if (unlikely(dctx->buflen)) {
85 		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
86 
87 		memcpy(dctx->buf + dctx->buflen, src, bytes);
88 		src += bytes;
89 		len -= bytes;
90 		dctx->buflen += bytes;
91 
92 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
93 			neon_poly1305_blocks(dctx, dctx->buf,
94 					     POLY1305_BLOCK_SIZE, 1, false);
95 			dctx->buflen = 0;
96 		}
97 	}
98 
99 	if (likely(len >= POLY1305_BLOCK_SIZE)) {
100 		neon_poly1305_blocks(dctx, src, len, 1, do_neon);
101 		src += round_down(len, POLY1305_BLOCK_SIZE);
102 		len %= POLY1305_BLOCK_SIZE;
103 	}
104 
105 	if (unlikely(len)) {
106 		dctx->buflen = len;
107 		memcpy(dctx->buf, src, len);
108 	}
109 }
110 
111 static int neon_poly1305_update(struct shash_desc *desc,
112 				const u8 *src, unsigned int srclen)
113 {
114 	bool do_neon = crypto_simd_usable() && srclen > 128;
115 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
116 
117 	if (static_branch_likely(&have_neon) && do_neon)
118 		kernel_neon_begin();
119 	neon_poly1305_do_update(dctx, src, srclen, do_neon);
120 	if (static_branch_likely(&have_neon) && do_neon)
121 		kernel_neon_end();
122 	return 0;
123 }
124 
125 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
126 			  unsigned int nbytes)
127 {
128 	if (unlikely(dctx->buflen)) {
129 		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
130 
131 		memcpy(dctx->buf + dctx->buflen, src, bytes);
132 		src += bytes;
133 		nbytes -= bytes;
134 		dctx->buflen += bytes;
135 
136 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
137 			poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
138 			dctx->buflen = 0;
139 		}
140 	}
141 
142 	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
143 		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
144 
145 		if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
146 			kernel_neon_begin();
147 			poly1305_blocks_neon(&dctx->h, src, len, 1);
148 			kernel_neon_end();
149 		} else {
150 			poly1305_blocks(&dctx->h, src, len, 1);
151 		}
152 		src += len;
153 		nbytes %= POLY1305_BLOCK_SIZE;
154 	}
155 
156 	if (unlikely(nbytes)) {
157 		dctx->buflen = nbytes;
158 		memcpy(dctx->buf, src, nbytes);
159 	}
160 }
161 EXPORT_SYMBOL(poly1305_update_arch);
162 
163 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
164 {
165 	if (unlikely(dctx->buflen)) {
166 		dctx->buf[dctx->buflen++] = 1;
167 		memset(dctx->buf + dctx->buflen, 0,
168 		       POLY1305_BLOCK_SIZE - dctx->buflen);
169 		poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
170 	}
171 
172 	poly1305_emit(&dctx->h, dst, dctx->s);
173 	*dctx = (struct poly1305_desc_ctx){};
174 }
175 EXPORT_SYMBOL(poly1305_final_arch);
176 
177 static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
178 {
179 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
180 
181 	if (unlikely(!dctx->sset))
182 		return -ENOKEY;
183 
184 	poly1305_final_arch(dctx, dst);
185 	return 0;
186 }
187 
188 static struct shash_alg neon_poly1305_alg = {
189 	.init			= neon_poly1305_init,
190 	.update			= neon_poly1305_update,
191 	.final			= neon_poly1305_final,
192 	.digestsize		= POLY1305_DIGEST_SIZE,
193 	.descsize		= sizeof(struct poly1305_desc_ctx),
194 
195 	.base.cra_name		= "poly1305",
196 	.base.cra_driver_name	= "poly1305-neon",
197 	.base.cra_priority	= 200,
198 	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
199 	.base.cra_module	= THIS_MODULE,
200 };
201 
202 static int __init neon_poly1305_mod_init(void)
203 {
204 	if (!cpu_have_named_feature(ASIMD))
205 		return 0;
206 
207 	static_branch_enable(&have_neon);
208 
209 	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
210 		crypto_register_shash(&neon_poly1305_alg) : 0;
211 }
212 
213 static void __exit neon_poly1305_mod_exit(void)
214 {
215 	if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
216 		crypto_unregister_shash(&neon_poly1305_alg);
217 }
218 
219 module_init(neon_poly1305_mod_init);
220 module_exit(neon_poly1305_mod_exit);
221 
222 MODULE_LICENSE("GPL v2");
223 MODULE_ALIAS_CRYPTO("poly1305");
224 MODULE_ALIAS_CRYPTO("poly1305-neon");
225