xref: /openbmc/linux/arch/arm/crypto/chacha-glue.c (revision 838f9bc0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
4  * including ChaCha20 (RFC7539)
5  *
6  * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
7  * Copyright (C) 2015 Martin Willi
8  */
9 
10 #include <crypto/algapi.h>
11 #include <crypto/internal/chacha.h>
12 #include <crypto/internal/simd.h>
13 #include <crypto/internal/skcipher.h>
14 #include <linux/jump_label.h>
15 #include <linux/kernel.h>
16 #include <linux/module.h>
17 
18 #include <asm/cputype.h>
19 #include <asm/hwcap.h>
20 #include <asm/neon.h>
21 #include <asm/simd.h>
22 
23 asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
24 				      int nrounds);
25 asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
26 				       int nrounds);
27 asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
28 asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
29 
30 asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
31 			     const u32 *state, int nrounds);
32 
33 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
34 
35 static inline bool neon_usable(void)
36 {
37 	return static_branch_likely(&use_neon) && crypto_simd_usable();
38 }
39 
40 static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
41 			  unsigned int bytes, int nrounds)
42 {
43 	u8 buf[CHACHA_BLOCK_SIZE];
44 
45 	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
46 		chacha_4block_xor_neon(state, dst, src, nrounds);
47 		bytes -= CHACHA_BLOCK_SIZE * 4;
48 		src += CHACHA_BLOCK_SIZE * 4;
49 		dst += CHACHA_BLOCK_SIZE * 4;
50 		state[12] += 4;
51 	}
52 	while (bytes >= CHACHA_BLOCK_SIZE) {
53 		chacha_block_xor_neon(state, dst, src, nrounds);
54 		bytes -= CHACHA_BLOCK_SIZE;
55 		src += CHACHA_BLOCK_SIZE;
56 		dst += CHACHA_BLOCK_SIZE;
57 		state[12]++;
58 	}
59 	if (bytes) {
60 		memcpy(buf, src, bytes);
61 		chacha_block_xor_neon(state, buf, buf, nrounds);
62 		memcpy(dst, buf, bytes);
63 	}
64 }
65 
66 void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
67 {
68 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
69 		hchacha_block_arm(state, stream, nrounds);
70 	} else {
71 		kernel_neon_begin();
72 		hchacha_block_neon(state, stream, nrounds);
73 		kernel_neon_end();
74 	}
75 }
76 EXPORT_SYMBOL(hchacha_block_arch);
77 
78 void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
79 {
80 	chacha_init_generic(state, key, iv);
81 }
82 EXPORT_SYMBOL(chacha_init_arch);
83 
84 void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
85 		       int nrounds)
86 {
87 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
88 	    bytes <= CHACHA_BLOCK_SIZE) {
89 		chacha_doarm(dst, src, bytes, state, nrounds);
90 		state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
91 		return;
92 	}
93 
94 	do {
95 		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
96 
97 		kernel_neon_begin();
98 		chacha_doneon(state, dst, src, todo, nrounds);
99 		kernel_neon_end();
100 
101 		bytes -= todo;
102 		src += todo;
103 		dst += todo;
104 	} while (bytes);
105 }
106 EXPORT_SYMBOL(chacha_crypt_arch);
107 
108 static int chacha_stream_xor(struct skcipher_request *req,
109 			     const struct chacha_ctx *ctx, const u8 *iv,
110 			     bool neon)
111 {
112 	struct skcipher_walk walk;
113 	u32 state[16];
114 	int err;
115 
116 	err = skcipher_walk_virt(&walk, req, false);
117 
118 	chacha_init_generic(state, ctx->key, iv);
119 
120 	while (walk.nbytes > 0) {
121 		unsigned int nbytes = walk.nbytes;
122 
123 		if (nbytes < walk.total)
124 			nbytes = round_down(nbytes, walk.stride);
125 
126 		if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
127 			chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
128 				     nbytes, state, ctx->nrounds);
129 			state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
130 		} else {
131 			kernel_neon_begin();
132 			chacha_doneon(state, walk.dst.virt.addr,
133 				      walk.src.virt.addr, nbytes, ctx->nrounds);
134 			kernel_neon_end();
135 		}
136 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
137 	}
138 
139 	return err;
140 }
141 
142 static int do_chacha(struct skcipher_request *req, bool neon)
143 {
144 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
145 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
146 
147 	return chacha_stream_xor(req, ctx, req->iv, neon);
148 }
149 
150 static int chacha_arm(struct skcipher_request *req)
151 {
152 	return do_chacha(req, false);
153 }
154 
155 static int chacha_neon(struct skcipher_request *req)
156 {
157 	return do_chacha(req, neon_usable());
158 }
159 
160 static int do_xchacha(struct skcipher_request *req, bool neon)
161 {
162 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
163 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
164 	struct chacha_ctx subctx;
165 	u32 state[16];
166 	u8 real_iv[16];
167 
168 	chacha_init_generic(state, ctx->key, req->iv);
169 
170 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
171 		hchacha_block_arm(state, subctx.key, ctx->nrounds);
172 	} else {
173 		kernel_neon_begin();
174 		hchacha_block_neon(state, subctx.key, ctx->nrounds);
175 		kernel_neon_end();
176 	}
177 	subctx.nrounds = ctx->nrounds;
178 
179 	memcpy(&real_iv[0], req->iv + 24, 8);
180 	memcpy(&real_iv[8], req->iv + 16, 8);
181 	return chacha_stream_xor(req, &subctx, real_iv, neon);
182 }
183 
184 static int xchacha_arm(struct skcipher_request *req)
185 {
186 	return do_xchacha(req, false);
187 }
188 
189 static int xchacha_neon(struct skcipher_request *req)
190 {
191 	return do_xchacha(req, neon_usable());
192 }
193 
194 static struct skcipher_alg arm_algs[] = {
195 	{
196 		.base.cra_name		= "chacha20",
197 		.base.cra_driver_name	= "chacha20-arm",
198 		.base.cra_priority	= 200,
199 		.base.cra_blocksize	= 1,
200 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
201 		.base.cra_module	= THIS_MODULE,
202 
203 		.min_keysize		= CHACHA_KEY_SIZE,
204 		.max_keysize		= CHACHA_KEY_SIZE,
205 		.ivsize			= CHACHA_IV_SIZE,
206 		.chunksize		= CHACHA_BLOCK_SIZE,
207 		.setkey			= chacha20_setkey,
208 		.encrypt		= chacha_arm,
209 		.decrypt		= chacha_arm,
210 	}, {
211 		.base.cra_name		= "xchacha20",
212 		.base.cra_driver_name	= "xchacha20-arm",
213 		.base.cra_priority	= 200,
214 		.base.cra_blocksize	= 1,
215 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
216 		.base.cra_module	= THIS_MODULE,
217 
218 		.min_keysize		= CHACHA_KEY_SIZE,
219 		.max_keysize		= CHACHA_KEY_SIZE,
220 		.ivsize			= XCHACHA_IV_SIZE,
221 		.chunksize		= CHACHA_BLOCK_SIZE,
222 		.setkey			= chacha20_setkey,
223 		.encrypt		= xchacha_arm,
224 		.decrypt		= xchacha_arm,
225 	}, {
226 		.base.cra_name		= "xchacha12",
227 		.base.cra_driver_name	= "xchacha12-arm",
228 		.base.cra_priority	= 200,
229 		.base.cra_blocksize	= 1,
230 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
231 		.base.cra_module	= THIS_MODULE,
232 
233 		.min_keysize		= CHACHA_KEY_SIZE,
234 		.max_keysize		= CHACHA_KEY_SIZE,
235 		.ivsize			= XCHACHA_IV_SIZE,
236 		.chunksize		= CHACHA_BLOCK_SIZE,
237 		.setkey			= chacha12_setkey,
238 		.encrypt		= xchacha_arm,
239 		.decrypt		= xchacha_arm,
240 	},
241 };
242 
243 static struct skcipher_alg neon_algs[] = {
244 	{
245 		.base.cra_name		= "chacha20",
246 		.base.cra_driver_name	= "chacha20-neon",
247 		.base.cra_priority	= 300,
248 		.base.cra_blocksize	= 1,
249 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
250 		.base.cra_module	= THIS_MODULE,
251 
252 		.min_keysize		= CHACHA_KEY_SIZE,
253 		.max_keysize		= CHACHA_KEY_SIZE,
254 		.ivsize			= CHACHA_IV_SIZE,
255 		.chunksize		= CHACHA_BLOCK_SIZE,
256 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
257 		.setkey			= chacha20_setkey,
258 		.encrypt		= chacha_neon,
259 		.decrypt		= chacha_neon,
260 	}, {
261 		.base.cra_name		= "xchacha20",
262 		.base.cra_driver_name	= "xchacha20-neon",
263 		.base.cra_priority	= 300,
264 		.base.cra_blocksize	= 1,
265 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
266 		.base.cra_module	= THIS_MODULE,
267 
268 		.min_keysize		= CHACHA_KEY_SIZE,
269 		.max_keysize		= CHACHA_KEY_SIZE,
270 		.ivsize			= XCHACHA_IV_SIZE,
271 		.chunksize		= CHACHA_BLOCK_SIZE,
272 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
273 		.setkey			= chacha20_setkey,
274 		.encrypt		= xchacha_neon,
275 		.decrypt		= xchacha_neon,
276 	}, {
277 		.base.cra_name		= "xchacha12",
278 		.base.cra_driver_name	= "xchacha12-neon",
279 		.base.cra_priority	= 300,
280 		.base.cra_blocksize	= 1,
281 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
282 		.base.cra_module	= THIS_MODULE,
283 
284 		.min_keysize		= CHACHA_KEY_SIZE,
285 		.max_keysize		= CHACHA_KEY_SIZE,
286 		.ivsize			= XCHACHA_IV_SIZE,
287 		.chunksize		= CHACHA_BLOCK_SIZE,
288 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
289 		.setkey			= chacha12_setkey,
290 		.encrypt		= xchacha_neon,
291 		.decrypt		= xchacha_neon,
292 	}
293 };
294 
295 static int __init chacha_simd_mod_init(void)
296 {
297 	int err = 0;
298 
299 	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
300 		err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
301 		if (err)
302 			return err;
303 	}
304 
305 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
306 		int i;
307 
308 		switch (read_cpuid_part()) {
309 		case ARM_CPU_PART_CORTEX_A7:
310 		case ARM_CPU_PART_CORTEX_A5:
311 			/*
312 			 * The Cortex-A7 and Cortex-A5 do not perform well with
313 			 * the NEON implementation but do incredibly with the
314 			 * scalar one and use less power.
315 			 */
316 			for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
317 				neon_algs[i].base.cra_priority = 0;
318 			break;
319 		default:
320 			static_branch_enable(&use_neon);
321 		}
322 
323 		if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
324 			err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
325 			if (err)
326 				crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
327 		}
328 	}
329 	return err;
330 }
331 
332 static void __exit chacha_simd_mod_fini(void)
333 {
334 	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
335 		crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
336 		if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
337 			crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
338 	}
339 }
340 
341 module_init(chacha_simd_mod_init);
342 module_exit(chacha_simd_mod_fini);
343 
344 MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
345 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
346 MODULE_LICENSE("GPL v2");
347 MODULE_ALIAS_CRYPTO("chacha20");
348 MODULE_ALIAS_CRYPTO("chacha20-arm");
349 MODULE_ALIAS_CRYPTO("xchacha20");
350 MODULE_ALIAS_CRYPTO("xchacha20-arm");
351 MODULE_ALIAS_CRYPTO("xchacha12");
352 MODULE_ALIAS_CRYPTO("xchacha12-arm");
353 #ifdef CONFIG_KERNEL_MODE_NEON
354 MODULE_ALIAS_CRYPTO("chacha20-neon");
355 MODULE_ALIAS_CRYPTO("xchacha20-neon");
356 MODULE_ALIAS_CRYPTO("xchacha12-neon");
357 #endif
358