1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * SM4 Cipher Algorithm, AES-NI/AVX optimized.
4  * as specified in
5  * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
6  *
7  * Copyright (c) 2021, Alibaba Group.
8  * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/crypto.h>
13 #include <linux/kernel.h>
14 #include <asm/simd.h>
15 #include <crypto/internal/simd.h>
16 #include <crypto/internal/skcipher.h>
17 #include <crypto/sm4.h>
18 #include "sm4-avx.h"
19 
20 #define SM4_CRYPT8_BLOCK_SIZE	(SM4_BLOCK_SIZE * 8)
21 
22 asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
23 				const u8 *src, int nblocks);
24 asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
25 				const u8 *src, int nblocks);
26 asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
27 				const u8 *src, u8 *iv);
28 asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
29 				const u8 *src, u8 *iv);
30 asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
31 				const u8 *src, u8 *iv);
32 
33 static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
34 			unsigned int key_len)
35 {
36 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
37 
38 	return sm4_expandkey(ctx, key, key_len);
39 }
40 
41 static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
42 {
43 	struct skcipher_walk walk;
44 	unsigned int nbytes;
45 	int err;
46 
47 	err = skcipher_walk_virt(&walk, req, false);
48 
49 	while ((nbytes = walk.nbytes) > 0) {
50 		const u8 *src = walk.src.virt.addr;
51 		u8 *dst = walk.dst.virt.addr;
52 
53 		kernel_fpu_begin();
54 		while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) {
55 			sm4_aesni_avx_crypt8(rkey, dst, src, 8);
56 			dst += SM4_CRYPT8_BLOCK_SIZE;
57 			src += SM4_CRYPT8_BLOCK_SIZE;
58 			nbytes -= SM4_CRYPT8_BLOCK_SIZE;
59 		}
60 		while (nbytes >= SM4_BLOCK_SIZE) {
61 			unsigned int nblocks = min(nbytes >> 4, 4u);
62 			sm4_aesni_avx_crypt4(rkey, dst, src, nblocks);
63 			dst += nblocks * SM4_BLOCK_SIZE;
64 			src += nblocks * SM4_BLOCK_SIZE;
65 			nbytes -= nblocks * SM4_BLOCK_SIZE;
66 		}
67 		kernel_fpu_end();
68 
69 		err = skcipher_walk_done(&walk, nbytes);
70 	}
71 
72 	return err;
73 }
74 
75 int sm4_avx_ecb_encrypt(struct skcipher_request *req)
76 {
77 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
78 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
79 
80 	return ecb_do_crypt(req, ctx->rkey_enc);
81 }
82 EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt);
83 
84 int sm4_avx_ecb_decrypt(struct skcipher_request *req)
85 {
86 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
87 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
88 
89 	return ecb_do_crypt(req, ctx->rkey_dec);
90 }
91 EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt);
92 
93 int sm4_cbc_encrypt(struct skcipher_request *req)
94 {
95 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
96 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
97 	struct skcipher_walk walk;
98 	unsigned int nbytes;
99 	int err;
100 
101 	err = skcipher_walk_virt(&walk, req, false);
102 
103 	while ((nbytes = walk.nbytes) > 0) {
104 		const u8 *iv = walk.iv;
105 		const u8 *src = walk.src.virt.addr;
106 		u8 *dst = walk.dst.virt.addr;
107 
108 		while (nbytes >= SM4_BLOCK_SIZE) {
109 			crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE);
110 			sm4_crypt_block(ctx->rkey_enc, dst, dst);
111 			iv = dst;
112 			src += SM4_BLOCK_SIZE;
113 			dst += SM4_BLOCK_SIZE;
114 			nbytes -= SM4_BLOCK_SIZE;
115 		}
116 		if (iv != walk.iv)
117 			memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
118 
119 		err = skcipher_walk_done(&walk, nbytes);
120 	}
121 
122 	return err;
123 }
124 EXPORT_SYMBOL_GPL(sm4_cbc_encrypt);
125 
126 int sm4_avx_cbc_decrypt(struct skcipher_request *req,
127 			unsigned int bsize, sm4_crypt_func func)
128 {
129 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
130 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
131 	struct skcipher_walk walk;
132 	unsigned int nbytes;
133 	int err;
134 
135 	err = skcipher_walk_virt(&walk, req, false);
136 
137 	while ((nbytes = walk.nbytes) > 0) {
138 		const u8 *src = walk.src.virt.addr;
139 		u8 *dst = walk.dst.virt.addr;
140 
141 		kernel_fpu_begin();
142 
143 		while (nbytes >= bsize) {
144 			func(ctx->rkey_dec, dst, src, walk.iv);
145 			dst += bsize;
146 			src += bsize;
147 			nbytes -= bsize;
148 		}
149 
150 		while (nbytes >= SM4_BLOCK_SIZE) {
151 			u8 keystream[SM4_BLOCK_SIZE * 8];
152 			u8 iv[SM4_BLOCK_SIZE];
153 			unsigned int nblocks = min(nbytes >> 4, 8u);
154 			int i;
155 
156 			sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream,
157 						src, nblocks);
158 
159 			src += ((int)nblocks - 2) * SM4_BLOCK_SIZE;
160 			dst += (nblocks - 1) * SM4_BLOCK_SIZE;
161 			memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
162 
163 			for (i = nblocks - 1; i > 0; i--) {
164 				crypto_xor_cpy(dst, src,
165 					&keystream[i * SM4_BLOCK_SIZE],
166 					SM4_BLOCK_SIZE);
167 				src -= SM4_BLOCK_SIZE;
168 				dst -= SM4_BLOCK_SIZE;
169 			}
170 			crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE);
171 			memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
172 			dst += nblocks * SM4_BLOCK_SIZE;
173 			src += (nblocks + 1) * SM4_BLOCK_SIZE;
174 			nbytes -= nblocks * SM4_BLOCK_SIZE;
175 		}
176 
177 		kernel_fpu_end();
178 		err = skcipher_walk_done(&walk, nbytes);
179 	}
180 
181 	return err;
182 }
183 EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt);
184 
185 static int cbc_decrypt(struct skcipher_request *req)
186 {
187 	return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
188 				sm4_aesni_avx_cbc_dec_blk8);
189 }
190 
191 int sm4_cfb_encrypt(struct skcipher_request *req)
192 {
193 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
194 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
195 	struct skcipher_walk walk;
196 	unsigned int nbytes;
197 	int err;
198 
199 	err = skcipher_walk_virt(&walk, req, false);
200 
201 	while ((nbytes = walk.nbytes) > 0) {
202 		u8 keystream[SM4_BLOCK_SIZE];
203 		const u8 *iv = walk.iv;
204 		const u8 *src = walk.src.virt.addr;
205 		u8 *dst = walk.dst.virt.addr;
206 
207 		while (nbytes >= SM4_BLOCK_SIZE) {
208 			sm4_crypt_block(ctx->rkey_enc, keystream, iv);
209 			crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE);
210 			iv = dst;
211 			src += SM4_BLOCK_SIZE;
212 			dst += SM4_BLOCK_SIZE;
213 			nbytes -= SM4_BLOCK_SIZE;
214 		}
215 		if (iv != walk.iv)
216 			memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
217 
218 		/* tail */
219 		if (walk.nbytes == walk.total && nbytes > 0) {
220 			sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
221 			crypto_xor_cpy(dst, src, keystream, nbytes);
222 			nbytes = 0;
223 		}
224 
225 		err = skcipher_walk_done(&walk, nbytes);
226 	}
227 
228 	return err;
229 }
230 EXPORT_SYMBOL_GPL(sm4_cfb_encrypt);
231 
232 int sm4_avx_cfb_decrypt(struct skcipher_request *req,
233 			unsigned int bsize, sm4_crypt_func func)
234 {
235 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
236 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
237 	struct skcipher_walk walk;
238 	unsigned int nbytes;
239 	int err;
240 
241 	err = skcipher_walk_virt(&walk, req, false);
242 
243 	while ((nbytes = walk.nbytes) > 0) {
244 		const u8 *src = walk.src.virt.addr;
245 		u8 *dst = walk.dst.virt.addr;
246 
247 		kernel_fpu_begin();
248 
249 		while (nbytes >= bsize) {
250 			func(ctx->rkey_enc, dst, src, walk.iv);
251 			dst += bsize;
252 			src += bsize;
253 			nbytes -= bsize;
254 		}
255 
256 		while (nbytes >= SM4_BLOCK_SIZE) {
257 			u8 keystream[SM4_BLOCK_SIZE * 8];
258 			unsigned int nblocks = min(nbytes >> 4, 8u);
259 
260 			memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
261 			if (nblocks > 1)
262 				memcpy(&keystream[SM4_BLOCK_SIZE], src,
263 					(nblocks - 1) * SM4_BLOCK_SIZE);
264 			memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE,
265 				SM4_BLOCK_SIZE);
266 
267 			sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
268 						keystream, nblocks);
269 
270 			crypto_xor_cpy(dst, src, keystream,
271 					nblocks * SM4_BLOCK_SIZE);
272 			dst += nblocks * SM4_BLOCK_SIZE;
273 			src += nblocks * SM4_BLOCK_SIZE;
274 			nbytes -= nblocks * SM4_BLOCK_SIZE;
275 		}
276 
277 		kernel_fpu_end();
278 
279 		/* tail */
280 		if (walk.nbytes == walk.total && nbytes > 0) {
281 			u8 keystream[SM4_BLOCK_SIZE];
282 
283 			sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
284 			crypto_xor_cpy(dst, src, keystream, nbytes);
285 			nbytes = 0;
286 		}
287 
288 		err = skcipher_walk_done(&walk, nbytes);
289 	}
290 
291 	return err;
292 }
293 EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt);
294 
295 static int cfb_decrypt(struct skcipher_request *req)
296 {
297 	return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
298 				sm4_aesni_avx_cfb_dec_blk8);
299 }
300 
301 int sm4_avx_ctr_crypt(struct skcipher_request *req,
302 			unsigned int bsize, sm4_crypt_func func)
303 {
304 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
305 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
306 	struct skcipher_walk walk;
307 	unsigned int nbytes;
308 	int err;
309 
310 	err = skcipher_walk_virt(&walk, req, false);
311 
312 	while ((nbytes = walk.nbytes) > 0) {
313 		const u8 *src = walk.src.virt.addr;
314 		u8 *dst = walk.dst.virt.addr;
315 
316 		kernel_fpu_begin();
317 
318 		while (nbytes >= bsize) {
319 			func(ctx->rkey_enc, dst, src, walk.iv);
320 			dst += bsize;
321 			src += bsize;
322 			nbytes -= bsize;
323 		}
324 
325 		while (nbytes >= SM4_BLOCK_SIZE) {
326 			u8 keystream[SM4_BLOCK_SIZE * 8];
327 			unsigned int nblocks = min(nbytes >> 4, 8u);
328 			int i;
329 
330 			for (i = 0; i < nblocks; i++) {
331 				memcpy(&keystream[i * SM4_BLOCK_SIZE],
332 					walk.iv, SM4_BLOCK_SIZE);
333 				crypto_inc(walk.iv, SM4_BLOCK_SIZE);
334 			}
335 			sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
336 					keystream, nblocks);
337 
338 			crypto_xor_cpy(dst, src, keystream,
339 					nblocks * SM4_BLOCK_SIZE);
340 			dst += nblocks * SM4_BLOCK_SIZE;
341 			src += nblocks * SM4_BLOCK_SIZE;
342 			nbytes -= nblocks * SM4_BLOCK_SIZE;
343 		}
344 
345 		kernel_fpu_end();
346 
347 		/* tail */
348 		if (walk.nbytes == walk.total && nbytes > 0) {
349 			u8 keystream[SM4_BLOCK_SIZE];
350 
351 			memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
352 			crypto_inc(walk.iv, SM4_BLOCK_SIZE);
353 
354 			sm4_crypt_block(ctx->rkey_enc, keystream, keystream);
355 
356 			crypto_xor_cpy(dst, src, keystream, nbytes);
357 			dst += nbytes;
358 			src += nbytes;
359 			nbytes = 0;
360 		}
361 
362 		err = skcipher_walk_done(&walk, nbytes);
363 	}
364 
365 	return err;
366 }
367 EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt);
368 
369 static int ctr_crypt(struct skcipher_request *req)
370 {
371 	return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE,
372 				sm4_aesni_avx_ctr_enc_blk8);
373 }
374 
375 static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
376 	{
377 		.base = {
378 			.cra_name		= "__ecb(sm4)",
379 			.cra_driver_name	= "__ecb-sm4-aesni-avx",
380 			.cra_priority		= 400,
381 			.cra_flags		= CRYPTO_ALG_INTERNAL,
382 			.cra_blocksize		= SM4_BLOCK_SIZE,
383 			.cra_ctxsize		= sizeof(struct sm4_ctx),
384 			.cra_module		= THIS_MODULE,
385 		},
386 		.min_keysize	= SM4_KEY_SIZE,
387 		.max_keysize	= SM4_KEY_SIZE,
388 		.walksize	= 8 * SM4_BLOCK_SIZE,
389 		.setkey		= sm4_skcipher_setkey,
390 		.encrypt	= sm4_avx_ecb_encrypt,
391 		.decrypt	= sm4_avx_ecb_decrypt,
392 	}, {
393 		.base = {
394 			.cra_name		= "__cbc(sm4)",
395 			.cra_driver_name	= "__cbc-sm4-aesni-avx",
396 			.cra_priority		= 400,
397 			.cra_flags		= CRYPTO_ALG_INTERNAL,
398 			.cra_blocksize		= SM4_BLOCK_SIZE,
399 			.cra_ctxsize		= sizeof(struct sm4_ctx),
400 			.cra_module		= THIS_MODULE,
401 		},
402 		.min_keysize	= SM4_KEY_SIZE,
403 		.max_keysize	= SM4_KEY_SIZE,
404 		.ivsize		= SM4_BLOCK_SIZE,
405 		.walksize	= 8 * SM4_BLOCK_SIZE,
406 		.setkey		= sm4_skcipher_setkey,
407 		.encrypt	= sm4_cbc_encrypt,
408 		.decrypt	= cbc_decrypt,
409 	}, {
410 		.base = {
411 			.cra_name		= "__cfb(sm4)",
412 			.cra_driver_name	= "__cfb-sm4-aesni-avx",
413 			.cra_priority		= 400,
414 			.cra_flags		= CRYPTO_ALG_INTERNAL,
415 			.cra_blocksize		= 1,
416 			.cra_ctxsize		= sizeof(struct sm4_ctx),
417 			.cra_module		= THIS_MODULE,
418 		},
419 		.min_keysize	= SM4_KEY_SIZE,
420 		.max_keysize	= SM4_KEY_SIZE,
421 		.ivsize		= SM4_BLOCK_SIZE,
422 		.chunksize	= SM4_BLOCK_SIZE,
423 		.walksize	= 8 * SM4_BLOCK_SIZE,
424 		.setkey		= sm4_skcipher_setkey,
425 		.encrypt	= sm4_cfb_encrypt,
426 		.decrypt	= cfb_decrypt,
427 	}, {
428 		.base = {
429 			.cra_name		= "__ctr(sm4)",
430 			.cra_driver_name	= "__ctr-sm4-aesni-avx",
431 			.cra_priority		= 400,
432 			.cra_flags		= CRYPTO_ALG_INTERNAL,
433 			.cra_blocksize		= 1,
434 			.cra_ctxsize		= sizeof(struct sm4_ctx),
435 			.cra_module		= THIS_MODULE,
436 		},
437 		.min_keysize	= SM4_KEY_SIZE,
438 		.max_keysize	= SM4_KEY_SIZE,
439 		.ivsize		= SM4_BLOCK_SIZE,
440 		.chunksize	= SM4_BLOCK_SIZE,
441 		.walksize	= 8 * SM4_BLOCK_SIZE,
442 		.setkey		= sm4_skcipher_setkey,
443 		.encrypt	= ctr_crypt,
444 		.decrypt	= ctr_crypt,
445 	}
446 };
447 
448 static struct simd_skcipher_alg *
449 simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];
450 
451 static int __init sm4_init(void)
452 {
453 	const char *feature_name;
454 
455 	if (!boot_cpu_has(X86_FEATURE_AVX) ||
456 	    !boot_cpu_has(X86_FEATURE_AES) ||
457 	    !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
458 		pr_info("AVX or AES-NI instructions are not detected.\n");
459 		return -ENODEV;
460 	}
461 
462 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
463 				&feature_name)) {
464 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
465 		return -ENODEV;
466 	}
467 
468 	return simd_register_skciphers_compat(sm4_aesni_avx_skciphers,
469 					ARRAY_SIZE(sm4_aesni_avx_skciphers),
470 					simd_sm4_aesni_avx_skciphers);
471 }
472 
473 static void __exit sm4_exit(void)
474 {
475 	simd_unregister_skciphers(sm4_aesni_avx_skciphers,
476 					ARRAY_SIZE(sm4_aesni_avx_skciphers),
477 					simd_sm4_aesni_avx_skciphers);
478 }
479 
480 module_init(sm4_init);
481 module_exit(sm4_exit);
482 
483 MODULE_LICENSE("GPL v2");
484 MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
485 MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized");
486 MODULE_ALIAS_CRYPTO("sm4");
487 MODULE_ALIAS_CRYPTO("sm4-aesni-avx");
488