1*5b2efa2bSTianjia Zhang /* SPDX-License-Identifier: GPL-2.0-or-later */
2*5b2efa2bSTianjia Zhang /*
3*5b2efa2bSTianjia Zhang * SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
4*5b2efa2bSTianjia Zhang * as specified in
5*5b2efa2bSTianjia Zhang * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
6*5b2efa2bSTianjia Zhang *
7*5b2efa2bSTianjia Zhang * Copyright (c) 2021, Alibaba Group.
8*5b2efa2bSTianjia Zhang * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
9*5b2efa2bSTianjia Zhang */
10*5b2efa2bSTianjia Zhang
11*5b2efa2bSTianjia Zhang #include <linux/module.h>
12*5b2efa2bSTianjia Zhang #include <linux/crypto.h>
13*5b2efa2bSTianjia Zhang #include <linux/kernel.h>
14*5b2efa2bSTianjia Zhang #include <asm/simd.h>
15*5b2efa2bSTianjia Zhang #include <crypto/internal/simd.h>
16*5b2efa2bSTianjia Zhang #include <crypto/internal/skcipher.h>
17*5b2efa2bSTianjia Zhang #include <crypto/sm4.h>
18*5b2efa2bSTianjia Zhang #include "sm4-avx.h"
19*5b2efa2bSTianjia Zhang
20*5b2efa2bSTianjia Zhang #define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16)
21*5b2efa2bSTianjia Zhang
22*5b2efa2bSTianjia Zhang asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
23*5b2efa2bSTianjia Zhang const u8 *src, u8 *iv);
24*5b2efa2bSTianjia Zhang asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
25*5b2efa2bSTianjia Zhang const u8 *src, u8 *iv);
26*5b2efa2bSTianjia Zhang asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
27*5b2efa2bSTianjia Zhang const u8 *src, u8 *iv);
28*5b2efa2bSTianjia Zhang
sm4_skcipher_setkey(struct crypto_skcipher * tfm,const u8 * key,unsigned int key_len)29*5b2efa2bSTianjia Zhang static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
30*5b2efa2bSTianjia Zhang unsigned int key_len)
31*5b2efa2bSTianjia Zhang {
32*5b2efa2bSTianjia Zhang struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
33*5b2efa2bSTianjia Zhang
34*5b2efa2bSTianjia Zhang return sm4_expandkey(ctx, key, key_len);
35*5b2efa2bSTianjia Zhang }
36*5b2efa2bSTianjia Zhang
cbc_decrypt(struct skcipher_request * req)37*5b2efa2bSTianjia Zhang static int cbc_decrypt(struct skcipher_request *req)
38*5b2efa2bSTianjia Zhang {
39*5b2efa2bSTianjia Zhang return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
40*5b2efa2bSTianjia Zhang sm4_aesni_avx2_cbc_dec_blk16);
41*5b2efa2bSTianjia Zhang }
42*5b2efa2bSTianjia Zhang
43*5b2efa2bSTianjia Zhang
cfb_decrypt(struct skcipher_request * req)44*5b2efa2bSTianjia Zhang static int cfb_decrypt(struct skcipher_request *req)
45*5b2efa2bSTianjia Zhang {
46*5b2efa2bSTianjia Zhang return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
47*5b2efa2bSTianjia Zhang sm4_aesni_avx2_cfb_dec_blk16);
48*5b2efa2bSTianjia Zhang }
49*5b2efa2bSTianjia Zhang
ctr_crypt(struct skcipher_request * req)50*5b2efa2bSTianjia Zhang static int ctr_crypt(struct skcipher_request *req)
51*5b2efa2bSTianjia Zhang {
52*5b2efa2bSTianjia Zhang return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE,
53*5b2efa2bSTianjia Zhang sm4_aesni_avx2_ctr_enc_blk16);
54*5b2efa2bSTianjia Zhang }
55*5b2efa2bSTianjia Zhang
56*5b2efa2bSTianjia Zhang static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
57*5b2efa2bSTianjia Zhang {
58*5b2efa2bSTianjia Zhang .base = {
59*5b2efa2bSTianjia Zhang .cra_name = "__ecb(sm4)",
60*5b2efa2bSTianjia Zhang .cra_driver_name = "__ecb-sm4-aesni-avx2",
61*5b2efa2bSTianjia Zhang .cra_priority = 500,
62*5b2efa2bSTianjia Zhang .cra_flags = CRYPTO_ALG_INTERNAL,
63*5b2efa2bSTianjia Zhang .cra_blocksize = SM4_BLOCK_SIZE,
64*5b2efa2bSTianjia Zhang .cra_ctxsize = sizeof(struct sm4_ctx),
65*5b2efa2bSTianjia Zhang .cra_module = THIS_MODULE,
66*5b2efa2bSTianjia Zhang },
67*5b2efa2bSTianjia Zhang .min_keysize = SM4_KEY_SIZE,
68*5b2efa2bSTianjia Zhang .max_keysize = SM4_KEY_SIZE,
69*5b2efa2bSTianjia Zhang .walksize = 16 * SM4_BLOCK_SIZE,
70*5b2efa2bSTianjia Zhang .setkey = sm4_skcipher_setkey,
71*5b2efa2bSTianjia Zhang .encrypt = sm4_avx_ecb_encrypt,
72*5b2efa2bSTianjia Zhang .decrypt = sm4_avx_ecb_decrypt,
73*5b2efa2bSTianjia Zhang }, {
74*5b2efa2bSTianjia Zhang .base = {
75*5b2efa2bSTianjia Zhang .cra_name = "__cbc(sm4)",
76*5b2efa2bSTianjia Zhang .cra_driver_name = "__cbc-sm4-aesni-avx2",
77*5b2efa2bSTianjia Zhang .cra_priority = 500,
78*5b2efa2bSTianjia Zhang .cra_flags = CRYPTO_ALG_INTERNAL,
79*5b2efa2bSTianjia Zhang .cra_blocksize = SM4_BLOCK_SIZE,
80*5b2efa2bSTianjia Zhang .cra_ctxsize = sizeof(struct sm4_ctx),
81*5b2efa2bSTianjia Zhang .cra_module = THIS_MODULE,
82*5b2efa2bSTianjia Zhang },
83*5b2efa2bSTianjia Zhang .min_keysize = SM4_KEY_SIZE,
84*5b2efa2bSTianjia Zhang .max_keysize = SM4_KEY_SIZE,
85*5b2efa2bSTianjia Zhang .ivsize = SM4_BLOCK_SIZE,
86*5b2efa2bSTianjia Zhang .walksize = 16 * SM4_BLOCK_SIZE,
87*5b2efa2bSTianjia Zhang .setkey = sm4_skcipher_setkey,
88*5b2efa2bSTianjia Zhang .encrypt = sm4_cbc_encrypt,
89*5b2efa2bSTianjia Zhang .decrypt = cbc_decrypt,
90*5b2efa2bSTianjia Zhang }, {
91*5b2efa2bSTianjia Zhang .base = {
92*5b2efa2bSTianjia Zhang .cra_name = "__cfb(sm4)",
93*5b2efa2bSTianjia Zhang .cra_driver_name = "__cfb-sm4-aesni-avx2",
94*5b2efa2bSTianjia Zhang .cra_priority = 500,
95*5b2efa2bSTianjia Zhang .cra_flags = CRYPTO_ALG_INTERNAL,
96*5b2efa2bSTianjia Zhang .cra_blocksize = 1,
97*5b2efa2bSTianjia Zhang .cra_ctxsize = sizeof(struct sm4_ctx),
98*5b2efa2bSTianjia Zhang .cra_module = THIS_MODULE,
99*5b2efa2bSTianjia Zhang },
100*5b2efa2bSTianjia Zhang .min_keysize = SM4_KEY_SIZE,
101*5b2efa2bSTianjia Zhang .max_keysize = SM4_KEY_SIZE,
102*5b2efa2bSTianjia Zhang .ivsize = SM4_BLOCK_SIZE,
103*5b2efa2bSTianjia Zhang .chunksize = SM4_BLOCK_SIZE,
104*5b2efa2bSTianjia Zhang .walksize = 16 * SM4_BLOCK_SIZE,
105*5b2efa2bSTianjia Zhang .setkey = sm4_skcipher_setkey,
106*5b2efa2bSTianjia Zhang .encrypt = sm4_cfb_encrypt,
107*5b2efa2bSTianjia Zhang .decrypt = cfb_decrypt,
108*5b2efa2bSTianjia Zhang }, {
109*5b2efa2bSTianjia Zhang .base = {
110*5b2efa2bSTianjia Zhang .cra_name = "__ctr(sm4)",
111*5b2efa2bSTianjia Zhang .cra_driver_name = "__ctr-sm4-aesni-avx2",
112*5b2efa2bSTianjia Zhang .cra_priority = 500,
113*5b2efa2bSTianjia Zhang .cra_flags = CRYPTO_ALG_INTERNAL,
114*5b2efa2bSTianjia Zhang .cra_blocksize = 1,
115*5b2efa2bSTianjia Zhang .cra_ctxsize = sizeof(struct sm4_ctx),
116*5b2efa2bSTianjia Zhang .cra_module = THIS_MODULE,
117*5b2efa2bSTianjia Zhang },
118*5b2efa2bSTianjia Zhang .min_keysize = SM4_KEY_SIZE,
119*5b2efa2bSTianjia Zhang .max_keysize = SM4_KEY_SIZE,
120*5b2efa2bSTianjia Zhang .ivsize = SM4_BLOCK_SIZE,
121*5b2efa2bSTianjia Zhang .chunksize = SM4_BLOCK_SIZE,
122*5b2efa2bSTianjia Zhang .walksize = 16 * SM4_BLOCK_SIZE,
123*5b2efa2bSTianjia Zhang .setkey = sm4_skcipher_setkey,
124*5b2efa2bSTianjia Zhang .encrypt = ctr_crypt,
125*5b2efa2bSTianjia Zhang .decrypt = ctr_crypt,
126*5b2efa2bSTianjia Zhang }
127*5b2efa2bSTianjia Zhang };
128*5b2efa2bSTianjia Zhang
129*5b2efa2bSTianjia Zhang static struct simd_skcipher_alg *
130*5b2efa2bSTianjia Zhang simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)];
131*5b2efa2bSTianjia Zhang
sm4_init(void)132*5b2efa2bSTianjia Zhang static int __init sm4_init(void)
133*5b2efa2bSTianjia Zhang {
134*5b2efa2bSTianjia Zhang const char *feature_name;
135*5b2efa2bSTianjia Zhang
136*5b2efa2bSTianjia Zhang if (!boot_cpu_has(X86_FEATURE_AVX) ||
137*5b2efa2bSTianjia Zhang !boot_cpu_has(X86_FEATURE_AVX2) ||
138*5b2efa2bSTianjia Zhang !boot_cpu_has(X86_FEATURE_AES) ||
139*5b2efa2bSTianjia Zhang !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
140*5b2efa2bSTianjia Zhang pr_info("AVX2 or AES-NI instructions are not detected.\n");
141*5b2efa2bSTianjia Zhang return -ENODEV;
142*5b2efa2bSTianjia Zhang }
143*5b2efa2bSTianjia Zhang
144*5b2efa2bSTianjia Zhang if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
145*5b2efa2bSTianjia Zhang &feature_name)) {
146*5b2efa2bSTianjia Zhang pr_info("CPU feature '%s' is not supported.\n", feature_name);
147*5b2efa2bSTianjia Zhang return -ENODEV;
148*5b2efa2bSTianjia Zhang }
149*5b2efa2bSTianjia Zhang
150*5b2efa2bSTianjia Zhang return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers,
151*5b2efa2bSTianjia Zhang ARRAY_SIZE(sm4_aesni_avx2_skciphers),
152*5b2efa2bSTianjia Zhang simd_sm4_aesni_avx2_skciphers);
153*5b2efa2bSTianjia Zhang }
154*5b2efa2bSTianjia Zhang
sm4_exit(void)155*5b2efa2bSTianjia Zhang static void __exit sm4_exit(void)
156*5b2efa2bSTianjia Zhang {
157*5b2efa2bSTianjia Zhang simd_unregister_skciphers(sm4_aesni_avx2_skciphers,
158*5b2efa2bSTianjia Zhang ARRAY_SIZE(sm4_aesni_avx2_skciphers),
159*5b2efa2bSTianjia Zhang simd_sm4_aesni_avx2_skciphers);
160*5b2efa2bSTianjia Zhang }
161*5b2efa2bSTianjia Zhang
162*5b2efa2bSTianjia Zhang module_init(sm4_init);
163*5b2efa2bSTianjia Zhang module_exit(sm4_exit);
164*5b2efa2bSTianjia Zhang
165*5b2efa2bSTianjia Zhang MODULE_LICENSE("GPL v2");
166*5b2efa2bSTianjia Zhang MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
167*5b2efa2bSTianjia Zhang MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized");
168*5b2efa2bSTianjia Zhang MODULE_ALIAS_CRYPTO("sm4");
169*5b2efa2bSTianjia Zhang MODULE_ALIAS_CRYPTO("sm4-aesni-avx2");
170