1 /*
2  * Glue Code for x86_64/AVX2 assembler optimized version of Serpent
3  *
4  * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/crypto.h>
16 #include <linux/err.h>
17 #include <crypto/algapi.h>
18 #include <crypto/internal/simd.h>
19 #include <crypto/serpent.h>
20 #include <crypto/xts.h>
21 #include <asm/crypto/glue_helper.h>
22 #include <asm/crypto/serpent-avx.h>
23 
24 #define SERPENT_AVX2_PARALLEL_BLOCKS 16
25 
26 /* 16-way AVX2 parallel cipher functions */
27 asmlinkage void serpent_ecb_enc_16way(struct serpent_ctx *ctx, u8 *dst,
28 				      const u8 *src);
29 asmlinkage void serpent_ecb_dec_16way(struct serpent_ctx *ctx, u8 *dst,
30 				      const u8 *src);
31 asmlinkage void serpent_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
32 
33 asmlinkage void serpent_ctr_16way(void *ctx, u128 *dst, const u128 *src,
34 				  le128 *iv);
35 asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst,
36 				      const u8 *src, le128 *iv);
37 asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst,
38 				      const u8 *src, le128 *iv);
39 
40 static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
41 				   const u8 *key, unsigned int keylen)
42 {
43 	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
44 }
45 
46 static const struct common_glue_ctx serpent_enc = {
47 	.num_funcs = 3,
48 	.fpu_blocks_limit = 8,
49 
50 	.funcs = { {
51 		.num_blocks = 16,
52 		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_16way) }
53 	}, {
54 		.num_blocks = 8,
55 		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
56 	}, {
57 		.num_blocks = 1,
58 		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
59 	} }
60 };
61 
62 static const struct common_glue_ctx serpent_ctr = {
63 	.num_funcs = 3,
64 	.fpu_blocks_limit = 8,
65 
66 	.funcs = { {
67 		.num_blocks = 16,
68 		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_16way) }
69 	},  {
70 		.num_blocks = 8,
71 		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
72 	}, {
73 		.num_blocks = 1,
74 		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
75 	} }
76 };
77 
78 static const struct common_glue_ctx serpent_enc_xts = {
79 	.num_funcs = 3,
80 	.fpu_blocks_limit = 8,
81 
82 	.funcs = { {
83 		.num_blocks = 16,
84 		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way) }
85 	}, {
86 		.num_blocks = 8,
87 		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
88 	}, {
89 		.num_blocks = 1,
90 		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
91 	} }
92 };
93 
94 static const struct common_glue_ctx serpent_dec = {
95 	.num_funcs = 3,
96 	.fpu_blocks_limit = 8,
97 
98 	.funcs = { {
99 		.num_blocks = 16,
100 		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_16way) }
101 	}, {
102 		.num_blocks = 8,
103 		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
104 	}, {
105 		.num_blocks = 1,
106 		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
107 	} }
108 };
109 
110 static const struct common_glue_ctx serpent_dec_cbc = {
111 	.num_funcs = 3,
112 	.fpu_blocks_limit = 8,
113 
114 	.funcs = { {
115 		.num_blocks = 16,
116 		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way) }
117 	}, {
118 		.num_blocks = 8,
119 		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
120 	}, {
121 		.num_blocks = 1,
122 		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
123 	} }
124 };
125 
126 static const struct common_glue_ctx serpent_dec_xts = {
127 	.num_funcs = 3,
128 	.fpu_blocks_limit = 8,
129 
130 	.funcs = { {
131 		.num_blocks = 16,
132 		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way) }
133 	}, {
134 		.num_blocks = 8,
135 		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
136 	}, {
137 		.num_blocks = 1,
138 		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
139 	} }
140 };
141 
142 static int ecb_encrypt(struct skcipher_request *req)
143 {
144 	return glue_ecb_req_128bit(&serpent_enc, req);
145 }
146 
147 static int ecb_decrypt(struct skcipher_request *req)
148 {
149 	return glue_ecb_req_128bit(&serpent_dec, req);
150 }
151 
152 static int cbc_encrypt(struct skcipher_request *req)
153 {
154 	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
155 					   req);
156 }
157 
158 static int cbc_decrypt(struct skcipher_request *req)
159 {
160 	return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
161 }
162 
163 static int ctr_crypt(struct skcipher_request *req)
164 {
165 	return glue_ctr_req_128bit(&serpent_ctr, req);
166 }
167 
168 static int xts_encrypt(struct skcipher_request *req)
169 {
170 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
171 	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
172 
173 	return glue_xts_req_128bit(&serpent_enc_xts, req,
174 				   XTS_TWEAK_CAST(__serpent_encrypt),
175 				   &ctx->tweak_ctx, &ctx->crypt_ctx);
176 }
177 
178 static int xts_decrypt(struct skcipher_request *req)
179 {
180 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
181 	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
182 
183 	return glue_xts_req_128bit(&serpent_dec_xts, req,
184 				   XTS_TWEAK_CAST(__serpent_encrypt),
185 				   &ctx->tweak_ctx, &ctx->crypt_ctx);
186 }
187 
188 static struct skcipher_alg serpent_algs[] = {
189 	{
190 		.base.cra_name		= "__ecb(serpent)",
191 		.base.cra_driver_name	= "__ecb-serpent-avx2",
192 		.base.cra_priority	= 600,
193 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
194 		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
195 		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
196 		.base.cra_module	= THIS_MODULE,
197 		.min_keysize		= SERPENT_MIN_KEY_SIZE,
198 		.max_keysize		= SERPENT_MAX_KEY_SIZE,
199 		.setkey			= serpent_setkey_skcipher,
200 		.encrypt		= ecb_encrypt,
201 		.decrypt		= ecb_decrypt,
202 	}, {
203 		.base.cra_name		= "__cbc(serpent)",
204 		.base.cra_driver_name	= "__cbc-serpent-avx2",
205 		.base.cra_priority	= 600,
206 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
207 		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
208 		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
209 		.base.cra_module	= THIS_MODULE,
210 		.min_keysize		= SERPENT_MIN_KEY_SIZE,
211 		.max_keysize		= SERPENT_MAX_KEY_SIZE,
212 		.ivsize			= SERPENT_BLOCK_SIZE,
213 		.setkey			= serpent_setkey_skcipher,
214 		.encrypt		= cbc_encrypt,
215 		.decrypt		= cbc_decrypt,
216 	}, {
217 		.base.cra_name		= "__ctr(serpent)",
218 		.base.cra_driver_name	= "__ctr-serpent-avx2",
219 		.base.cra_priority	= 600,
220 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
221 		.base.cra_blocksize	= 1,
222 		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
223 		.base.cra_module	= THIS_MODULE,
224 		.min_keysize		= SERPENT_MIN_KEY_SIZE,
225 		.max_keysize		= SERPENT_MAX_KEY_SIZE,
226 		.ivsize			= SERPENT_BLOCK_SIZE,
227 		.chunksize		= SERPENT_BLOCK_SIZE,
228 		.setkey			= serpent_setkey_skcipher,
229 		.encrypt		= ctr_crypt,
230 		.decrypt		= ctr_crypt,
231 	}, {
232 		.base.cra_name		= "__xts(serpent)",
233 		.base.cra_driver_name	= "__xts-serpent-avx2",
234 		.base.cra_priority	= 600,
235 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
236 		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
237 		.base.cra_ctxsize	= sizeof(struct serpent_xts_ctx),
238 		.base.cra_module	= THIS_MODULE,
239 		.min_keysize		= 2 * SERPENT_MIN_KEY_SIZE,
240 		.max_keysize		= 2 * SERPENT_MAX_KEY_SIZE,
241 		.ivsize			= SERPENT_BLOCK_SIZE,
242 		.setkey			= xts_serpent_setkey,
243 		.encrypt		= xts_encrypt,
244 		.decrypt		= xts_decrypt,
245 	},
246 };
247 
248 static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
249 
250 static int __init init(void)
251 {
252 	const char *feature_name;
253 
254 	if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
255 		pr_info("AVX2 instructions are not detected.\n");
256 		return -ENODEV;
257 	}
258 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
259 				&feature_name)) {
260 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
261 		return -ENODEV;
262 	}
263 
264 	return simd_register_skciphers_compat(serpent_algs,
265 					      ARRAY_SIZE(serpent_algs),
266 					      serpent_simd_algs);
267 }
268 
269 static void __exit fini(void)
270 {
271 	simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
272 				  serpent_simd_algs);
273 }
274 
275 module_init(init);
276 module_exit(fini);
277 
278 MODULE_LICENSE("GPL");
279 MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized");
280 MODULE_ALIAS_CRYPTO("serpent");
281 MODULE_ALIAS_CRYPTO("serpent-asm");
282