1 /*
2  * Glue Code for 3-way parallel assembler optimized version of Twofish
3  *
4  * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
19  * USA
20  *
21  */
22 
23 #include <asm/crypto/glue_helper.h>
24 #include <asm/crypto/twofish.h>
25 #include <crypto/algapi.h>
26 #include <crypto/b128ops.h>
27 #include <crypto/internal/skcipher.h>
28 #include <crypto/twofish.h>
29 #include <linux/crypto.h>
30 #include <linux/init.h>
31 #include <linux/module.h>
32 #include <linux/types.h>
33 
34 EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
35 EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
36 
37 static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
38 				   const u8 *key, unsigned int keylen)
39 {
40 	return twofish_setkey(&tfm->base, key, keylen);
41 }
42 
43 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
44 					const u8 *src)
45 {
46 	__twofish_enc_blk_3way(ctx, dst, src, false);
47 }
48 
49 static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
50 					    const u8 *src)
51 {
52 	__twofish_enc_blk_3way(ctx, dst, src, true);
53 }
54 
55 void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
56 {
57 	u128 ivs[2];
58 
59 	ivs[0] = src[0];
60 	ivs[1] = src[1];
61 
62 	twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
63 
64 	u128_xor(&dst[1], &dst[1], &ivs[0]);
65 	u128_xor(&dst[2], &dst[2], &ivs[1]);
66 }
67 EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
68 
69 void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
70 {
71 	be128 ctrblk;
72 
73 	if (dst != src)
74 		*dst = *src;
75 
76 	le128_to_be128(&ctrblk, iv);
77 	le128_inc(iv);
78 
79 	twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
80 	u128_xor(dst, dst, (u128 *)&ctrblk);
81 }
82 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
83 
84 void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
85 			      le128 *iv)
86 {
87 	be128 ctrblks[3];
88 
89 	if (dst != src) {
90 		dst[0] = src[0];
91 		dst[1] = src[1];
92 		dst[2] = src[2];
93 	}
94 
95 	le128_to_be128(&ctrblks[0], iv);
96 	le128_inc(iv);
97 	le128_to_be128(&ctrblks[1], iv);
98 	le128_inc(iv);
99 	le128_to_be128(&ctrblks[2], iv);
100 	le128_inc(iv);
101 
102 	twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
103 }
104 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
105 
106 static const struct common_glue_ctx twofish_enc = {
107 	.num_funcs = 2,
108 	.fpu_blocks_limit = -1,
109 
110 	.funcs = { {
111 		.num_blocks = 3,
112 		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
113 	}, {
114 		.num_blocks = 1,
115 		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
116 	} }
117 };
118 
119 static const struct common_glue_ctx twofish_ctr = {
120 	.num_funcs = 2,
121 	.fpu_blocks_limit = -1,
122 
123 	.funcs = { {
124 		.num_blocks = 3,
125 		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
126 	}, {
127 		.num_blocks = 1,
128 		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
129 	} }
130 };
131 
132 static const struct common_glue_ctx twofish_dec = {
133 	.num_funcs = 2,
134 	.fpu_blocks_limit = -1,
135 
136 	.funcs = { {
137 		.num_blocks = 3,
138 		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
139 	}, {
140 		.num_blocks = 1,
141 		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
142 	} }
143 };
144 
145 static const struct common_glue_ctx twofish_dec_cbc = {
146 	.num_funcs = 2,
147 	.fpu_blocks_limit = -1,
148 
149 	.funcs = { {
150 		.num_blocks = 3,
151 		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
152 	}, {
153 		.num_blocks = 1,
154 		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
155 	} }
156 };
157 
158 static int ecb_encrypt(struct skcipher_request *req)
159 {
160 	return glue_ecb_req_128bit(&twofish_enc, req);
161 }
162 
163 static int ecb_decrypt(struct skcipher_request *req)
164 {
165 	return glue_ecb_req_128bit(&twofish_dec, req);
166 }
167 
168 static int cbc_encrypt(struct skcipher_request *req)
169 {
170 	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
171 					   req);
172 }
173 
174 static int cbc_decrypt(struct skcipher_request *req)
175 {
176 	return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
177 }
178 
179 static int ctr_crypt(struct skcipher_request *req)
180 {
181 	return glue_ctr_req_128bit(&twofish_ctr, req);
182 }
183 
184 static struct skcipher_alg tf_skciphers[] = {
185 	{
186 		.base.cra_name		= "ecb(twofish)",
187 		.base.cra_driver_name	= "ecb-twofish-3way",
188 		.base.cra_priority	= 300,
189 		.base.cra_blocksize	= TF_BLOCK_SIZE,
190 		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
191 		.base.cra_module	= THIS_MODULE,
192 		.min_keysize		= TF_MIN_KEY_SIZE,
193 		.max_keysize		= TF_MAX_KEY_SIZE,
194 		.setkey			= twofish_setkey_skcipher,
195 		.encrypt		= ecb_encrypt,
196 		.decrypt		= ecb_decrypt,
197 	}, {
198 		.base.cra_name		= "cbc(twofish)",
199 		.base.cra_driver_name	= "cbc-twofish-3way",
200 		.base.cra_priority	= 300,
201 		.base.cra_blocksize	= TF_BLOCK_SIZE,
202 		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
203 		.base.cra_module	= THIS_MODULE,
204 		.min_keysize		= TF_MIN_KEY_SIZE,
205 		.max_keysize		= TF_MAX_KEY_SIZE,
206 		.ivsize			= TF_BLOCK_SIZE,
207 		.setkey			= twofish_setkey_skcipher,
208 		.encrypt		= cbc_encrypt,
209 		.decrypt		= cbc_decrypt,
210 	}, {
211 		.base.cra_name		= "ctr(twofish)",
212 		.base.cra_driver_name	= "ctr-twofish-3way",
213 		.base.cra_priority	= 300,
214 		.base.cra_blocksize	= 1,
215 		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
216 		.base.cra_module	= THIS_MODULE,
217 		.min_keysize		= TF_MIN_KEY_SIZE,
218 		.max_keysize		= TF_MAX_KEY_SIZE,
219 		.ivsize			= TF_BLOCK_SIZE,
220 		.chunksize		= TF_BLOCK_SIZE,
221 		.setkey			= twofish_setkey_skcipher,
222 		.encrypt		= ctr_crypt,
223 		.decrypt		= ctr_crypt,
224 	},
225 };
226 
227 static bool is_blacklisted_cpu(void)
228 {
229 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
230 		return false;
231 
232 	if (boot_cpu_data.x86 == 0x06 &&
233 		(boot_cpu_data.x86_model == 0x1c ||
234 		 boot_cpu_data.x86_model == 0x26 ||
235 		 boot_cpu_data.x86_model == 0x36)) {
236 		/*
237 		 * On Atom, twofish-3way is slower than original assembler
238 		 * implementation. Twofish-3way trades off some performance in
239 		 * storing blocks in 64bit registers to allow three blocks to
240 		 * be processed parallel. Parallel operation then allows gaining
241 		 * more performance than was trade off, on out-of-order CPUs.
242 		 * However Atom does not benefit from this parallellism and
243 		 * should be blacklisted.
244 		 */
245 		return true;
246 	}
247 
248 	if (boot_cpu_data.x86 == 0x0f) {
249 		/*
250 		 * On Pentium 4, twofish-3way is slower than original assembler
251 		 * implementation because excessive uses of 64bit rotate and
252 		 * left-shifts (which are really slow on P4) needed to store and
253 		 * handle 128bit block in two 64bit registers.
254 		 */
255 		return true;
256 	}
257 
258 	return false;
259 }
260 
261 static int force;
262 module_param(force, int, 0);
263 MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
264 
265 static int __init init(void)
266 {
267 	if (!force && is_blacklisted_cpu()) {
268 		printk(KERN_INFO
269 			"twofish-x86_64-3way: performance on this CPU "
270 			"would be suboptimal: disabling "
271 			"twofish-x86_64-3way.\n");
272 		return -ENODEV;
273 	}
274 
275 	return crypto_register_skciphers(tf_skciphers,
276 					 ARRAY_SIZE(tf_skciphers));
277 }
278 
279 static void __exit fini(void)
280 {
281 	crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
282 }
283 
284 module_init(init);
285 module_exit(fini);
286 
287 MODULE_LICENSE("GPL");
288 MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
289 MODULE_ALIAS_CRYPTO("twofish");
290 MODULE_ALIAS_CRYPTO("twofish-asm");
291