1 /*
2  * Cryptographic API.
3  *
4  * Glue code for the SHA256 Secure Hash Algorithm assembler
5  * implementation using supplemental SSE3 / AVX / AVX2 instructions.
6  *
7  * This file is based on sha256_generic.c
8  *
9  * Copyright (C) 2013 Intel Corporation.
10  *
11  * Author:
12  *     Tim Chen <tim.c.chen@linux.intel.com>
13  *
14  * This program is free software; you can redistribute it and/or modify it
15  * under the terms of the GNU General Public License as published by the Free
16  * Software Foundation; either version 2 of the License, or (at your option)
17  * any later version.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
23  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
24  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26  * SOFTWARE.
27  */
28 
29 
30 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
31 
32 #include <crypto/internal/hash.h>
33 #include <linux/init.h>
34 #include <linux/module.h>
35 #include <linux/mm.h>
36 #include <linux/cryptohash.h>
37 #include <linux/types.h>
38 #include <crypto/sha.h>
39 #include <asm/byteorder.h>
40 #include <asm/i387.h>
41 #include <asm/xcr.h>
42 #include <asm/xsave.h>
43 #include <linux/string.h>
44 
45 asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest,
46 				     u64 rounds);
47 #ifdef CONFIG_AS_AVX
48 asmlinkage void sha256_transform_avx(const char *data, u32 *digest,
49 				     u64 rounds);
50 #endif
51 #ifdef CONFIG_AS_AVX2
52 asmlinkage void sha256_transform_rorx(const char *data, u32 *digest,
53 				     u64 rounds);
54 #endif
55 
56 static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64);
57 
58 
59 static int sha256_ssse3_init(struct shash_desc *desc)
60 {
61 	struct sha256_state *sctx = shash_desc_ctx(desc);
62 
63 	sctx->state[0] = SHA256_H0;
64 	sctx->state[1] = SHA256_H1;
65 	sctx->state[2] = SHA256_H2;
66 	sctx->state[3] = SHA256_H3;
67 	sctx->state[4] = SHA256_H4;
68 	sctx->state[5] = SHA256_H5;
69 	sctx->state[6] = SHA256_H6;
70 	sctx->state[7] = SHA256_H7;
71 	sctx->count = 0;
72 
73 	return 0;
74 }
75 
76 static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
77 			       unsigned int len, unsigned int partial)
78 {
79 	struct sha256_state *sctx = shash_desc_ctx(desc);
80 	unsigned int done = 0;
81 
82 	sctx->count += len;
83 
84 	if (partial) {
85 		done = SHA256_BLOCK_SIZE - partial;
86 		memcpy(sctx->buf + partial, data, done);
87 		sha256_transform_asm(sctx->buf, sctx->state, 1);
88 	}
89 
90 	if (len - done >= SHA256_BLOCK_SIZE) {
91 		const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
92 
93 		sha256_transform_asm(data + done, sctx->state, (u64) rounds);
94 
95 		done += rounds * SHA256_BLOCK_SIZE;
96 	}
97 
98 	memcpy(sctx->buf, data + done, len - done);
99 
100 	return 0;
101 }
102 
103 static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
104 			     unsigned int len)
105 {
106 	struct sha256_state *sctx = shash_desc_ctx(desc);
107 	unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
108 	int res;
109 
110 	/* Handle the fast case right here */
111 	if (partial + len < SHA256_BLOCK_SIZE) {
112 		sctx->count += len;
113 		memcpy(sctx->buf + partial, data, len);
114 
115 		return 0;
116 	}
117 
118 	if (!irq_fpu_usable()) {
119 		res = crypto_sha256_update(desc, data, len);
120 	} else {
121 		kernel_fpu_begin();
122 		res = __sha256_ssse3_update(desc, data, len, partial);
123 		kernel_fpu_end();
124 	}
125 
126 	return res;
127 }
128 
129 
130 /* Add padding and return the message digest. */
131 static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
132 {
133 	struct sha256_state *sctx = shash_desc_ctx(desc);
134 	unsigned int i, index, padlen;
135 	__be32 *dst = (__be32 *)out;
136 	__be64 bits;
137 	static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
138 
139 	bits = cpu_to_be64(sctx->count << 3);
140 
141 	/* Pad out to 56 mod 64 and append length */
142 	index = sctx->count % SHA256_BLOCK_SIZE;
143 	padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
144 
145 	if (!irq_fpu_usable()) {
146 		crypto_sha256_update(desc, padding, padlen);
147 		crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
148 	} else {
149 		kernel_fpu_begin();
150 		/* We need to fill a whole block for __sha256_ssse3_update() */
151 		if (padlen <= 56) {
152 			sctx->count += padlen;
153 			memcpy(sctx->buf + index, padding, padlen);
154 		} else {
155 			__sha256_ssse3_update(desc, padding, padlen, index);
156 		}
157 		__sha256_ssse3_update(desc, (const u8 *)&bits,
158 					sizeof(bits), 56);
159 		kernel_fpu_end();
160 	}
161 
162 	/* Store state in digest */
163 	for (i = 0; i < 8; i++)
164 		dst[i] = cpu_to_be32(sctx->state[i]);
165 
166 	/* Wipe context */
167 	memset(sctx, 0, sizeof(*sctx));
168 
169 	return 0;
170 }
171 
172 static int sha256_ssse3_export(struct shash_desc *desc, void *out)
173 {
174 	struct sha256_state *sctx = shash_desc_ctx(desc);
175 
176 	memcpy(out, sctx, sizeof(*sctx));
177 
178 	return 0;
179 }
180 
181 static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
182 {
183 	struct sha256_state *sctx = shash_desc_ctx(desc);
184 
185 	memcpy(sctx, in, sizeof(*sctx));
186 
187 	return 0;
188 }
189 
190 static int sha224_ssse3_init(struct shash_desc *desc)
191 {
192 	struct sha256_state *sctx = shash_desc_ctx(desc);
193 
194 	sctx->state[0] = SHA224_H0;
195 	sctx->state[1] = SHA224_H1;
196 	sctx->state[2] = SHA224_H2;
197 	sctx->state[3] = SHA224_H3;
198 	sctx->state[4] = SHA224_H4;
199 	sctx->state[5] = SHA224_H5;
200 	sctx->state[6] = SHA224_H6;
201 	sctx->state[7] = SHA224_H7;
202 	sctx->count = 0;
203 
204 	return 0;
205 }
206 
207 static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash)
208 {
209 	u8 D[SHA256_DIGEST_SIZE];
210 
211 	sha256_ssse3_final(desc, D);
212 
213 	memcpy(hash, D, SHA224_DIGEST_SIZE);
214 	memset(D, 0, SHA256_DIGEST_SIZE);
215 
216 	return 0;
217 }
218 
219 static struct shash_alg algs[] = { {
220 	.digestsize	=	SHA256_DIGEST_SIZE,
221 	.init		=	sha256_ssse3_init,
222 	.update		=	sha256_ssse3_update,
223 	.final		=	sha256_ssse3_final,
224 	.export		=	sha256_ssse3_export,
225 	.import		=	sha256_ssse3_import,
226 	.descsize	=	sizeof(struct sha256_state),
227 	.statesize	=	sizeof(struct sha256_state),
228 	.base		=	{
229 		.cra_name	=	"sha256",
230 		.cra_driver_name =	"sha256-ssse3",
231 		.cra_priority	=	150,
232 		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
233 		.cra_blocksize	=	SHA256_BLOCK_SIZE,
234 		.cra_module	=	THIS_MODULE,
235 	}
236 }, {
237 	.digestsize	=	SHA224_DIGEST_SIZE,
238 	.init		=	sha224_ssse3_init,
239 	.update		=	sha256_ssse3_update,
240 	.final		=	sha224_ssse3_final,
241 	.export		=	sha256_ssse3_export,
242 	.import		=	sha256_ssse3_import,
243 	.descsize	=	sizeof(struct sha256_state),
244 	.statesize	=	sizeof(struct sha256_state),
245 	.base		=	{
246 		.cra_name	=	"sha224",
247 		.cra_driver_name =	"sha224-ssse3",
248 		.cra_priority	=	150,
249 		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
250 		.cra_blocksize	=	SHA224_BLOCK_SIZE,
251 		.cra_module	=	THIS_MODULE,
252 	}
253 } };
254 
255 #ifdef CONFIG_AS_AVX
256 static bool __init avx_usable(void)
257 {
258 	u64 xcr0;
259 
260 	if (!cpu_has_avx || !cpu_has_osxsave)
261 		return false;
262 
263 	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
264 	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
265 		pr_info("AVX detected but unusable.\n");
266 
267 		return false;
268 	}
269 
270 	return true;
271 }
272 #endif
273 
274 static int __init sha256_ssse3_mod_init(void)
275 {
276 	/* test for SSSE3 first */
277 	if (cpu_has_ssse3)
278 		sha256_transform_asm = sha256_transform_ssse3;
279 
280 #ifdef CONFIG_AS_AVX
281 	/* allow AVX to override SSSE3, it's a little faster */
282 	if (avx_usable()) {
283 #ifdef CONFIG_AS_AVX2
284 		if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2))
285 			sha256_transform_asm = sha256_transform_rorx;
286 		else
287 #endif
288 			sha256_transform_asm = sha256_transform_avx;
289 	}
290 #endif
291 
292 	if (sha256_transform_asm) {
293 #ifdef CONFIG_AS_AVX
294 		if (sha256_transform_asm == sha256_transform_avx)
295 			pr_info("Using AVX optimized SHA-256 implementation\n");
296 #ifdef CONFIG_AS_AVX2
297 		else if (sha256_transform_asm == sha256_transform_rorx)
298 			pr_info("Using AVX2 optimized SHA-256 implementation\n");
299 #endif
300 		else
301 #endif
302 			pr_info("Using SSSE3 optimized SHA-256 implementation\n");
303 		return crypto_register_shashes(algs, ARRAY_SIZE(algs));
304 	}
305 	pr_info("Neither AVX nor SSSE3 is available/usable.\n");
306 
307 	return -ENODEV;
308 }
309 
310 static void __exit sha256_ssse3_mod_fini(void)
311 {
312 	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
313 }
314 
315 module_init(sha256_ssse3_mod_init);
316 module_exit(sha256_ssse3_mod_fini);
317 
318 MODULE_LICENSE("GPL");
319 MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
320 
321 MODULE_ALIAS("sha256");
322 MODULE_ALIAS("sha224");
323