1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
4 * instructions. This file contains glue code.
5 *
6 * Copyright (c) 2009 Intel Corp.
7 * Author: Huang Ying <ying.huang@intel.com>
8 */
9
10 #include <linux/err.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/kernel.h>
14 #include <linux/crypto.h>
15 #include <crypto/algapi.h>
16 #include <crypto/cryptd.h>
17 #include <crypto/gf128mul.h>
18 #include <crypto/internal/hash.h>
19 #include <crypto/internal/simd.h>
20 #include <asm/cpu_device_id.h>
21 #include <asm/simd.h>
22 #include <asm/unaligned.h>
23
24 #define GHASH_BLOCK_SIZE 16
25 #define GHASH_DIGEST_SIZE 16
26
27 void clmul_ghash_mul(char *dst, const le128 *shash);
28
29 void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
30 const le128 *shash);
31
32 struct ghash_async_ctx {
33 struct cryptd_ahash *cryptd_tfm;
34 };
35
36 struct ghash_ctx {
37 le128 shash;
38 };
39
40 struct ghash_desc_ctx {
41 u8 buffer[GHASH_BLOCK_SIZE];
42 u32 bytes;
43 };
44
ghash_init(struct shash_desc * desc)45 static int ghash_init(struct shash_desc *desc)
46 {
47 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
48
49 memset(dctx, 0, sizeof(*dctx));
50
51 return 0;
52 }
53
ghash_setkey(struct crypto_shash * tfm,const u8 * key,unsigned int keylen)54 static int ghash_setkey(struct crypto_shash *tfm,
55 const u8 *key, unsigned int keylen)
56 {
57 struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
58 u64 a, b;
59
60 if (keylen != GHASH_BLOCK_SIZE)
61 return -EINVAL;
62
63 /*
64 * GHASH maps bits to polynomial coefficients backwards, which makes it
65 * hard to implement. But it can be shown that the GHASH multiplication
66 *
67 * D * K (mod x^128 + x^7 + x^2 + x + 1)
68 *
69 * (where D is a data block and K is the key) is equivalent to:
70 *
71 * bitreflect(D) * bitreflect(K) * x^(-127)
72 * (mod x^128 + x^127 + x^126 + x^121 + 1)
73 *
74 * So, the code below precomputes:
75 *
76 * bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1)
77 *
78 * ... but in Montgomery form (so that Montgomery multiplication can be
79 * used), i.e. with an extra x^128 factor, which means actually:
80 *
81 * bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1)
82 *
83 * The within-a-byte part of bitreflect() cancels out GHASH's built-in
84 * reflection, and thus bitreflect() is actually a byteswap.
85 */
86 a = get_unaligned_be64(key);
87 b = get_unaligned_be64(key + 8);
88 ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63));
89 ctx->shash.b = cpu_to_le64((b << 1) | (a >> 63));
90 if (a >> 63)
91 ctx->shash.a ^= cpu_to_le64((u64)0xc2 << 56);
92 return 0;
93 }
94
ghash_update(struct shash_desc * desc,const u8 * src,unsigned int srclen)95 static int ghash_update(struct shash_desc *desc,
96 const u8 *src, unsigned int srclen)
97 {
98 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
99 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
100 u8 *dst = dctx->buffer;
101
102 kernel_fpu_begin();
103 if (dctx->bytes) {
104 int n = min(srclen, dctx->bytes);
105 u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
106
107 dctx->bytes -= n;
108 srclen -= n;
109
110 while (n--)
111 *pos++ ^= *src++;
112
113 if (!dctx->bytes)
114 clmul_ghash_mul(dst, &ctx->shash);
115 }
116
117 clmul_ghash_update(dst, src, srclen, &ctx->shash);
118 kernel_fpu_end();
119
120 if (srclen & 0xf) {
121 src += srclen - (srclen & 0xf);
122 srclen &= 0xf;
123 dctx->bytes = GHASH_BLOCK_SIZE - srclen;
124 while (srclen--)
125 *dst++ ^= *src++;
126 }
127
128 return 0;
129 }
130
ghash_flush(struct ghash_ctx * ctx,struct ghash_desc_ctx * dctx)131 static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
132 {
133 u8 *dst = dctx->buffer;
134
135 if (dctx->bytes) {
136 u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
137
138 while (dctx->bytes--)
139 *tmp++ ^= 0;
140
141 kernel_fpu_begin();
142 clmul_ghash_mul(dst, &ctx->shash);
143 kernel_fpu_end();
144 }
145
146 dctx->bytes = 0;
147 }
148
ghash_final(struct shash_desc * desc,u8 * dst)149 static int ghash_final(struct shash_desc *desc, u8 *dst)
150 {
151 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
152 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
153 u8 *buf = dctx->buffer;
154
155 ghash_flush(ctx, dctx);
156 memcpy(dst, buf, GHASH_BLOCK_SIZE);
157
158 return 0;
159 }
160
161 static struct shash_alg ghash_alg = {
162 .digestsize = GHASH_DIGEST_SIZE,
163 .init = ghash_init,
164 .update = ghash_update,
165 .final = ghash_final,
166 .setkey = ghash_setkey,
167 .descsize = sizeof(struct ghash_desc_ctx),
168 .base = {
169 .cra_name = "__ghash",
170 .cra_driver_name = "__ghash-pclmulqdqni",
171 .cra_priority = 0,
172 .cra_flags = CRYPTO_ALG_INTERNAL,
173 .cra_blocksize = GHASH_BLOCK_SIZE,
174 .cra_ctxsize = sizeof(struct ghash_ctx),
175 .cra_module = THIS_MODULE,
176 },
177 };
178
ghash_async_init(struct ahash_request * req)179 static int ghash_async_init(struct ahash_request *req)
180 {
181 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
182 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
183 struct ahash_request *cryptd_req = ahash_request_ctx(req);
184 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
185 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
186 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
187
188 desc->tfm = child;
189 return crypto_shash_init(desc);
190 }
191
ghash_async_update(struct ahash_request * req)192 static int ghash_async_update(struct ahash_request *req)
193 {
194 struct ahash_request *cryptd_req = ahash_request_ctx(req);
195 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
196 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
197 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
198
199 if (!crypto_simd_usable() ||
200 (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
201 memcpy(cryptd_req, req, sizeof(*req));
202 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
203 return crypto_ahash_update(cryptd_req);
204 } else {
205 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
206 return shash_ahash_update(req, desc);
207 }
208 }
209
ghash_async_final(struct ahash_request * req)210 static int ghash_async_final(struct ahash_request *req)
211 {
212 struct ahash_request *cryptd_req = ahash_request_ctx(req);
213 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
214 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
215 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
216
217 if (!crypto_simd_usable() ||
218 (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
219 memcpy(cryptd_req, req, sizeof(*req));
220 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
221 return crypto_ahash_final(cryptd_req);
222 } else {
223 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
224 return crypto_shash_final(desc, req->result);
225 }
226 }
227
ghash_async_import(struct ahash_request * req,const void * in)228 static int ghash_async_import(struct ahash_request *req, const void *in)
229 {
230 struct ahash_request *cryptd_req = ahash_request_ctx(req);
231 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
232 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
233
234 ghash_async_init(req);
235 memcpy(dctx, in, sizeof(*dctx));
236 return 0;
237
238 }
239
ghash_async_export(struct ahash_request * req,void * out)240 static int ghash_async_export(struct ahash_request *req, void *out)
241 {
242 struct ahash_request *cryptd_req = ahash_request_ctx(req);
243 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
244 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
245
246 memcpy(out, dctx, sizeof(*dctx));
247 return 0;
248
249 }
250
ghash_async_digest(struct ahash_request * req)251 static int ghash_async_digest(struct ahash_request *req)
252 {
253 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
254 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
255 struct ahash_request *cryptd_req = ahash_request_ctx(req);
256 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
257
258 if (!crypto_simd_usable() ||
259 (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
260 memcpy(cryptd_req, req, sizeof(*req));
261 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
262 return crypto_ahash_digest(cryptd_req);
263 } else {
264 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
265 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
266
267 desc->tfm = child;
268 return shash_ahash_digest(req, desc);
269 }
270 }
271
ghash_async_setkey(struct crypto_ahash * tfm,const u8 * key,unsigned int keylen)272 static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
273 unsigned int keylen)
274 {
275 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
276 struct crypto_ahash *child = &ctx->cryptd_tfm->base;
277
278 crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
279 crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
280 & CRYPTO_TFM_REQ_MASK);
281 return crypto_ahash_setkey(child, key, keylen);
282 }
283
ghash_async_init_tfm(struct crypto_tfm * tfm)284 static int ghash_async_init_tfm(struct crypto_tfm *tfm)
285 {
286 struct cryptd_ahash *cryptd_tfm;
287 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
288
289 cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni",
290 CRYPTO_ALG_INTERNAL,
291 CRYPTO_ALG_INTERNAL);
292 if (IS_ERR(cryptd_tfm))
293 return PTR_ERR(cryptd_tfm);
294 ctx->cryptd_tfm = cryptd_tfm;
295 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
296 sizeof(struct ahash_request) +
297 crypto_ahash_reqsize(&cryptd_tfm->base));
298
299 return 0;
300 }
301
ghash_async_exit_tfm(struct crypto_tfm * tfm)302 static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
303 {
304 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
305
306 cryptd_free_ahash(ctx->cryptd_tfm);
307 }
308
309 static struct ahash_alg ghash_async_alg = {
310 .init = ghash_async_init,
311 .update = ghash_async_update,
312 .final = ghash_async_final,
313 .setkey = ghash_async_setkey,
314 .digest = ghash_async_digest,
315 .export = ghash_async_export,
316 .import = ghash_async_import,
317 .halg = {
318 .digestsize = GHASH_DIGEST_SIZE,
319 .statesize = sizeof(struct ghash_desc_ctx),
320 .base = {
321 .cra_name = "ghash",
322 .cra_driver_name = "ghash-clmulni",
323 .cra_priority = 400,
324 .cra_ctxsize = sizeof(struct ghash_async_ctx),
325 .cra_flags = CRYPTO_ALG_ASYNC,
326 .cra_blocksize = GHASH_BLOCK_SIZE,
327 .cra_module = THIS_MODULE,
328 .cra_init = ghash_async_init_tfm,
329 .cra_exit = ghash_async_exit_tfm,
330 },
331 },
332 };
333
334 static const struct x86_cpu_id pcmul_cpu_id[] = {
335 X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */
336 {}
337 };
338 MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
339
ghash_pclmulqdqni_mod_init(void)340 static int __init ghash_pclmulqdqni_mod_init(void)
341 {
342 int err;
343
344 if (!x86_match_cpu(pcmul_cpu_id))
345 return -ENODEV;
346
347 err = crypto_register_shash(&ghash_alg);
348 if (err)
349 goto err_out;
350 err = crypto_register_ahash(&ghash_async_alg);
351 if (err)
352 goto err_shash;
353
354 return 0;
355
356 err_shash:
357 crypto_unregister_shash(&ghash_alg);
358 err_out:
359 return err;
360 }
361
ghash_pclmulqdqni_mod_exit(void)362 static void __exit ghash_pclmulqdqni_mod_exit(void)
363 {
364 crypto_unregister_ahash(&ghash_async_alg);
365 crypto_unregister_shash(&ghash_alg);
366 }
367
368 module_init(ghash_pclmulqdqni_mod_init);
369 module_exit(ghash_pclmulqdqni_mod_exit);
370
371 MODULE_LICENSE("GPL");
372 MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI");
373 MODULE_ALIAS_CRYPTO("ghash");
374