1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Glue code for POLYVAL using PCMULQDQ-NI
4  *
5  * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
6  * Copyright (c) 2009 Intel Corp.
7  *   Author: Huang Ying <ying.huang@intel.com>
8  * Copyright 2021 Google LLC
9  */
10 
11 /*
12  * Glue code based on ghash-clmulni-intel_glue.c.
13  *
14  * This implementation of POLYVAL uses montgomery multiplication
15  * accelerated by PCLMULQDQ-NI to implement the finite field
16  * operations.
17  */
18 
19 #include <crypto/algapi.h>
20 #include <crypto/internal/hash.h>
21 #include <crypto/internal/simd.h>
22 #include <crypto/polyval.h>
23 #include <linux/crypto.h>
24 #include <linux/init.h>
25 #include <linux/kernel.h>
26 #include <linux/module.h>
27 #include <asm/cpu_device_id.h>
28 #include <asm/simd.h>
29 
30 #define POLYVAL_ALIGN	16
31 #define POLYVAL_ALIGN_ATTR __aligned(POLYVAL_ALIGN)
32 #define POLYVAL_ALIGN_EXTRA ((POLYVAL_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
33 #define POLYVAL_CTX_SIZE (sizeof(struct polyval_tfm_ctx) + POLYVAL_ALIGN_EXTRA)
34 #define NUM_KEY_POWERS	8
35 
36 struct polyval_tfm_ctx {
37 	/*
38 	 * These powers must be in the order h^8, ..., h^1.
39 	 */
40 	u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE] POLYVAL_ALIGN_ATTR;
41 };
42 
43 struct polyval_desc_ctx {
44 	u8 buffer[POLYVAL_BLOCK_SIZE];
45 	u32 bytes;
46 };
47 
48 asmlinkage void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
49 	const u8 *in, size_t nblocks, u8 *accumulator);
50 asmlinkage void clmul_polyval_mul(u8 *op1, const u8 *op2);
51 
52 static inline struct polyval_tfm_ctx *polyval_tfm_ctx(struct crypto_shash *tfm)
53 {
54 	return PTR_ALIGN(crypto_shash_ctx(tfm), POLYVAL_ALIGN);
55 }
56 
57 static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
58 	const u8 *in, size_t nblocks, u8 *accumulator)
59 {
60 	if (likely(crypto_simd_usable())) {
61 		kernel_fpu_begin();
62 		clmul_polyval_update(keys, in, nblocks, accumulator);
63 		kernel_fpu_end();
64 	} else {
65 		polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in,
66 			nblocks, accumulator);
67 	}
68 }
69 
70 static void internal_polyval_mul(u8 *op1, const u8 *op2)
71 {
72 	if (likely(crypto_simd_usable())) {
73 		kernel_fpu_begin();
74 		clmul_polyval_mul(op1, op2);
75 		kernel_fpu_end();
76 	} else {
77 		polyval_mul_non4k(op1, op2);
78 	}
79 }
80 
81 static int polyval_x86_setkey(struct crypto_shash *tfm,
82 			const u8 *key, unsigned int keylen)
83 {
84 	struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(tfm);
85 	int i;
86 
87 	if (keylen != POLYVAL_BLOCK_SIZE)
88 		return -EINVAL;
89 
90 	memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE);
91 
92 	for (i = NUM_KEY_POWERS-2; i >= 0; i--) {
93 		memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE);
94 		internal_polyval_mul(tctx->key_powers[i],
95 				     tctx->key_powers[i+1]);
96 	}
97 
98 	return 0;
99 }
100 
101 static int polyval_x86_init(struct shash_desc *desc)
102 {
103 	struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
104 
105 	memset(dctx, 0, sizeof(*dctx));
106 
107 	return 0;
108 }
109 
110 static int polyval_x86_update(struct shash_desc *desc,
111 			 const u8 *src, unsigned int srclen)
112 {
113 	struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
114 	const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm);
115 	u8 *pos;
116 	unsigned int nblocks;
117 	unsigned int n;
118 
119 	if (dctx->bytes) {
120 		n = min(srclen, dctx->bytes);
121 		pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes;
122 
123 		dctx->bytes -= n;
124 		srclen -= n;
125 
126 		while (n--)
127 			*pos++ ^= *src++;
128 
129 		if (!dctx->bytes)
130 			internal_polyval_mul(dctx->buffer,
131 					    tctx->key_powers[NUM_KEY_POWERS-1]);
132 	}
133 
134 	while (srclen >= POLYVAL_BLOCK_SIZE) {
135 		/* Allow rescheduling every 4K bytes. */
136 		nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
137 		internal_polyval_update(tctx, src, nblocks, dctx->buffer);
138 		srclen -= nblocks * POLYVAL_BLOCK_SIZE;
139 		src += nblocks * POLYVAL_BLOCK_SIZE;
140 	}
141 
142 	if (srclen) {
143 		dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
144 		pos = dctx->buffer;
145 		while (srclen--)
146 			*pos++ ^= *src++;
147 	}
148 
149 	return 0;
150 }
151 
152 static int polyval_x86_final(struct shash_desc *desc, u8 *dst)
153 {
154 	struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
155 	const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm);
156 
157 	if (dctx->bytes) {
158 		internal_polyval_mul(dctx->buffer,
159 				     tctx->key_powers[NUM_KEY_POWERS-1]);
160 	}
161 
162 	memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE);
163 
164 	return 0;
165 }
166 
167 static struct shash_alg polyval_alg = {
168 	.digestsize	= POLYVAL_DIGEST_SIZE,
169 	.init		= polyval_x86_init,
170 	.update		= polyval_x86_update,
171 	.final		= polyval_x86_final,
172 	.setkey		= polyval_x86_setkey,
173 	.descsize	= sizeof(struct polyval_desc_ctx),
174 	.base		= {
175 		.cra_name		= "polyval",
176 		.cra_driver_name	= "polyval-clmulni",
177 		.cra_priority		= 200,
178 		.cra_blocksize		= POLYVAL_BLOCK_SIZE,
179 		.cra_ctxsize		= POLYVAL_CTX_SIZE,
180 		.cra_module		= THIS_MODULE,
181 	},
182 };
183 
184 __maybe_unused static const struct x86_cpu_id pcmul_cpu_id[] = {
185 	X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL),
186 	{}
187 };
188 MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
189 
190 static int __init polyval_clmulni_mod_init(void)
191 {
192 	if (!x86_match_cpu(pcmul_cpu_id))
193 		return -ENODEV;
194 
195 	if (!boot_cpu_has(X86_FEATURE_AVX))
196 		return -ENODEV;
197 
198 	return crypto_register_shash(&polyval_alg);
199 }
200 
201 static void __exit polyval_clmulni_mod_exit(void)
202 {
203 	crypto_unregister_shash(&polyval_alg);
204 }
205 
206 module_init(polyval_clmulni_mod_init);
207 module_exit(polyval_clmulni_mod_exit);
208 
209 MODULE_LICENSE("GPL");
210 MODULE_DESCRIPTION("POLYVAL hash function accelerated by PCLMULQDQ-NI");
211 MODULE_ALIAS_CRYPTO("polyval");
212 MODULE_ALIAS_CRYPTO("polyval-clmulni");
213