xref: /openbmc/linux/arch/x86/crypto/poly1305_glue.c (revision 5a158981)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Poly1305 authenticator algorithm, RFC7539, SIMD glue code
4  *
5  * Copyright (C) 2015 Martin Willi
6  */
7 
8 #include <crypto/algapi.h>
9 #include <crypto/internal/hash.h>
10 #include <crypto/internal/poly1305.h>
11 #include <crypto/internal/simd.h>
12 #include <linux/crypto.h>
13 #include <linux/jump_label.h>
14 #include <linux/kernel.h>
15 #include <linux/module.h>
16 #include <asm/simd.h>
17 
18 asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
19 				    const u32 *r, unsigned int blocks);
20 asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
21 				     unsigned int blocks, const u32 *u);
22 asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
23 				     unsigned int blocks, const u32 *u);
24 
25 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
26 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
27 
28 static void poly1305_simd_mult(u32 *a, const u32 *b)
29 {
30 	u8 m[POLY1305_BLOCK_SIZE];
31 
32 	memset(m, 0, sizeof(m));
33 	/* The poly1305 block function adds a hi-bit to the accumulator which
34 	 * we don't need for key multiplication; compensate for it. */
35 	a[4] -= 1 << 24;
36 	poly1305_block_sse2(a, m, b, 1);
37 }
38 
39 static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
40 					   const u8 *src, unsigned int srclen)
41 {
42 	unsigned int datalen;
43 
44 	if (unlikely(!dctx->sset)) {
45 		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
46 		src += srclen - datalen;
47 		srclen = datalen;
48 	}
49 	if (srclen >= POLY1305_BLOCK_SIZE) {
50 		poly1305_core_blocks(&dctx->h, dctx->r, src,
51 				     srclen / POLY1305_BLOCK_SIZE, 1);
52 		srclen %= POLY1305_BLOCK_SIZE;
53 	}
54 	return srclen;
55 }
56 
57 static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
58 					 const u8 *src, unsigned int srclen)
59 {
60 	unsigned int blocks, datalen;
61 
62 	if (unlikely(!dctx->sset)) {
63 		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
64 		src += srclen - datalen;
65 		srclen = datalen;
66 	}
67 
68 	if (IS_ENABLED(CONFIG_AS_AVX2) &&
69 	    static_branch_likely(&poly1305_use_avx2) &&
70 	    srclen >= POLY1305_BLOCK_SIZE * 4) {
71 		if (unlikely(dctx->rset < 4)) {
72 			if (dctx->rset < 2) {
73 				dctx->r[1] = dctx->r[0];
74 				poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
75 			}
76 			dctx->r[2] = dctx->r[1];
77 			poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
78 			dctx->r[3] = dctx->r[2];
79 			poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
80 			dctx->rset = 4;
81 		}
82 		blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
83 		poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
84 				     dctx->r[1].r);
85 		src += POLY1305_BLOCK_SIZE * 4 * blocks;
86 		srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
87 	}
88 
89 	if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
90 		if (unlikely(dctx->rset < 2)) {
91 			dctx->r[1] = dctx->r[0];
92 			poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
93 			dctx->rset = 2;
94 		}
95 		blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
96 		poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
97 				     blocks, dctx->r[1].r);
98 		src += POLY1305_BLOCK_SIZE * 2 * blocks;
99 		srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
100 	}
101 	if (srclen >= POLY1305_BLOCK_SIZE) {
102 		poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
103 		srclen -= POLY1305_BLOCK_SIZE;
104 	}
105 	return srclen;
106 }
107 
108 void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
109 {
110 	poly1305_init_generic(desc, key);
111 }
112 EXPORT_SYMBOL(poly1305_init_arch);
113 
114 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
115 			  unsigned int srclen)
116 {
117 	unsigned int bytes;
118 
119 	if (unlikely(dctx->buflen)) {
120 		bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
121 		memcpy(dctx->buf + dctx->buflen, src, bytes);
122 		src += bytes;
123 		srclen -= bytes;
124 		dctx->buflen += bytes;
125 
126 		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
127 			if (static_branch_likely(&poly1305_use_simd) &&
128 			    likely(crypto_simd_usable())) {
129 				kernel_fpu_begin();
130 				poly1305_simd_blocks(dctx, dctx->buf,
131 						     POLY1305_BLOCK_SIZE);
132 				kernel_fpu_end();
133 			} else {
134 				poly1305_scalar_blocks(dctx, dctx->buf,
135 						       POLY1305_BLOCK_SIZE);
136 			}
137 			dctx->buflen = 0;
138 		}
139 	}
140 
141 	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
142 		if (static_branch_likely(&poly1305_use_simd) &&
143 		    likely(crypto_simd_usable())) {
144 			kernel_fpu_begin();
145 			bytes = poly1305_simd_blocks(dctx, src, srclen);
146 			kernel_fpu_end();
147 		} else {
148 			bytes = poly1305_scalar_blocks(dctx, src, srclen);
149 		}
150 		src += srclen - bytes;
151 		srclen = bytes;
152 	}
153 
154 	if (unlikely(srclen)) {
155 		dctx->buflen = srclen;
156 		memcpy(dctx->buf, src, srclen);
157 	}
158 }
159 EXPORT_SYMBOL(poly1305_update_arch);
160 
161 void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest)
162 {
163 	poly1305_final_generic(desc, digest);
164 }
165 EXPORT_SYMBOL(poly1305_final_arch);
166 
167 static int crypto_poly1305_init(struct shash_desc *desc)
168 {
169 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
170 
171 	poly1305_core_init(&dctx->h);
172 	dctx->buflen = 0;
173 	dctx->rset = 0;
174 	dctx->sset = false;
175 
176 	return 0;
177 }
178 
179 static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
180 {
181 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
182 
183 	if (unlikely(!dctx->sset))
184 		return -ENOKEY;
185 
186 	poly1305_final_generic(dctx, dst);
187 	return 0;
188 }
189 
190 static int poly1305_simd_update(struct shash_desc *desc,
191 				const u8 *src, unsigned int srclen)
192 {
193 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
194 
195 	poly1305_update_arch(dctx, src, srclen);
196 	return 0;
197 }
198 
199 static struct shash_alg alg = {
200 	.digestsize	= POLY1305_DIGEST_SIZE,
201 	.init		= crypto_poly1305_init,
202 	.update		= poly1305_simd_update,
203 	.final		= crypto_poly1305_final,
204 	.descsize	= sizeof(struct poly1305_desc_ctx),
205 	.base		= {
206 		.cra_name		= "poly1305",
207 		.cra_driver_name	= "poly1305-simd",
208 		.cra_priority		= 300,
209 		.cra_blocksize		= POLY1305_BLOCK_SIZE,
210 		.cra_module		= THIS_MODULE,
211 	},
212 };
213 
214 static int __init poly1305_simd_mod_init(void)
215 {
216 	if (!boot_cpu_has(X86_FEATURE_XMM2))
217 		return 0;
218 
219 	static_branch_enable(&poly1305_use_simd);
220 
221 	if (IS_ENABLED(CONFIG_AS_AVX2) &&
222 	    boot_cpu_has(X86_FEATURE_AVX) &&
223 	    boot_cpu_has(X86_FEATURE_AVX2) &&
224 	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
225 		static_branch_enable(&poly1305_use_avx2);
226 
227 	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0;
228 }
229 
230 static void __exit poly1305_simd_mod_exit(void)
231 {
232 	if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
233 		crypto_unregister_shash(&alg);
234 }
235 
236 module_init(poly1305_simd_mod_init);
237 module_exit(poly1305_simd_mod_exit);
238 
239 MODULE_LICENSE("GPL");
240 MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
241 MODULE_DESCRIPTION("Poly1305 authenticator");
242 MODULE_ALIAS_CRYPTO("poly1305");
243 MODULE_ALIAS_CRYPTO("poly1305-simd");
244