xref: /openbmc/linux/drivers/crypto/nx/nx-842.c (revision ff148d8a)
1 /*
2  * Cryptographic API for the NX-842 hardware compression.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * Copyright (C) IBM Corporation, 2011-2015
15  *
16  * Designer of the Power data compression engine:
17  *   Bulent Abali <abali@us.ibm.com>
18  *
19  * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
20  *                   Seth Jennings <sjenning@linux.vnet.ibm.com>
21  *
22  * Rewrite: Dan Streetman <ddstreet@ieee.org>
23  *
24  * This is an interface to the NX-842 compression hardware in PowerPC
25  * processors.  Most of the complexity of this drvier is due to the fact that
26  * the NX-842 compression hardware requires the input and output data buffers
27  * to be specifically aligned, to be a specific multiple in length, and within
28  * specific minimum and maximum lengths.  Those restrictions, provided by the
29  * nx-842 driver via nx842_constraints, mean this driver must use bounce
30  * buffers and headers to correct misaligned in or out buffers, and to split
31  * input buffers that are too large.
32  *
33  * This driver will fall back to software decompression if the hardware
34  * decompression fails, so this driver's decompression should never fail as
35  * long as the provided compressed buffer is valid.  Any compressed buffer
36  * created by this driver will have a header (except ones where the input
37  * perfectly matches the constraints); so users of this driver cannot simply
38  * pass a compressed buffer created by this driver over to the 842 software
39  * decompression library.  Instead, users must use this driver to decompress;
40  * if the hardware fails or is unavailable, the compressed buffer will be
41  * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
42  * software decompression library.
43  *
44  * This does not fall back to software compression, however, since the caller
45  * of this function is specifically requesting hardware compression; if the
46  * hardware compression fails, the caller can fall back to software
47  * compression, and the raw 842 compressed buffer that the software compressor
48  * creates can be passed to this driver for hardware decompression; any
49  * buffer without our specific header magic is assumed to be a raw 842 buffer
50  * and passed directly to the hardware.  Note that the software compression
51  * library will produce a compressed buffer that is incompatible with the
52  * hardware decompressor if the original input buffer length is not a multiple
53  * of 8; if such a compressed buffer is passed to this driver for
54  * decompression, the hardware will reject it and this driver will then pass
55  * it over to the software library for decompression.
56  */
57 
58 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
59 
60 #include <linux/vmalloc.h>
61 #include <linux/sw842.h>
62 #include <linux/spinlock.h>
63 
64 #include "nx-842.h"
65 
66 /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
67  * template (see lib/842/842.h), so this magic number will never appear at
68  * the start of a raw 842 compressed buffer.  That is important, as any buffer
69  * passed to us without this magic is assumed to be a raw 842 compressed
70  * buffer, and passed directly to the hardware to decompress.
71  */
72 #define NX842_CRYPTO_MAGIC	(0xf842)
73 #define NX842_CRYPTO_HEADER_SIZE(g)				\
74 	(sizeof(struct nx842_crypto_header) +			\
75 	 sizeof(struct nx842_crypto_header_group) * (g))
76 #define NX842_CRYPTO_HEADER_MAX_SIZE				\
77 	NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
78 
79 /* bounce buffer size */
80 #define BOUNCE_BUFFER_ORDER	(2)
81 #define BOUNCE_BUFFER_SIZE					\
82 	((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
83 
84 /* try longer on comp because we can fallback to sw decomp if hw is busy */
85 #define COMP_BUSY_TIMEOUT	(250) /* ms */
86 #define DECOMP_BUSY_TIMEOUT	(50) /* ms */
87 
88 struct nx842_crypto_param {
89 	u8 *in;
90 	unsigned int iremain;
91 	u8 *out;
92 	unsigned int oremain;
93 	unsigned int ototal;
94 };
95 
96 static int update_param(struct nx842_crypto_param *p,
97 			unsigned int slen, unsigned int dlen)
98 {
99 	if (p->iremain < slen)
100 		return -EOVERFLOW;
101 	if (p->oremain < dlen)
102 		return -ENOSPC;
103 
104 	p->in += slen;
105 	p->iremain -= slen;
106 	p->out += dlen;
107 	p->oremain -= dlen;
108 	p->ototal += dlen;
109 
110 	return 0;
111 }
112 
113 int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
114 {
115 	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
116 
117 	spin_lock_init(&ctx->lock);
118 	ctx->driver = driver;
119 	ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
120 	ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
121 	ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
122 	if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
123 		kfree(ctx->wmem);
124 		free_page((unsigned long)ctx->sbounce);
125 		free_page((unsigned long)ctx->dbounce);
126 		return -ENOMEM;
127 	}
128 
129 	return 0;
130 }
131 EXPORT_SYMBOL_GPL(nx842_crypto_init);
132 
133 void nx842_crypto_exit(struct crypto_tfm *tfm)
134 {
135 	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
136 
137 	kfree(ctx->wmem);
138 	free_page((unsigned long)ctx->sbounce);
139 	free_page((unsigned long)ctx->dbounce);
140 }
141 EXPORT_SYMBOL_GPL(nx842_crypto_exit);
142 
143 static void check_constraints(struct nx842_constraints *c)
144 {
145 	/* limit maximum, to always have enough bounce buffer to decompress */
146 	if (c->maximum > BOUNCE_BUFFER_SIZE)
147 		c->maximum = BOUNCE_BUFFER_SIZE;
148 }
149 
150 static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
151 {
152 	int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
153 
154 	/* compress should have added space for header */
155 	if (s > be16_to_cpu(hdr->group[0].padding)) {
156 		pr_err("Internal error: no space for header\n");
157 		return -EINVAL;
158 	}
159 
160 	memcpy(buf, hdr, s);
161 
162 	print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
163 
164 	return 0;
165 }
166 
167 static int compress(struct nx842_crypto_ctx *ctx,
168 		    struct nx842_crypto_param *p,
169 		    struct nx842_crypto_header_group *g,
170 		    struct nx842_constraints *c,
171 		    u16 *ignore,
172 		    unsigned int hdrsize)
173 {
174 	unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
175 	unsigned int adj_slen = slen;
176 	u8 *src = p->in, *dst = p->out;
177 	int ret, dskip = 0;
178 	ktime_t timeout;
179 
180 	if (p->iremain == 0)
181 		return -EOVERFLOW;
182 
183 	if (p->oremain == 0 || hdrsize + c->minimum > dlen)
184 		return -ENOSPC;
185 
186 	if (slen % c->multiple)
187 		adj_slen = round_up(slen, c->multiple);
188 	if (slen < c->minimum)
189 		adj_slen = c->minimum;
190 	if (slen > c->maximum)
191 		adj_slen = slen = c->maximum;
192 	if (adj_slen > slen || (u64)src % c->alignment) {
193 		adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
194 		slen = min(slen, BOUNCE_BUFFER_SIZE);
195 		if (adj_slen > slen)
196 			memset(ctx->sbounce + slen, 0, adj_slen - slen);
197 		memcpy(ctx->sbounce, src, slen);
198 		src = ctx->sbounce;
199 		slen = adj_slen;
200 		pr_debug("using comp sbounce buffer, len %x\n", slen);
201 	}
202 
203 	dst += hdrsize;
204 	dlen -= hdrsize;
205 
206 	if ((u64)dst % c->alignment) {
207 		dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
208 		dst += dskip;
209 		dlen -= dskip;
210 	}
211 	if (dlen % c->multiple)
212 		dlen = round_down(dlen, c->multiple);
213 	if (dlen < c->minimum) {
214 nospc:
215 		dst = ctx->dbounce;
216 		dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
217 		dlen = round_down(dlen, c->multiple);
218 		dskip = 0;
219 		pr_debug("using comp dbounce buffer, len %x\n", dlen);
220 	}
221 	if (dlen > c->maximum)
222 		dlen = c->maximum;
223 
224 	tmplen = dlen;
225 	timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
226 	do {
227 		dlen = tmplen; /* reset dlen, if we're retrying */
228 		ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem);
229 		/* possibly we should reduce the slen here, instead of
230 		 * retrying with the dbounce buffer?
231 		 */
232 		if (ret == -ENOSPC && dst != ctx->dbounce)
233 			goto nospc;
234 	} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
235 	if (ret)
236 		return ret;
237 
238 	dskip += hdrsize;
239 
240 	if (dst == ctx->dbounce)
241 		memcpy(p->out + dskip, dst, dlen);
242 
243 	g->padding = cpu_to_be16(dskip);
244 	g->compressed_length = cpu_to_be32(dlen);
245 	g->uncompressed_length = cpu_to_be32(slen);
246 
247 	if (p->iremain < slen) {
248 		*ignore = slen - p->iremain;
249 		slen = p->iremain;
250 	}
251 
252 	pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
253 		 slen, *ignore, dlen, dskip);
254 
255 	return update_param(p, slen, dskip + dlen);
256 }
257 
258 int nx842_crypto_compress(struct crypto_tfm *tfm,
259 			  const u8 *src, unsigned int slen,
260 			  u8 *dst, unsigned int *dlen)
261 {
262 	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
263 	struct nx842_crypto_header *hdr = &ctx->header;
264 	struct nx842_crypto_param p;
265 	struct nx842_constraints c = *ctx->driver->constraints;
266 	unsigned int groups, hdrsize, h;
267 	int ret, n;
268 	bool add_header;
269 	u16 ignore = 0;
270 
271 	check_constraints(&c);
272 
273 	p.in = (u8 *)src;
274 	p.iremain = slen;
275 	p.out = dst;
276 	p.oremain = *dlen;
277 	p.ototal = 0;
278 
279 	*dlen = 0;
280 
281 	groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
282 		       DIV_ROUND_UP(p.iremain, c.maximum));
283 	hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
284 
285 	spin_lock_bh(&ctx->lock);
286 
287 	/* skip adding header if the buffers meet all constraints */
288 	add_header = (p.iremain % c.multiple	||
289 		      p.iremain < c.minimum	||
290 		      p.iremain > c.maximum	||
291 		      (u64)p.in % c.alignment	||
292 		      p.oremain % c.multiple	||
293 		      p.oremain < c.minimum	||
294 		      p.oremain > c.maximum	||
295 		      (u64)p.out % c.alignment);
296 
297 	hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
298 	hdr->groups = 0;
299 	hdr->ignore = 0;
300 
301 	while (p.iremain > 0) {
302 		n = hdr->groups++;
303 		ret = -ENOSPC;
304 		if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
305 			goto unlock;
306 
307 		/* header goes before first group */
308 		h = !n && add_header ? hdrsize : 0;
309 
310 		if (ignore)
311 			pr_warn("internal error, ignore is set %x\n", ignore);
312 
313 		ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
314 		if (ret)
315 			goto unlock;
316 	}
317 
318 	if (!add_header && hdr->groups > 1) {
319 		pr_err("Internal error: No header but multiple groups\n");
320 		ret = -EINVAL;
321 		goto unlock;
322 	}
323 
324 	/* ignore indicates the input stream needed to be padded */
325 	hdr->ignore = cpu_to_be16(ignore);
326 	if (ignore)
327 		pr_debug("marked %d bytes as ignore\n", ignore);
328 
329 	if (add_header)
330 		ret = nx842_crypto_add_header(hdr, dst);
331 	if (ret)
332 		goto unlock;
333 
334 	*dlen = p.ototal;
335 
336 	pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
337 
338 unlock:
339 	spin_unlock_bh(&ctx->lock);
340 	return ret;
341 }
342 EXPORT_SYMBOL_GPL(nx842_crypto_compress);
343 
344 static int decompress(struct nx842_crypto_ctx *ctx,
345 		      struct nx842_crypto_param *p,
346 		      struct nx842_crypto_header_group *g,
347 		      struct nx842_constraints *c,
348 		      u16 ignore)
349 {
350 	unsigned int slen = be32_to_cpu(g->compressed_length);
351 	unsigned int required_len = be32_to_cpu(g->uncompressed_length);
352 	unsigned int dlen = p->oremain, tmplen;
353 	unsigned int adj_slen = slen;
354 	u8 *src = p->in, *dst = p->out;
355 	u16 padding = be16_to_cpu(g->padding);
356 	int ret, spadding = 0;
357 	ktime_t timeout;
358 
359 	if (!slen || !required_len)
360 		return -EINVAL;
361 
362 	if (p->iremain <= 0 || padding + slen > p->iremain)
363 		return -EOVERFLOW;
364 
365 	if (p->oremain <= 0 || required_len - ignore > p->oremain)
366 		return -ENOSPC;
367 
368 	src += padding;
369 
370 	if (slen % c->multiple)
371 		adj_slen = round_up(slen, c->multiple);
372 	if (slen < c->minimum)
373 		adj_slen = c->minimum;
374 	if (slen > c->maximum)
375 		goto usesw;
376 	if (slen < adj_slen || (u64)src % c->alignment) {
377 		/* we can append padding bytes because the 842 format defines
378 		 * an "end" template (see lib/842/842_decompress.c) and will
379 		 * ignore any bytes following it.
380 		 */
381 		if (slen < adj_slen)
382 			memset(ctx->sbounce + slen, 0, adj_slen - slen);
383 		memcpy(ctx->sbounce, src, slen);
384 		src = ctx->sbounce;
385 		spadding = adj_slen - slen;
386 		slen = adj_slen;
387 		pr_debug("using decomp sbounce buffer, len %x\n", slen);
388 	}
389 
390 	if (dlen % c->multiple)
391 		dlen = round_down(dlen, c->multiple);
392 	if (dlen < required_len || (u64)dst % c->alignment) {
393 		dst = ctx->dbounce;
394 		dlen = min(required_len, BOUNCE_BUFFER_SIZE);
395 		pr_debug("using decomp dbounce buffer, len %x\n", dlen);
396 	}
397 	if (dlen < c->minimum)
398 		goto usesw;
399 	if (dlen > c->maximum)
400 		dlen = c->maximum;
401 
402 	tmplen = dlen;
403 	timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
404 	do {
405 		dlen = tmplen; /* reset dlen, if we're retrying */
406 		ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem);
407 	} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
408 	if (ret) {
409 usesw:
410 		/* reset everything, sw doesn't have constraints */
411 		src = p->in + padding;
412 		slen = be32_to_cpu(g->compressed_length);
413 		spadding = 0;
414 		dst = p->out;
415 		dlen = p->oremain;
416 		if (dlen < required_len) { /* have ignore bytes */
417 			dst = ctx->dbounce;
418 			dlen = BOUNCE_BUFFER_SIZE;
419 		}
420 		pr_info_ratelimited("using software 842 decompression\n");
421 		ret = sw842_decompress(src, slen, dst, &dlen);
422 	}
423 	if (ret)
424 		return ret;
425 
426 	slen -= spadding;
427 
428 	dlen -= ignore;
429 	if (ignore)
430 		pr_debug("ignoring last %x bytes\n", ignore);
431 
432 	if (dst == ctx->dbounce)
433 		memcpy(p->out, dst, dlen);
434 
435 	pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
436 		 slen, padding, dlen, ignore);
437 
438 	return update_param(p, slen + padding, dlen);
439 }
440 
441 int nx842_crypto_decompress(struct crypto_tfm *tfm,
442 			    const u8 *src, unsigned int slen,
443 			    u8 *dst, unsigned int *dlen)
444 {
445 	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
446 	struct nx842_crypto_header *hdr;
447 	struct nx842_crypto_param p;
448 	struct nx842_constraints c = *ctx->driver->constraints;
449 	int n, ret, hdr_len;
450 	u16 ignore = 0;
451 
452 	check_constraints(&c);
453 
454 	p.in = (u8 *)src;
455 	p.iremain = slen;
456 	p.out = dst;
457 	p.oremain = *dlen;
458 	p.ototal = 0;
459 
460 	*dlen = 0;
461 
462 	hdr = (struct nx842_crypto_header *)src;
463 
464 	spin_lock_bh(&ctx->lock);
465 
466 	/* If it doesn't start with our header magic number, assume it's a raw
467 	 * 842 compressed buffer and pass it directly to the hardware driver
468 	 */
469 	if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
470 		struct nx842_crypto_header_group g = {
471 			.padding =		0,
472 			.compressed_length =	cpu_to_be32(p.iremain),
473 			.uncompressed_length =	cpu_to_be32(p.oremain),
474 		};
475 
476 		ret = decompress(ctx, &p, &g, &c, 0);
477 		if (ret)
478 			goto unlock;
479 
480 		goto success;
481 	}
482 
483 	if (!hdr->groups) {
484 		pr_err("header has no groups\n");
485 		ret = -EINVAL;
486 		goto unlock;
487 	}
488 	if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
489 		pr_err("header has too many groups %x, max %x\n",
490 		       hdr->groups, NX842_CRYPTO_GROUP_MAX);
491 		ret = -EINVAL;
492 		goto unlock;
493 	}
494 
495 	hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
496 	if (hdr_len > slen) {
497 		ret = -EOVERFLOW;
498 		goto unlock;
499 	}
500 
501 	memcpy(&ctx->header, src, hdr_len);
502 	hdr = &ctx->header;
503 
504 	for (n = 0; n < hdr->groups; n++) {
505 		/* ignore applies to last group */
506 		if (n + 1 == hdr->groups)
507 			ignore = be16_to_cpu(hdr->ignore);
508 
509 		ret = decompress(ctx, &p, &hdr->group[n], &c, ignore);
510 		if (ret)
511 			goto unlock;
512 	}
513 
514 success:
515 	*dlen = p.ototal;
516 
517 	pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
518 
519 	ret = 0;
520 
521 unlock:
522 	spin_unlock_bh(&ctx->lock);
523 
524 	return ret;
525 }
526 EXPORT_SYMBOL_GPL(nx842_crypto_decompress);
527 
528 MODULE_LICENSE("GPL");
529 MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
530 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
531