1 /* 2 * Cryptographic API for the NX-842 hardware compression. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * Copyright (C) IBM Corporation, 2011-2015 15 * 16 * Designer of the Power data compression engine: 17 * Bulent Abali <abali@us.ibm.com> 18 * 19 * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com> 20 * Seth Jennings <sjenning@linux.vnet.ibm.com> 21 * 22 * Rewrite: Dan Streetman <ddstreet@ieee.org> 23 * 24 * This is an interface to the NX-842 compression hardware in PowerPC 25 * processors. Most of the complexity of this drvier is due to the fact that 26 * the NX-842 compression hardware requires the input and output data buffers 27 * to be specifically aligned, to be a specific multiple in length, and within 28 * specific minimum and maximum lengths. Those restrictions, provided by the 29 * nx-842 driver via nx842_constraints, mean this driver must use bounce 30 * buffers and headers to correct misaligned in or out buffers, and to split 31 * input buffers that are too large. 32 * 33 * This driver will fall back to software decompression if the hardware 34 * decompression fails, so this driver's decompression should never fail as 35 * long as the provided compressed buffer is valid. Any compressed buffer 36 * created by this driver will have a header (except ones where the input 37 * perfectly matches the constraints); so users of this driver cannot simply 38 * pass a compressed buffer created by this driver over to the 842 software 39 * decompression library. Instead, users must use this driver to decompress; 40 * if the hardware fails or is unavailable, the compressed buffer will be 41 * parsed and the header removed, and the raw 842 buffer(s) passed to the 842 42 * software decompression library. 43 * 44 * This does not fall back to software compression, however, since the caller 45 * of this function is specifically requesting hardware compression; if the 46 * hardware compression fails, the caller can fall back to software 47 * compression, and the raw 842 compressed buffer that the software compressor 48 * creates can be passed to this driver for hardware decompression; any 49 * buffer without our specific header magic is assumed to be a raw 842 buffer 50 * and passed directly to the hardware. Note that the software compression 51 * library will produce a compressed buffer that is incompatible with the 52 * hardware decompressor if the original input buffer length is not a multiple 53 * of 8; if such a compressed buffer is passed to this driver for 54 * decompression, the hardware will reject it and this driver will then pass 55 * it over to the software library for decompression. 56 */ 57 58 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 59 60 #include <linux/vmalloc.h> 61 #include <linux/sw842.h> 62 #include <linux/spinlock.h> 63 64 #include "nx-842.h" 65 66 /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit 67 * template (see lib/842/842.h), so this magic number will never appear at 68 * the start of a raw 842 compressed buffer. That is important, as any buffer 69 * passed to us without this magic is assumed to be a raw 842 compressed 70 * buffer, and passed directly to the hardware to decompress. 71 */ 72 #define NX842_CRYPTO_MAGIC (0xf842) 73 #define NX842_CRYPTO_HEADER_SIZE(g) \ 74 (sizeof(struct nx842_crypto_header) + \ 75 sizeof(struct nx842_crypto_header_group) * (g)) 76 #define NX842_CRYPTO_HEADER_MAX_SIZE \ 77 NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX) 78 79 /* bounce buffer size */ 80 #define BOUNCE_BUFFER_ORDER (2) 81 #define BOUNCE_BUFFER_SIZE \ 82 ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER)) 83 84 /* try longer on comp because we can fallback to sw decomp if hw is busy */ 85 #define COMP_BUSY_TIMEOUT (250) /* ms */ 86 #define DECOMP_BUSY_TIMEOUT (50) /* ms */ 87 88 struct nx842_crypto_param { 89 u8 *in; 90 unsigned int iremain; 91 u8 *out; 92 unsigned int oremain; 93 unsigned int ototal; 94 }; 95 96 static int update_param(struct nx842_crypto_param *p, 97 unsigned int slen, unsigned int dlen) 98 { 99 if (p->iremain < slen) 100 return -EOVERFLOW; 101 if (p->oremain < dlen) 102 return -ENOSPC; 103 104 p->in += slen; 105 p->iremain -= slen; 106 p->out += dlen; 107 p->oremain -= dlen; 108 p->ototal += dlen; 109 110 return 0; 111 } 112 113 int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver) 114 { 115 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); 116 117 spin_lock_init(&ctx->lock); 118 ctx->driver = driver; 119 ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL); 120 ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); 121 ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); 122 if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) { 123 kfree(ctx->wmem); 124 free_page((unsigned long)ctx->sbounce); 125 free_page((unsigned long)ctx->dbounce); 126 return -ENOMEM; 127 } 128 129 return 0; 130 } 131 EXPORT_SYMBOL_GPL(nx842_crypto_init); 132 133 void nx842_crypto_exit(struct crypto_tfm *tfm) 134 { 135 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); 136 137 kfree(ctx->wmem); 138 free_page((unsigned long)ctx->sbounce); 139 free_page((unsigned long)ctx->dbounce); 140 } 141 EXPORT_SYMBOL_GPL(nx842_crypto_exit); 142 143 static void check_constraints(struct nx842_constraints *c) 144 { 145 /* limit maximum, to always have enough bounce buffer to decompress */ 146 if (c->maximum > BOUNCE_BUFFER_SIZE) 147 c->maximum = BOUNCE_BUFFER_SIZE; 148 } 149 150 static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf) 151 { 152 int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups); 153 154 /* compress should have added space for header */ 155 if (s > be16_to_cpu(hdr->group[0].padding)) { 156 pr_err("Internal error: no space for header\n"); 157 return -EINVAL; 158 } 159 160 memcpy(buf, hdr, s); 161 162 print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0); 163 164 return 0; 165 } 166 167 static int compress(struct nx842_crypto_ctx *ctx, 168 struct nx842_crypto_param *p, 169 struct nx842_crypto_header_group *g, 170 struct nx842_constraints *c, 171 u16 *ignore, 172 unsigned int hdrsize) 173 { 174 unsigned int slen = p->iremain, dlen = p->oremain, tmplen; 175 unsigned int adj_slen = slen; 176 u8 *src = p->in, *dst = p->out; 177 int ret, dskip = 0; 178 ktime_t timeout; 179 180 if (p->iremain == 0) 181 return -EOVERFLOW; 182 183 if (p->oremain == 0 || hdrsize + c->minimum > dlen) 184 return -ENOSPC; 185 186 if (slen % c->multiple) 187 adj_slen = round_up(slen, c->multiple); 188 if (slen < c->minimum) 189 adj_slen = c->minimum; 190 if (slen > c->maximum) 191 adj_slen = slen = c->maximum; 192 if (adj_slen > slen || (u64)src % c->alignment) { 193 adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE); 194 slen = min(slen, BOUNCE_BUFFER_SIZE); 195 if (adj_slen > slen) 196 memset(ctx->sbounce + slen, 0, adj_slen - slen); 197 memcpy(ctx->sbounce, src, slen); 198 src = ctx->sbounce; 199 slen = adj_slen; 200 pr_debug("using comp sbounce buffer, len %x\n", slen); 201 } 202 203 dst += hdrsize; 204 dlen -= hdrsize; 205 206 if ((u64)dst % c->alignment) { 207 dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst); 208 dst += dskip; 209 dlen -= dskip; 210 } 211 if (dlen % c->multiple) 212 dlen = round_down(dlen, c->multiple); 213 if (dlen < c->minimum) { 214 nospc: 215 dst = ctx->dbounce; 216 dlen = min(p->oremain, BOUNCE_BUFFER_SIZE); 217 dlen = round_down(dlen, c->multiple); 218 dskip = 0; 219 pr_debug("using comp dbounce buffer, len %x\n", dlen); 220 } 221 if (dlen > c->maximum) 222 dlen = c->maximum; 223 224 tmplen = dlen; 225 timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT); 226 do { 227 dlen = tmplen; /* reset dlen, if we're retrying */ 228 ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem); 229 /* possibly we should reduce the slen here, instead of 230 * retrying with the dbounce buffer? 231 */ 232 if (ret == -ENOSPC && dst != ctx->dbounce) 233 goto nospc; 234 } while (ret == -EBUSY && ktime_before(ktime_get(), timeout)); 235 if (ret) 236 return ret; 237 238 dskip += hdrsize; 239 240 if (dst == ctx->dbounce) 241 memcpy(p->out + dskip, dst, dlen); 242 243 g->padding = cpu_to_be16(dskip); 244 g->compressed_length = cpu_to_be32(dlen); 245 g->uncompressed_length = cpu_to_be32(slen); 246 247 if (p->iremain < slen) { 248 *ignore = slen - p->iremain; 249 slen = p->iremain; 250 } 251 252 pr_debug("compress slen %x ignore %x dlen %x padding %x\n", 253 slen, *ignore, dlen, dskip); 254 255 return update_param(p, slen, dskip + dlen); 256 } 257 258 int nx842_crypto_compress(struct crypto_tfm *tfm, 259 const u8 *src, unsigned int slen, 260 u8 *dst, unsigned int *dlen) 261 { 262 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); 263 struct nx842_crypto_header *hdr = &ctx->header; 264 struct nx842_crypto_param p; 265 struct nx842_constraints c = *ctx->driver->constraints; 266 unsigned int groups, hdrsize, h; 267 int ret, n; 268 bool add_header; 269 u16 ignore = 0; 270 271 check_constraints(&c); 272 273 p.in = (u8 *)src; 274 p.iremain = slen; 275 p.out = dst; 276 p.oremain = *dlen; 277 p.ototal = 0; 278 279 *dlen = 0; 280 281 groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX, 282 DIV_ROUND_UP(p.iremain, c.maximum)); 283 hdrsize = NX842_CRYPTO_HEADER_SIZE(groups); 284 285 spin_lock_bh(&ctx->lock); 286 287 /* skip adding header if the buffers meet all constraints */ 288 add_header = (p.iremain % c.multiple || 289 p.iremain < c.minimum || 290 p.iremain > c.maximum || 291 (u64)p.in % c.alignment || 292 p.oremain % c.multiple || 293 p.oremain < c.minimum || 294 p.oremain > c.maximum || 295 (u64)p.out % c.alignment); 296 297 hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC); 298 hdr->groups = 0; 299 hdr->ignore = 0; 300 301 while (p.iremain > 0) { 302 n = hdr->groups++; 303 ret = -ENOSPC; 304 if (hdr->groups > NX842_CRYPTO_GROUP_MAX) 305 goto unlock; 306 307 /* header goes before first group */ 308 h = !n && add_header ? hdrsize : 0; 309 310 if (ignore) 311 pr_warn("internal error, ignore is set %x\n", ignore); 312 313 ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h); 314 if (ret) 315 goto unlock; 316 } 317 318 if (!add_header && hdr->groups > 1) { 319 pr_err("Internal error: No header but multiple groups\n"); 320 ret = -EINVAL; 321 goto unlock; 322 } 323 324 /* ignore indicates the input stream needed to be padded */ 325 hdr->ignore = cpu_to_be16(ignore); 326 if (ignore) 327 pr_debug("marked %d bytes as ignore\n", ignore); 328 329 if (add_header) 330 ret = nx842_crypto_add_header(hdr, dst); 331 if (ret) 332 goto unlock; 333 334 *dlen = p.ototal; 335 336 pr_debug("compress total slen %x dlen %x\n", slen, *dlen); 337 338 unlock: 339 spin_unlock_bh(&ctx->lock); 340 return ret; 341 } 342 EXPORT_SYMBOL_GPL(nx842_crypto_compress); 343 344 static int decompress(struct nx842_crypto_ctx *ctx, 345 struct nx842_crypto_param *p, 346 struct nx842_crypto_header_group *g, 347 struct nx842_constraints *c, 348 u16 ignore) 349 { 350 unsigned int slen = be32_to_cpu(g->compressed_length); 351 unsigned int required_len = be32_to_cpu(g->uncompressed_length); 352 unsigned int dlen = p->oremain, tmplen; 353 unsigned int adj_slen = slen; 354 u8 *src = p->in, *dst = p->out; 355 u16 padding = be16_to_cpu(g->padding); 356 int ret, spadding = 0; 357 ktime_t timeout; 358 359 if (!slen || !required_len) 360 return -EINVAL; 361 362 if (p->iremain <= 0 || padding + slen > p->iremain) 363 return -EOVERFLOW; 364 365 if (p->oremain <= 0 || required_len - ignore > p->oremain) 366 return -ENOSPC; 367 368 src += padding; 369 370 if (slen % c->multiple) 371 adj_slen = round_up(slen, c->multiple); 372 if (slen < c->minimum) 373 adj_slen = c->minimum; 374 if (slen > c->maximum) 375 goto usesw; 376 if (slen < adj_slen || (u64)src % c->alignment) { 377 /* we can append padding bytes because the 842 format defines 378 * an "end" template (see lib/842/842_decompress.c) and will 379 * ignore any bytes following it. 380 */ 381 if (slen < adj_slen) 382 memset(ctx->sbounce + slen, 0, adj_slen - slen); 383 memcpy(ctx->sbounce, src, slen); 384 src = ctx->sbounce; 385 spadding = adj_slen - slen; 386 slen = adj_slen; 387 pr_debug("using decomp sbounce buffer, len %x\n", slen); 388 } 389 390 if (dlen % c->multiple) 391 dlen = round_down(dlen, c->multiple); 392 if (dlen < required_len || (u64)dst % c->alignment) { 393 dst = ctx->dbounce; 394 dlen = min(required_len, BOUNCE_BUFFER_SIZE); 395 pr_debug("using decomp dbounce buffer, len %x\n", dlen); 396 } 397 if (dlen < c->minimum) 398 goto usesw; 399 if (dlen > c->maximum) 400 dlen = c->maximum; 401 402 tmplen = dlen; 403 timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT); 404 do { 405 dlen = tmplen; /* reset dlen, if we're retrying */ 406 ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem); 407 } while (ret == -EBUSY && ktime_before(ktime_get(), timeout)); 408 if (ret) { 409 usesw: 410 /* reset everything, sw doesn't have constraints */ 411 src = p->in + padding; 412 slen = be32_to_cpu(g->compressed_length); 413 spadding = 0; 414 dst = p->out; 415 dlen = p->oremain; 416 if (dlen < required_len) { /* have ignore bytes */ 417 dst = ctx->dbounce; 418 dlen = BOUNCE_BUFFER_SIZE; 419 } 420 pr_info_ratelimited("using software 842 decompression\n"); 421 ret = sw842_decompress(src, slen, dst, &dlen); 422 } 423 if (ret) 424 return ret; 425 426 slen -= spadding; 427 428 dlen -= ignore; 429 if (ignore) 430 pr_debug("ignoring last %x bytes\n", ignore); 431 432 if (dst == ctx->dbounce) 433 memcpy(p->out, dst, dlen); 434 435 pr_debug("decompress slen %x padding %x dlen %x ignore %x\n", 436 slen, padding, dlen, ignore); 437 438 return update_param(p, slen + padding, dlen); 439 } 440 441 int nx842_crypto_decompress(struct crypto_tfm *tfm, 442 const u8 *src, unsigned int slen, 443 u8 *dst, unsigned int *dlen) 444 { 445 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); 446 struct nx842_crypto_header *hdr; 447 struct nx842_crypto_param p; 448 struct nx842_constraints c = *ctx->driver->constraints; 449 int n, ret, hdr_len; 450 u16 ignore = 0; 451 452 check_constraints(&c); 453 454 p.in = (u8 *)src; 455 p.iremain = slen; 456 p.out = dst; 457 p.oremain = *dlen; 458 p.ototal = 0; 459 460 *dlen = 0; 461 462 hdr = (struct nx842_crypto_header *)src; 463 464 spin_lock_bh(&ctx->lock); 465 466 /* If it doesn't start with our header magic number, assume it's a raw 467 * 842 compressed buffer and pass it directly to the hardware driver 468 */ 469 if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) { 470 struct nx842_crypto_header_group g = { 471 .padding = 0, 472 .compressed_length = cpu_to_be32(p.iremain), 473 .uncompressed_length = cpu_to_be32(p.oremain), 474 }; 475 476 ret = decompress(ctx, &p, &g, &c, 0); 477 if (ret) 478 goto unlock; 479 480 goto success; 481 } 482 483 if (!hdr->groups) { 484 pr_err("header has no groups\n"); 485 ret = -EINVAL; 486 goto unlock; 487 } 488 if (hdr->groups > NX842_CRYPTO_GROUP_MAX) { 489 pr_err("header has too many groups %x, max %x\n", 490 hdr->groups, NX842_CRYPTO_GROUP_MAX); 491 ret = -EINVAL; 492 goto unlock; 493 } 494 495 hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups); 496 if (hdr_len > slen) { 497 ret = -EOVERFLOW; 498 goto unlock; 499 } 500 501 memcpy(&ctx->header, src, hdr_len); 502 hdr = &ctx->header; 503 504 for (n = 0; n < hdr->groups; n++) { 505 /* ignore applies to last group */ 506 if (n + 1 == hdr->groups) 507 ignore = be16_to_cpu(hdr->ignore); 508 509 ret = decompress(ctx, &p, &hdr->group[n], &c, ignore); 510 if (ret) 511 goto unlock; 512 } 513 514 success: 515 *dlen = p.ototal; 516 517 pr_debug("decompress total slen %x dlen %x\n", slen, *dlen); 518 519 ret = 0; 520 521 unlock: 522 spin_unlock_bh(&ctx->lock); 523 524 return ret; 525 } 526 EXPORT_SYMBOL_GPL(nx842_crypto_decompress); 527 528 MODULE_LICENSE("GPL"); 529 MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver"); 530 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); 531