1 /* 2 * Driver for IBM Power 842 compression accelerator 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 * 18 * Copyright (C) IBM Corporation, 2012 19 * 20 * Authors: Robert Jennings <rcj@linux.vnet.ibm.com> 21 * Seth Jennings <sjenning@linux.vnet.ibm.com> 22 */ 23 24 #include <linux/kernel.h> 25 #include <linux/module.h> 26 #include <linux/nx842.h> 27 #include <linux/of.h> 28 #include <linux/slab.h> 29 30 #include <asm/page.h> 31 #include <asm/vio.h> 32 33 #include "nx_csbcpb.h" /* struct nx_csbcpb */ 34 35 #define MODULE_NAME "nx-compress" 36 MODULE_LICENSE("GPL"); 37 MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>"); 38 MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); 39 40 #define SHIFT_4K 12 41 #define SHIFT_64K 16 42 #define SIZE_4K (1UL << SHIFT_4K) 43 #define SIZE_64K (1UL << SHIFT_64K) 44 45 /* IO buffer must be 128 byte aligned */ 46 #define IO_BUFFER_ALIGN 128 47 48 struct nx842_header { 49 int blocks_nr; /* number of compressed blocks */ 50 int offset; /* offset of the first block (from beginning of header) */ 51 int sizes[0]; /* size of compressed blocks */ 52 }; 53 54 static inline int nx842_header_size(const struct nx842_header *hdr) 55 { 56 return sizeof(struct nx842_header) + 57 hdr->blocks_nr * sizeof(hdr->sizes[0]); 58 } 59 60 /* Macros for fields within nx_csbcpb */ 61 /* Check the valid bit within the csbcpb valid field */ 62 #define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7)) 63 64 /* CE macros operate on the completion_extension field bits in the csbcpb. 65 * CE0 0=full completion, 1=partial completion 66 * CE1 0=CE0 indicates completion, 1=termination (output may be modified) 67 * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */ 68 #define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7)) 69 #define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6)) 70 #define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5)) 71 72 /* The NX unit accepts data only on 4K page boundaries */ 73 #define NX842_HW_PAGE_SHIFT SHIFT_4K 74 #define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT) 75 #define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1)) 76 77 enum nx842_status { 78 UNAVAILABLE, 79 AVAILABLE 80 }; 81 82 struct ibm_nx842_counters { 83 atomic64_t comp_complete; 84 atomic64_t comp_failed; 85 atomic64_t decomp_complete; 86 atomic64_t decomp_failed; 87 atomic64_t swdecomp; 88 atomic64_t comp_times[32]; 89 atomic64_t decomp_times[32]; 90 }; 91 92 static struct nx842_devdata { 93 struct vio_dev *vdev; 94 struct device *dev; 95 struct ibm_nx842_counters *counters; 96 unsigned int max_sg_len; 97 unsigned int max_sync_size; 98 unsigned int max_sync_sg; 99 enum nx842_status status; 100 } __rcu *devdata; 101 static DEFINE_SPINLOCK(devdata_mutex); 102 103 #define NX842_COUNTER_INC(_x) \ 104 static inline void nx842_inc_##_x( \ 105 const struct nx842_devdata *dev) { \ 106 if (dev) \ 107 atomic64_inc(&dev->counters->_x); \ 108 } 109 NX842_COUNTER_INC(comp_complete); 110 NX842_COUNTER_INC(comp_failed); 111 NX842_COUNTER_INC(decomp_complete); 112 NX842_COUNTER_INC(decomp_failed); 113 NX842_COUNTER_INC(swdecomp); 114 115 #define NX842_HIST_SLOTS 16 116 117 static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time) 118 { 119 int bucket = fls(time); 120 121 if (bucket) 122 bucket = min((NX842_HIST_SLOTS - 1), bucket - 1); 123 124 atomic64_inc(×[bucket]); 125 } 126 127 /* NX unit operation flags */ 128 #define NX842_OP_COMPRESS 0x0 129 #define NX842_OP_CRC 0x1 130 #define NX842_OP_DECOMPRESS 0x2 131 #define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC) 132 #define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC) 133 #define NX842_OP_ASYNC (1<<23) 134 #define NX842_OP_NOTIFY (1<<22) 135 #define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8) 136 137 static unsigned long nx842_get_desired_dma(struct vio_dev *viodev) 138 { 139 /* No use of DMA mappings within the driver. */ 140 return 0; 141 } 142 143 struct nx842_slentry { 144 unsigned long ptr; /* Real address (use __pa()) */ 145 unsigned long len; 146 }; 147 148 /* pHyp scatterlist entry */ 149 struct nx842_scatterlist { 150 int entry_nr; /* number of slentries */ 151 struct nx842_slentry *entries; /* ptr to array of slentries */ 152 }; 153 154 /* Does not include sizeof(entry_nr) in the size */ 155 static inline unsigned long nx842_get_scatterlist_size( 156 struct nx842_scatterlist *sl) 157 { 158 return sl->entry_nr * sizeof(struct nx842_slentry); 159 } 160 161 static int nx842_build_scatterlist(unsigned long buf, int len, 162 struct nx842_scatterlist *sl) 163 { 164 unsigned long nextpage; 165 struct nx842_slentry *entry; 166 167 sl->entry_nr = 0; 168 169 entry = sl->entries; 170 while (len) { 171 entry->ptr = __pa(buf); 172 nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE); 173 if (nextpage < buf + len) { 174 /* we aren't at the end yet */ 175 if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE)) 176 /* we are in the middle (or beginning) */ 177 entry->len = NX842_HW_PAGE_SIZE; 178 else 179 /* we are at the beginning */ 180 entry->len = nextpage - buf; 181 } else { 182 /* at the end */ 183 entry->len = len; 184 } 185 186 len -= entry->len; 187 buf += entry->len; 188 sl->entry_nr++; 189 entry++; 190 } 191 192 return 0; 193 } 194 195 /* 196 * Working memory for software decompression 197 */ 198 struct sw842_fifo { 199 union { 200 char f8[256][8]; 201 char f4[512][4]; 202 }; 203 char f2[256][2]; 204 unsigned char f84_full; 205 unsigned char f2_full; 206 unsigned char f8_count; 207 unsigned char f2_count; 208 unsigned int f4_count; 209 }; 210 211 /* 212 * Working memory for crypto API 213 */ 214 struct nx842_workmem { 215 char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */ 216 union { 217 /* hardware working memory */ 218 struct { 219 /* scatterlist */ 220 char slin[SIZE_4K]; 221 char slout[SIZE_4K]; 222 /* coprocessor status/parameter block */ 223 struct nx_csbcpb csbcpb; 224 }; 225 /* software working memory */ 226 struct sw842_fifo swfifo; /* software decompression fifo */ 227 }; 228 }; 229 230 int nx842_get_workmem_size(void) 231 { 232 return sizeof(struct nx842_workmem) + NX842_HW_PAGE_SIZE; 233 } 234 EXPORT_SYMBOL_GPL(nx842_get_workmem_size); 235 236 int nx842_get_workmem_size_aligned(void) 237 { 238 return sizeof(struct nx842_workmem); 239 } 240 EXPORT_SYMBOL_GPL(nx842_get_workmem_size_aligned); 241 242 static int nx842_validate_result(struct device *dev, 243 struct cop_status_block *csb) 244 { 245 /* The csb must be valid after returning from vio_h_cop_sync */ 246 if (!NX842_CSBCBP_VALID_CHK(csb->valid)) { 247 dev_err(dev, "%s: cspcbp not valid upon completion.\n", 248 __func__); 249 dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n", 250 csb->valid, 251 csb->crb_seq_number, 252 csb->completion_code, 253 csb->completion_extension); 254 dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n", 255 csb->processed_byte_count, 256 (unsigned long)csb->address); 257 return -EIO; 258 } 259 260 /* Check return values from the hardware in the CSB */ 261 switch (csb->completion_code) { 262 case 0: /* Completed without error */ 263 break; 264 case 64: /* Target bytes > Source bytes during compression */ 265 case 13: /* Output buffer too small */ 266 dev_dbg(dev, "%s: Compression output larger than input\n", 267 __func__); 268 return -ENOSPC; 269 case 66: /* Input data contains an illegal template field */ 270 case 67: /* Template indicates data past the end of the input stream */ 271 dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n", 272 __func__, csb->completion_code); 273 return -EINVAL; 274 default: 275 dev_dbg(dev, "%s: Unspecified error (code:%d)\n", 276 __func__, csb->completion_code); 277 return -EIO; 278 } 279 280 /* Hardware sanity check */ 281 if (!NX842_CSBCPB_CE2(csb->completion_extension)) { 282 dev_err(dev, "%s: No error returned by hardware, but " 283 "data returned is unusable, contact support.\n" 284 "(Additional info: csbcbp->processed bytes " 285 "does not specify processed bytes for the " 286 "target buffer.)\n", __func__); 287 return -EIO; 288 } 289 290 return 0; 291 } 292 293 /** 294 * nx842_compress - Compress data using the 842 algorithm 295 * 296 * Compression provide by the NX842 coprocessor on IBM Power systems. 297 * The input buffer is compressed and the result is stored in the 298 * provided output buffer. 299 * 300 * Upon return from this function @outlen contains the length of the 301 * compressed data. If there is an error then @outlen will be 0 and an 302 * error will be specified by the return code from this function. 303 * 304 * @in: Pointer to input buffer, must be page aligned 305 * @inlen: Length of input buffer, must be PAGE_SIZE 306 * @out: Pointer to output buffer 307 * @outlen: Length of output buffer 308 * @wrkmem: ptr to buffer for working memory, size determined by 309 * nx842_get_workmem_size() 310 * 311 * Returns: 312 * 0 Success, output of length @outlen stored in the buffer at @out 313 * -ENOMEM Unable to allocate internal buffers 314 * -ENOSPC Output buffer is to small 315 * -EMSGSIZE XXX Difficult to describe this limitation 316 * -EIO Internal error 317 * -ENODEV Hardware unavailable 318 */ 319 int nx842_compress(const unsigned char *in, unsigned int inlen, 320 unsigned char *out, unsigned int *outlen, void *wmem) 321 { 322 struct nx842_header *hdr; 323 struct nx842_devdata *local_devdata; 324 struct device *dev = NULL; 325 struct nx842_workmem *workmem; 326 struct nx842_scatterlist slin, slout; 327 struct nx_csbcpb *csbcpb; 328 int ret = 0, max_sync_size, i, bytesleft, size, hdrsize; 329 unsigned long inbuf, outbuf, padding; 330 struct vio_pfo_op op = { 331 .done = NULL, 332 .handle = 0, 333 .timeout = 0, 334 }; 335 unsigned long start_time = get_tb(); 336 337 /* 338 * Make sure input buffer is 64k page aligned. This is assumed since 339 * this driver is designed for page compression only (for now). This 340 * is very nice since we can now use direct DDE(s) for the input and 341 * the alignment is guaranteed. 342 */ 343 inbuf = (unsigned long)in; 344 if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE) 345 return -EINVAL; 346 347 rcu_read_lock(); 348 local_devdata = rcu_dereference(devdata); 349 if (!local_devdata || !local_devdata->dev) { 350 rcu_read_unlock(); 351 return -ENODEV; 352 } 353 max_sync_size = local_devdata->max_sync_size; 354 dev = local_devdata->dev; 355 356 /* Create the header */ 357 hdr = (struct nx842_header *)out; 358 hdr->blocks_nr = PAGE_SIZE / max_sync_size; 359 hdrsize = nx842_header_size(hdr); 360 outbuf = (unsigned long)out + hdrsize; 361 bytesleft = *outlen - hdrsize; 362 363 /* Init scatterlist */ 364 workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, 365 NX842_HW_PAGE_SIZE); 366 slin.entries = (struct nx842_slentry *)workmem->slin; 367 slout.entries = (struct nx842_slentry *)workmem->slout; 368 369 /* Init operation */ 370 op.flags = NX842_OP_COMPRESS; 371 csbcpb = &workmem->csbcpb; 372 memset(csbcpb, 0, sizeof(*csbcpb)); 373 op.csbcpb = __pa(csbcpb); 374 op.out = __pa(slout.entries); 375 376 for (i = 0; i < hdr->blocks_nr; i++) { 377 /* 378 * Aligning the output blocks to 128 bytes does waste space, 379 * but it prevents the need for bounce buffers and memory 380 * copies. It also simplifies the code a lot. In the worst 381 * case (64k page, 4k max_sync_size), you lose up to 382 * (128*16)/64k = ~3% the compression factor. For 64k 383 * max_sync_size, the loss would be at most 128/64k = ~0.2%. 384 */ 385 padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf; 386 outbuf += padding; 387 bytesleft -= padding; 388 if (i == 0) 389 /* save offset into first block in header */ 390 hdr->offset = padding + hdrsize; 391 392 if (bytesleft <= 0) { 393 ret = -ENOSPC; 394 goto unlock; 395 } 396 397 /* 398 * NOTE: If the default max_sync_size is changed from 4k 399 * to 64k, remove the "likely" case below, since a 400 * scatterlist will always be needed. 401 */ 402 if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) { 403 /* Create direct DDE */ 404 op.in = __pa(inbuf); 405 op.inlen = max_sync_size; 406 407 } else { 408 /* Create indirect DDE (scatterlist) */ 409 nx842_build_scatterlist(inbuf, max_sync_size, &slin); 410 op.in = __pa(slin.entries); 411 op.inlen = -nx842_get_scatterlist_size(&slin); 412 } 413 414 /* 415 * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect 416 * DDE is required for the outbuf. 417 * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must 418 * also be page aligned (1 in 128/4k=32 chance) in order 419 * to use a direct DDE. 420 * This is unlikely, just use an indirect DDE always. 421 */ 422 nx842_build_scatterlist(outbuf, 423 min(bytesleft, max_sync_size), &slout); 424 /* op.out set before loop */ 425 op.outlen = -nx842_get_scatterlist_size(&slout); 426 427 /* Send request to pHyp */ 428 ret = vio_h_cop_sync(local_devdata->vdev, &op); 429 430 /* Check for pHyp error */ 431 if (ret) { 432 dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", 433 __func__, ret, op.hcall_err); 434 ret = -EIO; 435 goto unlock; 436 } 437 438 /* Check for hardware error */ 439 ret = nx842_validate_result(dev, &csbcpb->csb); 440 if (ret && ret != -ENOSPC) 441 goto unlock; 442 443 /* Handle incompressible data */ 444 if (unlikely(ret == -ENOSPC)) { 445 if (bytesleft < max_sync_size) { 446 /* 447 * Not enough space left in the output buffer 448 * to store uncompressed block 449 */ 450 goto unlock; 451 } else { 452 /* Store incompressible block */ 453 memcpy((void *)outbuf, (void *)inbuf, 454 max_sync_size); 455 hdr->sizes[i] = -max_sync_size; 456 outbuf += max_sync_size; 457 bytesleft -= max_sync_size; 458 /* Reset ret, incompressible data handled */ 459 ret = 0; 460 } 461 } else { 462 /* Normal case, compression was successful */ 463 size = csbcpb->csb.processed_byte_count; 464 dev_dbg(dev, "%s: processed_bytes=%d\n", 465 __func__, size); 466 hdr->sizes[i] = size; 467 outbuf += size; 468 bytesleft -= size; 469 } 470 471 inbuf += max_sync_size; 472 } 473 474 *outlen = (unsigned int)(outbuf - (unsigned long)out); 475 476 unlock: 477 if (ret) 478 nx842_inc_comp_failed(local_devdata); 479 else { 480 nx842_inc_comp_complete(local_devdata); 481 ibm_nx842_incr_hist(local_devdata->counters->comp_times, 482 (get_tb() - start_time) / tb_ticks_per_usec); 483 } 484 rcu_read_unlock(); 485 return ret; 486 } 487 EXPORT_SYMBOL_GPL(nx842_compress); 488 489 static int sw842_decompress(const unsigned char *, int, unsigned char *, int *, 490 const void *); 491 492 /** 493 * nx842_decompress - Decompress data using the 842 algorithm 494 * 495 * Decompression provide by the NX842 coprocessor on IBM Power systems. 496 * The input buffer is decompressed and the result is stored in the 497 * provided output buffer. The size allocated to the output buffer is 498 * provided by the caller of this function in @outlen. Upon return from 499 * this function @outlen contains the length of the decompressed data. 500 * If there is an error then @outlen will be 0 and an error will be 501 * specified by the return code from this function. 502 * 503 * @in: Pointer to input buffer, will use bounce buffer if not 128 byte 504 * aligned 505 * @inlen: Length of input buffer 506 * @out: Pointer to output buffer, must be page aligned 507 * @outlen: Length of output buffer, must be PAGE_SIZE 508 * @wrkmem: ptr to buffer for working memory, size determined by 509 * nx842_get_workmem_size() 510 * 511 * Returns: 512 * 0 Success, output of length @outlen stored in the buffer at @out 513 * -ENODEV Hardware decompression device is unavailable 514 * -ENOMEM Unable to allocate internal buffers 515 * -ENOSPC Output buffer is to small 516 * -EINVAL Bad input data encountered when attempting decompress 517 * -EIO Internal error 518 */ 519 int nx842_decompress(const unsigned char *in, unsigned int inlen, 520 unsigned char *out, unsigned int *outlen, void *wmem) 521 { 522 struct nx842_header *hdr; 523 struct nx842_devdata *local_devdata; 524 struct device *dev = NULL; 525 struct nx842_workmem *workmem; 526 struct nx842_scatterlist slin, slout; 527 struct nx_csbcpb *csbcpb; 528 int ret = 0, i, size, max_sync_size; 529 unsigned long inbuf, outbuf; 530 struct vio_pfo_op op = { 531 .done = NULL, 532 .handle = 0, 533 .timeout = 0, 534 }; 535 unsigned long start_time = get_tb(); 536 537 /* Ensure page alignment and size */ 538 outbuf = (unsigned long)out; 539 if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE) 540 return -EINVAL; 541 542 rcu_read_lock(); 543 local_devdata = rcu_dereference(devdata); 544 if (local_devdata) 545 dev = local_devdata->dev; 546 547 /* Get header */ 548 hdr = (struct nx842_header *)in; 549 550 workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, 551 NX842_HW_PAGE_SIZE); 552 553 inbuf = (unsigned long)in + hdr->offset; 554 if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) { 555 /* Copy block(s) into bounce buffer for alignment */ 556 memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset); 557 inbuf = (unsigned long)workmem->bounce; 558 } 559 560 /* Init scatterlist */ 561 slin.entries = (struct nx842_slentry *)workmem->slin; 562 slout.entries = (struct nx842_slentry *)workmem->slout; 563 564 /* Init operation */ 565 op.flags = NX842_OP_DECOMPRESS; 566 csbcpb = &workmem->csbcpb; 567 memset(csbcpb, 0, sizeof(*csbcpb)); 568 op.csbcpb = __pa(csbcpb); 569 570 /* 571 * max_sync_size may have changed since compression, 572 * so we can't read it from the device info. We need 573 * to derive it from hdr->blocks_nr. 574 */ 575 max_sync_size = PAGE_SIZE / hdr->blocks_nr; 576 577 for (i = 0; i < hdr->blocks_nr; i++) { 578 /* Skip padding */ 579 inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN); 580 581 if (hdr->sizes[i] < 0) { 582 /* Negative sizes indicate uncompressed data blocks */ 583 size = abs(hdr->sizes[i]); 584 memcpy((void *)outbuf, (void *)inbuf, size); 585 outbuf += size; 586 inbuf += size; 587 continue; 588 } 589 590 if (!dev) 591 goto sw; 592 593 /* 594 * The better the compression, the more likely the "likely" 595 * case becomes. 596 */ 597 if (likely((inbuf & NX842_HW_PAGE_MASK) == 598 ((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) { 599 /* Create direct DDE */ 600 op.in = __pa(inbuf); 601 op.inlen = hdr->sizes[i]; 602 } else { 603 /* Create indirect DDE (scatterlist) */ 604 nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin); 605 op.in = __pa(slin.entries); 606 op.inlen = -nx842_get_scatterlist_size(&slin); 607 } 608 609 /* 610 * NOTE: If the default max_sync_size is changed from 4k 611 * to 64k, remove the "likely" case below, since a 612 * scatterlist will always be needed. 613 */ 614 if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) { 615 /* Create direct DDE */ 616 op.out = __pa(outbuf); 617 op.outlen = max_sync_size; 618 } else { 619 /* Create indirect DDE (scatterlist) */ 620 nx842_build_scatterlist(outbuf, max_sync_size, &slout); 621 op.out = __pa(slout.entries); 622 op.outlen = -nx842_get_scatterlist_size(&slout); 623 } 624 625 /* Send request to pHyp */ 626 ret = vio_h_cop_sync(local_devdata->vdev, &op); 627 628 /* Check for pHyp error */ 629 if (ret) { 630 dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", 631 __func__, ret, op.hcall_err); 632 dev = NULL; 633 goto sw; 634 } 635 636 /* Check for hardware error */ 637 ret = nx842_validate_result(dev, &csbcpb->csb); 638 if (ret) { 639 dev = NULL; 640 goto sw; 641 } 642 643 /* HW decompression success */ 644 inbuf += hdr->sizes[i]; 645 outbuf += csbcpb->csb.processed_byte_count; 646 continue; 647 648 sw: 649 /* software decompression */ 650 size = max_sync_size; 651 ret = sw842_decompress( 652 (unsigned char *)inbuf, hdr->sizes[i], 653 (unsigned char *)outbuf, &size, wmem); 654 if (ret) 655 pr_debug("%s: sw842_decompress failed with %d\n", 656 __func__, ret); 657 658 if (ret) { 659 if (ret != -ENOSPC && ret != -EINVAL && 660 ret != -EMSGSIZE) 661 ret = -EIO; 662 goto unlock; 663 } 664 665 /* SW decompression success */ 666 inbuf += hdr->sizes[i]; 667 outbuf += size; 668 } 669 670 *outlen = (unsigned int)(outbuf - (unsigned long)out); 671 672 unlock: 673 if (ret) 674 /* decompress fail */ 675 nx842_inc_decomp_failed(local_devdata); 676 else { 677 if (!dev) 678 /* software decompress */ 679 nx842_inc_swdecomp(local_devdata); 680 nx842_inc_decomp_complete(local_devdata); 681 ibm_nx842_incr_hist(local_devdata->counters->decomp_times, 682 (get_tb() - start_time) / tb_ticks_per_usec); 683 } 684 685 rcu_read_unlock(); 686 return ret; 687 } 688 EXPORT_SYMBOL_GPL(nx842_decompress); 689 690 /** 691 * nx842_OF_set_defaults -- Set default (disabled) values for devdata 692 * 693 * @devdata - struct nx842_devdata to update 694 * 695 * Returns: 696 * 0 on success 697 * -ENOENT if @devdata ptr is NULL 698 */ 699 static int nx842_OF_set_defaults(struct nx842_devdata *devdata) 700 { 701 if (devdata) { 702 devdata->max_sync_size = 0; 703 devdata->max_sync_sg = 0; 704 devdata->max_sg_len = 0; 705 devdata->status = UNAVAILABLE; 706 return 0; 707 } else 708 return -ENOENT; 709 } 710 711 /** 712 * nx842_OF_upd_status -- Update the device info from OF status prop 713 * 714 * The status property indicates if the accelerator is enabled. If the 715 * device is in the OF tree it indicates that the hardware is present. 716 * The status field indicates if the device is enabled when the status 717 * is 'okay'. Otherwise the device driver will be disabled. 718 * 719 * @devdata - struct nx842_devdata to update 720 * @prop - struct property point containing the maxsyncop for the update 721 * 722 * Returns: 723 * 0 - Device is available 724 * -EINVAL - Device is not available 725 */ 726 static int nx842_OF_upd_status(struct nx842_devdata *devdata, 727 struct property *prop) { 728 int ret = 0; 729 const char *status = (const char *)prop->value; 730 731 if (!strncmp(status, "okay", (size_t)prop->length)) { 732 devdata->status = AVAILABLE; 733 } else { 734 dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n", 735 __func__, status); 736 devdata->status = UNAVAILABLE; 737 } 738 739 return ret; 740 } 741 742 /** 743 * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop 744 * 745 * Definition of the 'ibm,max-sg-len' OF property: 746 * This field indicates the maximum byte length of a scatter list 747 * for the platform facility. It is a single cell encoded as with encode-int. 748 * 749 * Example: 750 * # od -x ibm,max-sg-len 751 * 0000000 0000 0ff0 752 * 753 * In this example, the maximum byte length of a scatter list is 754 * 0x0ff0 (4,080). 755 * 756 * @devdata - struct nx842_devdata to update 757 * @prop - struct property point containing the maxsyncop for the update 758 * 759 * Returns: 760 * 0 on success 761 * -EINVAL on failure 762 */ 763 static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata, 764 struct property *prop) { 765 int ret = 0; 766 const int *maxsglen = prop->value; 767 768 if (prop->length != sizeof(*maxsglen)) { 769 dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__); 770 dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__, 771 prop->length, sizeof(*maxsglen)); 772 ret = -EINVAL; 773 } else { 774 devdata->max_sg_len = (unsigned int)min(*maxsglen, 775 (int)NX842_HW_PAGE_SIZE); 776 } 777 778 return ret; 779 } 780 781 /** 782 * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop 783 * 784 * Definition of the 'ibm,max-sync-cop' OF property: 785 * Two series of cells. The first series of cells represents the maximums 786 * that can be synchronously compressed. The second series of cells 787 * represents the maximums that can be synchronously decompressed. 788 * 1. The first cell in each series contains the count of the number of 789 * data length, scatter list elements pairs that follow – each being 790 * of the form 791 * a. One cell data byte length 792 * b. One cell total number of scatter list elements 793 * 794 * Example: 795 * # od -x ibm,max-sync-cop 796 * 0000000 0000 0001 0000 1000 0000 01fe 0000 0001 797 * 0000020 0000 1000 0000 01fe 798 * 799 * In this example, compression supports 0x1000 (4,096) data byte length 800 * and 0x1fe (510) total scatter list elements. Decompression supports 801 * 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list 802 * elements. 803 * 804 * @devdata - struct nx842_devdata to update 805 * @prop - struct property point containing the maxsyncop for the update 806 * 807 * Returns: 808 * 0 on success 809 * -EINVAL on failure 810 */ 811 static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata, 812 struct property *prop) { 813 int ret = 0; 814 const struct maxsynccop_t { 815 int comp_elements; 816 int comp_data_limit; 817 int comp_sg_limit; 818 int decomp_elements; 819 int decomp_data_limit; 820 int decomp_sg_limit; 821 } *maxsynccop; 822 823 if (prop->length != sizeof(*maxsynccop)) { 824 dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__); 825 dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length, 826 sizeof(*maxsynccop)); 827 ret = -EINVAL; 828 goto out; 829 } 830 831 maxsynccop = (const struct maxsynccop_t *)prop->value; 832 833 /* Use one limit rather than separate limits for compression and 834 * decompression. Set a maximum for this so as not to exceed the 835 * size that the header can support and round the value down to 836 * the hardware page size (4K) */ 837 devdata->max_sync_size = 838 (unsigned int)min(maxsynccop->comp_data_limit, 839 maxsynccop->decomp_data_limit); 840 841 devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size, 842 SIZE_64K); 843 844 if (devdata->max_sync_size < SIZE_4K) { 845 dev_err(devdata->dev, "%s: hardware max data size (%u) is " 846 "less than the driver minimum, unable to use " 847 "the hardware device\n", 848 __func__, devdata->max_sync_size); 849 ret = -EINVAL; 850 goto out; 851 } 852 853 devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit, 854 maxsynccop->decomp_sg_limit); 855 if (devdata->max_sync_sg < 1) { 856 dev_err(devdata->dev, "%s: hardware max sg size (%u) is " 857 "less than the driver minimum, unable to use " 858 "the hardware device\n", 859 __func__, devdata->max_sync_sg); 860 ret = -EINVAL; 861 goto out; 862 } 863 864 out: 865 return ret; 866 } 867 868 /** 869 * 870 * nx842_OF_upd -- Handle OF properties updates for the device. 871 * 872 * Set all properties from the OF tree. Optionally, a new property 873 * can be provided by the @new_prop pointer to overwrite an existing value. 874 * The device will remain disabled until all values are valid, this function 875 * will return an error for updates unless all values are valid. 876 * 877 * @new_prop: If not NULL, this property is being updated. If NULL, update 878 * all properties from the current values in the OF tree. 879 * 880 * Returns: 881 * 0 - Success 882 * -ENOMEM - Could not allocate memory for new devdata structure 883 * -EINVAL - property value not found, new_prop is not a recognized 884 * property for the device or property value is not valid. 885 * -ENODEV - Device is not available 886 */ 887 static int nx842_OF_upd(struct property *new_prop) 888 { 889 struct nx842_devdata *old_devdata = NULL; 890 struct nx842_devdata *new_devdata = NULL; 891 struct device_node *of_node = NULL; 892 struct property *status = NULL; 893 struct property *maxsglen = NULL; 894 struct property *maxsyncop = NULL; 895 int ret = 0; 896 unsigned long flags; 897 898 spin_lock_irqsave(&devdata_mutex, flags); 899 old_devdata = rcu_dereference_check(devdata, 900 lockdep_is_held(&devdata_mutex)); 901 if (old_devdata) 902 of_node = old_devdata->dev->of_node; 903 904 if (!old_devdata || !of_node) { 905 pr_err("%s: device is not available\n", __func__); 906 spin_unlock_irqrestore(&devdata_mutex, flags); 907 return -ENODEV; 908 } 909 910 new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); 911 if (!new_devdata) { 912 dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__); 913 ret = -ENOMEM; 914 goto error_out; 915 } 916 917 memcpy(new_devdata, old_devdata, sizeof(*old_devdata)); 918 new_devdata->counters = old_devdata->counters; 919 920 /* Set ptrs for existing properties */ 921 status = of_find_property(of_node, "status", NULL); 922 maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL); 923 maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL); 924 if (!status || !maxsglen || !maxsyncop) { 925 dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__); 926 ret = -EINVAL; 927 goto error_out; 928 } 929 930 /* Set ptr to new property if provided */ 931 if (new_prop) { 932 /* Single property */ 933 if (!strncmp(new_prop->name, "status", new_prop->length)) { 934 status = new_prop; 935 936 } else if (!strncmp(new_prop->name, "ibm,max-sg-len", 937 new_prop->length)) { 938 maxsglen = new_prop; 939 940 } else if (!strncmp(new_prop->name, "ibm,max-sync-cop", 941 new_prop->length)) { 942 maxsyncop = new_prop; 943 944 } else { 945 /* 946 * Skip the update, the property being updated 947 * has no impact. 948 */ 949 goto out; 950 } 951 } 952 953 /* Perform property updates */ 954 ret = nx842_OF_upd_status(new_devdata, status); 955 if (ret) 956 goto error_out; 957 958 ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen); 959 if (ret) 960 goto error_out; 961 962 ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop); 963 if (ret) 964 goto error_out; 965 966 out: 967 dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n", 968 __func__, new_devdata->max_sync_size, 969 old_devdata->max_sync_size); 970 dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n", 971 __func__, new_devdata->max_sync_sg, 972 old_devdata->max_sync_sg); 973 dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n", 974 __func__, new_devdata->max_sg_len, 975 old_devdata->max_sg_len); 976 977 rcu_assign_pointer(devdata, new_devdata); 978 spin_unlock_irqrestore(&devdata_mutex, flags); 979 synchronize_rcu(); 980 dev_set_drvdata(new_devdata->dev, new_devdata); 981 kfree(old_devdata); 982 return 0; 983 984 error_out: 985 if (new_devdata) { 986 dev_info(old_devdata->dev, "%s: device disabled\n", __func__); 987 nx842_OF_set_defaults(new_devdata); 988 rcu_assign_pointer(devdata, new_devdata); 989 spin_unlock_irqrestore(&devdata_mutex, flags); 990 synchronize_rcu(); 991 dev_set_drvdata(new_devdata->dev, new_devdata); 992 kfree(old_devdata); 993 } else { 994 dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__); 995 spin_unlock_irqrestore(&devdata_mutex, flags); 996 } 997 998 if (!ret) 999 ret = -EINVAL; 1000 return ret; 1001 } 1002 1003 /** 1004 * nx842_OF_notifier - Process updates to OF properties for the device 1005 * 1006 * @np: notifier block 1007 * @action: notifier action 1008 * @update: struct pSeries_reconfig_prop_update pointer if action is 1009 * PSERIES_UPDATE_PROPERTY 1010 * 1011 * Returns: 1012 * NOTIFY_OK on success 1013 * NOTIFY_BAD encoded with error number on failure, use 1014 * notifier_to_errno() to decode this value 1015 */ 1016 static int nx842_OF_notifier(struct notifier_block *np, unsigned long action, 1017 void *update) 1018 { 1019 struct of_prop_reconfig *upd = update; 1020 struct nx842_devdata *local_devdata; 1021 struct device_node *node = NULL; 1022 1023 rcu_read_lock(); 1024 local_devdata = rcu_dereference(devdata); 1025 if (local_devdata) 1026 node = local_devdata->dev->of_node; 1027 1028 if (local_devdata && 1029 action == OF_RECONFIG_UPDATE_PROPERTY && 1030 !strcmp(upd->dn->name, node->name)) { 1031 rcu_read_unlock(); 1032 nx842_OF_upd(upd->prop); 1033 } else 1034 rcu_read_unlock(); 1035 1036 return NOTIFY_OK; 1037 } 1038 1039 static struct notifier_block nx842_of_nb = { 1040 .notifier_call = nx842_OF_notifier, 1041 }; 1042 1043 #define nx842_counter_read(_name) \ 1044 static ssize_t nx842_##_name##_show(struct device *dev, \ 1045 struct device_attribute *attr, \ 1046 char *buf) { \ 1047 struct nx842_devdata *local_devdata; \ 1048 int p = 0; \ 1049 rcu_read_lock(); \ 1050 local_devdata = rcu_dereference(devdata); \ 1051 if (local_devdata) \ 1052 p = snprintf(buf, PAGE_SIZE, "%ld\n", \ 1053 atomic64_read(&local_devdata->counters->_name)); \ 1054 rcu_read_unlock(); \ 1055 return p; \ 1056 } 1057 1058 #define NX842DEV_COUNTER_ATTR_RO(_name) \ 1059 nx842_counter_read(_name); \ 1060 static struct device_attribute dev_attr_##_name = __ATTR(_name, \ 1061 0444, \ 1062 nx842_##_name##_show,\ 1063 NULL); 1064 1065 NX842DEV_COUNTER_ATTR_RO(comp_complete); 1066 NX842DEV_COUNTER_ATTR_RO(comp_failed); 1067 NX842DEV_COUNTER_ATTR_RO(decomp_complete); 1068 NX842DEV_COUNTER_ATTR_RO(decomp_failed); 1069 NX842DEV_COUNTER_ATTR_RO(swdecomp); 1070 1071 static ssize_t nx842_timehist_show(struct device *, 1072 struct device_attribute *, char *); 1073 1074 static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444, 1075 nx842_timehist_show, NULL); 1076 static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times, 1077 0444, nx842_timehist_show, NULL); 1078 1079 static ssize_t nx842_timehist_show(struct device *dev, 1080 struct device_attribute *attr, char *buf) { 1081 char *p = buf; 1082 struct nx842_devdata *local_devdata; 1083 atomic64_t *times; 1084 int bytes_remain = PAGE_SIZE; 1085 int bytes; 1086 int i; 1087 1088 rcu_read_lock(); 1089 local_devdata = rcu_dereference(devdata); 1090 if (!local_devdata) { 1091 rcu_read_unlock(); 1092 return 0; 1093 } 1094 1095 if (attr == &dev_attr_comp_times) 1096 times = local_devdata->counters->comp_times; 1097 else if (attr == &dev_attr_decomp_times) 1098 times = local_devdata->counters->decomp_times; 1099 else { 1100 rcu_read_unlock(); 1101 return 0; 1102 } 1103 1104 for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) { 1105 bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n", 1106 i ? (2<<(i-1)) : 0, (2<<i)-1, 1107 atomic64_read(×[i])); 1108 bytes_remain -= bytes; 1109 p += bytes; 1110 } 1111 /* The last bucket holds everything over 1112 * 2<<(NX842_HIST_SLOTS - 2) us */ 1113 bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n", 1114 2<<(NX842_HIST_SLOTS - 2), 1115 atomic64_read(×[(NX842_HIST_SLOTS - 1)])); 1116 p += bytes; 1117 1118 rcu_read_unlock(); 1119 return p - buf; 1120 } 1121 1122 static struct attribute *nx842_sysfs_entries[] = { 1123 &dev_attr_comp_complete.attr, 1124 &dev_attr_comp_failed.attr, 1125 &dev_attr_decomp_complete.attr, 1126 &dev_attr_decomp_failed.attr, 1127 &dev_attr_swdecomp.attr, 1128 &dev_attr_comp_times.attr, 1129 &dev_attr_decomp_times.attr, 1130 NULL, 1131 }; 1132 1133 static struct attribute_group nx842_attribute_group = { 1134 .name = NULL, /* put in device directory */ 1135 .attrs = nx842_sysfs_entries, 1136 }; 1137 1138 static int __init nx842_probe(struct vio_dev *viodev, 1139 const struct vio_device_id *id) 1140 { 1141 struct nx842_devdata *old_devdata, *new_devdata = NULL; 1142 unsigned long flags; 1143 int ret = 0; 1144 1145 spin_lock_irqsave(&devdata_mutex, flags); 1146 old_devdata = rcu_dereference_check(devdata, 1147 lockdep_is_held(&devdata_mutex)); 1148 1149 if (old_devdata && old_devdata->vdev != NULL) { 1150 dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__); 1151 ret = -1; 1152 goto error_unlock; 1153 } 1154 1155 dev_set_drvdata(&viodev->dev, NULL); 1156 1157 new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); 1158 if (!new_devdata) { 1159 dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__); 1160 ret = -ENOMEM; 1161 goto error_unlock; 1162 } 1163 1164 new_devdata->counters = kzalloc(sizeof(*new_devdata->counters), 1165 GFP_NOFS); 1166 if (!new_devdata->counters) { 1167 dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__); 1168 ret = -ENOMEM; 1169 goto error_unlock; 1170 } 1171 1172 new_devdata->vdev = viodev; 1173 new_devdata->dev = &viodev->dev; 1174 nx842_OF_set_defaults(new_devdata); 1175 1176 rcu_assign_pointer(devdata, new_devdata); 1177 spin_unlock_irqrestore(&devdata_mutex, flags); 1178 synchronize_rcu(); 1179 kfree(old_devdata); 1180 1181 of_reconfig_notifier_register(&nx842_of_nb); 1182 1183 ret = nx842_OF_upd(NULL); 1184 if (ret && ret != -ENODEV) { 1185 dev_err(&viodev->dev, "could not parse device tree. %d\n", ret); 1186 ret = -1; 1187 goto error; 1188 } 1189 1190 rcu_read_lock(); 1191 if (dev_set_drvdata(&viodev->dev, rcu_dereference(devdata))) { 1192 rcu_read_unlock(); 1193 dev_err(&viodev->dev, "failed to set driver data for device\n"); 1194 ret = -1; 1195 goto error; 1196 } 1197 rcu_read_unlock(); 1198 1199 if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) { 1200 dev_err(&viodev->dev, "could not create sysfs device attributes\n"); 1201 ret = -1; 1202 goto error; 1203 } 1204 1205 return 0; 1206 1207 error_unlock: 1208 spin_unlock_irqrestore(&devdata_mutex, flags); 1209 if (new_devdata) 1210 kfree(new_devdata->counters); 1211 kfree(new_devdata); 1212 error: 1213 return ret; 1214 } 1215 1216 static int __exit nx842_remove(struct vio_dev *viodev) 1217 { 1218 struct nx842_devdata *old_devdata; 1219 unsigned long flags; 1220 1221 pr_info("Removing IBM Power 842 compression device\n"); 1222 sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); 1223 1224 spin_lock_irqsave(&devdata_mutex, flags); 1225 old_devdata = rcu_dereference_check(devdata, 1226 lockdep_is_held(&devdata_mutex)); 1227 of_reconfig_notifier_unregister(&nx842_of_nb); 1228 rcu_assign_pointer(devdata, NULL); 1229 spin_unlock_irqrestore(&devdata_mutex, flags); 1230 synchronize_rcu(); 1231 dev_set_drvdata(&viodev->dev, NULL); 1232 if (old_devdata) 1233 kfree(old_devdata->counters); 1234 kfree(old_devdata); 1235 return 0; 1236 } 1237 1238 static struct vio_device_id nx842_driver_ids[] = { 1239 {"ibm,compression-v1", "ibm,compression"}, 1240 {"", ""}, 1241 }; 1242 1243 static struct vio_driver nx842_driver = { 1244 .name = MODULE_NAME, 1245 .probe = nx842_probe, 1246 .remove = nx842_remove, 1247 .get_desired_dma = nx842_get_desired_dma, 1248 .id_table = nx842_driver_ids, 1249 }; 1250 1251 static int __init nx842_init(void) 1252 { 1253 struct nx842_devdata *new_devdata; 1254 pr_info("Registering IBM Power 842 compression driver\n"); 1255 1256 RCU_INIT_POINTER(devdata, NULL); 1257 new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL); 1258 if (!new_devdata) { 1259 pr_err("Could not allocate memory for device data\n"); 1260 return -ENOMEM; 1261 } 1262 new_devdata->status = UNAVAILABLE; 1263 RCU_INIT_POINTER(devdata, new_devdata); 1264 1265 return vio_register_driver(&nx842_driver); 1266 } 1267 1268 module_init(nx842_init); 1269 1270 static void __exit nx842_exit(void) 1271 { 1272 struct nx842_devdata *old_devdata; 1273 unsigned long flags; 1274 1275 pr_info("Exiting IBM Power 842 compression driver\n"); 1276 spin_lock_irqsave(&devdata_mutex, flags); 1277 old_devdata = rcu_dereference_check(devdata, 1278 lockdep_is_held(&devdata_mutex)); 1279 rcu_assign_pointer(devdata, NULL); 1280 spin_unlock_irqrestore(&devdata_mutex, flags); 1281 synchronize_rcu(); 1282 if (old_devdata) 1283 dev_set_drvdata(old_devdata->dev, NULL); 1284 kfree(old_devdata); 1285 vio_unregister_driver(&nx842_driver); 1286 } 1287 1288 module_exit(nx842_exit); 1289 1290 /********************************* 1291 * 842 software decompressor 1292 *********************************/ 1293 typedef int (*sw842_template_op)(const char **, int *, unsigned char **, 1294 struct sw842_fifo *); 1295 1296 static int sw842_data8(const char **, int *, unsigned char **, 1297 struct sw842_fifo *); 1298 static int sw842_data4(const char **, int *, unsigned char **, 1299 struct sw842_fifo *); 1300 static int sw842_data2(const char **, int *, unsigned char **, 1301 struct sw842_fifo *); 1302 static int sw842_ptr8(const char **, int *, unsigned char **, 1303 struct sw842_fifo *); 1304 static int sw842_ptr4(const char **, int *, unsigned char **, 1305 struct sw842_fifo *); 1306 static int sw842_ptr2(const char **, int *, unsigned char **, 1307 struct sw842_fifo *); 1308 1309 /* special templates */ 1310 #define SW842_TMPL_REPEAT 0x1B 1311 #define SW842_TMPL_ZEROS 0x1C 1312 #define SW842_TMPL_EOF 0x1E 1313 1314 static sw842_template_op sw842_tmpl_ops[26][4] = { 1315 { sw842_data8, NULL}, /* 0 (00000) */ 1316 { sw842_data4, sw842_data2, sw842_ptr2, NULL}, 1317 { sw842_data4, sw842_ptr2, sw842_data2, NULL}, 1318 { sw842_data4, sw842_ptr2, sw842_ptr2, NULL}, 1319 { sw842_data4, sw842_ptr4, NULL}, 1320 { sw842_data2, sw842_ptr2, sw842_data4, NULL}, 1321 { sw842_data2, sw842_ptr2, sw842_data2, sw842_ptr2}, 1322 { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_data2}, 1323 { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_ptr2,}, 1324 { sw842_data2, sw842_ptr2, sw842_ptr4, NULL}, 1325 { sw842_ptr2, sw842_data2, sw842_data4, NULL}, /* 10 (01010) */ 1326 { sw842_ptr2, sw842_data4, sw842_ptr2, NULL}, 1327 { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_data2}, 1328 { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_ptr2}, 1329 { sw842_ptr2, sw842_data2, sw842_ptr4, NULL}, 1330 { sw842_ptr2, sw842_ptr2, sw842_data4, NULL}, 1331 { sw842_ptr2, sw842_ptr2, sw842_data2, sw842_ptr2}, 1332 { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_data2}, 1333 { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_ptr2}, 1334 { sw842_ptr2, sw842_ptr2, sw842_ptr4, NULL}, 1335 { sw842_ptr4, sw842_data4, NULL}, /* 20 (10100) */ 1336 { sw842_ptr4, sw842_data2, sw842_ptr2, NULL}, 1337 { sw842_ptr4, sw842_ptr2, sw842_data2, NULL}, 1338 { sw842_ptr4, sw842_ptr2, sw842_ptr2, NULL}, 1339 { sw842_ptr4, sw842_ptr4, NULL}, 1340 { sw842_ptr8, NULL} 1341 }; 1342 1343 /* Software decompress helpers */ 1344 1345 static uint8_t sw842_get_byte(const char *buf, int bit) 1346 { 1347 uint8_t tmpl; 1348 uint16_t tmp; 1349 tmp = htons(*(uint16_t *)(buf)); 1350 tmp = (uint16_t)(tmp << bit); 1351 tmp = ntohs(tmp); 1352 memcpy(&tmpl, &tmp, 1); 1353 return tmpl; 1354 } 1355 1356 static uint8_t sw842_get_template(const char **buf, int *bit) 1357 { 1358 uint8_t byte; 1359 byte = sw842_get_byte(*buf, *bit); 1360 byte = byte >> 3; 1361 byte &= 0x1F; 1362 *buf += (*bit + 5) / 8; 1363 *bit = (*bit + 5) % 8; 1364 return byte; 1365 } 1366 1367 /* repeat_count happens to be 5-bit too (like the template) */ 1368 static uint8_t sw842_get_repeat_count(const char **buf, int *bit) 1369 { 1370 uint8_t byte; 1371 byte = sw842_get_byte(*buf, *bit); 1372 byte = byte >> 2; 1373 byte &= 0x3F; 1374 *buf += (*bit + 6) / 8; 1375 *bit = (*bit + 6) % 8; 1376 return byte; 1377 } 1378 1379 static uint8_t sw842_get_ptr2(const char **buf, int *bit) 1380 { 1381 uint8_t ptr; 1382 ptr = sw842_get_byte(*buf, *bit); 1383 (*buf)++; 1384 return ptr; 1385 } 1386 1387 static uint16_t sw842_get_ptr4(const char **buf, int *bit, 1388 struct sw842_fifo *fifo) 1389 { 1390 uint16_t ptr; 1391 ptr = htons(*(uint16_t *)(*buf)); 1392 ptr = (uint16_t)(ptr << *bit); 1393 ptr = ptr >> 7; 1394 ptr &= 0x01FF; 1395 *buf += (*bit + 9) / 8; 1396 *bit = (*bit + 9) % 8; 1397 return ptr; 1398 } 1399 1400 static uint8_t sw842_get_ptr8(const char **buf, int *bit, 1401 struct sw842_fifo *fifo) 1402 { 1403 return sw842_get_ptr2(buf, bit); 1404 } 1405 1406 /* Software decompress template ops */ 1407 1408 static int sw842_data8(const char **inbuf, int *inbit, 1409 unsigned char **outbuf, struct sw842_fifo *fifo) 1410 { 1411 int ret; 1412 1413 ret = sw842_data4(inbuf, inbit, outbuf, fifo); 1414 if (ret) 1415 return ret; 1416 ret = sw842_data4(inbuf, inbit, outbuf, fifo); 1417 return ret; 1418 } 1419 1420 static int sw842_data4(const char **inbuf, int *inbit, 1421 unsigned char **outbuf, struct sw842_fifo *fifo) 1422 { 1423 int ret; 1424 1425 ret = sw842_data2(inbuf, inbit, outbuf, fifo); 1426 if (ret) 1427 return ret; 1428 ret = sw842_data2(inbuf, inbit, outbuf, fifo); 1429 return ret; 1430 } 1431 1432 static int sw842_data2(const char **inbuf, int *inbit, 1433 unsigned char **outbuf, struct sw842_fifo *fifo) 1434 { 1435 **outbuf = sw842_get_byte(*inbuf, *inbit); 1436 (*inbuf)++; 1437 (*outbuf)++; 1438 **outbuf = sw842_get_byte(*inbuf, *inbit); 1439 (*inbuf)++; 1440 (*outbuf)++; 1441 return 0; 1442 } 1443 1444 static int sw842_ptr8(const char **inbuf, int *inbit, 1445 unsigned char **outbuf, struct sw842_fifo *fifo) 1446 { 1447 uint8_t ptr; 1448 ptr = sw842_get_ptr8(inbuf, inbit, fifo); 1449 if (!fifo->f84_full && (ptr >= fifo->f8_count)) 1450 return 1; 1451 memcpy(*outbuf, fifo->f8[ptr], 8); 1452 *outbuf += 8; 1453 return 0; 1454 } 1455 1456 static int sw842_ptr4(const char **inbuf, int *inbit, 1457 unsigned char **outbuf, struct sw842_fifo *fifo) 1458 { 1459 uint16_t ptr; 1460 ptr = sw842_get_ptr4(inbuf, inbit, fifo); 1461 if (!fifo->f84_full && (ptr >= fifo->f4_count)) 1462 return 1; 1463 memcpy(*outbuf, fifo->f4[ptr], 4); 1464 *outbuf += 4; 1465 return 0; 1466 } 1467 1468 static int sw842_ptr2(const char **inbuf, int *inbit, 1469 unsigned char **outbuf, struct sw842_fifo *fifo) 1470 { 1471 uint8_t ptr; 1472 ptr = sw842_get_ptr2(inbuf, inbit); 1473 if (!fifo->f2_full && (ptr >= fifo->f2_count)) 1474 return 1; 1475 memcpy(*outbuf, fifo->f2[ptr], 2); 1476 *outbuf += 2; 1477 return 0; 1478 } 1479 1480 static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo) 1481 { 1482 unsigned char initial_f2count = fifo->f2_count; 1483 1484 memcpy(fifo->f8[fifo->f8_count], buf, 8); 1485 fifo->f4_count += 2; 1486 fifo->f8_count += 1; 1487 1488 if (!fifo->f84_full && fifo->f4_count >= 512) { 1489 fifo->f84_full = 1; 1490 fifo->f4_count /= 512; 1491 } 1492 1493 memcpy(fifo->f2[fifo->f2_count++], buf, 2); 1494 memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2); 1495 memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2); 1496 memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2); 1497 if (fifo->f2_count < initial_f2count) 1498 fifo->f2_full = 1; 1499 } 1500 1501 static int sw842_decompress(const unsigned char *src, int srclen, 1502 unsigned char *dst, int *destlen, 1503 const void *wrkmem) 1504 { 1505 uint8_t tmpl; 1506 const char *inbuf; 1507 int inbit = 0; 1508 unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf; 1509 const char *inbuf_end; 1510 sw842_template_op op; 1511 int opindex; 1512 int i, repeat_count; 1513 struct sw842_fifo *fifo; 1514 int ret = 0; 1515 1516 fifo = &((struct nx842_workmem *)(wrkmem))->swfifo; 1517 memset(fifo, 0, sizeof(*fifo)); 1518 1519 origbuf = NULL; 1520 inbuf = src; 1521 inbuf_end = src + srclen; 1522 outbuf = dst; 1523 outbuf_end = dst + *destlen; 1524 1525 while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) { 1526 if (inbuf >= inbuf_end) { 1527 ret = -EINVAL; 1528 goto out; 1529 } 1530 1531 opindex = 0; 1532 prevbuf = origbuf; 1533 origbuf = outbuf; 1534 switch (tmpl) { 1535 case SW842_TMPL_REPEAT: 1536 if (prevbuf == NULL) { 1537 ret = -EINVAL; 1538 goto out; 1539 } 1540 1541 repeat_count = sw842_get_repeat_count(&inbuf, 1542 &inbit) + 1; 1543 1544 /* Did the repeat count advance past the end of input */ 1545 if (inbuf > inbuf_end) { 1546 ret = -EINVAL; 1547 goto out; 1548 } 1549 1550 for (i = 0; i < repeat_count; i++) { 1551 /* Would this overflow the output buffer */ 1552 if ((outbuf + 8) > outbuf_end) { 1553 ret = -ENOSPC; 1554 goto out; 1555 } 1556 1557 memcpy(outbuf, prevbuf, 8); 1558 sw842_copy_to_fifo(outbuf, fifo); 1559 outbuf += 8; 1560 } 1561 break; 1562 1563 case SW842_TMPL_ZEROS: 1564 /* Would this overflow the output buffer */ 1565 if ((outbuf + 8) > outbuf_end) { 1566 ret = -ENOSPC; 1567 goto out; 1568 } 1569 1570 memset(outbuf, 0, 8); 1571 sw842_copy_to_fifo(outbuf, fifo); 1572 outbuf += 8; 1573 break; 1574 1575 default: 1576 if (tmpl > 25) { 1577 ret = -EINVAL; 1578 goto out; 1579 } 1580 1581 /* Does this go past the end of the input buffer */ 1582 if ((inbuf + 2) > inbuf_end) { 1583 ret = -EINVAL; 1584 goto out; 1585 } 1586 1587 /* Would this overflow the output buffer */ 1588 if ((outbuf + 8) > outbuf_end) { 1589 ret = -ENOSPC; 1590 goto out; 1591 } 1592 1593 while (opindex < 4 && 1594 (op = sw842_tmpl_ops[tmpl][opindex++]) 1595 != NULL) { 1596 ret = (*op)(&inbuf, &inbit, &outbuf, fifo); 1597 if (ret) { 1598 ret = -EINVAL; 1599 goto out; 1600 } 1601 sw842_copy_to_fifo(origbuf, fifo); 1602 } 1603 } 1604 } 1605 1606 out: 1607 if (!ret) 1608 *destlen = (unsigned int)(outbuf - dst); 1609 else 1610 *destlen = 0; 1611 1612 return ret; 1613 } 1614