1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Driver for IBM PowerNV compression accelerator 4 * 5 * Copyright (C) 2015 Dan Streetman, IBM Corp 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include "nx-842.h" 11 12 #include <linux/timer.h> 13 14 #include <asm/prom.h> 15 #include <asm/icswx.h> 16 #include <asm/vas.h> 17 #include <asm/reg.h> 18 #include <asm/opal-api.h> 19 #include <asm/opal.h> 20 21 MODULE_LICENSE("GPL"); 22 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); 23 MODULE_DESCRIPTION("H/W Compression driver for IBM PowerNV processors"); 24 MODULE_ALIAS_CRYPTO("842"); 25 MODULE_ALIAS_CRYPTO("842-nx"); 26 27 #define WORKMEM_ALIGN (CRB_ALIGN) 28 #define CSB_WAIT_MAX (5000) /* ms */ 29 #define VAS_RETRIES (10) 30 31 struct nx842_workmem { 32 /* Below fields must be properly aligned */ 33 struct coprocessor_request_block crb; /* CRB_ALIGN align */ 34 struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */ 35 struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */ 36 /* Above fields must be properly aligned */ 37 38 ktime_t start; 39 40 char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */ 41 } __packed __aligned(WORKMEM_ALIGN); 42 43 struct nx_coproc { 44 unsigned int chip_id; 45 unsigned int ct; /* Can be 842 or GZIP high/normal*/ 46 unsigned int ci; /* Coprocessor instance, used with icswx */ 47 struct { 48 struct vas_window *rxwin; 49 int id; 50 } vas; 51 struct list_head list; 52 }; 53 54 /* 55 * Send the request to NX engine on the chip for the corresponding CPU 56 * where the process is executing. Use with VAS function. 57 */ 58 static DEFINE_PER_CPU(struct vas_window *, cpu_txwin); 59 60 /* no cpu hotplug on powernv, so this list never changes after init */ 61 static LIST_HEAD(nx_coprocs); 62 static unsigned int nx842_ct; /* used in icswx function */ 63 64 /* 65 * Using same values as in skiboot or coprocessor type representing 66 * in NX workbook. 67 */ 68 #define NX_CT_GZIP (2) /* on P9 and later */ 69 #define NX_CT_842 (3) 70 71 static int (*nx842_powernv_exec)(const unsigned char *in, 72 unsigned int inlen, unsigned char *out, 73 unsigned int *outlenp, void *workmem, int fc); 74 75 /* 76 * setup_indirect_dde - Setup an indirect DDE 77 * 78 * The DDE is setup with the DDE count, byte count, and address of 79 * first direct DDE in the list. 80 */ 81 static void setup_indirect_dde(struct data_descriptor_entry *dde, 82 struct data_descriptor_entry *ddl, 83 unsigned int dde_count, unsigned int byte_count) 84 { 85 dde->flags = 0; 86 dde->count = dde_count; 87 dde->index = 0; 88 dde->length = cpu_to_be32(byte_count); 89 dde->address = cpu_to_be64(nx842_get_pa(ddl)); 90 } 91 92 /* 93 * setup_direct_dde - Setup single DDE from buffer 94 * 95 * The DDE is setup with the buffer and length. The buffer must be properly 96 * aligned. The used length is returned. 97 * Returns: 98 * N Successfully set up DDE with N bytes 99 */ 100 static unsigned int setup_direct_dde(struct data_descriptor_entry *dde, 101 unsigned long pa, unsigned int len) 102 { 103 unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa)); 104 105 dde->flags = 0; 106 dde->count = 0; 107 dde->index = 0; 108 dde->length = cpu_to_be32(l); 109 dde->address = cpu_to_be64(pa); 110 111 return l; 112 } 113 114 /* 115 * setup_ddl - Setup DDL from buffer 116 * 117 * Returns: 118 * 0 Successfully set up DDL 119 */ 120 static int setup_ddl(struct data_descriptor_entry *dde, 121 struct data_descriptor_entry *ddl, 122 unsigned char *buf, unsigned int len, 123 bool in) 124 { 125 unsigned long pa = nx842_get_pa(buf); 126 int i, ret, total_len = len; 127 128 if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) { 129 pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n", 130 in ? "input" : "output", pa, DDE_BUFFER_ALIGN); 131 return -EINVAL; 132 } 133 134 /* only need to check last mult; since buffer must be 135 * DDE_BUFFER_ALIGN aligned, and that is a multiple of 136 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers 137 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT. 138 */ 139 if (len % DDE_BUFFER_LAST_MULT) { 140 pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n", 141 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT); 142 if (in) 143 return -EINVAL; 144 len = round_down(len, DDE_BUFFER_LAST_MULT); 145 } 146 147 /* use a single direct DDE */ 148 if (len <= LEN_ON_PAGE(pa)) { 149 ret = setup_direct_dde(dde, pa, len); 150 WARN_ON(ret < len); 151 return 0; 152 } 153 154 /* use the DDL */ 155 for (i = 0; i < DDL_LEN_MAX && len > 0; i++) { 156 ret = setup_direct_dde(&ddl[i], pa, len); 157 buf += ret; 158 len -= ret; 159 pa = nx842_get_pa(buf); 160 } 161 162 if (len > 0) { 163 pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n", 164 total_len, in ? "input" : "output", len); 165 if (in) 166 return -EMSGSIZE; 167 total_len -= len; 168 } 169 setup_indirect_dde(dde, ddl, i, total_len); 170 171 return 0; 172 } 173 174 #define CSB_ERR(csb, msg, ...) \ 175 pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n", \ 176 ##__VA_ARGS__, (csb)->flags, \ 177 (csb)->cs, (csb)->cc, (csb)->ce, \ 178 be32_to_cpu((csb)->count)) 179 180 #define CSB_ERR_ADDR(csb, msg, ...) \ 181 CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__, \ 182 (unsigned long)be64_to_cpu((csb)->address)) 183 184 static int wait_for_csb(struct nx842_workmem *wmem, 185 struct coprocessor_status_block *csb) 186 { 187 ktime_t start = wmem->start, now = ktime_get(); 188 ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX); 189 190 while (!(READ_ONCE(csb->flags) & CSB_V)) { 191 cpu_relax(); 192 now = ktime_get(); 193 if (ktime_after(now, timeout)) 194 break; 195 } 196 197 /* hw has updated csb and output buffer */ 198 barrier(); 199 200 /* check CSB flags */ 201 if (!(csb->flags & CSB_V)) { 202 CSB_ERR(csb, "CSB still not valid after %ld us, giving up", 203 (long)ktime_us_delta(now, start)); 204 return -ETIMEDOUT; 205 } 206 if (csb->flags & CSB_F) { 207 CSB_ERR(csb, "Invalid CSB format"); 208 return -EPROTO; 209 } 210 if (csb->flags & CSB_CH) { 211 CSB_ERR(csb, "Invalid CSB chaining state"); 212 return -EPROTO; 213 } 214 215 /* verify CSB completion sequence is 0 */ 216 if (csb->cs) { 217 CSB_ERR(csb, "Invalid CSB completion sequence"); 218 return -EPROTO; 219 } 220 221 /* check CSB Completion Code */ 222 switch (csb->cc) { 223 /* no error */ 224 case CSB_CC_SUCCESS: 225 break; 226 case CSB_CC_TPBC_GT_SPBC: 227 /* not an error, but the compressed data is 228 * larger than the uncompressed data :( 229 */ 230 break; 231 232 /* input data errors */ 233 case CSB_CC_OPERAND_OVERLAP: 234 /* input and output buffers overlap */ 235 CSB_ERR(csb, "Operand Overlap error"); 236 return -EINVAL; 237 case CSB_CC_INVALID_OPERAND: 238 CSB_ERR(csb, "Invalid operand"); 239 return -EINVAL; 240 case CSB_CC_NOSPC: 241 /* output buffer too small */ 242 return -ENOSPC; 243 case CSB_CC_ABORT: 244 CSB_ERR(csb, "Function aborted"); 245 return -EINTR; 246 case CSB_CC_CRC_MISMATCH: 247 CSB_ERR(csb, "CRC mismatch"); 248 return -EINVAL; 249 case CSB_CC_TEMPL_INVALID: 250 CSB_ERR(csb, "Compressed data template invalid"); 251 return -EINVAL; 252 case CSB_CC_TEMPL_OVERFLOW: 253 CSB_ERR(csb, "Compressed data template shows data past end"); 254 return -EINVAL; 255 case CSB_CC_EXCEED_BYTE_COUNT: /* P9 or later */ 256 /* 257 * DDE byte count exceeds the limit specified in Maximum 258 * byte count register. 259 */ 260 CSB_ERR(csb, "DDE byte count exceeds the limit"); 261 return -EINVAL; 262 263 /* these should not happen */ 264 case CSB_CC_INVALID_ALIGN: 265 /* setup_ddl should have detected this */ 266 CSB_ERR_ADDR(csb, "Invalid alignment"); 267 return -EINVAL; 268 case CSB_CC_DATA_LENGTH: 269 /* setup_ddl should have detected this */ 270 CSB_ERR(csb, "Invalid data length"); 271 return -EINVAL; 272 case CSB_CC_WR_TRANSLATION: 273 case CSB_CC_TRANSLATION: 274 case CSB_CC_TRANSLATION_DUP1: 275 case CSB_CC_TRANSLATION_DUP2: 276 case CSB_CC_TRANSLATION_DUP3: 277 case CSB_CC_TRANSLATION_DUP4: 278 case CSB_CC_TRANSLATION_DUP5: 279 case CSB_CC_TRANSLATION_DUP6: 280 /* should not happen, we use physical addrs */ 281 CSB_ERR_ADDR(csb, "Translation error"); 282 return -EPROTO; 283 case CSB_CC_WR_PROTECTION: 284 case CSB_CC_PROTECTION: 285 case CSB_CC_PROTECTION_DUP1: 286 case CSB_CC_PROTECTION_DUP2: 287 case CSB_CC_PROTECTION_DUP3: 288 case CSB_CC_PROTECTION_DUP4: 289 case CSB_CC_PROTECTION_DUP5: 290 case CSB_CC_PROTECTION_DUP6: 291 /* should not happen, we use physical addrs */ 292 CSB_ERR_ADDR(csb, "Protection error"); 293 return -EPROTO; 294 case CSB_CC_PRIVILEGE: 295 /* shouldn't happen, we're in HYP mode */ 296 CSB_ERR(csb, "Insufficient Privilege error"); 297 return -EPROTO; 298 case CSB_CC_EXCESSIVE_DDE: 299 /* shouldn't happen, setup_ddl doesn't use many dde's */ 300 CSB_ERR(csb, "Too many DDEs in DDL"); 301 return -EINVAL; 302 case CSB_CC_TRANSPORT: 303 case CSB_CC_INVALID_CRB: /* P9 or later */ 304 /* shouldn't happen, we setup CRB correctly */ 305 CSB_ERR(csb, "Invalid CRB"); 306 return -EINVAL; 307 case CSB_CC_INVALID_DDE: /* P9 or later */ 308 /* 309 * shouldn't happen, setup_direct/indirect_dde creates 310 * DDE right 311 */ 312 CSB_ERR(csb, "Invalid DDE"); 313 return -EINVAL; 314 case CSB_CC_SEGMENTED_DDL: 315 /* shouldn't happen, setup_ddl creates DDL right */ 316 CSB_ERR(csb, "Segmented DDL error"); 317 return -EINVAL; 318 case CSB_CC_DDE_OVERFLOW: 319 /* shouldn't happen, setup_ddl creates DDL right */ 320 CSB_ERR(csb, "DDE overflow error"); 321 return -EINVAL; 322 case CSB_CC_SESSION: 323 /* should not happen with ICSWX */ 324 CSB_ERR(csb, "Session violation error"); 325 return -EPROTO; 326 case CSB_CC_CHAIN: 327 /* should not happen, we don't use chained CRBs */ 328 CSB_ERR(csb, "Chained CRB error"); 329 return -EPROTO; 330 case CSB_CC_SEQUENCE: 331 /* should not happen, we don't use chained CRBs */ 332 CSB_ERR(csb, "CRB sequence number error"); 333 return -EPROTO; 334 case CSB_CC_UNKNOWN_CODE: 335 CSB_ERR(csb, "Unknown subfunction code"); 336 return -EPROTO; 337 338 /* hardware errors */ 339 case CSB_CC_RD_EXTERNAL: 340 case CSB_CC_RD_EXTERNAL_DUP1: 341 case CSB_CC_RD_EXTERNAL_DUP2: 342 case CSB_CC_RD_EXTERNAL_DUP3: 343 CSB_ERR_ADDR(csb, "Read error outside coprocessor"); 344 return -EPROTO; 345 case CSB_CC_WR_EXTERNAL: 346 CSB_ERR_ADDR(csb, "Write error outside coprocessor"); 347 return -EPROTO; 348 case CSB_CC_INTERNAL: 349 CSB_ERR(csb, "Internal error in coprocessor"); 350 return -EPROTO; 351 case CSB_CC_PROVISION: 352 CSB_ERR(csb, "Storage provision error"); 353 return -EPROTO; 354 case CSB_CC_HW: 355 CSB_ERR(csb, "Correctable hardware error"); 356 return -EPROTO; 357 case CSB_CC_HW_EXPIRED_TIMER: /* P9 or later */ 358 CSB_ERR(csb, "Job did not finish within allowed time"); 359 return -EPROTO; 360 361 default: 362 CSB_ERR(csb, "Invalid CC %d", csb->cc); 363 return -EPROTO; 364 } 365 366 /* check Completion Extension state */ 367 if (csb->ce & CSB_CE_TERMINATION) { 368 CSB_ERR(csb, "CSB request was terminated"); 369 return -EPROTO; 370 } 371 if (csb->ce & CSB_CE_INCOMPLETE) { 372 CSB_ERR(csb, "CSB request not complete"); 373 return -EPROTO; 374 } 375 if (!(csb->ce & CSB_CE_TPBC)) { 376 CSB_ERR(csb, "TPBC not provided, unknown target length"); 377 return -EPROTO; 378 } 379 380 /* successful completion */ 381 pr_debug_ratelimited("Processed %u bytes in %lu us\n", 382 be32_to_cpu(csb->count), 383 (unsigned long)ktime_us_delta(now, start)); 384 385 return 0; 386 } 387 388 static int nx842_config_crb(const unsigned char *in, unsigned int inlen, 389 unsigned char *out, unsigned int outlen, 390 struct nx842_workmem *wmem) 391 { 392 struct coprocessor_request_block *crb; 393 struct coprocessor_status_block *csb; 394 u64 csb_addr; 395 int ret; 396 397 crb = &wmem->crb; 398 csb = &crb->csb; 399 400 /* Clear any previous values */ 401 memset(crb, 0, sizeof(*crb)); 402 403 /* set up DDLs */ 404 ret = setup_ddl(&crb->source, wmem->ddl_in, 405 (unsigned char *)in, inlen, true); 406 if (ret) 407 return ret; 408 409 ret = setup_ddl(&crb->target, wmem->ddl_out, 410 out, outlen, false); 411 if (ret) 412 return ret; 413 414 /* set up CRB's CSB addr */ 415 csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS; 416 csb_addr |= CRB_CSB_AT; /* Addrs are phys */ 417 crb->csb_addr = cpu_to_be64(csb_addr); 418 419 return 0; 420 } 421 422 /** 423 * nx842_exec_icswx - compress/decompress data using the 842 algorithm 424 * 425 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems. 426 * This compresses or decompresses the provided input buffer into the provided 427 * output buffer. 428 * 429 * Upon return from this function @outlen contains the length of the 430 * output data. If there is an error then @outlen will be 0 and an 431 * error will be specified by the return code from this function. 432 * 433 * The @workmem buffer should only be used by one function call at a time. 434 * 435 * @in: input buffer pointer 436 * @inlen: input buffer size 437 * @out: output buffer pointer 438 * @outlenp: output buffer size pointer 439 * @workmem: working memory buffer pointer, size determined by 440 * nx842_powernv_driver.workmem_size 441 * @fc: function code, see CCW Function Codes in nx-842.h 442 * 443 * Returns: 444 * 0 Success, output of length @outlenp stored in the buffer at @out 445 * -ENODEV Hardware unavailable 446 * -ENOSPC Output buffer is to small 447 * -EMSGSIZE Input buffer too large 448 * -EINVAL buffer constraints do not fix nx842_constraints 449 * -EPROTO hardware error during operation 450 * -ETIMEDOUT hardware did not complete operation in reasonable time 451 * -EINTR operation was aborted 452 */ 453 static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen, 454 unsigned char *out, unsigned int *outlenp, 455 void *workmem, int fc) 456 { 457 struct coprocessor_request_block *crb; 458 struct coprocessor_status_block *csb; 459 struct nx842_workmem *wmem; 460 int ret; 461 u32 ccw; 462 unsigned int outlen = *outlenp; 463 464 wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN); 465 466 *outlenp = 0; 467 468 /* shoudn't happen, we don't load without a coproc */ 469 if (!nx842_ct) { 470 pr_err_ratelimited("coprocessor CT is 0"); 471 return -ENODEV; 472 } 473 474 ret = nx842_config_crb(in, inlen, out, outlen, wmem); 475 if (ret) 476 return ret; 477 478 crb = &wmem->crb; 479 csb = &crb->csb; 480 481 /* set up CCW */ 482 ccw = 0; 483 ccw = SET_FIELD(CCW_CT, ccw, nx842_ct); 484 ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */ 485 ccw = SET_FIELD(CCW_FC_842, ccw, fc); 486 487 wmem->start = ktime_get(); 488 489 /* do ICSWX */ 490 ret = icswx(cpu_to_be32(ccw), crb); 491 492 pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret, 493 (unsigned int)ccw, 494 (unsigned int)be32_to_cpu(crb->ccw)); 495 496 /* 497 * NX842 coprocessor sets 3rd bit in CR register with XER[S0]. 498 * XER[S0] is the integer summary overflow bit which is nothing 499 * to do NX. Since this bit can be set with other return values, 500 * mask this bit. 501 */ 502 ret &= ~ICSWX_XERS0; 503 504 switch (ret) { 505 case ICSWX_INITIATED: 506 ret = wait_for_csb(wmem, csb); 507 break; 508 case ICSWX_BUSY: 509 pr_debug_ratelimited("842 Coprocessor busy\n"); 510 ret = -EBUSY; 511 break; 512 case ICSWX_REJECTED: 513 pr_err_ratelimited("ICSWX rejected\n"); 514 ret = -EPROTO; 515 break; 516 } 517 518 if (!ret) 519 *outlenp = be32_to_cpu(csb->count); 520 521 return ret; 522 } 523 524 /** 525 * nx842_exec_vas - compress/decompress data using the 842 algorithm 526 * 527 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems. 528 * This compresses or decompresses the provided input buffer into the provided 529 * output buffer. 530 * 531 * Upon return from this function @outlen contains the length of the 532 * output data. If there is an error then @outlen will be 0 and an 533 * error will be specified by the return code from this function. 534 * 535 * The @workmem buffer should only be used by one function call at a time. 536 * 537 * @in: input buffer pointer 538 * @inlen: input buffer size 539 * @out: output buffer pointer 540 * @outlenp: output buffer size pointer 541 * @workmem: working memory buffer pointer, size determined by 542 * nx842_powernv_driver.workmem_size 543 * @fc: function code, see CCW Function Codes in nx-842.h 544 * 545 * Returns: 546 * 0 Success, output of length @outlenp stored in the buffer 547 * at @out 548 * -ENODEV Hardware unavailable 549 * -ENOSPC Output buffer is to small 550 * -EMSGSIZE Input buffer too large 551 * -EINVAL buffer constraints do not fix nx842_constraints 552 * -EPROTO hardware error during operation 553 * -ETIMEDOUT hardware did not complete operation in reasonable time 554 * -EINTR operation was aborted 555 */ 556 static int nx842_exec_vas(const unsigned char *in, unsigned int inlen, 557 unsigned char *out, unsigned int *outlenp, 558 void *workmem, int fc) 559 { 560 struct coprocessor_request_block *crb; 561 struct coprocessor_status_block *csb; 562 struct nx842_workmem *wmem; 563 struct vas_window *txwin; 564 int ret, i = 0; 565 u32 ccw; 566 unsigned int outlen = *outlenp; 567 568 wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN); 569 570 *outlenp = 0; 571 572 crb = &wmem->crb; 573 csb = &crb->csb; 574 575 ret = nx842_config_crb(in, inlen, out, outlen, wmem); 576 if (ret) 577 return ret; 578 579 ccw = 0; 580 ccw = SET_FIELD(CCW_FC_842, ccw, fc); 581 crb->ccw = cpu_to_be32(ccw); 582 583 do { 584 wmem->start = ktime_get(); 585 preempt_disable(); 586 txwin = this_cpu_read(cpu_txwin); 587 588 /* 589 * VAS copy CRB into L2 cache. Refer <asm/vas.h>. 590 * @crb and @offset. 591 */ 592 vas_copy_crb(crb, 0); 593 594 /* 595 * VAS paste previously copied CRB to NX. 596 * @txwin, @offset and @last (must be true). 597 */ 598 ret = vas_paste_crb(txwin, 0, 1); 599 preempt_enable(); 600 /* 601 * Retry copy/paste function for VAS failures. 602 */ 603 } while (ret && (i++ < VAS_RETRIES)); 604 605 if (ret) { 606 pr_err_ratelimited("VAS copy/paste failed\n"); 607 return ret; 608 } 609 610 ret = wait_for_csb(wmem, csb); 611 if (!ret) 612 *outlenp = be32_to_cpu(csb->count); 613 614 return ret; 615 } 616 617 /** 618 * nx842_powernv_compress - Compress data using the 842 algorithm 619 * 620 * Compression provided by the NX842 coprocessor on IBM PowerNV systems. 621 * The input buffer is compressed and the result is stored in the 622 * provided output buffer. 623 * 624 * Upon return from this function @outlen contains the length of the 625 * compressed data. If there is an error then @outlen will be 0 and an 626 * error will be specified by the return code from this function. 627 * 628 * @in: input buffer pointer 629 * @inlen: input buffer size 630 * @out: output buffer pointer 631 * @outlenp: output buffer size pointer 632 * @wmem: working memory buffer pointer, size determined by 633 * nx842_powernv_driver.workmem_size 634 * 635 * Returns: see @nx842_powernv_exec() 636 */ 637 static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen, 638 unsigned char *out, unsigned int *outlenp, 639 void *wmem) 640 { 641 return nx842_powernv_exec(in, inlen, out, outlenp, 642 wmem, CCW_FC_842_COMP_CRC); 643 } 644 645 /** 646 * nx842_powernv_decompress - Decompress data using the 842 algorithm 647 * 648 * Decompression provided by the NX842 coprocessor on IBM PowerNV systems. 649 * The input buffer is decompressed and the result is stored in the 650 * provided output buffer. 651 * 652 * Upon return from this function @outlen contains the length of the 653 * decompressed data. If there is an error then @outlen will be 0 and an 654 * error will be specified by the return code from this function. 655 * 656 * @in: input buffer pointer 657 * @inlen: input buffer size 658 * @out: output buffer pointer 659 * @outlenp: output buffer size pointer 660 * @wmem: working memory buffer pointer, size determined by 661 * nx842_powernv_driver.workmem_size 662 * 663 * Returns: see @nx842_powernv_exec() 664 */ 665 static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen, 666 unsigned char *out, unsigned int *outlenp, 667 void *wmem) 668 { 669 return nx842_powernv_exec(in, inlen, out, outlenp, 670 wmem, CCW_FC_842_DECOMP_CRC); 671 } 672 673 static inline void nx_add_coprocs_list(struct nx_coproc *coproc, 674 int chipid) 675 { 676 coproc->chip_id = chipid; 677 INIT_LIST_HEAD(&coproc->list); 678 list_add(&coproc->list, &nx_coprocs); 679 } 680 681 static struct vas_window *nx_alloc_txwin(struct nx_coproc *coproc) 682 { 683 struct vas_window *txwin = NULL; 684 struct vas_tx_win_attr txattr; 685 686 /* 687 * Kernel requests will be high priority. So open send 688 * windows only for high priority RxFIFO entries. 689 */ 690 vas_init_tx_win_attr(&txattr, coproc->ct); 691 txattr.lpid = 0; /* lpid is 0 for kernel requests */ 692 693 /* 694 * Open a VAS send window which is used to send request to NX. 695 */ 696 txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr); 697 if (IS_ERR(txwin)) 698 pr_err("ibm,nx-842: Can not open TX window: %ld\n", 699 PTR_ERR(txwin)); 700 701 return txwin; 702 } 703 704 /* 705 * Identify chip ID for each CPU, open send wndow for the corresponding NX 706 * engine and save txwin in percpu cpu_txwin. 707 * cpu_txwin is used in copy/paste operation for each compression / 708 * decompression request. 709 */ 710 static int nx_open_percpu_txwins(void) 711 { 712 struct nx_coproc *coproc, *n; 713 unsigned int i, chip_id; 714 715 for_each_possible_cpu(i) { 716 struct vas_window *txwin = NULL; 717 718 chip_id = cpu_to_chip_id(i); 719 720 list_for_each_entry_safe(coproc, n, &nx_coprocs, list) { 721 /* 722 * Kernel requests use only high priority FIFOs. So 723 * open send windows for these FIFOs. 724 * GZIP is not supported in kernel right now. 725 */ 726 727 if (coproc->ct != VAS_COP_TYPE_842_HIPRI) 728 continue; 729 730 if (coproc->chip_id == chip_id) { 731 txwin = nx_alloc_txwin(coproc); 732 if (IS_ERR(txwin)) 733 return PTR_ERR(txwin); 734 735 per_cpu(cpu_txwin, i) = txwin; 736 break; 737 } 738 } 739 740 if (!per_cpu(cpu_txwin, i)) { 741 /* shouldn't happen, Each chip will have NX engine */ 742 pr_err("NX engine is not available for CPU %d\n", i); 743 return -EINVAL; 744 } 745 } 746 747 return 0; 748 } 749 750 static int __init nx_set_ct(struct nx_coproc *coproc, const char *priority, 751 int high, int normal) 752 { 753 if (!strcmp(priority, "High")) 754 coproc->ct = high; 755 else if (!strcmp(priority, "Normal")) 756 coproc->ct = normal; 757 else { 758 pr_err("Invalid RxFIFO priority value\n"); 759 return -EINVAL; 760 } 761 762 return 0; 763 } 764 765 static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, 766 int vasid, int type, int *ct) 767 { 768 struct vas_window *rxwin = NULL; 769 struct vas_rx_win_attr rxattr; 770 u32 lpid, pid, tid, fifo_size; 771 struct nx_coproc *coproc; 772 u64 rx_fifo; 773 const char *priority; 774 int ret; 775 776 ret = of_property_read_u64(dn, "rx-fifo-address", &rx_fifo); 777 if (ret) { 778 pr_err("Missing rx-fifo-address property\n"); 779 return ret; 780 } 781 782 ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size); 783 if (ret) { 784 pr_err("Missing rx-fifo-size property\n"); 785 return ret; 786 } 787 788 ret = of_property_read_u32(dn, "lpid", &lpid); 789 if (ret) { 790 pr_err("Missing lpid property\n"); 791 return ret; 792 } 793 794 ret = of_property_read_u32(dn, "pid", &pid); 795 if (ret) { 796 pr_err("Missing pid property\n"); 797 return ret; 798 } 799 800 ret = of_property_read_u32(dn, "tid", &tid); 801 if (ret) { 802 pr_err("Missing tid property\n"); 803 return ret; 804 } 805 806 ret = of_property_read_string(dn, "priority", &priority); 807 if (ret) { 808 pr_err("Missing priority property\n"); 809 return ret; 810 } 811 812 coproc = kzalloc(sizeof(*coproc), GFP_KERNEL); 813 if (!coproc) 814 return -ENOMEM; 815 816 if (type == NX_CT_842) 817 ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_842_HIPRI, 818 VAS_COP_TYPE_842); 819 else if (type == NX_CT_GZIP) 820 ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_GZIP_HIPRI, 821 VAS_COP_TYPE_GZIP); 822 823 if (ret) 824 goto err_out; 825 826 vas_init_rx_win_attr(&rxattr, coproc->ct); 827 rxattr.rx_fifo = rx_fifo; 828 rxattr.rx_fifo_size = fifo_size; 829 rxattr.lnotify_lpid = lpid; 830 rxattr.lnotify_pid = pid; 831 rxattr.lnotify_tid = tid; 832 /* 833 * Maximum RX window credits can not be more than #CRBs in 834 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns. 835 */ 836 rxattr.wcreds_max = fifo_size / CRB_SIZE; 837 838 /* 839 * Open a VAS receice window which is used to configure RxFIFO 840 * for NX. 841 */ 842 rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr); 843 if (IS_ERR(rxwin)) { 844 ret = PTR_ERR(rxwin); 845 pr_err("setting RxFIFO with VAS failed: %d\n", 846 ret); 847 goto err_out; 848 } 849 850 coproc->vas.rxwin = rxwin; 851 coproc->vas.id = vasid; 852 nx_add_coprocs_list(coproc, chip_id); 853 854 /* 855 * (lpid, pid, tid) combination has to be unique for each 856 * coprocessor instance in the system. So to make it 857 * unique, skiboot uses coprocessor type such as 842 or 858 * GZIP for pid and provides this value to kernel in pid 859 * device-tree property. 860 */ 861 *ct = pid; 862 863 return 0; 864 865 err_out: 866 kfree(coproc); 867 return ret; 868 } 869 870 static int __init nx_coproc_init(int chip_id, int ct_842, int ct_gzip) 871 { 872 int ret = 0; 873 874 if (opal_check_token(OPAL_NX_COPROC_INIT)) { 875 ret = opal_nx_coproc_init(chip_id, ct_842); 876 877 if (!ret) 878 ret = opal_nx_coproc_init(chip_id, ct_gzip); 879 880 if (ret) { 881 ret = opal_error_code(ret); 882 pr_err("Failed to initialize NX for chip(%d): %d\n", 883 chip_id, ret); 884 } 885 } else 886 pr_warn("Firmware doesn't support NX initialization\n"); 887 888 return ret; 889 } 890 891 static int __init find_nx_device_tree(struct device_node *dn, int chip_id, 892 int vasid, int type, char *devname, 893 int *ct) 894 { 895 int ret = 0; 896 897 if (of_device_is_compatible(dn, devname)) { 898 ret = vas_cfg_coproc_info(dn, chip_id, vasid, type, ct); 899 if (ret) 900 of_node_put(dn); 901 } 902 903 return ret; 904 } 905 906 static int __init nx_powernv_probe_vas(struct device_node *pn) 907 { 908 int chip_id, vasid, ret = 0; 909 int ct_842 = 0, ct_gzip = 0; 910 struct device_node *dn; 911 912 chip_id = of_get_ibm_chip_id(pn); 913 if (chip_id < 0) { 914 pr_err("ibm,chip-id missing\n"); 915 return -EINVAL; 916 } 917 918 vasid = chip_to_vas_id(chip_id); 919 if (vasid < 0) { 920 pr_err("Unable to map chip_id %d to vasid\n", chip_id); 921 return -EINVAL; 922 } 923 924 for_each_child_of_node(pn, dn) { 925 ret = find_nx_device_tree(dn, chip_id, vasid, NX_CT_842, 926 "ibm,p9-nx-842", &ct_842); 927 928 if (!ret) 929 ret = find_nx_device_tree(dn, chip_id, vasid, 930 NX_CT_GZIP, "ibm,p9-nx-gzip", &ct_gzip); 931 932 if (ret) { 933 of_node_put(dn); 934 return ret; 935 } 936 } 937 938 if (!ct_842 || !ct_gzip) { 939 pr_err("NX FIFO nodes are missing\n"); 940 return -EINVAL; 941 } 942 943 /* 944 * Initialize NX instance for both high and normal priority FIFOs. 945 */ 946 ret = nx_coproc_init(chip_id, ct_842, ct_gzip); 947 948 return ret; 949 } 950 951 static int __init nx842_powernv_probe(struct device_node *dn) 952 { 953 struct nx_coproc *coproc; 954 unsigned int ct, ci; 955 int chip_id; 956 957 chip_id = of_get_ibm_chip_id(dn); 958 if (chip_id < 0) { 959 pr_err("ibm,chip-id missing\n"); 960 return -EINVAL; 961 } 962 963 if (of_property_read_u32(dn, "ibm,842-coprocessor-type", &ct)) { 964 pr_err("ibm,842-coprocessor-type missing\n"); 965 return -EINVAL; 966 } 967 968 if (of_property_read_u32(dn, "ibm,842-coprocessor-instance", &ci)) { 969 pr_err("ibm,842-coprocessor-instance missing\n"); 970 return -EINVAL; 971 } 972 973 coproc = kzalloc(sizeof(*coproc), GFP_KERNEL); 974 if (!coproc) 975 return -ENOMEM; 976 977 coproc->ct = ct; 978 coproc->ci = ci; 979 nx_add_coprocs_list(coproc, chip_id); 980 981 pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci); 982 983 if (!nx842_ct) 984 nx842_ct = ct; 985 else if (nx842_ct != ct) 986 pr_err("NX842 chip %d, CT %d != first found CT %d\n", 987 chip_id, ct, nx842_ct); 988 989 return 0; 990 } 991 992 static void nx_delete_coprocs(void) 993 { 994 struct nx_coproc *coproc, *n; 995 struct vas_window *txwin; 996 int i; 997 998 /* 999 * close percpu txwins that are opened for the corresponding coproc. 1000 */ 1001 for_each_possible_cpu(i) { 1002 txwin = per_cpu(cpu_txwin, i); 1003 if (txwin) 1004 vas_win_close(txwin); 1005 1006 per_cpu(cpu_txwin, i) = NULL; 1007 } 1008 1009 list_for_each_entry_safe(coproc, n, &nx_coprocs, list) { 1010 if (coproc->vas.rxwin) 1011 vas_win_close(coproc->vas.rxwin); 1012 1013 list_del(&coproc->list); 1014 kfree(coproc); 1015 } 1016 } 1017 1018 static struct nx842_constraints nx842_powernv_constraints = { 1019 .alignment = DDE_BUFFER_ALIGN, 1020 .multiple = DDE_BUFFER_LAST_MULT, 1021 .minimum = DDE_BUFFER_LAST_MULT, 1022 .maximum = (DDL_LEN_MAX - 1) * PAGE_SIZE, 1023 }; 1024 1025 static struct nx842_driver nx842_powernv_driver = { 1026 .name = KBUILD_MODNAME, 1027 .owner = THIS_MODULE, 1028 .workmem_size = sizeof(struct nx842_workmem), 1029 .constraints = &nx842_powernv_constraints, 1030 .compress = nx842_powernv_compress, 1031 .decompress = nx842_powernv_decompress, 1032 }; 1033 1034 static int nx842_powernv_crypto_init(struct crypto_tfm *tfm) 1035 { 1036 return nx842_crypto_init(tfm, &nx842_powernv_driver); 1037 } 1038 1039 static struct crypto_alg nx842_powernv_alg = { 1040 .cra_name = "842", 1041 .cra_driver_name = "842-nx", 1042 .cra_priority = 300, 1043 .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, 1044 .cra_ctxsize = sizeof(struct nx842_crypto_ctx), 1045 .cra_module = THIS_MODULE, 1046 .cra_init = nx842_powernv_crypto_init, 1047 .cra_exit = nx842_crypto_exit, 1048 .cra_u = { .compress = { 1049 .coa_compress = nx842_crypto_compress, 1050 .coa_decompress = nx842_crypto_decompress } } 1051 }; 1052 1053 static __init int nx_compress_powernv_init(void) 1054 { 1055 struct device_node *dn; 1056 int ret; 1057 1058 /* verify workmem size/align restrictions */ 1059 BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN); 1060 BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN); 1061 BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN); 1062 /* verify buffer size/align restrictions */ 1063 BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN); 1064 BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT); 1065 BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT); 1066 1067 for_each_compatible_node(dn, NULL, "ibm,power9-nx") { 1068 ret = nx_powernv_probe_vas(dn); 1069 if (ret) { 1070 nx_delete_coprocs(); 1071 of_node_put(dn); 1072 return ret; 1073 } 1074 } 1075 1076 if (list_empty(&nx_coprocs)) { 1077 for_each_compatible_node(dn, NULL, "ibm,power-nx") 1078 nx842_powernv_probe(dn); 1079 1080 if (!nx842_ct) 1081 return -ENODEV; 1082 1083 nx842_powernv_exec = nx842_exec_icswx; 1084 } else { 1085 /* 1086 * Register VAS user space API for NX GZIP so 1087 * that user space can use GZIP engine. 1088 * Using high FIFO priority for kernel requests and 1089 * normal FIFO priority is assigned for userspace. 1090 * 842 compression is supported only in kernel. 1091 */ 1092 ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP, 1093 "nx-gzip"); 1094 1095 /* 1096 * GZIP is not supported in kernel right now. 1097 * So open tx windows only for 842. 1098 */ 1099 if (!ret) 1100 ret = nx_open_percpu_txwins(); 1101 1102 if (ret) { 1103 nx_delete_coprocs(); 1104 return ret; 1105 } 1106 1107 nx842_powernv_exec = nx842_exec_vas; 1108 } 1109 1110 ret = crypto_register_alg(&nx842_powernv_alg); 1111 if (ret) { 1112 nx_delete_coprocs(); 1113 return ret; 1114 } 1115 1116 return 0; 1117 } 1118 module_init(nx_compress_powernv_init); 1119 1120 static void __exit nx_compress_powernv_exit(void) 1121 { 1122 /* 1123 * GZIP engine is supported only in power9 or later and nx842_ct 1124 * is used on power8 (icswx). 1125 * VAS API for NX GZIP is registered during init for user space 1126 * use. So delete this API use for GZIP engine. 1127 */ 1128 if (!nx842_ct) 1129 vas_unregister_api_powernv(); 1130 1131 crypto_unregister_alg(&nx842_powernv_alg); 1132 1133 nx_delete_coprocs(); 1134 } 1135 module_exit(nx_compress_powernv_exit); 1136