1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Driver for IBM PowerNV compression accelerator 4 * 5 * Copyright (C) 2015 Dan Streetman, IBM Corp 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include "nx-842.h" 11 12 #include <linux/timer.h> 13 14 #include <asm/prom.h> 15 #include <asm/icswx.h> 16 #include <asm/vas.h> 17 #include <asm/reg.h> 18 #include <asm/opal-api.h> 19 #include <asm/opal.h> 20 21 MODULE_LICENSE("GPL"); 22 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); 23 MODULE_DESCRIPTION("H/W Compression driver for IBM PowerNV processors"); 24 MODULE_ALIAS_CRYPTO("842"); 25 MODULE_ALIAS_CRYPTO("842-nx"); 26 27 #define WORKMEM_ALIGN (CRB_ALIGN) 28 #define CSB_WAIT_MAX (5000) /* ms */ 29 #define VAS_RETRIES (10) 30 31 struct nx842_workmem { 32 /* Below fields must be properly aligned */ 33 struct coprocessor_request_block crb; /* CRB_ALIGN align */ 34 struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */ 35 struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */ 36 /* Above fields must be properly aligned */ 37 38 ktime_t start; 39 40 char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */ 41 } __packed __aligned(WORKMEM_ALIGN); 42 43 struct nx842_coproc { 44 unsigned int chip_id; 45 unsigned int ct; 46 unsigned int ci; /* Coprocessor instance, used with icswx */ 47 struct { 48 struct vas_window *rxwin; 49 int id; 50 } vas; 51 struct list_head list; 52 }; 53 54 /* 55 * Send the request to NX engine on the chip for the corresponding CPU 56 * where the process is executing. Use with VAS function. 57 */ 58 static DEFINE_PER_CPU(struct vas_window *, cpu_txwin); 59 60 /* no cpu hotplug on powernv, so this list never changes after init */ 61 static LIST_HEAD(nx842_coprocs); 62 static unsigned int nx842_ct; /* used in icswx function */ 63 64 static int (*nx842_powernv_exec)(const unsigned char *in, 65 unsigned int inlen, unsigned char *out, 66 unsigned int *outlenp, void *workmem, int fc); 67 68 /** 69 * setup_indirect_dde - Setup an indirect DDE 70 * 71 * The DDE is setup with the the DDE count, byte count, and address of 72 * first direct DDE in the list. 73 */ 74 static void setup_indirect_dde(struct data_descriptor_entry *dde, 75 struct data_descriptor_entry *ddl, 76 unsigned int dde_count, unsigned int byte_count) 77 { 78 dde->flags = 0; 79 dde->count = dde_count; 80 dde->index = 0; 81 dde->length = cpu_to_be32(byte_count); 82 dde->address = cpu_to_be64(nx842_get_pa(ddl)); 83 } 84 85 /** 86 * setup_direct_dde - Setup single DDE from buffer 87 * 88 * The DDE is setup with the buffer and length. The buffer must be properly 89 * aligned. The used length is returned. 90 * Returns: 91 * N Successfully set up DDE with N bytes 92 */ 93 static unsigned int setup_direct_dde(struct data_descriptor_entry *dde, 94 unsigned long pa, unsigned int len) 95 { 96 unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa)); 97 98 dde->flags = 0; 99 dde->count = 0; 100 dde->index = 0; 101 dde->length = cpu_to_be32(l); 102 dde->address = cpu_to_be64(pa); 103 104 return l; 105 } 106 107 /** 108 * setup_ddl - Setup DDL from buffer 109 * 110 * Returns: 111 * 0 Successfully set up DDL 112 */ 113 static int setup_ddl(struct data_descriptor_entry *dde, 114 struct data_descriptor_entry *ddl, 115 unsigned char *buf, unsigned int len, 116 bool in) 117 { 118 unsigned long pa = nx842_get_pa(buf); 119 int i, ret, total_len = len; 120 121 if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) { 122 pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n", 123 in ? "input" : "output", pa, DDE_BUFFER_ALIGN); 124 return -EINVAL; 125 } 126 127 /* only need to check last mult; since buffer must be 128 * DDE_BUFFER_ALIGN aligned, and that is a multiple of 129 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers 130 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT. 131 */ 132 if (len % DDE_BUFFER_LAST_MULT) { 133 pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n", 134 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT); 135 if (in) 136 return -EINVAL; 137 len = round_down(len, DDE_BUFFER_LAST_MULT); 138 } 139 140 /* use a single direct DDE */ 141 if (len <= LEN_ON_PAGE(pa)) { 142 ret = setup_direct_dde(dde, pa, len); 143 WARN_ON(ret < len); 144 return 0; 145 } 146 147 /* use the DDL */ 148 for (i = 0; i < DDL_LEN_MAX && len > 0; i++) { 149 ret = setup_direct_dde(&ddl[i], pa, len); 150 buf += ret; 151 len -= ret; 152 pa = nx842_get_pa(buf); 153 } 154 155 if (len > 0) { 156 pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n", 157 total_len, in ? "input" : "output", len); 158 if (in) 159 return -EMSGSIZE; 160 total_len -= len; 161 } 162 setup_indirect_dde(dde, ddl, i, total_len); 163 164 return 0; 165 } 166 167 #define CSB_ERR(csb, msg, ...) \ 168 pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n", \ 169 ##__VA_ARGS__, (csb)->flags, \ 170 (csb)->cs, (csb)->cc, (csb)->ce, \ 171 be32_to_cpu((csb)->count)) 172 173 #define CSB_ERR_ADDR(csb, msg, ...) \ 174 CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__, \ 175 (unsigned long)be64_to_cpu((csb)->address)) 176 177 /** 178 * wait_for_csb 179 */ 180 static int wait_for_csb(struct nx842_workmem *wmem, 181 struct coprocessor_status_block *csb) 182 { 183 ktime_t start = wmem->start, now = ktime_get(); 184 ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX); 185 186 while (!(READ_ONCE(csb->flags) & CSB_V)) { 187 cpu_relax(); 188 now = ktime_get(); 189 if (ktime_after(now, timeout)) 190 break; 191 } 192 193 /* hw has updated csb and output buffer */ 194 barrier(); 195 196 /* check CSB flags */ 197 if (!(csb->flags & CSB_V)) { 198 CSB_ERR(csb, "CSB still not valid after %ld us, giving up", 199 (long)ktime_us_delta(now, start)); 200 return -ETIMEDOUT; 201 } 202 if (csb->flags & CSB_F) { 203 CSB_ERR(csb, "Invalid CSB format"); 204 return -EPROTO; 205 } 206 if (csb->flags & CSB_CH) { 207 CSB_ERR(csb, "Invalid CSB chaining state"); 208 return -EPROTO; 209 } 210 211 /* verify CSB completion sequence is 0 */ 212 if (csb->cs) { 213 CSB_ERR(csb, "Invalid CSB completion sequence"); 214 return -EPROTO; 215 } 216 217 /* check CSB Completion Code */ 218 switch (csb->cc) { 219 /* no error */ 220 case CSB_CC_SUCCESS: 221 break; 222 case CSB_CC_TPBC_GT_SPBC: 223 /* not an error, but the compressed data is 224 * larger than the uncompressed data :( 225 */ 226 break; 227 228 /* input data errors */ 229 case CSB_CC_OPERAND_OVERLAP: 230 /* input and output buffers overlap */ 231 CSB_ERR(csb, "Operand Overlap error"); 232 return -EINVAL; 233 case CSB_CC_INVALID_OPERAND: 234 CSB_ERR(csb, "Invalid operand"); 235 return -EINVAL; 236 case CSB_CC_NOSPC: 237 /* output buffer too small */ 238 return -ENOSPC; 239 case CSB_CC_ABORT: 240 CSB_ERR(csb, "Function aborted"); 241 return -EINTR; 242 case CSB_CC_CRC_MISMATCH: 243 CSB_ERR(csb, "CRC mismatch"); 244 return -EINVAL; 245 case CSB_CC_TEMPL_INVALID: 246 CSB_ERR(csb, "Compressed data template invalid"); 247 return -EINVAL; 248 case CSB_CC_TEMPL_OVERFLOW: 249 CSB_ERR(csb, "Compressed data template shows data past end"); 250 return -EINVAL; 251 case CSB_CC_EXCEED_BYTE_COUNT: /* P9 or later */ 252 /* 253 * DDE byte count exceeds the limit specified in Maximum 254 * byte count register. 255 */ 256 CSB_ERR(csb, "DDE byte count exceeds the limit"); 257 return -EINVAL; 258 259 /* these should not happen */ 260 case CSB_CC_INVALID_ALIGN: 261 /* setup_ddl should have detected this */ 262 CSB_ERR_ADDR(csb, "Invalid alignment"); 263 return -EINVAL; 264 case CSB_CC_DATA_LENGTH: 265 /* setup_ddl should have detected this */ 266 CSB_ERR(csb, "Invalid data length"); 267 return -EINVAL; 268 case CSB_CC_WR_TRANSLATION: 269 case CSB_CC_TRANSLATION: 270 case CSB_CC_TRANSLATION_DUP1: 271 case CSB_CC_TRANSLATION_DUP2: 272 case CSB_CC_TRANSLATION_DUP3: 273 case CSB_CC_TRANSLATION_DUP4: 274 case CSB_CC_TRANSLATION_DUP5: 275 case CSB_CC_TRANSLATION_DUP6: 276 /* should not happen, we use physical addrs */ 277 CSB_ERR_ADDR(csb, "Translation error"); 278 return -EPROTO; 279 case CSB_CC_WR_PROTECTION: 280 case CSB_CC_PROTECTION: 281 case CSB_CC_PROTECTION_DUP1: 282 case CSB_CC_PROTECTION_DUP2: 283 case CSB_CC_PROTECTION_DUP3: 284 case CSB_CC_PROTECTION_DUP4: 285 case CSB_CC_PROTECTION_DUP5: 286 case CSB_CC_PROTECTION_DUP6: 287 /* should not happen, we use physical addrs */ 288 CSB_ERR_ADDR(csb, "Protection error"); 289 return -EPROTO; 290 case CSB_CC_PRIVILEGE: 291 /* shouldn't happen, we're in HYP mode */ 292 CSB_ERR(csb, "Insufficient Privilege error"); 293 return -EPROTO; 294 case CSB_CC_EXCESSIVE_DDE: 295 /* shouldn't happen, setup_ddl doesn't use many dde's */ 296 CSB_ERR(csb, "Too many DDEs in DDL"); 297 return -EINVAL; 298 case CSB_CC_TRANSPORT: 299 case CSB_CC_INVALID_CRB: /* P9 or later */ 300 /* shouldn't happen, we setup CRB correctly */ 301 CSB_ERR(csb, "Invalid CRB"); 302 return -EINVAL; 303 case CSB_CC_INVALID_DDE: /* P9 or later */ 304 /* 305 * shouldn't happen, setup_direct/indirect_dde creates 306 * DDE right 307 */ 308 CSB_ERR(csb, "Invalid DDE"); 309 return -EINVAL; 310 case CSB_CC_SEGMENTED_DDL: 311 /* shouldn't happen, setup_ddl creates DDL right */ 312 CSB_ERR(csb, "Segmented DDL error"); 313 return -EINVAL; 314 case CSB_CC_DDE_OVERFLOW: 315 /* shouldn't happen, setup_ddl creates DDL right */ 316 CSB_ERR(csb, "DDE overflow error"); 317 return -EINVAL; 318 case CSB_CC_SESSION: 319 /* should not happen with ICSWX */ 320 CSB_ERR(csb, "Session violation error"); 321 return -EPROTO; 322 case CSB_CC_CHAIN: 323 /* should not happen, we don't use chained CRBs */ 324 CSB_ERR(csb, "Chained CRB error"); 325 return -EPROTO; 326 case CSB_CC_SEQUENCE: 327 /* should not happen, we don't use chained CRBs */ 328 CSB_ERR(csb, "CRB sequence number error"); 329 return -EPROTO; 330 case CSB_CC_UNKNOWN_CODE: 331 CSB_ERR(csb, "Unknown subfunction code"); 332 return -EPROTO; 333 334 /* hardware errors */ 335 case CSB_CC_RD_EXTERNAL: 336 case CSB_CC_RD_EXTERNAL_DUP1: 337 case CSB_CC_RD_EXTERNAL_DUP2: 338 case CSB_CC_RD_EXTERNAL_DUP3: 339 CSB_ERR_ADDR(csb, "Read error outside coprocessor"); 340 return -EPROTO; 341 case CSB_CC_WR_EXTERNAL: 342 CSB_ERR_ADDR(csb, "Write error outside coprocessor"); 343 return -EPROTO; 344 case CSB_CC_INTERNAL: 345 CSB_ERR(csb, "Internal error in coprocessor"); 346 return -EPROTO; 347 case CSB_CC_PROVISION: 348 CSB_ERR(csb, "Storage provision error"); 349 return -EPROTO; 350 case CSB_CC_HW: 351 CSB_ERR(csb, "Correctable hardware error"); 352 return -EPROTO; 353 case CSB_CC_HW_EXPIRED_TIMER: /* P9 or later */ 354 CSB_ERR(csb, "Job did not finish within allowed time"); 355 return -EPROTO; 356 357 default: 358 CSB_ERR(csb, "Invalid CC %d", csb->cc); 359 return -EPROTO; 360 } 361 362 /* check Completion Extension state */ 363 if (csb->ce & CSB_CE_TERMINATION) { 364 CSB_ERR(csb, "CSB request was terminated"); 365 return -EPROTO; 366 } 367 if (csb->ce & CSB_CE_INCOMPLETE) { 368 CSB_ERR(csb, "CSB request not complete"); 369 return -EPROTO; 370 } 371 if (!(csb->ce & CSB_CE_TPBC)) { 372 CSB_ERR(csb, "TPBC not provided, unknown target length"); 373 return -EPROTO; 374 } 375 376 /* successful completion */ 377 pr_debug_ratelimited("Processed %u bytes in %lu us\n", 378 be32_to_cpu(csb->count), 379 (unsigned long)ktime_us_delta(now, start)); 380 381 return 0; 382 } 383 384 static int nx842_config_crb(const unsigned char *in, unsigned int inlen, 385 unsigned char *out, unsigned int outlen, 386 struct nx842_workmem *wmem) 387 { 388 struct coprocessor_request_block *crb; 389 struct coprocessor_status_block *csb; 390 u64 csb_addr; 391 int ret; 392 393 crb = &wmem->crb; 394 csb = &crb->csb; 395 396 /* Clear any previous values */ 397 memset(crb, 0, sizeof(*crb)); 398 399 /* set up DDLs */ 400 ret = setup_ddl(&crb->source, wmem->ddl_in, 401 (unsigned char *)in, inlen, true); 402 if (ret) 403 return ret; 404 405 ret = setup_ddl(&crb->target, wmem->ddl_out, 406 out, outlen, false); 407 if (ret) 408 return ret; 409 410 /* set up CRB's CSB addr */ 411 csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS; 412 csb_addr |= CRB_CSB_AT; /* Addrs are phys */ 413 crb->csb_addr = cpu_to_be64(csb_addr); 414 415 return 0; 416 } 417 418 /** 419 * nx842_exec_icswx - compress/decompress data using the 842 algorithm 420 * 421 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems. 422 * This compresses or decompresses the provided input buffer into the provided 423 * output buffer. 424 * 425 * Upon return from this function @outlen contains the length of the 426 * output data. If there is an error then @outlen will be 0 and an 427 * error will be specified by the return code from this function. 428 * 429 * The @workmem buffer should only be used by one function call at a time. 430 * 431 * @in: input buffer pointer 432 * @inlen: input buffer size 433 * @out: output buffer pointer 434 * @outlenp: output buffer size pointer 435 * @workmem: working memory buffer pointer, size determined by 436 * nx842_powernv_driver.workmem_size 437 * @fc: function code, see CCW Function Codes in nx-842.h 438 * 439 * Returns: 440 * 0 Success, output of length @outlenp stored in the buffer at @out 441 * -ENODEV Hardware unavailable 442 * -ENOSPC Output buffer is to small 443 * -EMSGSIZE Input buffer too large 444 * -EINVAL buffer constraints do not fix nx842_constraints 445 * -EPROTO hardware error during operation 446 * -ETIMEDOUT hardware did not complete operation in reasonable time 447 * -EINTR operation was aborted 448 */ 449 static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen, 450 unsigned char *out, unsigned int *outlenp, 451 void *workmem, int fc) 452 { 453 struct coprocessor_request_block *crb; 454 struct coprocessor_status_block *csb; 455 struct nx842_workmem *wmem; 456 int ret; 457 u32 ccw; 458 unsigned int outlen = *outlenp; 459 460 wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN); 461 462 *outlenp = 0; 463 464 /* shoudn't happen, we don't load without a coproc */ 465 if (!nx842_ct) { 466 pr_err_ratelimited("coprocessor CT is 0"); 467 return -ENODEV; 468 } 469 470 ret = nx842_config_crb(in, inlen, out, outlen, wmem); 471 if (ret) 472 return ret; 473 474 crb = &wmem->crb; 475 csb = &crb->csb; 476 477 /* set up CCW */ 478 ccw = 0; 479 ccw = SET_FIELD(CCW_CT, ccw, nx842_ct); 480 ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */ 481 ccw = SET_FIELD(CCW_FC_842, ccw, fc); 482 483 wmem->start = ktime_get(); 484 485 /* do ICSWX */ 486 ret = icswx(cpu_to_be32(ccw), crb); 487 488 pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret, 489 (unsigned int)ccw, 490 (unsigned int)be32_to_cpu(crb->ccw)); 491 492 /* 493 * NX842 coprocessor sets 3rd bit in CR register with XER[S0]. 494 * XER[S0] is the integer summary overflow bit which is nothing 495 * to do NX. Since this bit can be set with other return values, 496 * mask this bit. 497 */ 498 ret &= ~ICSWX_XERS0; 499 500 switch (ret) { 501 case ICSWX_INITIATED: 502 ret = wait_for_csb(wmem, csb); 503 break; 504 case ICSWX_BUSY: 505 pr_debug_ratelimited("842 Coprocessor busy\n"); 506 ret = -EBUSY; 507 break; 508 case ICSWX_REJECTED: 509 pr_err_ratelimited("ICSWX rejected\n"); 510 ret = -EPROTO; 511 break; 512 } 513 514 if (!ret) 515 *outlenp = be32_to_cpu(csb->count); 516 517 return ret; 518 } 519 520 /** 521 * nx842_exec_vas - compress/decompress data using the 842 algorithm 522 * 523 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems. 524 * This compresses or decompresses the provided input buffer into the provided 525 * output buffer. 526 * 527 * Upon return from this function @outlen contains the length of the 528 * output data. If there is an error then @outlen will be 0 and an 529 * error will be specified by the return code from this function. 530 * 531 * The @workmem buffer should only be used by one function call at a time. 532 * 533 * @in: input buffer pointer 534 * @inlen: input buffer size 535 * @out: output buffer pointer 536 * @outlenp: output buffer size pointer 537 * @workmem: working memory buffer pointer, size determined by 538 * nx842_powernv_driver.workmem_size 539 * @fc: function code, see CCW Function Codes in nx-842.h 540 * 541 * Returns: 542 * 0 Success, output of length @outlenp stored in the buffer 543 * at @out 544 * -ENODEV Hardware unavailable 545 * -ENOSPC Output buffer is to small 546 * -EMSGSIZE Input buffer too large 547 * -EINVAL buffer constraints do not fix nx842_constraints 548 * -EPROTO hardware error during operation 549 * -ETIMEDOUT hardware did not complete operation in reasonable time 550 * -EINTR operation was aborted 551 */ 552 static int nx842_exec_vas(const unsigned char *in, unsigned int inlen, 553 unsigned char *out, unsigned int *outlenp, 554 void *workmem, int fc) 555 { 556 struct coprocessor_request_block *crb; 557 struct coprocessor_status_block *csb; 558 struct nx842_workmem *wmem; 559 struct vas_window *txwin; 560 int ret, i = 0; 561 u32 ccw; 562 unsigned int outlen = *outlenp; 563 564 wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN); 565 566 *outlenp = 0; 567 568 crb = &wmem->crb; 569 csb = &crb->csb; 570 571 ret = nx842_config_crb(in, inlen, out, outlen, wmem); 572 if (ret) 573 return ret; 574 575 ccw = 0; 576 ccw = SET_FIELD(CCW_FC_842, ccw, fc); 577 crb->ccw = cpu_to_be32(ccw); 578 579 do { 580 wmem->start = ktime_get(); 581 preempt_disable(); 582 txwin = this_cpu_read(cpu_txwin); 583 584 /* 585 * VAS copy CRB into L2 cache. Refer <asm/vas.h>. 586 * @crb and @offset. 587 */ 588 vas_copy_crb(crb, 0); 589 590 /* 591 * VAS paste previously copied CRB to NX. 592 * @txwin, @offset and @last (must be true). 593 */ 594 ret = vas_paste_crb(txwin, 0, 1); 595 preempt_enable(); 596 /* 597 * Retry copy/paste function for VAS failures. 598 */ 599 } while (ret && (i++ < VAS_RETRIES)); 600 601 if (ret) { 602 pr_err_ratelimited("VAS copy/paste failed\n"); 603 return ret; 604 } 605 606 ret = wait_for_csb(wmem, csb); 607 if (!ret) 608 *outlenp = be32_to_cpu(csb->count); 609 610 return ret; 611 } 612 613 /** 614 * nx842_powernv_compress - Compress data using the 842 algorithm 615 * 616 * Compression provided by the NX842 coprocessor on IBM PowerNV systems. 617 * The input buffer is compressed and the result is stored in the 618 * provided output buffer. 619 * 620 * Upon return from this function @outlen contains the length of the 621 * compressed data. If there is an error then @outlen will be 0 and an 622 * error will be specified by the return code from this function. 623 * 624 * @in: input buffer pointer 625 * @inlen: input buffer size 626 * @out: output buffer pointer 627 * @outlenp: output buffer size pointer 628 * @workmem: working memory buffer pointer, size determined by 629 * nx842_powernv_driver.workmem_size 630 * 631 * Returns: see @nx842_powernv_exec() 632 */ 633 static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen, 634 unsigned char *out, unsigned int *outlenp, 635 void *wmem) 636 { 637 return nx842_powernv_exec(in, inlen, out, outlenp, 638 wmem, CCW_FC_842_COMP_CRC); 639 } 640 641 /** 642 * nx842_powernv_decompress - Decompress data using the 842 algorithm 643 * 644 * Decompression provided by the NX842 coprocessor on IBM PowerNV systems. 645 * The input buffer is decompressed and the result is stored in the 646 * provided output buffer. 647 * 648 * Upon return from this function @outlen contains the length of the 649 * decompressed data. If there is an error then @outlen will be 0 and an 650 * error will be specified by the return code from this function. 651 * 652 * @in: input buffer pointer 653 * @inlen: input buffer size 654 * @out: output buffer pointer 655 * @outlenp: output buffer size pointer 656 * @workmem: working memory buffer pointer, size determined by 657 * nx842_powernv_driver.workmem_size 658 * 659 * Returns: see @nx842_powernv_exec() 660 */ 661 static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen, 662 unsigned char *out, unsigned int *outlenp, 663 void *wmem) 664 { 665 return nx842_powernv_exec(in, inlen, out, outlenp, 666 wmem, CCW_FC_842_DECOMP_CRC); 667 } 668 669 static inline void nx842_add_coprocs_list(struct nx842_coproc *coproc, 670 int chipid) 671 { 672 coproc->chip_id = chipid; 673 INIT_LIST_HEAD(&coproc->list); 674 list_add(&coproc->list, &nx842_coprocs); 675 } 676 677 static struct vas_window *nx842_alloc_txwin(struct nx842_coproc *coproc) 678 { 679 struct vas_window *txwin = NULL; 680 struct vas_tx_win_attr txattr; 681 682 /* 683 * Kernel requests will be high priority. So open send 684 * windows only for high priority RxFIFO entries. 685 */ 686 vas_init_tx_win_attr(&txattr, coproc->ct); 687 txattr.lpid = 0; /* lpid is 0 for kernel requests */ 688 txattr.pid = 0; /* pid is 0 for kernel requests */ 689 690 /* 691 * Open a VAS send window which is used to send request to NX. 692 */ 693 txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr); 694 if (IS_ERR(txwin)) 695 pr_err("ibm,nx-842: Can not open TX window: %ld\n", 696 PTR_ERR(txwin)); 697 698 return txwin; 699 } 700 701 /* 702 * Identify chip ID for each CPU, open send wndow for the corresponding NX 703 * engine and save txwin in percpu cpu_txwin. 704 * cpu_txwin is used in copy/paste operation for each compression / 705 * decompression request. 706 */ 707 static int nx842_open_percpu_txwins(void) 708 { 709 struct nx842_coproc *coproc, *n; 710 unsigned int i, chip_id; 711 712 for_each_possible_cpu(i) { 713 struct vas_window *txwin = NULL; 714 715 chip_id = cpu_to_chip_id(i); 716 717 list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) { 718 /* 719 * Kernel requests use only high priority FIFOs. So 720 * open send windows for these FIFOs. 721 */ 722 723 if (coproc->ct != VAS_COP_TYPE_842_HIPRI) 724 continue; 725 726 if (coproc->chip_id == chip_id) { 727 txwin = nx842_alloc_txwin(coproc); 728 if (IS_ERR(txwin)) 729 return PTR_ERR(txwin); 730 731 per_cpu(cpu_txwin, i) = txwin; 732 break; 733 } 734 } 735 736 if (!per_cpu(cpu_txwin, i)) { 737 /* shouldn't happen, Each chip will have NX engine */ 738 pr_err("NX engine is not available for CPU %d\n", i); 739 return -EINVAL; 740 } 741 } 742 743 return 0; 744 } 745 746 static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, 747 int vasid, int *ct) 748 { 749 struct vas_window *rxwin = NULL; 750 struct vas_rx_win_attr rxattr; 751 struct nx842_coproc *coproc; 752 u32 lpid, pid, tid, fifo_size; 753 u64 rx_fifo; 754 const char *priority; 755 int ret; 756 757 ret = of_property_read_u64(dn, "rx-fifo-address", &rx_fifo); 758 if (ret) { 759 pr_err("Missing rx-fifo-address property\n"); 760 return ret; 761 } 762 763 ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size); 764 if (ret) { 765 pr_err("Missing rx-fifo-size property\n"); 766 return ret; 767 } 768 769 ret = of_property_read_u32(dn, "lpid", &lpid); 770 if (ret) { 771 pr_err("Missing lpid property\n"); 772 return ret; 773 } 774 775 ret = of_property_read_u32(dn, "pid", &pid); 776 if (ret) { 777 pr_err("Missing pid property\n"); 778 return ret; 779 } 780 781 ret = of_property_read_u32(dn, "tid", &tid); 782 if (ret) { 783 pr_err("Missing tid property\n"); 784 return ret; 785 } 786 787 ret = of_property_read_string(dn, "priority", &priority); 788 if (ret) { 789 pr_err("Missing priority property\n"); 790 return ret; 791 } 792 793 coproc = kzalloc(sizeof(*coproc), GFP_KERNEL); 794 if (!coproc) 795 return -ENOMEM; 796 797 if (!strcmp(priority, "High")) 798 coproc->ct = VAS_COP_TYPE_842_HIPRI; 799 else if (!strcmp(priority, "Normal")) 800 coproc->ct = VAS_COP_TYPE_842; 801 else { 802 pr_err("Invalid RxFIFO priority value\n"); 803 ret = -EINVAL; 804 goto err_out; 805 } 806 807 vas_init_rx_win_attr(&rxattr, coproc->ct); 808 rxattr.rx_fifo = (void *)rx_fifo; 809 rxattr.rx_fifo_size = fifo_size; 810 rxattr.lnotify_lpid = lpid; 811 rxattr.lnotify_pid = pid; 812 rxattr.lnotify_tid = tid; 813 /* 814 * Maximum RX window credits can not be more than #CRBs in 815 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns. 816 */ 817 rxattr.wcreds_max = fifo_size / CRB_SIZE; 818 819 /* 820 * Open a VAS receice window which is used to configure RxFIFO 821 * for NX. 822 */ 823 rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr); 824 if (IS_ERR(rxwin)) { 825 ret = PTR_ERR(rxwin); 826 pr_err("setting RxFIFO with VAS failed: %d\n", 827 ret); 828 goto err_out; 829 } 830 831 coproc->vas.rxwin = rxwin; 832 coproc->vas.id = vasid; 833 nx842_add_coprocs_list(coproc, chip_id); 834 835 /* 836 * (lpid, pid, tid) combination has to be unique for each 837 * coprocessor instance in the system. So to make it 838 * unique, skiboot uses coprocessor type such as 842 or 839 * GZIP for pid and provides this value to kernel in pid 840 * device-tree property. 841 */ 842 *ct = pid; 843 844 return 0; 845 846 err_out: 847 kfree(coproc); 848 return ret; 849 } 850 851 852 static int __init nx842_powernv_probe_vas(struct device_node *pn) 853 { 854 struct device_node *dn; 855 int chip_id, vasid, ret = 0; 856 int nx_fifo_found = 0; 857 int uninitialized_var(ct); 858 859 chip_id = of_get_ibm_chip_id(pn); 860 if (chip_id < 0) { 861 pr_err("ibm,chip-id missing\n"); 862 return -EINVAL; 863 } 864 865 vasid = chip_to_vas_id(chip_id); 866 if (vasid < 0) { 867 pr_err("Unable to map chip_id %d to vasid\n", chip_id); 868 return -EINVAL; 869 } 870 871 for_each_child_of_node(pn, dn) { 872 if (of_device_is_compatible(dn, "ibm,p9-nx-842")) { 873 ret = vas_cfg_coproc_info(dn, chip_id, vasid, &ct); 874 if (ret) { 875 of_node_put(dn); 876 return ret; 877 } 878 nx_fifo_found++; 879 } 880 } 881 882 if (!nx_fifo_found) { 883 pr_err("NX842 FIFO nodes are missing\n"); 884 return -EINVAL; 885 } 886 887 /* 888 * Initialize NX instance for both high and normal priority FIFOs. 889 */ 890 if (opal_check_token(OPAL_NX_COPROC_INIT)) { 891 ret = opal_nx_coproc_init(chip_id, ct); 892 if (ret) { 893 pr_err("Failed to initialize NX for chip(%d): %d\n", 894 chip_id, ret); 895 ret = opal_error_code(ret); 896 } 897 } else 898 pr_warn("Firmware doesn't support NX initialization\n"); 899 900 return ret; 901 } 902 903 static int __init nx842_powernv_probe(struct device_node *dn) 904 { 905 struct nx842_coproc *coproc; 906 unsigned int ct, ci; 907 int chip_id; 908 909 chip_id = of_get_ibm_chip_id(dn); 910 if (chip_id < 0) { 911 pr_err("ibm,chip-id missing\n"); 912 return -EINVAL; 913 } 914 915 if (of_property_read_u32(dn, "ibm,842-coprocessor-type", &ct)) { 916 pr_err("ibm,842-coprocessor-type missing\n"); 917 return -EINVAL; 918 } 919 920 if (of_property_read_u32(dn, "ibm,842-coprocessor-instance", &ci)) { 921 pr_err("ibm,842-coprocessor-instance missing\n"); 922 return -EINVAL; 923 } 924 925 coproc = kzalloc(sizeof(*coproc), GFP_KERNEL); 926 if (!coproc) 927 return -ENOMEM; 928 929 coproc->ct = ct; 930 coproc->ci = ci; 931 nx842_add_coprocs_list(coproc, chip_id); 932 933 pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci); 934 935 if (!nx842_ct) 936 nx842_ct = ct; 937 else if (nx842_ct != ct) 938 pr_err("NX842 chip %d, CT %d != first found CT %d\n", 939 chip_id, ct, nx842_ct); 940 941 return 0; 942 } 943 944 static void nx842_delete_coprocs(void) 945 { 946 struct nx842_coproc *coproc, *n; 947 struct vas_window *txwin; 948 int i; 949 950 /* 951 * close percpu txwins that are opened for the corresponding coproc. 952 */ 953 for_each_possible_cpu(i) { 954 txwin = per_cpu(cpu_txwin, i); 955 if (txwin) 956 vas_win_close(txwin); 957 958 per_cpu(cpu_txwin, i) = 0; 959 } 960 961 list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) { 962 if (coproc->vas.rxwin) 963 vas_win_close(coproc->vas.rxwin); 964 965 list_del(&coproc->list); 966 kfree(coproc); 967 } 968 } 969 970 static struct nx842_constraints nx842_powernv_constraints = { 971 .alignment = DDE_BUFFER_ALIGN, 972 .multiple = DDE_BUFFER_LAST_MULT, 973 .minimum = DDE_BUFFER_LAST_MULT, 974 .maximum = (DDL_LEN_MAX - 1) * PAGE_SIZE, 975 }; 976 977 static struct nx842_driver nx842_powernv_driver = { 978 .name = KBUILD_MODNAME, 979 .owner = THIS_MODULE, 980 .workmem_size = sizeof(struct nx842_workmem), 981 .constraints = &nx842_powernv_constraints, 982 .compress = nx842_powernv_compress, 983 .decompress = nx842_powernv_decompress, 984 }; 985 986 static int nx842_powernv_crypto_init(struct crypto_tfm *tfm) 987 { 988 return nx842_crypto_init(tfm, &nx842_powernv_driver); 989 } 990 991 static struct crypto_alg nx842_powernv_alg = { 992 .cra_name = "842", 993 .cra_driver_name = "842-nx", 994 .cra_priority = 300, 995 .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, 996 .cra_ctxsize = sizeof(struct nx842_crypto_ctx), 997 .cra_module = THIS_MODULE, 998 .cra_init = nx842_powernv_crypto_init, 999 .cra_exit = nx842_crypto_exit, 1000 .cra_u = { .compress = { 1001 .coa_compress = nx842_crypto_compress, 1002 .coa_decompress = nx842_crypto_decompress } } 1003 }; 1004 1005 static __init int nx842_powernv_init(void) 1006 { 1007 struct device_node *dn; 1008 int ret; 1009 1010 /* verify workmem size/align restrictions */ 1011 BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN); 1012 BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN); 1013 BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN); 1014 /* verify buffer size/align restrictions */ 1015 BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN); 1016 BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT); 1017 BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT); 1018 1019 for_each_compatible_node(dn, NULL, "ibm,power9-nx") { 1020 ret = nx842_powernv_probe_vas(dn); 1021 if (ret) { 1022 nx842_delete_coprocs(); 1023 of_node_put(dn); 1024 return ret; 1025 } 1026 } 1027 1028 if (list_empty(&nx842_coprocs)) { 1029 for_each_compatible_node(dn, NULL, "ibm,power-nx") 1030 nx842_powernv_probe(dn); 1031 1032 if (!nx842_ct) 1033 return -ENODEV; 1034 1035 nx842_powernv_exec = nx842_exec_icswx; 1036 } else { 1037 ret = nx842_open_percpu_txwins(); 1038 if (ret) { 1039 nx842_delete_coprocs(); 1040 return ret; 1041 } 1042 1043 nx842_powernv_exec = nx842_exec_vas; 1044 } 1045 1046 ret = crypto_register_alg(&nx842_powernv_alg); 1047 if (ret) { 1048 nx842_delete_coprocs(); 1049 return ret; 1050 } 1051 1052 return 0; 1053 } 1054 module_init(nx842_powernv_init); 1055 1056 static void __exit nx842_powernv_exit(void) 1057 { 1058 crypto_unregister_alg(&nx842_powernv_alg); 1059 1060 nx842_delete_coprocs(); 1061 } 1062 module_exit(nx842_powernv_exit); 1063