1 /** 2 * IBM Accelerator Family 'GenWQE' 3 * 4 * (C) Copyright IBM Corp. 2013 5 * 6 * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> 7 * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> 8 * Author: Michael Jung <mijung@gmx.net> 9 * Author: Michael Ruettger <michael@ibmra.de> 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU General Public License (version 2 only) 13 * as published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 */ 20 21 /* 22 * Module initialization and PCIe setup. Card health monitoring and 23 * recovery functionality. Character device creation and deletion are 24 * controlled from here. 25 */ 26 27 #include <linux/module.h> 28 #include <linux/types.h> 29 #include <linux/pci.h> 30 #include <linux/err.h> 31 #include <linux/aer.h> 32 #include <linux/string.h> 33 #include <linux/sched.h> 34 #include <linux/wait.h> 35 #include <linux/delay.h> 36 #include <linux/dma-mapping.h> 37 #include <linux/module.h> 38 #include <linux/notifier.h> 39 #include <linux/device.h> 40 #include <linux/log2.h> 41 42 #include "card_base.h" 43 #include "card_ddcb.h" 44 45 MODULE_AUTHOR("Frank Haverkamp <haver@linux.vnet.ibm.com>"); 46 MODULE_AUTHOR("Michael Ruettger <michael@ibmra.de>"); 47 MODULE_AUTHOR("Joerg-Stephan Vogt <jsvogt@de.ibm.com>"); 48 MODULE_AUTHOR("Michael Jung <mijung@gmx.net>"); 49 50 MODULE_DESCRIPTION("GenWQE Card"); 51 MODULE_VERSION(DRV_VERSION); 52 MODULE_LICENSE("GPL"); 53 54 static char genwqe_driver_name[] = GENWQE_DEVNAME; 55 static struct class *class_genwqe; 56 static struct dentry *debugfs_genwqe; 57 static struct genwqe_dev *genwqe_devices[GENWQE_CARD_NO_MAX]; 58 59 /* PCI structure for identifying device by PCI vendor and device ID */ 60 static const struct pci_device_id genwqe_device_table[] = { 61 { .vendor = PCI_VENDOR_ID_IBM, 62 .device = PCI_DEVICE_GENWQE, 63 .subvendor = PCI_SUBVENDOR_ID_IBM, 64 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5, 65 .class = (PCI_CLASSCODE_GENWQE5 << 8), 66 .class_mask = ~0, 67 .driver_data = 0 }, 68 69 /* Initial SR-IOV bring-up image */ 70 { .vendor = PCI_VENDOR_ID_IBM, 71 .device = PCI_DEVICE_GENWQE, 72 .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, 73 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV, 74 .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), 75 .class_mask = ~0, 76 .driver_data = 0 }, 77 78 { .vendor = PCI_VENDOR_ID_IBM, /* VF Vendor ID */ 79 .device = 0x0000, /* VF Device ID */ 80 .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, 81 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV, 82 .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), 83 .class_mask = ~0, 84 .driver_data = 0 }, 85 86 /* Fixed up image */ 87 { .vendor = PCI_VENDOR_ID_IBM, 88 .device = PCI_DEVICE_GENWQE, 89 .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, 90 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5, 91 .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), 92 .class_mask = ~0, 93 .driver_data = 0 }, 94 95 { .vendor = PCI_VENDOR_ID_IBM, /* VF Vendor ID */ 96 .device = 0x0000, /* VF Device ID */ 97 .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, 98 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5, 99 .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), 100 .class_mask = ~0, 101 .driver_data = 0 }, 102 103 /* Even one more ... */ 104 { .vendor = PCI_VENDOR_ID_IBM, 105 .device = PCI_DEVICE_GENWQE, 106 .subvendor = PCI_SUBVENDOR_ID_IBM, 107 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_NEW, 108 .class = (PCI_CLASSCODE_GENWQE5 << 8), 109 .class_mask = ~0, 110 .driver_data = 0 }, 111 112 { 0, } /* 0 terminated list. */ 113 }; 114 115 MODULE_DEVICE_TABLE(pci, genwqe_device_table); 116 117 /** 118 * genwqe_dev_alloc() - Create and prepare a new card descriptor 119 * 120 * Return: Pointer to card descriptor, or ERR_PTR(err) on error 121 */ 122 static struct genwqe_dev *genwqe_dev_alloc(void) 123 { 124 unsigned int i = 0, j; 125 struct genwqe_dev *cd; 126 127 for (i = 0; i < GENWQE_CARD_NO_MAX; i++) { 128 if (genwqe_devices[i] == NULL) 129 break; 130 } 131 if (i >= GENWQE_CARD_NO_MAX) 132 return ERR_PTR(-ENODEV); 133 134 cd = kzalloc(sizeof(struct genwqe_dev), GFP_KERNEL); 135 if (!cd) 136 return ERR_PTR(-ENOMEM); 137 138 cd->card_idx = i; 139 cd->class_genwqe = class_genwqe; 140 cd->debugfs_genwqe = debugfs_genwqe; 141 142 /* 143 * This comes from kernel config option and can be overritten via 144 * debugfs. 145 */ 146 cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY; 147 148 init_waitqueue_head(&cd->queue_waitq); 149 150 spin_lock_init(&cd->file_lock); 151 INIT_LIST_HEAD(&cd->file_list); 152 153 cd->card_state = GENWQE_CARD_UNUSED; 154 spin_lock_init(&cd->print_lock); 155 156 cd->ddcb_software_timeout = GENWQE_DDCB_SOFTWARE_TIMEOUT; 157 cd->kill_timeout = GENWQE_KILL_TIMEOUT; 158 159 for (j = 0; j < GENWQE_MAX_VFS; j++) 160 cd->vf_jobtimeout_msec[j] = GENWQE_VF_JOBTIMEOUT_MSEC; 161 162 genwqe_devices[i] = cd; 163 return cd; 164 } 165 166 static void genwqe_dev_free(struct genwqe_dev *cd) 167 { 168 if (!cd) 169 return; 170 171 genwqe_devices[cd->card_idx] = NULL; 172 kfree(cd); 173 } 174 175 /** 176 * genwqe_bus_reset() - Card recovery 177 * 178 * pci_reset_function() will recover the device and ensure that the 179 * registers are accessible again when it completes with success. If 180 * not, the card will stay dead and registers will be unaccessible 181 * still. 182 */ 183 static int genwqe_bus_reset(struct genwqe_dev *cd) 184 { 185 int rc = 0; 186 struct pci_dev *pci_dev = cd->pci_dev; 187 void __iomem *mmio; 188 189 if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE) 190 return -EIO; 191 192 mmio = cd->mmio; 193 cd->mmio = NULL; 194 pci_iounmap(pci_dev, mmio); 195 196 pci_release_mem_regions(pci_dev); 197 198 /* 199 * Firmware/BIOS might change memory mapping during bus reset. 200 * Settings like enable bus-mastering, ... are backuped and 201 * restored by the pci_reset_function(). 202 */ 203 dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__); 204 rc = pci_reset_function(pci_dev); 205 if (rc) { 206 dev_err(&pci_dev->dev, 207 "[%s] err: failed reset func (rc %d)\n", __func__, rc); 208 return rc; 209 } 210 dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc); 211 212 /* 213 * Here is the right spot to clear the register read 214 * failure. pci_bus_reset() does this job in real systems. 215 */ 216 cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE | 217 GENWQE_INJECT_GFIR_FATAL | 218 GENWQE_INJECT_GFIR_INFO); 219 220 rc = pci_request_mem_regions(pci_dev, genwqe_driver_name); 221 if (rc) { 222 dev_err(&pci_dev->dev, 223 "[%s] err: request bars failed (%d)\n", __func__, rc); 224 return -EIO; 225 } 226 227 cd->mmio = pci_iomap(pci_dev, 0, 0); 228 if (cd->mmio == NULL) { 229 dev_err(&pci_dev->dev, 230 "[%s] err: mapping BAR0 failed\n", __func__); 231 return -ENOMEM; 232 } 233 return 0; 234 } 235 236 /* 237 * Hardware circumvention section. Certain bitstreams in our test-lab 238 * had different kinds of problems. Here is where we adjust those 239 * bitstreams to function will with this version of our device driver. 240 * 241 * Thise circumventions are applied to the physical function only. 242 * The magical numbers below are identifying development/manufacturing 243 * versions of the bitstream used on the card. 244 * 245 * Turn off error reporting for old/manufacturing images. 246 */ 247 248 bool genwqe_need_err_masking(struct genwqe_dev *cd) 249 { 250 return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull; 251 } 252 253 static void genwqe_tweak_hardware(struct genwqe_dev *cd) 254 { 255 struct pci_dev *pci_dev = cd->pci_dev; 256 257 /* Mask FIRs for development images */ 258 if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) && 259 ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) { 260 dev_warn(&pci_dev->dev, 261 "FIRs masked due to bitstream %016llx.%016llx\n", 262 cd->slu_unitcfg, cd->app_unitcfg); 263 264 __genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR, 265 0xFFFFFFFFFFFFFFFFull); 266 267 __genwqe_writeq(cd, IO_APP_ERR_ACT_MASK, 268 0x0000000000000000ull); 269 } 270 } 271 272 /** 273 * genwqe_recovery_on_fatal_gfir_required() - Version depended actions 274 * 275 * Bitstreams older than 2013-02-17 have a bug where fatal GFIRs must 276 * be ignored. This is e.g. true for the bitstream we gave to the card 277 * manufacturer, but also for some old bitstreams we released to our 278 * test-lab. 279 */ 280 int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd) 281 { 282 return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull; 283 } 284 285 int genwqe_flash_readback_fails(struct genwqe_dev *cd) 286 { 287 return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull; 288 } 289 290 /** 291 * genwqe_T_psec() - Calculate PF/VF timeout register content 292 * 293 * Note: From a design perspective it turned out to be a bad idea to 294 * use codes here to specifiy the frequency/speed values. An old 295 * driver cannot understand new codes and is therefore always a 296 * problem. Better is to measure out the value or put the 297 * speed/frequency directly into a register which is always a valid 298 * value for old as well as for new software. 299 */ 300 /* T = 1/f */ 301 static int genwqe_T_psec(struct genwqe_dev *cd) 302 { 303 u16 speed; /* 1/f -> 250, 200, 166, 175 */ 304 static const int T[] = { 4000, 5000, 6000, 5714 }; 305 306 speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full); 307 if (speed >= ARRAY_SIZE(T)) 308 return -1; /* illegal value */ 309 310 return T[speed]; 311 } 312 313 /** 314 * genwqe_setup_pf_jtimer() - Setup PF hardware timeouts for DDCB execution 315 * 316 * Do this _after_ card_reset() is called. Otherwise the values will 317 * vanish. The settings need to be done when the queues are inactive. 318 * 319 * The max. timeout value is 2^(10+x) * T (6ns for 166MHz) * 15/16. 320 * The min. timeout value is 2^(10+x) * T (6ns for 166MHz) * 14/16. 321 */ 322 static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd) 323 { 324 u32 T = genwqe_T_psec(cd); 325 u64 x; 326 327 if (GENWQE_PF_JOBTIMEOUT_MSEC == 0) 328 return false; 329 330 /* PF: large value needed, flash update 2sec per block */ 331 x = ilog2(GENWQE_PF_JOBTIMEOUT_MSEC * 332 16000000000uL/(T * 15)) - 10; 333 334 genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, 335 0xff00 | (x & 0xff), 0); 336 return true; 337 } 338 339 /** 340 * genwqe_setup_vf_jtimer() - Setup VF hardware timeouts for DDCB execution 341 */ 342 static bool genwqe_setup_vf_jtimer(struct genwqe_dev *cd) 343 { 344 struct pci_dev *pci_dev = cd->pci_dev; 345 unsigned int vf; 346 u32 T = genwqe_T_psec(cd); 347 u64 x; 348 int totalvfs; 349 350 totalvfs = pci_sriov_get_totalvfs(pci_dev); 351 if (totalvfs <= 0) 352 return false; 353 354 for (vf = 0; vf < totalvfs; vf++) { 355 356 if (cd->vf_jobtimeout_msec[vf] == 0) 357 continue; 358 359 x = ilog2(cd->vf_jobtimeout_msec[vf] * 360 16000000000uL/(T * 15)) - 10; 361 362 genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, 363 0xff00 | (x & 0xff), vf + 1); 364 } 365 return true; 366 } 367 368 static int genwqe_ffdc_buffs_alloc(struct genwqe_dev *cd) 369 { 370 unsigned int type, e = 0; 371 372 for (type = 0; type < GENWQE_DBG_UNITS; type++) { 373 switch (type) { 374 case GENWQE_DBG_UNIT0: 375 e = genwqe_ffdc_buff_size(cd, 0); 376 break; 377 case GENWQE_DBG_UNIT1: 378 e = genwqe_ffdc_buff_size(cd, 1); 379 break; 380 case GENWQE_DBG_UNIT2: 381 e = genwqe_ffdc_buff_size(cd, 2); 382 break; 383 case GENWQE_DBG_REGS: 384 e = GENWQE_FFDC_REGS; 385 break; 386 } 387 388 /* currently support only the debug units mentioned here */ 389 cd->ffdc[type].entries = e; 390 cd->ffdc[type].regs = 391 kmalloc_array(e, sizeof(struct genwqe_reg), 392 GFP_KERNEL); 393 /* 394 * regs == NULL is ok, the using code treats this as no regs, 395 * Printing warning is ok in this case. 396 */ 397 } 398 return 0; 399 } 400 401 static void genwqe_ffdc_buffs_free(struct genwqe_dev *cd) 402 { 403 unsigned int type; 404 405 for (type = 0; type < GENWQE_DBG_UNITS; type++) { 406 kfree(cd->ffdc[type].regs); 407 cd->ffdc[type].regs = NULL; 408 } 409 } 410 411 static int genwqe_read_ids(struct genwqe_dev *cd) 412 { 413 int err = 0; 414 int slu_id; 415 struct pci_dev *pci_dev = cd->pci_dev; 416 417 cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG); 418 if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) { 419 dev_err(&pci_dev->dev, 420 "err: SLUID=%016llx\n", cd->slu_unitcfg); 421 err = -EIO; 422 goto out_err; 423 } 424 425 slu_id = genwqe_get_slu_id(cd); 426 if (slu_id < GENWQE_SLU_ARCH_REQ || slu_id == 0xff) { 427 dev_err(&pci_dev->dev, 428 "err: incompatible SLU Architecture %u\n", slu_id); 429 err = -ENOENT; 430 goto out_err; 431 } 432 433 cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG); 434 if (cd->app_unitcfg == IO_ILLEGAL_VALUE) { 435 dev_err(&pci_dev->dev, 436 "err: APPID=%016llx\n", cd->app_unitcfg); 437 err = -EIO; 438 goto out_err; 439 } 440 genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name)); 441 442 /* 443 * Is access to all registers possible? If we are a VF the 444 * answer is obvious. If we run fully virtualized, we need to 445 * check if we can access all registers. If we do not have 446 * full access we will cause an UR and some informational FIRs 447 * in the PF, but that should not harm. 448 */ 449 if (pci_dev->is_virtfn) 450 cd->is_privileged = 0; 451 else 452 cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM) 453 != IO_ILLEGAL_VALUE); 454 455 out_err: 456 return err; 457 } 458 459 static int genwqe_start(struct genwqe_dev *cd) 460 { 461 int err; 462 struct pci_dev *pci_dev = cd->pci_dev; 463 464 err = genwqe_read_ids(cd); 465 if (err) 466 return err; 467 468 if (genwqe_is_privileged(cd)) { 469 /* do this after the tweaks. alloc fail is acceptable */ 470 genwqe_ffdc_buffs_alloc(cd); 471 genwqe_stop_traps(cd); 472 473 /* Collect registers e.g. FIRs, UNITIDs, traces ... */ 474 genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs, 475 cd->ffdc[GENWQE_DBG_REGS].entries, 0); 476 477 genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0, 478 cd->ffdc[GENWQE_DBG_UNIT0].regs, 479 cd->ffdc[GENWQE_DBG_UNIT0].entries); 480 481 genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1, 482 cd->ffdc[GENWQE_DBG_UNIT1].regs, 483 cd->ffdc[GENWQE_DBG_UNIT1].entries); 484 485 genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2, 486 cd->ffdc[GENWQE_DBG_UNIT2].regs, 487 cd->ffdc[GENWQE_DBG_UNIT2].entries); 488 489 genwqe_start_traps(cd); 490 491 if (cd->card_state == GENWQE_CARD_FATAL_ERROR) { 492 dev_warn(&pci_dev->dev, 493 "[%s] chip reload/recovery!\n", __func__); 494 495 /* 496 * Stealth Mode: Reload chip on either hot 497 * reset or PERST. 498 */ 499 cd->softreset = 0x7Cull; 500 __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 501 cd->softreset); 502 503 err = genwqe_bus_reset(cd); 504 if (err != 0) { 505 dev_err(&pci_dev->dev, 506 "[%s] err: bus reset failed!\n", 507 __func__); 508 goto out; 509 } 510 511 /* 512 * Re-read the IDs because 513 * it could happen that the bitstream load 514 * failed! 515 */ 516 err = genwqe_read_ids(cd); 517 if (err) 518 goto out; 519 } 520 } 521 522 err = genwqe_setup_service_layer(cd); /* does a reset to the card */ 523 if (err != 0) { 524 dev_err(&pci_dev->dev, 525 "[%s] err: could not setup servicelayer!\n", __func__); 526 err = -ENODEV; 527 goto out; 528 } 529 530 if (genwqe_is_privileged(cd)) { /* code is running _after_ reset */ 531 genwqe_tweak_hardware(cd); 532 533 genwqe_setup_pf_jtimer(cd); 534 genwqe_setup_vf_jtimer(cd); 535 } 536 537 err = genwqe_device_create(cd); 538 if (err < 0) { 539 dev_err(&pci_dev->dev, 540 "err: chdev init failed! (err=%d)\n", err); 541 goto out_release_service_layer; 542 } 543 return 0; 544 545 out_release_service_layer: 546 genwqe_release_service_layer(cd); 547 out: 548 if (genwqe_is_privileged(cd)) 549 genwqe_ffdc_buffs_free(cd); 550 return -EIO; 551 } 552 553 /** 554 * genwqe_stop() - Stop card operation 555 * 556 * Recovery notes: 557 * As long as genwqe_thread runs we might access registers during 558 * error data capture. Same is with the genwqe_health_thread. 559 * When genwqe_bus_reset() fails this function might called two times: 560 * first by the genwqe_health_thread() and later by genwqe_remove() to 561 * unbind the device. We must be able to survive that. 562 * 563 * This function must be robust enough to be called twice. 564 */ 565 static int genwqe_stop(struct genwqe_dev *cd) 566 { 567 genwqe_finish_queue(cd); /* no register access */ 568 genwqe_device_remove(cd); /* device removed, procs killed */ 569 genwqe_release_service_layer(cd); /* here genwqe_thread is stopped */ 570 571 if (genwqe_is_privileged(cd)) { 572 pci_disable_sriov(cd->pci_dev); /* access pci config space */ 573 genwqe_ffdc_buffs_free(cd); 574 } 575 576 return 0; 577 } 578 579 /** 580 * genwqe_recover_card() - Try to recover the card if it is possible 581 * 582 * If fatal_err is set no register access is possible anymore. It is 583 * likely that genwqe_start fails in that situation. Proper error 584 * handling is required in this case. 585 * 586 * genwqe_bus_reset() will cause the pci code to call genwqe_remove() 587 * and later genwqe_probe() for all virtual functions. 588 */ 589 static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err) 590 { 591 int rc; 592 struct pci_dev *pci_dev = cd->pci_dev; 593 594 genwqe_stop(cd); 595 596 /* 597 * Make sure chip is not reloaded to maintain FFDC. Write SLU 598 * Reset Register, CPLDReset field to 0. 599 */ 600 if (!fatal_err) { 601 cd->softreset = 0x70ull; 602 __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset); 603 } 604 605 rc = genwqe_bus_reset(cd); 606 if (rc != 0) { 607 dev_err(&pci_dev->dev, 608 "[%s] err: card recovery impossible!\n", __func__); 609 return rc; 610 } 611 612 rc = genwqe_start(cd); 613 if (rc < 0) { 614 dev_err(&pci_dev->dev, 615 "[%s] err: failed to launch device!\n", __func__); 616 return rc; 617 } 618 return 0; 619 } 620 621 static int genwqe_health_check_cond(struct genwqe_dev *cd, u64 *gfir) 622 { 623 *gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 624 return (*gfir & GFIR_ERR_TRIGGER) && 625 genwqe_recovery_on_fatal_gfir_required(cd); 626 } 627 628 /** 629 * genwqe_fir_checking() - Check the fault isolation registers of the card 630 * 631 * If this code works ok, can be tried out with help of the genwqe_poke tool: 632 * sudo ./tools/genwqe_poke 0x8 0xfefefefefef 633 * 634 * Now the relevant FIRs/sFIRs should be printed out and the driver should 635 * invoke recovery (devices are removed and readded). 636 */ 637 static u64 genwqe_fir_checking(struct genwqe_dev *cd) 638 { 639 int j, iterations = 0; 640 u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec; 641 u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr; 642 struct pci_dev *pci_dev = cd->pci_dev; 643 644 healthMonitor: 645 iterations++; 646 if (iterations > 16) { 647 dev_err(&pci_dev->dev, "* exit looping after %d times\n", 648 iterations); 649 goto fatal_error; 650 } 651 652 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 653 if (gfir != 0x0) 654 dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", 655 IO_SLC_CFGREG_GFIR, gfir); 656 if (gfir == IO_ILLEGAL_VALUE) 657 goto fatal_error; 658 659 /* 660 * Avoid printing when to GFIR bit is on prevents contignous 661 * printout e.g. for the following bug: 662 * FIR set without a 2ndary FIR/FIR cannot be cleared 663 * Comment out the following if to get the prints: 664 */ 665 if (gfir == 0) 666 return 0; 667 668 gfir_masked = gfir & GFIR_ERR_TRIGGER; /* fatal errors */ 669 670 for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */ 671 672 /* read the primary FIR (pfir) */ 673 fir_addr = (uid << 24) + 0x08; 674 fir = __genwqe_readq(cd, fir_addr); 675 if (fir == 0x0) 676 continue; /* no error in this unit */ 677 678 dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir); 679 if (fir == IO_ILLEGAL_VALUE) 680 goto fatal_error; 681 682 /* read primary FEC */ 683 fec_addr = (uid << 24) + 0x18; 684 fec = __genwqe_readq(cd, fec_addr); 685 686 dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec); 687 if (fec == IO_ILLEGAL_VALUE) 688 goto fatal_error; 689 690 for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) { 691 692 /* secondary fir empty, skip it */ 693 if ((fir & mask) == 0x0) 694 continue; 695 696 sfir_addr = (uid << 24) + 0x100 + 0x08 * j; 697 sfir = __genwqe_readq(cd, sfir_addr); 698 699 if (sfir == IO_ILLEGAL_VALUE) 700 goto fatal_error; 701 dev_err(&pci_dev->dev, 702 "* 0x%08x 0x%016llx\n", sfir_addr, sfir); 703 704 sfec_addr = (uid << 24) + 0x300 + 0x08 * j; 705 sfec = __genwqe_readq(cd, sfec_addr); 706 707 if (sfec == IO_ILLEGAL_VALUE) 708 goto fatal_error; 709 dev_err(&pci_dev->dev, 710 "* 0x%08x 0x%016llx\n", sfec_addr, sfec); 711 712 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 713 if (gfir == IO_ILLEGAL_VALUE) 714 goto fatal_error; 715 716 /* gfir turned on during routine! get out and 717 start over. */ 718 if ((gfir_masked == 0x0) && 719 (gfir & GFIR_ERR_TRIGGER)) { 720 goto healthMonitor; 721 } 722 723 /* do not clear if we entered with a fatal gfir */ 724 if (gfir_masked == 0x0) { 725 726 /* NEW clear by mask the logged bits */ 727 sfir_addr = (uid << 24) + 0x100 + 0x08 * j; 728 __genwqe_writeq(cd, sfir_addr, sfir); 729 730 dev_dbg(&pci_dev->dev, 731 "[HM] Clearing 2ndary FIR 0x%08x with 0x%016llx\n", 732 sfir_addr, sfir); 733 734 /* 735 * note, these cannot be error-Firs 736 * since gfir_masked is 0 after sfir 737 * was read. Also, it is safe to do 738 * this write if sfir=0. Still need to 739 * clear the primary. This just means 740 * there is no secondary FIR. 741 */ 742 743 /* clear by mask the logged bit. */ 744 fir_clr_addr = (uid << 24) + 0x10; 745 __genwqe_writeq(cd, fir_clr_addr, mask); 746 747 dev_dbg(&pci_dev->dev, 748 "[HM] Clearing primary FIR 0x%08x with 0x%016llx\n", 749 fir_clr_addr, mask); 750 } 751 } 752 } 753 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 754 if (gfir == IO_ILLEGAL_VALUE) 755 goto fatal_error; 756 757 if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) { 758 /* 759 * Check once more that it didn't go on after all the 760 * FIRS were cleared. 761 */ 762 dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n", 763 iterations); 764 goto healthMonitor; 765 } 766 return gfir_masked; 767 768 fatal_error: 769 return IO_ILLEGAL_VALUE; 770 } 771 772 /** 773 * genwqe_pci_fundamental_reset() - trigger a PCIe fundamental reset on the slot 774 * 775 * Note: pci_set_pcie_reset_state() is not implemented on all archs, so this 776 * reset method will not work in all cases. 777 * 778 * Return: 0 on success or error code from pci_set_pcie_reset_state() 779 */ 780 static int genwqe_pci_fundamental_reset(struct pci_dev *pci_dev) 781 { 782 int rc; 783 784 /* 785 * lock pci config space access from userspace, 786 * save state and issue PCIe fundamental reset 787 */ 788 pci_cfg_access_lock(pci_dev); 789 pci_save_state(pci_dev); 790 rc = pci_set_pcie_reset_state(pci_dev, pcie_warm_reset); 791 if (!rc) { 792 /* keep PCIe reset asserted for 250ms */ 793 msleep(250); 794 pci_set_pcie_reset_state(pci_dev, pcie_deassert_reset); 795 /* Wait for 2s to reload flash and train the link */ 796 msleep(2000); 797 } 798 pci_restore_state(pci_dev); 799 pci_cfg_access_unlock(pci_dev); 800 return rc; 801 } 802 803 804 static int genwqe_platform_recovery(struct genwqe_dev *cd) 805 { 806 struct pci_dev *pci_dev = cd->pci_dev; 807 int rc; 808 809 dev_info(&pci_dev->dev, 810 "[%s] resetting card for error recovery\n", __func__); 811 812 /* Clear out error injection flags */ 813 cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE | 814 GENWQE_INJECT_GFIR_FATAL | 815 GENWQE_INJECT_GFIR_INFO); 816 817 genwqe_stop(cd); 818 819 /* Try recoverying the card with fundamental reset */ 820 rc = genwqe_pci_fundamental_reset(pci_dev); 821 if (!rc) { 822 rc = genwqe_start(cd); 823 if (!rc) 824 dev_info(&pci_dev->dev, 825 "[%s] card recovered\n", __func__); 826 else 827 dev_err(&pci_dev->dev, 828 "[%s] err: cannot start card services! (err=%d)\n", 829 __func__, rc); 830 } else { 831 dev_err(&pci_dev->dev, 832 "[%s] card reset failed\n", __func__); 833 } 834 835 return rc; 836 } 837 838 /* 839 * genwqe_reload_bistream() - reload card bitstream 840 * 841 * Set the appropriate register and call fundamental reset to reaload the card 842 * bitstream. 843 * 844 * Return: 0 on success, error code otherwise 845 */ 846 static int genwqe_reload_bistream(struct genwqe_dev *cd) 847 { 848 struct pci_dev *pci_dev = cd->pci_dev; 849 int rc; 850 851 dev_info(&pci_dev->dev, 852 "[%s] resetting card for bitstream reload\n", 853 __func__); 854 855 genwqe_stop(cd); 856 857 /* 858 * Cause a CPLD reprogram with the 'next_bitstream' 859 * partition on PCIe hot or fundamental reset 860 */ 861 __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 862 (cd->softreset & 0xcull) | 0x70ull); 863 864 rc = genwqe_pci_fundamental_reset(pci_dev); 865 if (rc) { 866 /* 867 * A fundamental reset failure can be caused 868 * by lack of support on the arch, so we just 869 * log the error and try to start the card 870 * again. 871 */ 872 dev_err(&pci_dev->dev, 873 "[%s] err: failed to reset card for bitstream reload\n", 874 __func__); 875 } 876 877 rc = genwqe_start(cd); 878 if (rc) { 879 dev_err(&pci_dev->dev, 880 "[%s] err: cannot start card services! (err=%d)\n", 881 __func__, rc); 882 return rc; 883 } 884 dev_info(&pci_dev->dev, 885 "[%s] card reloaded\n", __func__); 886 return 0; 887 } 888 889 890 /** 891 * genwqe_health_thread() - Health checking thread 892 * 893 * This thread is only started for the PF of the card. 894 * 895 * This thread monitors the health of the card. A critical situation 896 * is when we read registers which contain -1 (IO_ILLEGAL_VALUE). In 897 * this case we need to be recovered from outside. Writing to 898 * registers will very likely not work either. 899 * 900 * This thread must only exit if kthread_should_stop() becomes true. 901 * 902 * Condition for the health-thread to trigger: 903 * a) when a kthread_stop() request comes in or 904 * b) a critical GFIR occured 905 * 906 * Informational GFIRs are checked and potentially printed in 907 * GENWQE_HEALTH_CHECK_INTERVAL seconds. 908 */ 909 static int genwqe_health_thread(void *data) 910 { 911 int rc, should_stop = 0; 912 struct genwqe_dev *cd = data; 913 struct pci_dev *pci_dev = cd->pci_dev; 914 u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg; 915 916 health_thread_begin: 917 while (!kthread_should_stop()) { 918 rc = wait_event_interruptible_timeout(cd->health_waitq, 919 (genwqe_health_check_cond(cd, &gfir) || 920 (should_stop = kthread_should_stop())), 921 GENWQE_HEALTH_CHECK_INTERVAL * HZ); 922 923 if (should_stop) 924 break; 925 926 if (gfir == IO_ILLEGAL_VALUE) { 927 dev_err(&pci_dev->dev, 928 "[%s] GFIR=%016llx\n", __func__, gfir); 929 goto fatal_error; 930 } 931 932 slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG); 933 if (slu_unitcfg == IO_ILLEGAL_VALUE) { 934 dev_err(&pci_dev->dev, 935 "[%s] SLU_UNITCFG=%016llx\n", 936 __func__, slu_unitcfg); 937 goto fatal_error; 938 } 939 940 app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG); 941 if (app_unitcfg == IO_ILLEGAL_VALUE) { 942 dev_err(&pci_dev->dev, 943 "[%s] APP_UNITCFG=%016llx\n", 944 __func__, app_unitcfg); 945 goto fatal_error; 946 } 947 948 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 949 if (gfir == IO_ILLEGAL_VALUE) { 950 dev_err(&pci_dev->dev, 951 "[%s] %s: GFIR=%016llx\n", __func__, 952 (gfir & GFIR_ERR_TRIGGER) ? "err" : "info", 953 gfir); 954 goto fatal_error; 955 } 956 957 gfir_masked = genwqe_fir_checking(cd); 958 if (gfir_masked == IO_ILLEGAL_VALUE) 959 goto fatal_error; 960 961 /* 962 * GFIR ErrorTrigger bits set => reset the card! 963 * Never do this for old/manufacturing images! 964 */ 965 if ((gfir_masked) && !cd->skip_recovery && 966 genwqe_recovery_on_fatal_gfir_required(cd)) { 967 968 cd->card_state = GENWQE_CARD_FATAL_ERROR; 969 970 rc = genwqe_recover_card(cd, 0); 971 if (rc < 0) { 972 /* FIXME Card is unusable and needs unbind! */ 973 goto fatal_error; 974 } 975 } 976 977 if (cd->card_state == GENWQE_CARD_RELOAD_BITSTREAM) { 978 /* Userspace requested card bitstream reload */ 979 rc = genwqe_reload_bistream(cd); 980 if (rc) 981 goto fatal_error; 982 } 983 984 cd->last_gfir = gfir; 985 cond_resched(); 986 } 987 988 return 0; 989 990 fatal_error: 991 if (cd->use_platform_recovery) { 992 /* 993 * Since we use raw accessors, EEH errors won't be detected 994 * by the platform until we do a non-raw MMIO or config space 995 * read 996 */ 997 readq(cd->mmio + IO_SLC_CFGREG_GFIR); 998 999 /* We do nothing if the card is going over PCI recovery */ 1000 if (pci_channel_offline(pci_dev)) 1001 return -EIO; 1002 1003 /* 1004 * If it's supported by the platform, we try a fundamental reset 1005 * to recover from a fatal error. Otherwise, we continue to wait 1006 * for an external recovery procedure to take care of it. 1007 */ 1008 rc = genwqe_platform_recovery(cd); 1009 if (!rc) 1010 goto health_thread_begin; 1011 } 1012 1013 dev_err(&pci_dev->dev, 1014 "[%s] card unusable. Please trigger unbind!\n", __func__); 1015 1016 /* Bring down logical devices to inform user space via udev remove. */ 1017 cd->card_state = GENWQE_CARD_FATAL_ERROR; 1018 genwqe_stop(cd); 1019 1020 /* genwqe_bus_reset failed(). Now wait for genwqe_remove(). */ 1021 while (!kthread_should_stop()) 1022 cond_resched(); 1023 1024 return -EIO; 1025 } 1026 1027 static int genwqe_health_check_start(struct genwqe_dev *cd) 1028 { 1029 int rc; 1030 1031 if (GENWQE_HEALTH_CHECK_INTERVAL <= 0) 1032 return 0; /* valid for disabling the service */ 1033 1034 /* moved before request_irq() */ 1035 /* init_waitqueue_head(&cd->health_waitq); */ 1036 1037 cd->health_thread = kthread_run(genwqe_health_thread, cd, 1038 GENWQE_DEVNAME "%d_health", 1039 cd->card_idx); 1040 if (IS_ERR(cd->health_thread)) { 1041 rc = PTR_ERR(cd->health_thread); 1042 cd->health_thread = NULL; 1043 return rc; 1044 } 1045 return 0; 1046 } 1047 1048 static int genwqe_health_thread_running(struct genwqe_dev *cd) 1049 { 1050 return cd->health_thread != NULL; 1051 } 1052 1053 static int genwqe_health_check_stop(struct genwqe_dev *cd) 1054 { 1055 int rc; 1056 1057 if (!genwqe_health_thread_running(cd)) 1058 return -EIO; 1059 1060 rc = kthread_stop(cd->health_thread); 1061 cd->health_thread = NULL; 1062 return 0; 1063 } 1064 1065 /** 1066 * genwqe_pci_setup() - Allocate PCIe related resources for our card 1067 */ 1068 static int genwqe_pci_setup(struct genwqe_dev *cd) 1069 { 1070 int err; 1071 struct pci_dev *pci_dev = cd->pci_dev; 1072 1073 err = pci_enable_device_mem(pci_dev); 1074 if (err) { 1075 dev_err(&pci_dev->dev, 1076 "err: failed to enable pci memory (err=%d)\n", err); 1077 goto err_out; 1078 } 1079 1080 /* Reserve PCI I/O and memory resources */ 1081 err = pci_request_mem_regions(pci_dev, genwqe_driver_name); 1082 if (err) { 1083 dev_err(&pci_dev->dev, 1084 "[%s] err: request bars failed (%d)\n", __func__, err); 1085 err = -EIO; 1086 goto err_disable_device; 1087 } 1088 1089 /* check for 64-bit DMA address supported (DAC) */ 1090 if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64))) { 1091 err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(64)); 1092 if (err) { 1093 dev_err(&pci_dev->dev, 1094 "err: DMA64 consistent mask error\n"); 1095 err = -EIO; 1096 goto out_release_resources; 1097 } 1098 /* check for 32-bit DMA address supported (SAC) */ 1099 } else if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) { 1100 err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(32)); 1101 if (err) { 1102 dev_err(&pci_dev->dev, 1103 "err: DMA32 consistent mask error\n"); 1104 err = -EIO; 1105 goto out_release_resources; 1106 } 1107 } else { 1108 dev_err(&pci_dev->dev, 1109 "err: neither DMA32 nor DMA64 supported\n"); 1110 err = -EIO; 1111 goto out_release_resources; 1112 } 1113 1114 pci_set_master(pci_dev); 1115 pci_enable_pcie_error_reporting(pci_dev); 1116 1117 /* EEH recovery requires PCIe fundamental reset */ 1118 pci_dev->needs_freset = 1; 1119 1120 /* request complete BAR-0 space (length = 0) */ 1121 cd->mmio_len = pci_resource_len(pci_dev, 0); 1122 cd->mmio = pci_iomap(pci_dev, 0, 0); 1123 if (cd->mmio == NULL) { 1124 dev_err(&pci_dev->dev, 1125 "[%s] err: mapping BAR0 failed\n", __func__); 1126 err = -ENOMEM; 1127 goto out_release_resources; 1128 } 1129 1130 cd->num_vfs = pci_sriov_get_totalvfs(pci_dev); 1131 if (cd->num_vfs < 0) 1132 cd->num_vfs = 0; 1133 1134 err = genwqe_read_ids(cd); 1135 if (err) 1136 goto out_iounmap; 1137 1138 return 0; 1139 1140 out_iounmap: 1141 pci_iounmap(pci_dev, cd->mmio); 1142 out_release_resources: 1143 pci_release_mem_regions(pci_dev); 1144 err_disable_device: 1145 pci_disable_device(pci_dev); 1146 err_out: 1147 return err; 1148 } 1149 1150 /** 1151 * genwqe_pci_remove() - Free PCIe related resources for our card 1152 */ 1153 static void genwqe_pci_remove(struct genwqe_dev *cd) 1154 { 1155 struct pci_dev *pci_dev = cd->pci_dev; 1156 1157 if (cd->mmio) 1158 pci_iounmap(pci_dev, cd->mmio); 1159 1160 pci_release_mem_regions(pci_dev); 1161 pci_disable_device(pci_dev); 1162 } 1163 1164 /** 1165 * genwqe_probe() - Device initialization 1166 * @pdev: PCI device information struct 1167 * 1168 * Callable for multiple cards. This function is called on bind. 1169 * 1170 * Return: 0 if succeeded, < 0 when failed 1171 */ 1172 static int genwqe_probe(struct pci_dev *pci_dev, 1173 const struct pci_device_id *id) 1174 { 1175 int err; 1176 struct genwqe_dev *cd; 1177 1178 genwqe_init_crc32(); 1179 1180 cd = genwqe_dev_alloc(); 1181 if (IS_ERR(cd)) { 1182 dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n", 1183 (int)PTR_ERR(cd)); 1184 return PTR_ERR(cd); 1185 } 1186 1187 dev_set_drvdata(&pci_dev->dev, cd); 1188 cd->pci_dev = pci_dev; 1189 1190 err = genwqe_pci_setup(cd); 1191 if (err < 0) { 1192 dev_err(&pci_dev->dev, 1193 "err: problems with PCI setup (err=%d)\n", err); 1194 goto out_free_dev; 1195 } 1196 1197 err = genwqe_start(cd); 1198 if (err < 0) { 1199 dev_err(&pci_dev->dev, 1200 "err: cannot start card services! (err=%d)\n", err); 1201 goto out_pci_remove; 1202 } 1203 1204 if (genwqe_is_privileged(cd)) { 1205 err = genwqe_health_check_start(cd); 1206 if (err < 0) { 1207 dev_err(&pci_dev->dev, 1208 "err: cannot start health checking! (err=%d)\n", 1209 err); 1210 goto out_stop_services; 1211 } 1212 } 1213 return 0; 1214 1215 out_stop_services: 1216 genwqe_stop(cd); 1217 out_pci_remove: 1218 genwqe_pci_remove(cd); 1219 out_free_dev: 1220 genwqe_dev_free(cd); 1221 return err; 1222 } 1223 1224 /** 1225 * genwqe_remove() - Called when device is removed (hot-plugable) 1226 * 1227 * Or when driver is unloaded respecitively when unbind is done. 1228 */ 1229 static void genwqe_remove(struct pci_dev *pci_dev) 1230 { 1231 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); 1232 1233 genwqe_health_check_stop(cd); 1234 1235 /* 1236 * genwqe_stop() must survive if it is called twice 1237 * sequentially. This happens when the health thread calls it 1238 * and fails on genwqe_bus_reset(). 1239 */ 1240 genwqe_stop(cd); 1241 genwqe_pci_remove(cd); 1242 genwqe_dev_free(cd); 1243 } 1244 1245 /* 1246 * genwqe_err_error_detected() - Error detection callback 1247 * 1248 * This callback is called by the PCI subsystem whenever a PCI bus 1249 * error is detected. 1250 */ 1251 static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev, 1252 enum pci_channel_state state) 1253 { 1254 struct genwqe_dev *cd; 1255 1256 dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state); 1257 1258 cd = dev_get_drvdata(&pci_dev->dev); 1259 if (cd == NULL) 1260 return PCI_ERS_RESULT_DISCONNECT; 1261 1262 /* Stop the card */ 1263 genwqe_health_check_stop(cd); 1264 genwqe_stop(cd); 1265 1266 /* 1267 * On permanent failure, the PCI code will call device remove 1268 * after the return of this function. 1269 * genwqe_stop() can be called twice. 1270 */ 1271 if (state == pci_channel_io_perm_failure) { 1272 return PCI_ERS_RESULT_DISCONNECT; 1273 } else { 1274 genwqe_pci_remove(cd); 1275 return PCI_ERS_RESULT_NEED_RESET; 1276 } 1277 } 1278 1279 static pci_ers_result_t genwqe_err_slot_reset(struct pci_dev *pci_dev) 1280 { 1281 int rc; 1282 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); 1283 1284 rc = genwqe_pci_setup(cd); 1285 if (!rc) { 1286 return PCI_ERS_RESULT_RECOVERED; 1287 } else { 1288 dev_err(&pci_dev->dev, 1289 "err: problems with PCI setup (err=%d)\n", rc); 1290 return PCI_ERS_RESULT_DISCONNECT; 1291 } 1292 } 1293 1294 static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev) 1295 { 1296 return PCI_ERS_RESULT_NONE; 1297 } 1298 1299 static void genwqe_err_resume(struct pci_dev *pci_dev) 1300 { 1301 int rc; 1302 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); 1303 1304 rc = genwqe_start(cd); 1305 if (!rc) { 1306 rc = genwqe_health_check_start(cd); 1307 if (rc) 1308 dev_err(&pci_dev->dev, 1309 "err: cannot start health checking! (err=%d)\n", 1310 rc); 1311 } else { 1312 dev_err(&pci_dev->dev, 1313 "err: cannot start card services! (err=%d)\n", rc); 1314 } 1315 } 1316 1317 static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs) 1318 { 1319 int rc; 1320 struct genwqe_dev *cd = dev_get_drvdata(&dev->dev); 1321 1322 if (numvfs > 0) { 1323 genwqe_setup_vf_jtimer(cd); 1324 rc = pci_enable_sriov(dev, numvfs); 1325 if (rc < 0) 1326 return rc; 1327 return numvfs; 1328 } 1329 if (numvfs == 0) { 1330 pci_disable_sriov(dev); 1331 return 0; 1332 } 1333 return 0; 1334 } 1335 1336 static struct pci_error_handlers genwqe_err_handler = { 1337 .error_detected = genwqe_err_error_detected, 1338 .mmio_enabled = genwqe_err_result_none, 1339 .slot_reset = genwqe_err_slot_reset, 1340 .resume = genwqe_err_resume, 1341 }; 1342 1343 static struct pci_driver genwqe_driver = { 1344 .name = genwqe_driver_name, 1345 .id_table = genwqe_device_table, 1346 .probe = genwqe_probe, 1347 .remove = genwqe_remove, 1348 .sriov_configure = genwqe_sriov_configure, 1349 .err_handler = &genwqe_err_handler, 1350 }; 1351 1352 /** 1353 * genwqe_devnode() - Set default access mode for genwqe devices. 1354 * 1355 * Default mode should be rw for everybody. Do not change default 1356 * device name. 1357 */ 1358 static char *genwqe_devnode(struct device *dev, umode_t *mode) 1359 { 1360 if (mode) 1361 *mode = 0666; 1362 return NULL; 1363 } 1364 1365 /** 1366 * genwqe_init_module() - Driver registration and initialization 1367 */ 1368 static int __init genwqe_init_module(void) 1369 { 1370 int rc; 1371 1372 class_genwqe = class_create(THIS_MODULE, GENWQE_DEVNAME); 1373 if (IS_ERR(class_genwqe)) { 1374 pr_err("[%s] create class failed\n", __func__); 1375 return -ENOMEM; 1376 } 1377 1378 class_genwqe->devnode = genwqe_devnode; 1379 1380 debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL); 1381 if (!debugfs_genwqe) { 1382 rc = -ENOMEM; 1383 goto err_out; 1384 } 1385 1386 rc = pci_register_driver(&genwqe_driver); 1387 if (rc != 0) { 1388 pr_err("[%s] pci_reg_driver (rc=%d)\n", __func__, rc); 1389 goto err_out0; 1390 } 1391 1392 return rc; 1393 1394 err_out0: 1395 debugfs_remove(debugfs_genwqe); 1396 err_out: 1397 class_destroy(class_genwqe); 1398 return rc; 1399 } 1400 1401 /** 1402 * genwqe_exit_module() - Driver exit 1403 */ 1404 static void __exit genwqe_exit_module(void) 1405 { 1406 pci_unregister_driver(&genwqe_driver); 1407 debugfs_remove(debugfs_genwqe); 1408 class_destroy(class_genwqe); 1409 } 1410 1411 module_init(genwqe_init_module); 1412 module_exit(genwqe_exit_module); 1413