1 /** 2 * IBM Accelerator Family 'GenWQE' 3 * 4 * (C) Copyright IBM Corp. 2013 5 * 6 * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> 7 * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> 8 * Author: Michael Jung <mijung@gmx.net> 9 * Author: Michael Ruettger <michael@ibmra.de> 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU General Public License (version 2 only) 13 * as published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 */ 20 21 /* 22 * Module initialization and PCIe setup. Card health monitoring and 23 * recovery functionality. Character device creation and deletion are 24 * controlled from here. 25 */ 26 27 #include <linux/types.h> 28 #include <linux/pci.h> 29 #include <linux/err.h> 30 #include <linux/aer.h> 31 #include <linux/string.h> 32 #include <linux/sched.h> 33 #include <linux/wait.h> 34 #include <linux/delay.h> 35 #include <linux/dma-mapping.h> 36 #include <linux/module.h> 37 #include <linux/notifier.h> 38 #include <linux/device.h> 39 #include <linux/log2.h> 40 41 #include "card_base.h" 42 #include "card_ddcb.h" 43 44 MODULE_AUTHOR("Frank Haverkamp <haver@linux.vnet.ibm.com>"); 45 MODULE_AUTHOR("Michael Ruettger <michael@ibmra.de>"); 46 MODULE_AUTHOR("Joerg-Stephan Vogt <jsvogt@de.ibm.com>"); 47 MODULE_AUTHOR("Michael Jung <mijung@gmx.net>"); 48 49 MODULE_DESCRIPTION("GenWQE Card"); 50 MODULE_VERSION(DRV_VERSION); 51 MODULE_LICENSE("GPL"); 52 53 static char genwqe_driver_name[] = GENWQE_DEVNAME; 54 static struct class *class_genwqe; 55 static struct dentry *debugfs_genwqe; 56 static struct genwqe_dev *genwqe_devices[GENWQE_CARD_NO_MAX]; 57 58 /* PCI structure for identifying device by PCI vendor and device ID */ 59 static const struct pci_device_id genwqe_device_table[] = { 60 { .vendor = PCI_VENDOR_ID_IBM, 61 .device = PCI_DEVICE_GENWQE, 62 .subvendor = PCI_SUBVENDOR_ID_IBM, 63 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5, 64 .class = (PCI_CLASSCODE_GENWQE5 << 8), 65 .class_mask = ~0, 66 .driver_data = 0 }, 67 68 /* Initial SR-IOV bring-up image */ 69 { .vendor = PCI_VENDOR_ID_IBM, 70 .device = PCI_DEVICE_GENWQE, 71 .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, 72 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV, 73 .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), 74 .class_mask = ~0, 75 .driver_data = 0 }, 76 77 { .vendor = PCI_VENDOR_ID_IBM, /* VF Vendor ID */ 78 .device = 0x0000, /* VF Device ID */ 79 .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, 80 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV, 81 .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), 82 .class_mask = ~0, 83 .driver_data = 0 }, 84 85 /* Fixed up image */ 86 { .vendor = PCI_VENDOR_ID_IBM, 87 .device = PCI_DEVICE_GENWQE, 88 .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, 89 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5, 90 .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), 91 .class_mask = ~0, 92 .driver_data = 0 }, 93 94 { .vendor = PCI_VENDOR_ID_IBM, /* VF Vendor ID */ 95 .device = 0x0000, /* VF Device ID */ 96 .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, 97 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5, 98 .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), 99 .class_mask = ~0, 100 .driver_data = 0 }, 101 102 /* Even one more ... */ 103 { .vendor = PCI_VENDOR_ID_IBM, 104 .device = PCI_DEVICE_GENWQE, 105 .subvendor = PCI_SUBVENDOR_ID_IBM, 106 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_NEW, 107 .class = (PCI_CLASSCODE_GENWQE5 << 8), 108 .class_mask = ~0, 109 .driver_data = 0 }, 110 111 { 0, } /* 0 terminated list. */ 112 }; 113 114 MODULE_DEVICE_TABLE(pci, genwqe_device_table); 115 116 /** 117 * genwqe_dev_alloc() - Create and prepare a new card descriptor 118 * 119 * Return: Pointer to card descriptor, or ERR_PTR(err) on error 120 */ 121 static struct genwqe_dev *genwqe_dev_alloc(void) 122 { 123 unsigned int i = 0, j; 124 struct genwqe_dev *cd; 125 126 for (i = 0; i < GENWQE_CARD_NO_MAX; i++) { 127 if (genwqe_devices[i] == NULL) 128 break; 129 } 130 if (i >= GENWQE_CARD_NO_MAX) 131 return ERR_PTR(-ENODEV); 132 133 cd = kzalloc(sizeof(struct genwqe_dev), GFP_KERNEL); 134 if (!cd) 135 return ERR_PTR(-ENOMEM); 136 137 cd->card_idx = i; 138 cd->class_genwqe = class_genwqe; 139 cd->debugfs_genwqe = debugfs_genwqe; 140 141 /* 142 * This comes from kernel config option and can be overritten via 143 * debugfs. 144 */ 145 cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY; 146 147 init_waitqueue_head(&cd->queue_waitq); 148 149 spin_lock_init(&cd->file_lock); 150 INIT_LIST_HEAD(&cd->file_list); 151 152 cd->card_state = GENWQE_CARD_UNUSED; 153 spin_lock_init(&cd->print_lock); 154 155 cd->ddcb_software_timeout = GENWQE_DDCB_SOFTWARE_TIMEOUT; 156 cd->kill_timeout = GENWQE_KILL_TIMEOUT; 157 158 for (j = 0; j < GENWQE_MAX_VFS; j++) 159 cd->vf_jobtimeout_msec[j] = GENWQE_VF_JOBTIMEOUT_MSEC; 160 161 genwqe_devices[i] = cd; 162 return cd; 163 } 164 165 static void genwqe_dev_free(struct genwqe_dev *cd) 166 { 167 if (!cd) 168 return; 169 170 genwqe_devices[cd->card_idx] = NULL; 171 kfree(cd); 172 } 173 174 /** 175 * genwqe_bus_reset() - Card recovery 176 * 177 * pci_reset_function() will recover the device and ensure that the 178 * registers are accessible again when it completes with success. If 179 * not, the card will stay dead and registers will be unaccessible 180 * still. 181 */ 182 static int genwqe_bus_reset(struct genwqe_dev *cd) 183 { 184 int rc = 0; 185 struct pci_dev *pci_dev = cd->pci_dev; 186 void __iomem *mmio; 187 188 if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE) 189 return -EIO; 190 191 mmio = cd->mmio; 192 cd->mmio = NULL; 193 pci_iounmap(pci_dev, mmio); 194 195 pci_release_mem_regions(pci_dev); 196 197 /* 198 * Firmware/BIOS might change memory mapping during bus reset. 199 * Settings like enable bus-mastering, ... are backuped and 200 * restored by the pci_reset_function(). 201 */ 202 dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__); 203 rc = pci_reset_function(pci_dev); 204 if (rc) { 205 dev_err(&pci_dev->dev, 206 "[%s] err: failed reset func (rc %d)\n", __func__, rc); 207 return rc; 208 } 209 dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc); 210 211 /* 212 * Here is the right spot to clear the register read 213 * failure. pci_bus_reset() does this job in real systems. 214 */ 215 cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE | 216 GENWQE_INJECT_GFIR_FATAL | 217 GENWQE_INJECT_GFIR_INFO); 218 219 rc = pci_request_mem_regions(pci_dev, genwqe_driver_name); 220 if (rc) { 221 dev_err(&pci_dev->dev, 222 "[%s] err: request bars failed (%d)\n", __func__, rc); 223 return -EIO; 224 } 225 226 cd->mmio = pci_iomap(pci_dev, 0, 0); 227 if (cd->mmio == NULL) { 228 dev_err(&pci_dev->dev, 229 "[%s] err: mapping BAR0 failed\n", __func__); 230 return -ENOMEM; 231 } 232 return 0; 233 } 234 235 /* 236 * Hardware circumvention section. Certain bitstreams in our test-lab 237 * had different kinds of problems. Here is where we adjust those 238 * bitstreams to function will with this version of our device driver. 239 * 240 * Thise circumventions are applied to the physical function only. 241 * The magical numbers below are identifying development/manufacturing 242 * versions of the bitstream used on the card. 243 * 244 * Turn off error reporting for old/manufacturing images. 245 */ 246 247 bool genwqe_need_err_masking(struct genwqe_dev *cd) 248 { 249 return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull; 250 } 251 252 static void genwqe_tweak_hardware(struct genwqe_dev *cd) 253 { 254 struct pci_dev *pci_dev = cd->pci_dev; 255 256 /* Mask FIRs for development images */ 257 if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) && 258 ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) { 259 dev_warn(&pci_dev->dev, 260 "FIRs masked due to bitstream %016llx.%016llx\n", 261 cd->slu_unitcfg, cd->app_unitcfg); 262 263 __genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR, 264 0xFFFFFFFFFFFFFFFFull); 265 266 __genwqe_writeq(cd, IO_APP_ERR_ACT_MASK, 267 0x0000000000000000ull); 268 } 269 } 270 271 /** 272 * genwqe_recovery_on_fatal_gfir_required() - Version depended actions 273 * 274 * Bitstreams older than 2013-02-17 have a bug where fatal GFIRs must 275 * be ignored. This is e.g. true for the bitstream we gave to the card 276 * manufacturer, but also for some old bitstreams we released to our 277 * test-lab. 278 */ 279 int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd) 280 { 281 return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull; 282 } 283 284 int genwqe_flash_readback_fails(struct genwqe_dev *cd) 285 { 286 return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull; 287 } 288 289 /** 290 * genwqe_T_psec() - Calculate PF/VF timeout register content 291 * 292 * Note: From a design perspective it turned out to be a bad idea to 293 * use codes here to specifiy the frequency/speed values. An old 294 * driver cannot understand new codes and is therefore always a 295 * problem. Better is to measure out the value or put the 296 * speed/frequency directly into a register which is always a valid 297 * value for old as well as for new software. 298 */ 299 /* T = 1/f */ 300 static int genwqe_T_psec(struct genwqe_dev *cd) 301 { 302 u16 speed; /* 1/f -> 250, 200, 166, 175 */ 303 static const int T[] = { 4000, 5000, 6000, 5714 }; 304 305 speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full); 306 if (speed >= ARRAY_SIZE(T)) 307 return -1; /* illegal value */ 308 309 return T[speed]; 310 } 311 312 /** 313 * genwqe_setup_pf_jtimer() - Setup PF hardware timeouts for DDCB execution 314 * 315 * Do this _after_ card_reset() is called. Otherwise the values will 316 * vanish. The settings need to be done when the queues are inactive. 317 * 318 * The max. timeout value is 2^(10+x) * T (6ns for 166MHz) * 15/16. 319 * The min. timeout value is 2^(10+x) * T (6ns for 166MHz) * 14/16. 320 */ 321 static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd) 322 { 323 u32 T = genwqe_T_psec(cd); 324 u64 x; 325 326 if (GENWQE_PF_JOBTIMEOUT_MSEC == 0) 327 return false; 328 329 /* PF: large value needed, flash update 2sec per block */ 330 x = ilog2(GENWQE_PF_JOBTIMEOUT_MSEC * 331 16000000000uL/(T * 15)) - 10; 332 333 genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, 334 0xff00 | (x & 0xff), 0); 335 return true; 336 } 337 338 /** 339 * genwqe_setup_vf_jtimer() - Setup VF hardware timeouts for DDCB execution 340 */ 341 static bool genwqe_setup_vf_jtimer(struct genwqe_dev *cd) 342 { 343 struct pci_dev *pci_dev = cd->pci_dev; 344 unsigned int vf; 345 u32 T = genwqe_T_psec(cd); 346 u64 x; 347 int totalvfs; 348 349 totalvfs = pci_sriov_get_totalvfs(pci_dev); 350 if (totalvfs <= 0) 351 return false; 352 353 for (vf = 0; vf < totalvfs; vf++) { 354 355 if (cd->vf_jobtimeout_msec[vf] == 0) 356 continue; 357 358 x = ilog2(cd->vf_jobtimeout_msec[vf] * 359 16000000000uL/(T * 15)) - 10; 360 361 genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, 362 0xff00 | (x & 0xff), vf + 1); 363 } 364 return true; 365 } 366 367 static int genwqe_ffdc_buffs_alloc(struct genwqe_dev *cd) 368 { 369 unsigned int type, e = 0; 370 371 for (type = 0; type < GENWQE_DBG_UNITS; type++) { 372 switch (type) { 373 case GENWQE_DBG_UNIT0: 374 e = genwqe_ffdc_buff_size(cd, 0); 375 break; 376 case GENWQE_DBG_UNIT1: 377 e = genwqe_ffdc_buff_size(cd, 1); 378 break; 379 case GENWQE_DBG_UNIT2: 380 e = genwqe_ffdc_buff_size(cd, 2); 381 break; 382 case GENWQE_DBG_REGS: 383 e = GENWQE_FFDC_REGS; 384 break; 385 } 386 387 /* currently support only the debug units mentioned here */ 388 cd->ffdc[type].entries = e; 389 cd->ffdc[type].regs = 390 kmalloc_array(e, sizeof(struct genwqe_reg), 391 GFP_KERNEL); 392 /* 393 * regs == NULL is ok, the using code treats this as no regs, 394 * Printing warning is ok in this case. 395 */ 396 } 397 return 0; 398 } 399 400 static void genwqe_ffdc_buffs_free(struct genwqe_dev *cd) 401 { 402 unsigned int type; 403 404 for (type = 0; type < GENWQE_DBG_UNITS; type++) { 405 kfree(cd->ffdc[type].regs); 406 cd->ffdc[type].regs = NULL; 407 } 408 } 409 410 static int genwqe_read_ids(struct genwqe_dev *cd) 411 { 412 int err = 0; 413 int slu_id; 414 struct pci_dev *pci_dev = cd->pci_dev; 415 416 cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG); 417 if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) { 418 dev_err(&pci_dev->dev, 419 "err: SLUID=%016llx\n", cd->slu_unitcfg); 420 err = -EIO; 421 goto out_err; 422 } 423 424 slu_id = genwqe_get_slu_id(cd); 425 if (slu_id < GENWQE_SLU_ARCH_REQ || slu_id == 0xff) { 426 dev_err(&pci_dev->dev, 427 "err: incompatible SLU Architecture %u\n", slu_id); 428 err = -ENOENT; 429 goto out_err; 430 } 431 432 cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG); 433 if (cd->app_unitcfg == IO_ILLEGAL_VALUE) { 434 dev_err(&pci_dev->dev, 435 "err: APPID=%016llx\n", cd->app_unitcfg); 436 err = -EIO; 437 goto out_err; 438 } 439 genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name)); 440 441 /* 442 * Is access to all registers possible? If we are a VF the 443 * answer is obvious. If we run fully virtualized, we need to 444 * check if we can access all registers. If we do not have 445 * full access we will cause an UR and some informational FIRs 446 * in the PF, but that should not harm. 447 */ 448 if (pci_dev->is_virtfn) 449 cd->is_privileged = 0; 450 else 451 cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM) 452 != IO_ILLEGAL_VALUE); 453 454 out_err: 455 return err; 456 } 457 458 static int genwqe_start(struct genwqe_dev *cd) 459 { 460 int err; 461 struct pci_dev *pci_dev = cd->pci_dev; 462 463 err = genwqe_read_ids(cd); 464 if (err) 465 return err; 466 467 if (genwqe_is_privileged(cd)) { 468 /* do this after the tweaks. alloc fail is acceptable */ 469 genwqe_ffdc_buffs_alloc(cd); 470 genwqe_stop_traps(cd); 471 472 /* Collect registers e.g. FIRs, UNITIDs, traces ... */ 473 genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs, 474 cd->ffdc[GENWQE_DBG_REGS].entries, 0); 475 476 genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0, 477 cd->ffdc[GENWQE_DBG_UNIT0].regs, 478 cd->ffdc[GENWQE_DBG_UNIT0].entries); 479 480 genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1, 481 cd->ffdc[GENWQE_DBG_UNIT1].regs, 482 cd->ffdc[GENWQE_DBG_UNIT1].entries); 483 484 genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2, 485 cd->ffdc[GENWQE_DBG_UNIT2].regs, 486 cd->ffdc[GENWQE_DBG_UNIT2].entries); 487 488 genwqe_start_traps(cd); 489 490 if (cd->card_state == GENWQE_CARD_FATAL_ERROR) { 491 dev_warn(&pci_dev->dev, 492 "[%s] chip reload/recovery!\n", __func__); 493 494 /* 495 * Stealth Mode: Reload chip on either hot 496 * reset or PERST. 497 */ 498 cd->softreset = 0x7Cull; 499 __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 500 cd->softreset); 501 502 err = genwqe_bus_reset(cd); 503 if (err != 0) { 504 dev_err(&pci_dev->dev, 505 "[%s] err: bus reset failed!\n", 506 __func__); 507 goto out; 508 } 509 510 /* 511 * Re-read the IDs because 512 * it could happen that the bitstream load 513 * failed! 514 */ 515 err = genwqe_read_ids(cd); 516 if (err) 517 goto out; 518 } 519 } 520 521 err = genwqe_setup_service_layer(cd); /* does a reset to the card */ 522 if (err != 0) { 523 dev_err(&pci_dev->dev, 524 "[%s] err: could not setup servicelayer!\n", __func__); 525 err = -ENODEV; 526 goto out; 527 } 528 529 if (genwqe_is_privileged(cd)) { /* code is running _after_ reset */ 530 genwqe_tweak_hardware(cd); 531 532 genwqe_setup_pf_jtimer(cd); 533 genwqe_setup_vf_jtimer(cd); 534 } 535 536 err = genwqe_device_create(cd); 537 if (err < 0) { 538 dev_err(&pci_dev->dev, 539 "err: chdev init failed! (err=%d)\n", err); 540 goto out_release_service_layer; 541 } 542 return 0; 543 544 out_release_service_layer: 545 genwqe_release_service_layer(cd); 546 out: 547 if (genwqe_is_privileged(cd)) 548 genwqe_ffdc_buffs_free(cd); 549 return -EIO; 550 } 551 552 /** 553 * genwqe_stop() - Stop card operation 554 * 555 * Recovery notes: 556 * As long as genwqe_thread runs we might access registers during 557 * error data capture. Same is with the genwqe_health_thread. 558 * When genwqe_bus_reset() fails this function might called two times: 559 * first by the genwqe_health_thread() and later by genwqe_remove() to 560 * unbind the device. We must be able to survive that. 561 * 562 * This function must be robust enough to be called twice. 563 */ 564 static int genwqe_stop(struct genwqe_dev *cd) 565 { 566 genwqe_finish_queue(cd); /* no register access */ 567 genwqe_device_remove(cd); /* device removed, procs killed */ 568 genwqe_release_service_layer(cd); /* here genwqe_thread is stopped */ 569 570 if (genwqe_is_privileged(cd)) { 571 pci_disable_sriov(cd->pci_dev); /* access pci config space */ 572 genwqe_ffdc_buffs_free(cd); 573 } 574 575 return 0; 576 } 577 578 /** 579 * genwqe_recover_card() - Try to recover the card if it is possible 580 * 581 * If fatal_err is set no register access is possible anymore. It is 582 * likely that genwqe_start fails in that situation. Proper error 583 * handling is required in this case. 584 * 585 * genwqe_bus_reset() will cause the pci code to call genwqe_remove() 586 * and later genwqe_probe() for all virtual functions. 587 */ 588 static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err) 589 { 590 int rc; 591 struct pci_dev *pci_dev = cd->pci_dev; 592 593 genwqe_stop(cd); 594 595 /* 596 * Make sure chip is not reloaded to maintain FFDC. Write SLU 597 * Reset Register, CPLDReset field to 0. 598 */ 599 if (!fatal_err) { 600 cd->softreset = 0x70ull; 601 __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset); 602 } 603 604 rc = genwqe_bus_reset(cd); 605 if (rc != 0) { 606 dev_err(&pci_dev->dev, 607 "[%s] err: card recovery impossible!\n", __func__); 608 return rc; 609 } 610 611 rc = genwqe_start(cd); 612 if (rc < 0) { 613 dev_err(&pci_dev->dev, 614 "[%s] err: failed to launch device!\n", __func__); 615 return rc; 616 } 617 return 0; 618 } 619 620 static int genwqe_health_check_cond(struct genwqe_dev *cd, u64 *gfir) 621 { 622 *gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 623 return (*gfir & GFIR_ERR_TRIGGER) && 624 genwqe_recovery_on_fatal_gfir_required(cd); 625 } 626 627 /** 628 * genwqe_fir_checking() - Check the fault isolation registers of the card 629 * 630 * If this code works ok, can be tried out with help of the genwqe_poke tool: 631 * sudo ./tools/genwqe_poke 0x8 0xfefefefefef 632 * 633 * Now the relevant FIRs/sFIRs should be printed out and the driver should 634 * invoke recovery (devices are removed and readded). 635 */ 636 static u64 genwqe_fir_checking(struct genwqe_dev *cd) 637 { 638 int j, iterations = 0; 639 u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec; 640 u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr; 641 struct pci_dev *pci_dev = cd->pci_dev; 642 643 healthMonitor: 644 iterations++; 645 if (iterations > 16) { 646 dev_err(&pci_dev->dev, "* exit looping after %d times\n", 647 iterations); 648 goto fatal_error; 649 } 650 651 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 652 if (gfir != 0x0) 653 dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", 654 IO_SLC_CFGREG_GFIR, gfir); 655 if (gfir == IO_ILLEGAL_VALUE) 656 goto fatal_error; 657 658 /* 659 * Avoid printing when to GFIR bit is on prevents contignous 660 * printout e.g. for the following bug: 661 * FIR set without a 2ndary FIR/FIR cannot be cleared 662 * Comment out the following if to get the prints: 663 */ 664 if (gfir == 0) 665 return 0; 666 667 gfir_masked = gfir & GFIR_ERR_TRIGGER; /* fatal errors */ 668 669 for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */ 670 671 /* read the primary FIR (pfir) */ 672 fir_addr = (uid << 24) + 0x08; 673 fir = __genwqe_readq(cd, fir_addr); 674 if (fir == 0x0) 675 continue; /* no error in this unit */ 676 677 dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir); 678 if (fir == IO_ILLEGAL_VALUE) 679 goto fatal_error; 680 681 /* read primary FEC */ 682 fec_addr = (uid << 24) + 0x18; 683 fec = __genwqe_readq(cd, fec_addr); 684 685 dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec); 686 if (fec == IO_ILLEGAL_VALUE) 687 goto fatal_error; 688 689 for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) { 690 691 /* secondary fir empty, skip it */ 692 if ((fir & mask) == 0x0) 693 continue; 694 695 sfir_addr = (uid << 24) + 0x100 + 0x08 * j; 696 sfir = __genwqe_readq(cd, sfir_addr); 697 698 if (sfir == IO_ILLEGAL_VALUE) 699 goto fatal_error; 700 dev_err(&pci_dev->dev, 701 "* 0x%08x 0x%016llx\n", sfir_addr, sfir); 702 703 sfec_addr = (uid << 24) + 0x300 + 0x08 * j; 704 sfec = __genwqe_readq(cd, sfec_addr); 705 706 if (sfec == IO_ILLEGAL_VALUE) 707 goto fatal_error; 708 dev_err(&pci_dev->dev, 709 "* 0x%08x 0x%016llx\n", sfec_addr, sfec); 710 711 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 712 if (gfir == IO_ILLEGAL_VALUE) 713 goto fatal_error; 714 715 /* gfir turned on during routine! get out and 716 start over. */ 717 if ((gfir_masked == 0x0) && 718 (gfir & GFIR_ERR_TRIGGER)) { 719 goto healthMonitor; 720 } 721 722 /* do not clear if we entered with a fatal gfir */ 723 if (gfir_masked == 0x0) { 724 725 /* NEW clear by mask the logged bits */ 726 sfir_addr = (uid << 24) + 0x100 + 0x08 * j; 727 __genwqe_writeq(cd, sfir_addr, sfir); 728 729 dev_dbg(&pci_dev->dev, 730 "[HM] Clearing 2ndary FIR 0x%08x with 0x%016llx\n", 731 sfir_addr, sfir); 732 733 /* 734 * note, these cannot be error-Firs 735 * since gfir_masked is 0 after sfir 736 * was read. Also, it is safe to do 737 * this write if sfir=0. Still need to 738 * clear the primary. This just means 739 * there is no secondary FIR. 740 */ 741 742 /* clear by mask the logged bit. */ 743 fir_clr_addr = (uid << 24) + 0x10; 744 __genwqe_writeq(cd, fir_clr_addr, mask); 745 746 dev_dbg(&pci_dev->dev, 747 "[HM] Clearing primary FIR 0x%08x with 0x%016llx\n", 748 fir_clr_addr, mask); 749 } 750 } 751 } 752 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 753 if (gfir == IO_ILLEGAL_VALUE) 754 goto fatal_error; 755 756 if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) { 757 /* 758 * Check once more that it didn't go on after all the 759 * FIRS were cleared. 760 */ 761 dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n", 762 iterations); 763 goto healthMonitor; 764 } 765 return gfir_masked; 766 767 fatal_error: 768 return IO_ILLEGAL_VALUE; 769 } 770 771 /** 772 * genwqe_pci_fundamental_reset() - trigger a PCIe fundamental reset on the slot 773 * 774 * Note: pci_set_pcie_reset_state() is not implemented on all archs, so this 775 * reset method will not work in all cases. 776 * 777 * Return: 0 on success or error code from pci_set_pcie_reset_state() 778 */ 779 static int genwqe_pci_fundamental_reset(struct pci_dev *pci_dev) 780 { 781 int rc; 782 783 /* 784 * lock pci config space access from userspace, 785 * save state and issue PCIe fundamental reset 786 */ 787 pci_cfg_access_lock(pci_dev); 788 pci_save_state(pci_dev); 789 rc = pci_set_pcie_reset_state(pci_dev, pcie_warm_reset); 790 if (!rc) { 791 /* keep PCIe reset asserted for 250ms */ 792 msleep(250); 793 pci_set_pcie_reset_state(pci_dev, pcie_deassert_reset); 794 /* Wait for 2s to reload flash and train the link */ 795 msleep(2000); 796 } 797 pci_restore_state(pci_dev); 798 pci_cfg_access_unlock(pci_dev); 799 return rc; 800 } 801 802 803 static int genwqe_platform_recovery(struct genwqe_dev *cd) 804 { 805 struct pci_dev *pci_dev = cd->pci_dev; 806 int rc; 807 808 dev_info(&pci_dev->dev, 809 "[%s] resetting card for error recovery\n", __func__); 810 811 /* Clear out error injection flags */ 812 cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE | 813 GENWQE_INJECT_GFIR_FATAL | 814 GENWQE_INJECT_GFIR_INFO); 815 816 genwqe_stop(cd); 817 818 /* Try recoverying the card with fundamental reset */ 819 rc = genwqe_pci_fundamental_reset(pci_dev); 820 if (!rc) { 821 rc = genwqe_start(cd); 822 if (!rc) 823 dev_info(&pci_dev->dev, 824 "[%s] card recovered\n", __func__); 825 else 826 dev_err(&pci_dev->dev, 827 "[%s] err: cannot start card services! (err=%d)\n", 828 __func__, rc); 829 } else { 830 dev_err(&pci_dev->dev, 831 "[%s] card reset failed\n", __func__); 832 } 833 834 return rc; 835 } 836 837 /* 838 * genwqe_reload_bistream() - reload card bitstream 839 * 840 * Set the appropriate register and call fundamental reset to reaload the card 841 * bitstream. 842 * 843 * Return: 0 on success, error code otherwise 844 */ 845 static int genwqe_reload_bistream(struct genwqe_dev *cd) 846 { 847 struct pci_dev *pci_dev = cd->pci_dev; 848 int rc; 849 850 dev_info(&pci_dev->dev, 851 "[%s] resetting card for bitstream reload\n", 852 __func__); 853 854 genwqe_stop(cd); 855 856 /* 857 * Cause a CPLD reprogram with the 'next_bitstream' 858 * partition on PCIe hot or fundamental reset 859 */ 860 __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 861 (cd->softreset & 0xcull) | 0x70ull); 862 863 rc = genwqe_pci_fundamental_reset(pci_dev); 864 if (rc) { 865 /* 866 * A fundamental reset failure can be caused 867 * by lack of support on the arch, so we just 868 * log the error and try to start the card 869 * again. 870 */ 871 dev_err(&pci_dev->dev, 872 "[%s] err: failed to reset card for bitstream reload\n", 873 __func__); 874 } 875 876 rc = genwqe_start(cd); 877 if (rc) { 878 dev_err(&pci_dev->dev, 879 "[%s] err: cannot start card services! (err=%d)\n", 880 __func__, rc); 881 return rc; 882 } 883 dev_info(&pci_dev->dev, 884 "[%s] card reloaded\n", __func__); 885 return 0; 886 } 887 888 889 /** 890 * genwqe_health_thread() - Health checking thread 891 * 892 * This thread is only started for the PF of the card. 893 * 894 * This thread monitors the health of the card. A critical situation 895 * is when we read registers which contain -1 (IO_ILLEGAL_VALUE). In 896 * this case we need to be recovered from outside. Writing to 897 * registers will very likely not work either. 898 * 899 * This thread must only exit if kthread_should_stop() becomes true. 900 * 901 * Condition for the health-thread to trigger: 902 * a) when a kthread_stop() request comes in or 903 * b) a critical GFIR occured 904 * 905 * Informational GFIRs are checked and potentially printed in 906 * GENWQE_HEALTH_CHECK_INTERVAL seconds. 907 */ 908 static int genwqe_health_thread(void *data) 909 { 910 int rc, should_stop = 0; 911 struct genwqe_dev *cd = data; 912 struct pci_dev *pci_dev = cd->pci_dev; 913 u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg; 914 915 health_thread_begin: 916 while (!kthread_should_stop()) { 917 rc = wait_event_interruptible_timeout(cd->health_waitq, 918 (genwqe_health_check_cond(cd, &gfir) || 919 (should_stop = kthread_should_stop())), 920 GENWQE_HEALTH_CHECK_INTERVAL * HZ); 921 922 if (should_stop) 923 break; 924 925 if (gfir == IO_ILLEGAL_VALUE) { 926 dev_err(&pci_dev->dev, 927 "[%s] GFIR=%016llx\n", __func__, gfir); 928 goto fatal_error; 929 } 930 931 slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG); 932 if (slu_unitcfg == IO_ILLEGAL_VALUE) { 933 dev_err(&pci_dev->dev, 934 "[%s] SLU_UNITCFG=%016llx\n", 935 __func__, slu_unitcfg); 936 goto fatal_error; 937 } 938 939 app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG); 940 if (app_unitcfg == IO_ILLEGAL_VALUE) { 941 dev_err(&pci_dev->dev, 942 "[%s] APP_UNITCFG=%016llx\n", 943 __func__, app_unitcfg); 944 goto fatal_error; 945 } 946 947 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 948 if (gfir == IO_ILLEGAL_VALUE) { 949 dev_err(&pci_dev->dev, 950 "[%s] %s: GFIR=%016llx\n", __func__, 951 (gfir & GFIR_ERR_TRIGGER) ? "err" : "info", 952 gfir); 953 goto fatal_error; 954 } 955 956 gfir_masked = genwqe_fir_checking(cd); 957 if (gfir_masked == IO_ILLEGAL_VALUE) 958 goto fatal_error; 959 960 /* 961 * GFIR ErrorTrigger bits set => reset the card! 962 * Never do this for old/manufacturing images! 963 */ 964 if ((gfir_masked) && !cd->skip_recovery && 965 genwqe_recovery_on_fatal_gfir_required(cd)) { 966 967 cd->card_state = GENWQE_CARD_FATAL_ERROR; 968 969 rc = genwqe_recover_card(cd, 0); 970 if (rc < 0) { 971 /* FIXME Card is unusable and needs unbind! */ 972 goto fatal_error; 973 } 974 } 975 976 if (cd->card_state == GENWQE_CARD_RELOAD_BITSTREAM) { 977 /* Userspace requested card bitstream reload */ 978 rc = genwqe_reload_bistream(cd); 979 if (rc) 980 goto fatal_error; 981 } 982 983 cd->last_gfir = gfir; 984 cond_resched(); 985 } 986 987 return 0; 988 989 fatal_error: 990 if (cd->use_platform_recovery) { 991 /* 992 * Since we use raw accessors, EEH errors won't be detected 993 * by the platform until we do a non-raw MMIO or config space 994 * read 995 */ 996 readq(cd->mmio + IO_SLC_CFGREG_GFIR); 997 998 /* We do nothing if the card is going over PCI recovery */ 999 if (pci_channel_offline(pci_dev)) 1000 return -EIO; 1001 1002 /* 1003 * If it's supported by the platform, we try a fundamental reset 1004 * to recover from a fatal error. Otherwise, we continue to wait 1005 * for an external recovery procedure to take care of it. 1006 */ 1007 rc = genwqe_platform_recovery(cd); 1008 if (!rc) 1009 goto health_thread_begin; 1010 } 1011 1012 dev_err(&pci_dev->dev, 1013 "[%s] card unusable. Please trigger unbind!\n", __func__); 1014 1015 /* Bring down logical devices to inform user space via udev remove. */ 1016 cd->card_state = GENWQE_CARD_FATAL_ERROR; 1017 genwqe_stop(cd); 1018 1019 /* genwqe_bus_reset failed(). Now wait for genwqe_remove(). */ 1020 while (!kthread_should_stop()) 1021 cond_resched(); 1022 1023 return -EIO; 1024 } 1025 1026 static int genwqe_health_check_start(struct genwqe_dev *cd) 1027 { 1028 int rc; 1029 1030 if (GENWQE_HEALTH_CHECK_INTERVAL <= 0) 1031 return 0; /* valid for disabling the service */ 1032 1033 /* moved before request_irq() */ 1034 /* init_waitqueue_head(&cd->health_waitq); */ 1035 1036 cd->health_thread = kthread_run(genwqe_health_thread, cd, 1037 GENWQE_DEVNAME "%d_health", 1038 cd->card_idx); 1039 if (IS_ERR(cd->health_thread)) { 1040 rc = PTR_ERR(cd->health_thread); 1041 cd->health_thread = NULL; 1042 return rc; 1043 } 1044 return 0; 1045 } 1046 1047 static int genwqe_health_thread_running(struct genwqe_dev *cd) 1048 { 1049 return cd->health_thread != NULL; 1050 } 1051 1052 static int genwqe_health_check_stop(struct genwqe_dev *cd) 1053 { 1054 int rc; 1055 1056 if (!genwqe_health_thread_running(cd)) 1057 return -EIO; 1058 1059 rc = kthread_stop(cd->health_thread); 1060 cd->health_thread = NULL; 1061 return 0; 1062 } 1063 1064 /** 1065 * genwqe_pci_setup() - Allocate PCIe related resources for our card 1066 */ 1067 static int genwqe_pci_setup(struct genwqe_dev *cd) 1068 { 1069 int err; 1070 struct pci_dev *pci_dev = cd->pci_dev; 1071 1072 err = pci_enable_device_mem(pci_dev); 1073 if (err) { 1074 dev_err(&pci_dev->dev, 1075 "err: failed to enable pci memory (err=%d)\n", err); 1076 goto err_out; 1077 } 1078 1079 /* Reserve PCI I/O and memory resources */ 1080 err = pci_request_mem_regions(pci_dev, genwqe_driver_name); 1081 if (err) { 1082 dev_err(&pci_dev->dev, 1083 "[%s] err: request bars failed (%d)\n", __func__, err); 1084 err = -EIO; 1085 goto err_disable_device; 1086 } 1087 1088 /* check for 64-bit DMA address supported (DAC) */ 1089 if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64))) { 1090 err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(64)); 1091 if (err) { 1092 dev_err(&pci_dev->dev, 1093 "err: DMA64 consistent mask error\n"); 1094 err = -EIO; 1095 goto out_release_resources; 1096 } 1097 /* check for 32-bit DMA address supported (SAC) */ 1098 } else if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) { 1099 err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(32)); 1100 if (err) { 1101 dev_err(&pci_dev->dev, 1102 "err: DMA32 consistent mask error\n"); 1103 err = -EIO; 1104 goto out_release_resources; 1105 } 1106 } else { 1107 dev_err(&pci_dev->dev, 1108 "err: neither DMA32 nor DMA64 supported\n"); 1109 err = -EIO; 1110 goto out_release_resources; 1111 } 1112 1113 pci_set_master(pci_dev); 1114 pci_enable_pcie_error_reporting(pci_dev); 1115 1116 /* EEH recovery requires PCIe fundamental reset */ 1117 pci_dev->needs_freset = 1; 1118 1119 /* request complete BAR-0 space (length = 0) */ 1120 cd->mmio_len = pci_resource_len(pci_dev, 0); 1121 cd->mmio = pci_iomap(pci_dev, 0, 0); 1122 if (cd->mmio == NULL) { 1123 dev_err(&pci_dev->dev, 1124 "[%s] err: mapping BAR0 failed\n", __func__); 1125 err = -ENOMEM; 1126 goto out_release_resources; 1127 } 1128 1129 cd->num_vfs = pci_sriov_get_totalvfs(pci_dev); 1130 if (cd->num_vfs < 0) 1131 cd->num_vfs = 0; 1132 1133 err = genwqe_read_ids(cd); 1134 if (err) 1135 goto out_iounmap; 1136 1137 return 0; 1138 1139 out_iounmap: 1140 pci_iounmap(pci_dev, cd->mmio); 1141 out_release_resources: 1142 pci_release_mem_regions(pci_dev); 1143 err_disable_device: 1144 pci_disable_device(pci_dev); 1145 err_out: 1146 return err; 1147 } 1148 1149 /** 1150 * genwqe_pci_remove() - Free PCIe related resources for our card 1151 */ 1152 static void genwqe_pci_remove(struct genwqe_dev *cd) 1153 { 1154 struct pci_dev *pci_dev = cd->pci_dev; 1155 1156 if (cd->mmio) 1157 pci_iounmap(pci_dev, cd->mmio); 1158 1159 pci_release_mem_regions(pci_dev); 1160 pci_disable_device(pci_dev); 1161 } 1162 1163 /** 1164 * genwqe_probe() - Device initialization 1165 * @pdev: PCI device information struct 1166 * 1167 * Callable for multiple cards. This function is called on bind. 1168 * 1169 * Return: 0 if succeeded, < 0 when failed 1170 */ 1171 static int genwqe_probe(struct pci_dev *pci_dev, 1172 const struct pci_device_id *id) 1173 { 1174 int err; 1175 struct genwqe_dev *cd; 1176 1177 genwqe_init_crc32(); 1178 1179 cd = genwqe_dev_alloc(); 1180 if (IS_ERR(cd)) { 1181 dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n", 1182 (int)PTR_ERR(cd)); 1183 return PTR_ERR(cd); 1184 } 1185 1186 dev_set_drvdata(&pci_dev->dev, cd); 1187 cd->pci_dev = pci_dev; 1188 1189 err = genwqe_pci_setup(cd); 1190 if (err < 0) { 1191 dev_err(&pci_dev->dev, 1192 "err: problems with PCI setup (err=%d)\n", err); 1193 goto out_free_dev; 1194 } 1195 1196 err = genwqe_start(cd); 1197 if (err < 0) { 1198 dev_err(&pci_dev->dev, 1199 "err: cannot start card services! (err=%d)\n", err); 1200 goto out_pci_remove; 1201 } 1202 1203 if (genwqe_is_privileged(cd)) { 1204 err = genwqe_health_check_start(cd); 1205 if (err < 0) { 1206 dev_err(&pci_dev->dev, 1207 "err: cannot start health checking! (err=%d)\n", 1208 err); 1209 goto out_stop_services; 1210 } 1211 } 1212 return 0; 1213 1214 out_stop_services: 1215 genwqe_stop(cd); 1216 out_pci_remove: 1217 genwqe_pci_remove(cd); 1218 out_free_dev: 1219 genwqe_dev_free(cd); 1220 return err; 1221 } 1222 1223 /** 1224 * genwqe_remove() - Called when device is removed (hot-plugable) 1225 * 1226 * Or when driver is unloaded respecitively when unbind is done. 1227 */ 1228 static void genwqe_remove(struct pci_dev *pci_dev) 1229 { 1230 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); 1231 1232 genwqe_health_check_stop(cd); 1233 1234 /* 1235 * genwqe_stop() must survive if it is called twice 1236 * sequentially. This happens when the health thread calls it 1237 * and fails on genwqe_bus_reset(). 1238 */ 1239 genwqe_stop(cd); 1240 genwqe_pci_remove(cd); 1241 genwqe_dev_free(cd); 1242 } 1243 1244 /* 1245 * genwqe_err_error_detected() - Error detection callback 1246 * 1247 * This callback is called by the PCI subsystem whenever a PCI bus 1248 * error is detected. 1249 */ 1250 static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev, 1251 enum pci_channel_state state) 1252 { 1253 struct genwqe_dev *cd; 1254 1255 dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state); 1256 1257 cd = dev_get_drvdata(&pci_dev->dev); 1258 if (cd == NULL) 1259 return PCI_ERS_RESULT_DISCONNECT; 1260 1261 /* Stop the card */ 1262 genwqe_health_check_stop(cd); 1263 genwqe_stop(cd); 1264 1265 /* 1266 * On permanent failure, the PCI code will call device remove 1267 * after the return of this function. 1268 * genwqe_stop() can be called twice. 1269 */ 1270 if (state == pci_channel_io_perm_failure) { 1271 return PCI_ERS_RESULT_DISCONNECT; 1272 } else { 1273 genwqe_pci_remove(cd); 1274 return PCI_ERS_RESULT_NEED_RESET; 1275 } 1276 } 1277 1278 static pci_ers_result_t genwqe_err_slot_reset(struct pci_dev *pci_dev) 1279 { 1280 int rc; 1281 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); 1282 1283 rc = genwqe_pci_setup(cd); 1284 if (!rc) { 1285 return PCI_ERS_RESULT_RECOVERED; 1286 } else { 1287 dev_err(&pci_dev->dev, 1288 "err: problems with PCI setup (err=%d)\n", rc); 1289 return PCI_ERS_RESULT_DISCONNECT; 1290 } 1291 } 1292 1293 static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev) 1294 { 1295 return PCI_ERS_RESULT_NONE; 1296 } 1297 1298 static void genwqe_err_resume(struct pci_dev *pci_dev) 1299 { 1300 int rc; 1301 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); 1302 1303 rc = genwqe_start(cd); 1304 if (!rc) { 1305 rc = genwqe_health_check_start(cd); 1306 if (rc) 1307 dev_err(&pci_dev->dev, 1308 "err: cannot start health checking! (err=%d)\n", 1309 rc); 1310 } else { 1311 dev_err(&pci_dev->dev, 1312 "err: cannot start card services! (err=%d)\n", rc); 1313 } 1314 } 1315 1316 static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs) 1317 { 1318 int rc; 1319 struct genwqe_dev *cd = dev_get_drvdata(&dev->dev); 1320 1321 if (numvfs > 0) { 1322 genwqe_setup_vf_jtimer(cd); 1323 rc = pci_enable_sriov(dev, numvfs); 1324 if (rc < 0) 1325 return rc; 1326 return numvfs; 1327 } 1328 if (numvfs == 0) { 1329 pci_disable_sriov(dev); 1330 return 0; 1331 } 1332 return 0; 1333 } 1334 1335 static struct pci_error_handlers genwqe_err_handler = { 1336 .error_detected = genwqe_err_error_detected, 1337 .mmio_enabled = genwqe_err_result_none, 1338 .slot_reset = genwqe_err_slot_reset, 1339 .resume = genwqe_err_resume, 1340 }; 1341 1342 static struct pci_driver genwqe_driver = { 1343 .name = genwqe_driver_name, 1344 .id_table = genwqe_device_table, 1345 .probe = genwqe_probe, 1346 .remove = genwqe_remove, 1347 .sriov_configure = genwqe_sriov_configure, 1348 .err_handler = &genwqe_err_handler, 1349 }; 1350 1351 /** 1352 * genwqe_devnode() - Set default access mode for genwqe devices. 1353 * 1354 * Default mode should be rw for everybody. Do not change default 1355 * device name. 1356 */ 1357 static char *genwqe_devnode(struct device *dev, umode_t *mode) 1358 { 1359 if (mode) 1360 *mode = 0666; 1361 return NULL; 1362 } 1363 1364 /** 1365 * genwqe_init_module() - Driver registration and initialization 1366 */ 1367 static int __init genwqe_init_module(void) 1368 { 1369 int rc; 1370 1371 class_genwqe = class_create(THIS_MODULE, GENWQE_DEVNAME); 1372 if (IS_ERR(class_genwqe)) { 1373 pr_err("[%s] create class failed\n", __func__); 1374 return -ENOMEM; 1375 } 1376 1377 class_genwqe->devnode = genwqe_devnode; 1378 1379 debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL); 1380 if (!debugfs_genwqe) { 1381 rc = -ENOMEM; 1382 goto err_out; 1383 } 1384 1385 rc = pci_register_driver(&genwqe_driver); 1386 if (rc != 0) { 1387 pr_err("[%s] pci_reg_driver (rc=%d)\n", __func__, rc); 1388 goto err_out0; 1389 } 1390 1391 return rc; 1392 1393 err_out0: 1394 debugfs_remove(debugfs_genwqe); 1395 err_out: 1396 class_destroy(class_genwqe); 1397 return rc; 1398 } 1399 1400 /** 1401 * genwqe_exit_module() - Driver exit 1402 */ 1403 static void __exit genwqe_exit_module(void) 1404 { 1405 pci_unregister_driver(&genwqe_driver); 1406 debugfs_remove(debugfs_genwqe); 1407 class_destroy(class_genwqe); 1408 } 1409 1410 module_init(genwqe_init_module); 1411 module_exit(genwqe_exit_module); 1412