1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2021 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 * 7 */ 8 9 #define pr_fmt(fmt) "habanalabs: " fmt 10 11 #include "habanalabs.h" 12 #include "../include/hw_ip/pci/pci_general.h" 13 14 #include <linux/pci.h> 15 #include <linux/aer.h> 16 #include <linux/module.h> 17 18 #define CREATE_TRACE_POINTS 19 #include <trace/events/habanalabs.h> 20 21 #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" 22 23 #define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators" 24 25 MODULE_AUTHOR(HL_DRIVER_AUTHOR); 26 MODULE_DESCRIPTION(HL_DRIVER_DESC); 27 MODULE_LICENSE("GPL v2"); 28 29 static int hl_major; 30 static struct class *hl_class; 31 static DEFINE_IDR(hl_devs_idr); 32 static DEFINE_MUTEX(hl_devs_idr_lock); 33 34 #define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */ 35 #define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */ 36 37 static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED; 38 static int reset_on_lockup = 1; 39 static int memory_scrub; 40 static ulong boot_error_status_mask = ULONG_MAX; 41 42 module_param(timeout_locked, int, 0444); 43 MODULE_PARM_DESC(timeout_locked, 44 "Device lockup timeout in seconds (0 = disabled, default 30s)"); 45 46 module_param(reset_on_lockup, int, 0444); 47 MODULE_PARM_DESC(reset_on_lockup, 48 "Do device reset on lockup (0 = no, 1 = yes, default yes)"); 49 50 module_param(memory_scrub, int, 0444); 51 MODULE_PARM_DESC(memory_scrub, 52 "Scrub device memory in various states (0 = no, 1 = yes, default no)"); 53 54 module_param(boot_error_status_mask, ulong, 0444); 55 MODULE_PARM_DESC(boot_error_status_mask, 56 "Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)"); 57 58 #define PCI_VENDOR_ID_HABANALABS 0x1da3 59 60 #define PCI_IDS_GOYA 0x0001 61 #define PCI_IDS_GAUDI 0x1000 62 #define PCI_IDS_GAUDI_SEC 0x1010 63 64 #define PCI_IDS_GAUDI2 0x1020 65 66 static const struct pci_device_id ids[] = { 67 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, 68 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, 69 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), }, 70 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), }, 71 { 0, } 72 }; 73 MODULE_DEVICE_TABLE(pci, ids); 74 75 /* 76 * get_asic_type - translate device id to asic type 77 * 78 * @hdev: pointer to habanalabs device structure. 79 * 80 * Translate device id and revision id to asic type. 81 * In case of unidentified device, return -1 82 */ 83 static enum hl_asic_type get_asic_type(struct hl_device *hdev) 84 { 85 struct pci_dev *pdev = hdev->pdev; 86 enum hl_asic_type asic_type = ASIC_INVALID; 87 88 switch (pdev->device) { 89 case PCI_IDS_GOYA: 90 asic_type = ASIC_GOYA; 91 break; 92 case PCI_IDS_GAUDI: 93 asic_type = ASIC_GAUDI; 94 break; 95 case PCI_IDS_GAUDI_SEC: 96 asic_type = ASIC_GAUDI_SEC; 97 break; 98 case PCI_IDS_GAUDI2: 99 switch (pdev->revision) { 100 case REV_ID_A: 101 asic_type = ASIC_GAUDI2; 102 break; 103 case REV_ID_B: 104 asic_type = ASIC_GAUDI2B; 105 break; 106 default: 107 break; 108 } 109 break; 110 default: 111 break; 112 } 113 114 return asic_type; 115 } 116 117 static bool is_asic_secured(enum hl_asic_type asic_type) 118 { 119 switch (asic_type) { 120 case ASIC_GAUDI_SEC: 121 return true; 122 default: 123 return false; 124 } 125 } 126 127 /* 128 * hl_device_open - open function for habanalabs device 129 * 130 * @inode: pointer to inode structure 131 * @filp: pointer to file structure 132 * 133 * Called when process opens an habanalabs device. 134 */ 135 int hl_device_open(struct inode *inode, struct file *filp) 136 { 137 enum hl_device_status status; 138 struct hl_device *hdev; 139 struct hl_fpriv *hpriv; 140 int rc; 141 142 mutex_lock(&hl_devs_idr_lock); 143 hdev = idr_find(&hl_devs_idr, iminor(inode)); 144 mutex_unlock(&hl_devs_idr_lock); 145 146 if (!hdev) { 147 pr_err("Couldn't find device %d:%d\n", 148 imajor(inode), iminor(inode)); 149 return -ENXIO; 150 } 151 152 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); 153 if (!hpriv) 154 return -ENOMEM; 155 156 hpriv->hdev = hdev; 157 filp->private_data = hpriv; 158 hpriv->filp = filp; 159 160 mutex_init(&hpriv->notifier_event.lock); 161 mutex_init(&hpriv->restore_phase_mutex); 162 mutex_init(&hpriv->ctx_lock); 163 kref_init(&hpriv->refcount); 164 nonseekable_open(inode, filp); 165 166 hl_ctx_mgr_init(&hpriv->ctx_mgr); 167 hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr); 168 169 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); 170 171 mutex_lock(&hdev->fpriv_list_lock); 172 173 if (!hl_device_operational(hdev, &status)) { 174 dev_dbg_ratelimited(hdev->dev, 175 "Can't open %s because it is %s\n", 176 dev_name(hdev->dev), hdev->status[status]); 177 178 if (status == HL_DEVICE_STATUS_IN_RESET || 179 status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE) 180 rc = -EAGAIN; 181 else 182 rc = -EPERM; 183 184 goto out_err; 185 } 186 187 if (hdev->is_in_dram_scrub) { 188 dev_dbg_ratelimited(hdev->dev, 189 "Can't open %s during dram scrub\n", 190 dev_name(hdev->dev)); 191 rc = -EAGAIN; 192 goto out_err; 193 } 194 195 if (hdev->compute_ctx_in_release) { 196 dev_dbg_ratelimited(hdev->dev, 197 "Can't open %s because another user is still releasing it\n", 198 dev_name(hdev->dev)); 199 rc = -EAGAIN; 200 goto out_err; 201 } 202 203 if (hdev->is_compute_ctx_active) { 204 dev_dbg_ratelimited(hdev->dev, 205 "Can't open %s because another user is working on it\n", 206 dev_name(hdev->dev)); 207 rc = -EBUSY; 208 goto out_err; 209 } 210 211 rc = hl_ctx_create(hdev, hpriv); 212 if (rc) { 213 dev_err(hdev->dev, "Failed to create context %d\n", rc); 214 goto out_err; 215 } 216 217 list_add(&hpriv->dev_node, &hdev->fpriv_list); 218 mutex_unlock(&hdev->fpriv_list_lock); 219 220 hdev->asic_funcs->send_device_activity(hdev, true); 221 222 hl_debugfs_add_file(hpriv); 223 224 atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1); 225 atomic_set(&hdev->captured_err_info.razwi_info.razwi_detected, 0); 226 atomic_set(&hdev->captured_err_info.page_fault_info.page_fault_detected, 0); 227 hdev->captured_err_info.undef_opcode.write_enable = true; 228 hdev->captured_err_info.razwi_info.razwi_info_available = false; 229 hdev->captured_err_info.page_fault_info.page_fault_info_available = false; 230 231 hdev->open_counter++; 232 hdev->last_successful_open_jif = jiffies; 233 hdev->last_successful_open_ktime = ktime_get(); 234 235 return 0; 236 237 out_err: 238 mutex_unlock(&hdev->fpriv_list_lock); 239 hl_mem_mgr_fini(&hpriv->mem_mgr); 240 hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); 241 filp->private_data = NULL; 242 mutex_destroy(&hpriv->ctx_lock); 243 mutex_destroy(&hpriv->restore_phase_mutex); 244 mutex_destroy(&hpriv->notifier_event.lock); 245 put_pid(hpriv->taskpid); 246 247 kfree(hpriv); 248 249 return rc; 250 } 251 252 int hl_device_open_ctrl(struct inode *inode, struct file *filp) 253 { 254 struct hl_device *hdev; 255 struct hl_fpriv *hpriv; 256 int rc; 257 258 mutex_lock(&hl_devs_idr_lock); 259 hdev = idr_find(&hl_devs_idr, iminor(inode)); 260 mutex_unlock(&hl_devs_idr_lock); 261 262 if (!hdev) { 263 pr_err("Couldn't find device %d:%d\n", 264 imajor(inode), iminor(inode)); 265 return -ENXIO; 266 } 267 268 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); 269 if (!hpriv) 270 return -ENOMEM; 271 272 /* Prevent other routines from reading partial hpriv data by 273 * initializing hpriv fields before inserting it to the list 274 */ 275 hpriv->hdev = hdev; 276 filp->private_data = hpriv; 277 hpriv->filp = filp; 278 279 mutex_init(&hpriv->notifier_event.lock); 280 nonseekable_open(inode, filp); 281 282 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); 283 284 mutex_lock(&hdev->fpriv_ctrl_list_lock); 285 286 if (!hl_ctrl_device_operational(hdev, NULL)) { 287 dev_dbg_ratelimited(hdev->dev_ctrl, 288 "Can't open %s because it is disabled\n", 289 dev_name(hdev->dev_ctrl)); 290 rc = -EPERM; 291 goto out_err; 292 } 293 294 list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list); 295 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 296 297 return 0; 298 299 out_err: 300 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 301 filp->private_data = NULL; 302 put_pid(hpriv->taskpid); 303 304 kfree(hpriv); 305 306 return rc; 307 } 308 309 static void set_driver_behavior_per_device(struct hl_device *hdev) 310 { 311 hdev->nic_ports_mask = 0; 312 hdev->fw_components = FW_TYPE_ALL_TYPES; 313 hdev->mmu_enable = MMU_EN_ALL; 314 hdev->cpu_queues_enable = 1; 315 hdev->pldm = 0; 316 hdev->hard_reset_on_fw_events = 1; 317 hdev->bmc_enable = 1; 318 hdev->reset_on_preboot_fail = 1; 319 hdev->heartbeat = 1; 320 } 321 322 static void copy_kernel_module_params_to_device(struct hl_device *hdev) 323 { 324 hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type); 325 326 hdev->major = hl_major; 327 hdev->memory_scrub = memory_scrub; 328 hdev->reset_on_lockup = reset_on_lockup; 329 hdev->boot_error_status_mask = boot_error_status_mask; 330 } 331 332 static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout) 333 { 334 switch (hdev->asic_type) { 335 case ASIC_GAUDI: 336 case ASIC_GAUDI_SEC: 337 /* If user didn't request a different timeout than the default one, we have 338 * a different default timeout for Gaudi 339 */ 340 if (timeout == HL_DEFAULT_TIMEOUT_LOCKED) 341 hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED * 342 MSEC_PER_SEC); 343 344 hdev->reset_upon_device_release = 0; 345 break; 346 347 case ASIC_GOYA: 348 hdev->reset_upon_device_release = 0; 349 break; 350 351 default: 352 hdev->reset_upon_device_release = 1; 353 break; 354 } 355 } 356 357 static int fixup_device_params(struct hl_device *hdev) 358 { 359 int tmp_timeout; 360 361 tmp_timeout = timeout_locked; 362 363 hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; 364 hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; 365 366 if (tmp_timeout) 367 hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC); 368 else 369 hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; 370 371 hdev->stop_on_err = true; 372 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 373 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 374 375 /* Enable only after the initialization of the device */ 376 hdev->disabled = true; 377 378 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) && 379 (hdev->fw_components & ~FW_TYPE_PREBOOT_CPU)) { 380 pr_err("Preboot must be set along with other components"); 381 return -EINVAL; 382 } 383 384 /* If CPU queues not enabled, no way to do heartbeat */ 385 if (!hdev->cpu_queues_enable) 386 hdev->heartbeat = 0; 387 388 fixup_device_params_per_asic(hdev, tmp_timeout); 389 390 return 0; 391 } 392 393 /** 394 * create_hdev - create habanalabs device instance 395 * 396 * @dev: will hold the pointer to the new habanalabs device structure 397 * @pdev: pointer to the pci device 398 * 399 * Allocate memory for habanalabs device and initialize basic fields 400 * Identify the ASIC type 401 * Allocate ID (minor) for the device (only for real devices) 402 */ 403 static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) 404 { 405 int main_id, ctrl_id = 0, rc = 0; 406 struct hl_device *hdev; 407 408 *dev = NULL; 409 410 hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); 411 if (!hdev) 412 return -ENOMEM; 413 414 /* Will be NULL in case of simulator device */ 415 hdev->pdev = pdev; 416 417 /* Assign status description string */ 418 strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX); 419 strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); 420 strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); 421 strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); 422 strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], 423 "in device creation", HL_STR_MAX); 424 strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE], 425 "in reset after device release", HL_STR_MAX); 426 427 428 /* First, we must find out which ASIC are we handling. This is needed 429 * to configure the behavior of the driver (kernel parameters) 430 */ 431 hdev->asic_type = get_asic_type(hdev); 432 if (hdev->asic_type == ASIC_INVALID) { 433 dev_err(&pdev->dev, "Unsupported ASIC\n"); 434 rc = -ENODEV; 435 goto free_hdev; 436 } 437 438 copy_kernel_module_params_to_device(hdev); 439 440 set_driver_behavior_per_device(hdev); 441 442 fixup_device_params(hdev); 443 444 mutex_lock(&hl_devs_idr_lock); 445 446 /* Always save 2 numbers, 1 for main device and 1 for control. 447 * They must be consecutive 448 */ 449 main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL); 450 451 if (main_id >= 0) 452 ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1, 453 main_id + 2, GFP_KERNEL); 454 455 mutex_unlock(&hl_devs_idr_lock); 456 457 if ((main_id < 0) || (ctrl_id < 0)) { 458 if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC)) 459 pr_err("too many devices in the system\n"); 460 461 if (main_id >= 0) { 462 mutex_lock(&hl_devs_idr_lock); 463 idr_remove(&hl_devs_idr, main_id); 464 mutex_unlock(&hl_devs_idr_lock); 465 } 466 467 rc = -EBUSY; 468 goto free_hdev; 469 } 470 471 hdev->id = main_id; 472 hdev->id_control = ctrl_id; 473 474 *dev = hdev; 475 476 return 0; 477 478 free_hdev: 479 kfree(hdev); 480 return rc; 481 } 482 483 /* 484 * destroy_hdev - destroy habanalabs device instance 485 * 486 * @dev: pointer to the habanalabs device structure 487 * 488 */ 489 static void destroy_hdev(struct hl_device *hdev) 490 { 491 /* Remove device from the device list */ 492 mutex_lock(&hl_devs_idr_lock); 493 idr_remove(&hl_devs_idr, hdev->id); 494 idr_remove(&hl_devs_idr, hdev->id_control); 495 mutex_unlock(&hl_devs_idr_lock); 496 497 kfree(hdev); 498 } 499 500 static int hl_pmops_suspend(struct device *dev) 501 { 502 struct hl_device *hdev = dev_get_drvdata(dev); 503 504 pr_debug("Going to suspend PCI device\n"); 505 506 if (!hdev) { 507 pr_err("device pointer is NULL in suspend\n"); 508 return 0; 509 } 510 511 return hl_device_suspend(hdev); 512 } 513 514 static int hl_pmops_resume(struct device *dev) 515 { 516 struct hl_device *hdev = dev_get_drvdata(dev); 517 518 pr_debug("Going to resume PCI device\n"); 519 520 if (!hdev) { 521 pr_err("device pointer is NULL in resume\n"); 522 return 0; 523 } 524 525 return hl_device_resume(hdev); 526 } 527 528 /** 529 * hl_pci_probe - probe PCI habanalabs devices 530 * 531 * @pdev: pointer to pci device 532 * @id: pointer to pci device id structure 533 * 534 * Standard PCI probe function for habanalabs device. 535 * Create a new habanalabs device and initialize it according to the 536 * device's type 537 */ 538 static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 539 { 540 struct hl_device *hdev; 541 int rc; 542 543 dev_info(&pdev->dev, HL_NAME 544 " device found [%04x:%04x] (rev %x)\n", 545 (int)pdev->vendor, (int)pdev->device, (int)pdev->revision); 546 547 rc = create_hdev(&hdev, pdev); 548 if (rc) 549 return rc; 550 551 pci_set_drvdata(pdev, hdev); 552 553 pci_enable_pcie_error_reporting(pdev); 554 555 rc = hl_device_init(hdev, hl_class); 556 if (rc) { 557 dev_err(&pdev->dev, "Fatal error during habanalabs device init\n"); 558 rc = -ENODEV; 559 goto disable_device; 560 } 561 562 return 0; 563 564 disable_device: 565 pci_disable_pcie_error_reporting(pdev); 566 pci_set_drvdata(pdev, NULL); 567 destroy_hdev(hdev); 568 569 return rc; 570 } 571 572 /* 573 * hl_pci_remove - remove PCI habanalabs devices 574 * 575 * @pdev: pointer to pci device 576 * 577 * Standard PCI remove function for habanalabs device 578 */ 579 static void hl_pci_remove(struct pci_dev *pdev) 580 { 581 struct hl_device *hdev; 582 583 hdev = pci_get_drvdata(pdev); 584 if (!hdev) 585 return; 586 587 hl_device_fini(hdev); 588 pci_disable_pcie_error_reporting(pdev); 589 pci_set_drvdata(pdev, NULL); 590 destroy_hdev(hdev); 591 } 592 593 /** 594 * hl_pci_err_detected - a PCI bus error detected on this device 595 * 596 * @pdev: pointer to pci device 597 * @state: PCI error type 598 * 599 * Called by the PCI subsystem whenever a non-correctable 600 * PCI bus error is detected 601 */ 602 static pci_ers_result_t 603 hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) 604 { 605 struct hl_device *hdev = pci_get_drvdata(pdev); 606 enum pci_ers_result result; 607 608 switch (state) { 609 case pci_channel_io_normal: 610 dev_warn(hdev->dev, "PCI normal state error detected\n"); 611 return PCI_ERS_RESULT_CAN_RECOVER; 612 613 case pci_channel_io_frozen: 614 dev_warn(hdev->dev, "PCI frozen state error detected\n"); 615 result = PCI_ERS_RESULT_NEED_RESET; 616 break; 617 618 case pci_channel_io_perm_failure: 619 dev_warn(hdev->dev, "PCI failure state error detected\n"); 620 result = PCI_ERS_RESULT_DISCONNECT; 621 break; 622 623 default: 624 result = PCI_ERS_RESULT_NONE; 625 } 626 627 hdev->asic_funcs->halt_engines(hdev, true, false); 628 629 return result; 630 } 631 632 /** 633 * hl_pci_err_resume - resume after a PCI slot reset 634 * 635 * @pdev: pointer to pci device 636 * 637 */ 638 static void hl_pci_err_resume(struct pci_dev *pdev) 639 { 640 struct hl_device *hdev = pci_get_drvdata(pdev); 641 642 dev_warn(hdev->dev, "Resuming device after PCI slot reset\n"); 643 hl_device_resume(hdev); 644 } 645 646 /** 647 * hl_pci_err_slot_reset - a PCI slot reset has just happened 648 * 649 * @pdev: pointer to pci device 650 * 651 * Determine if the driver can recover from the PCI slot reset 652 */ 653 static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev) 654 { 655 struct hl_device *hdev = pci_get_drvdata(pdev); 656 657 dev_warn(hdev->dev, "PCI slot reset detected\n"); 658 659 return PCI_ERS_RESULT_RECOVERED; 660 } 661 662 static const struct dev_pm_ops hl_pm_ops = { 663 .suspend = hl_pmops_suspend, 664 .resume = hl_pmops_resume, 665 }; 666 667 static const struct pci_error_handlers hl_pci_err_handler = { 668 .error_detected = hl_pci_err_detected, 669 .slot_reset = hl_pci_err_slot_reset, 670 .resume = hl_pci_err_resume, 671 }; 672 673 static struct pci_driver hl_pci_driver = { 674 .name = HL_NAME, 675 .id_table = ids, 676 .probe = hl_pci_probe, 677 .remove = hl_pci_remove, 678 .shutdown = hl_pci_remove, 679 .driver = { 680 .name = HL_NAME, 681 .pm = &hl_pm_ops, 682 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 683 }, 684 .err_handler = &hl_pci_err_handler, 685 }; 686 687 /* 688 * hl_init - Initialize the habanalabs kernel driver 689 */ 690 static int __init hl_init(void) 691 { 692 int rc; 693 dev_t dev; 694 695 pr_info("loading driver\n"); 696 697 rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME); 698 if (rc < 0) { 699 pr_err("unable to get major\n"); 700 return rc; 701 } 702 703 hl_major = MAJOR(dev); 704 705 hl_class = class_create(THIS_MODULE, HL_NAME); 706 if (IS_ERR(hl_class)) { 707 pr_err("failed to allocate class\n"); 708 rc = PTR_ERR(hl_class); 709 goto remove_major; 710 } 711 712 hl_debugfs_init(); 713 714 rc = pci_register_driver(&hl_pci_driver); 715 if (rc) { 716 pr_err("failed to register pci device\n"); 717 goto remove_debugfs; 718 } 719 720 pr_debug("driver loaded\n"); 721 722 return 0; 723 724 remove_debugfs: 725 hl_debugfs_fini(); 726 class_destroy(hl_class); 727 remove_major: 728 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); 729 return rc; 730 } 731 732 /* 733 * hl_exit - Release all resources of the habanalabs kernel driver 734 */ 735 static void __exit hl_exit(void) 736 { 737 pci_unregister_driver(&hl_pci_driver); 738 739 /* 740 * Removing debugfs must be after all devices or simulator devices 741 * have been removed because otherwise we get a bug in the 742 * debugfs module for referencing NULL objects 743 */ 744 hl_debugfs_fini(); 745 746 class_destroy(hl_class); 747 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); 748 749 idr_destroy(&hl_devs_idr); 750 751 pr_debug("driver removed\n"); 752 } 753 754 module_init(hl_init); 755 module_exit(hl_exit); 756