1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2021 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 *
7 */
8
9 #define pr_fmt(fmt) "habanalabs: " fmt
10
11 #include "habanalabs.h"
12 #include "../include/hw_ip/pci/pci_general.h"
13
14 #include <linux/pci.h>
15 #include <linux/module.h>
16 #include <linux/vmalloc.h>
17
18 #define CREATE_TRACE_POINTS
19 #include <trace/events/habanalabs.h>
20
21 #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
22
23 #define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators"
24
25 MODULE_AUTHOR(HL_DRIVER_AUTHOR);
26 MODULE_DESCRIPTION(HL_DRIVER_DESC);
27 MODULE_LICENSE("GPL v2");
28
29 static int hl_major;
30 static struct class *hl_class;
31 static DEFINE_IDR(hl_devs_idr);
32 static DEFINE_MUTEX(hl_devs_idr_lock);
33
34 #define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */
35 #define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */
36
37 static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED;
38 static int reset_on_lockup = 1;
39 static int memory_scrub;
40 static ulong boot_error_status_mask = ULONG_MAX;
41
42 module_param(timeout_locked, int, 0444);
43 MODULE_PARM_DESC(timeout_locked,
44 "Device lockup timeout in seconds (0 = disabled, default 30s)");
45
46 module_param(reset_on_lockup, int, 0444);
47 MODULE_PARM_DESC(reset_on_lockup,
48 "Do device reset on lockup (0 = no, 1 = yes, default yes)");
49
50 module_param(memory_scrub, int, 0444);
51 MODULE_PARM_DESC(memory_scrub,
52 "Scrub device memory in various states (0 = no, 1 = yes, default no)");
53
54 module_param(boot_error_status_mask, ulong, 0444);
55 MODULE_PARM_DESC(boot_error_status_mask,
56 "Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)");
57
58 #define PCI_IDS_GOYA 0x0001
59 #define PCI_IDS_GAUDI 0x1000
60 #define PCI_IDS_GAUDI_SEC 0x1010
61
62 #define PCI_IDS_GAUDI2 0x1020
63
64 static const struct pci_device_id ids[] = {
65 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
66 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
67 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
68 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), },
69 { 0, }
70 };
71 MODULE_DEVICE_TABLE(pci, ids);
72
73 /*
74 * get_asic_type - translate device id to asic type
75 *
76 * @hdev: pointer to habanalabs device structure.
77 *
78 * Translate device id and revision id to asic type.
79 * In case of unidentified device, return -1
80 */
get_asic_type(struct hl_device * hdev)81 static enum hl_asic_type get_asic_type(struct hl_device *hdev)
82 {
83 struct pci_dev *pdev = hdev->pdev;
84 enum hl_asic_type asic_type = ASIC_INVALID;
85
86 switch (pdev->device) {
87 case PCI_IDS_GOYA:
88 asic_type = ASIC_GOYA;
89 break;
90 case PCI_IDS_GAUDI:
91 asic_type = ASIC_GAUDI;
92 break;
93 case PCI_IDS_GAUDI_SEC:
94 asic_type = ASIC_GAUDI_SEC;
95 break;
96 case PCI_IDS_GAUDI2:
97 switch (pdev->revision) {
98 case REV_ID_A:
99 asic_type = ASIC_GAUDI2;
100 break;
101 case REV_ID_B:
102 asic_type = ASIC_GAUDI2B;
103 break;
104 case REV_ID_C:
105 asic_type = ASIC_GAUDI2C;
106 break;
107 default:
108 break;
109 }
110 break;
111 default:
112 break;
113 }
114
115 return asic_type;
116 }
117
is_asic_secured(enum hl_asic_type asic_type)118 static bool is_asic_secured(enum hl_asic_type asic_type)
119 {
120 switch (asic_type) {
121 case ASIC_GAUDI_SEC:
122 return true;
123 default:
124 return false;
125 }
126 }
127
128 /*
129 * hl_device_open - open function for habanalabs device
130 *
131 * @inode: pointer to inode structure
132 * @filp: pointer to file structure
133 *
134 * Called when process opens an habanalabs device.
135 */
hl_device_open(struct inode * inode,struct file * filp)136 int hl_device_open(struct inode *inode, struct file *filp)
137 {
138 enum hl_device_status status;
139 struct hl_device *hdev;
140 struct hl_fpriv *hpriv;
141 int rc;
142
143 mutex_lock(&hl_devs_idr_lock);
144 hdev = idr_find(&hl_devs_idr, iminor(inode));
145 mutex_unlock(&hl_devs_idr_lock);
146
147 if (!hdev) {
148 pr_err("Couldn't find device %d:%d\n",
149 imajor(inode), iminor(inode));
150 return -ENXIO;
151 }
152
153 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
154 if (!hpriv)
155 return -ENOMEM;
156
157 hpriv->hdev = hdev;
158 filp->private_data = hpriv;
159 hpriv->filp = filp;
160
161 mutex_init(&hpriv->notifier_event.lock);
162 mutex_init(&hpriv->restore_phase_mutex);
163 mutex_init(&hpriv->ctx_lock);
164 kref_init(&hpriv->refcount);
165 nonseekable_open(inode, filp);
166
167 hl_ctx_mgr_init(&hpriv->ctx_mgr);
168 hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
169
170 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
171
172 mutex_lock(&hdev->fpriv_list_lock);
173
174 if (!hl_device_operational(hdev, &status)) {
175 dev_dbg_ratelimited(hdev->dev,
176 "Can't open %s because it is %s\n",
177 dev_name(hdev->dev), hdev->status[status]);
178
179 if (status == HL_DEVICE_STATUS_IN_RESET ||
180 status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE)
181 rc = -EAGAIN;
182 else
183 rc = -EPERM;
184
185 goto out_err;
186 }
187
188 if (hdev->is_in_dram_scrub) {
189 dev_dbg_ratelimited(hdev->dev,
190 "Can't open %s during dram scrub\n",
191 dev_name(hdev->dev));
192 rc = -EAGAIN;
193 goto out_err;
194 }
195
196 if (hdev->compute_ctx_in_release) {
197 dev_dbg_ratelimited(hdev->dev,
198 "Can't open %s because another user is still releasing it\n",
199 dev_name(hdev->dev));
200 rc = -EAGAIN;
201 goto out_err;
202 }
203
204 if (hdev->is_compute_ctx_active) {
205 dev_dbg_ratelimited(hdev->dev,
206 "Can't open %s because another user is working on it\n",
207 dev_name(hdev->dev));
208 rc = -EBUSY;
209 goto out_err;
210 }
211
212 rc = hl_ctx_create(hdev, hpriv);
213 if (rc) {
214 dev_err(hdev->dev, "Failed to create context %d\n", rc);
215 goto out_err;
216 }
217
218 list_add(&hpriv->dev_node, &hdev->fpriv_list);
219 mutex_unlock(&hdev->fpriv_list_lock);
220
221 hdev->asic_funcs->send_device_activity(hdev, true);
222
223 hl_debugfs_add_file(hpriv);
224
225 hl_enable_err_info_capture(&hdev->captured_err_info);
226
227 hdev->open_counter++;
228 hdev->last_successful_open_jif = jiffies;
229 hdev->last_successful_open_ktime = ktime_get();
230
231 return 0;
232
233 out_err:
234 mutex_unlock(&hdev->fpriv_list_lock);
235 hl_mem_mgr_fini(&hpriv->mem_mgr);
236 hl_mem_mgr_idr_destroy(&hpriv->mem_mgr);
237 hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
238 filp->private_data = NULL;
239 mutex_destroy(&hpriv->ctx_lock);
240 mutex_destroy(&hpriv->restore_phase_mutex);
241 mutex_destroy(&hpriv->notifier_event.lock);
242 put_pid(hpriv->taskpid);
243
244 kfree(hpriv);
245
246 return rc;
247 }
248
hl_device_open_ctrl(struct inode * inode,struct file * filp)249 int hl_device_open_ctrl(struct inode *inode, struct file *filp)
250 {
251 struct hl_device *hdev;
252 struct hl_fpriv *hpriv;
253 int rc;
254
255 mutex_lock(&hl_devs_idr_lock);
256 hdev = idr_find(&hl_devs_idr, iminor(inode));
257 mutex_unlock(&hl_devs_idr_lock);
258
259 if (!hdev) {
260 pr_err("Couldn't find device %d:%d\n",
261 imajor(inode), iminor(inode));
262 return -ENXIO;
263 }
264
265 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
266 if (!hpriv)
267 return -ENOMEM;
268
269 /* Prevent other routines from reading partial hpriv data by
270 * initializing hpriv fields before inserting it to the list
271 */
272 hpriv->hdev = hdev;
273 filp->private_data = hpriv;
274 hpriv->filp = filp;
275
276 mutex_init(&hpriv->notifier_event.lock);
277 nonseekable_open(inode, filp);
278
279 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
280
281 mutex_lock(&hdev->fpriv_ctrl_list_lock);
282
283 if (!hl_ctrl_device_operational(hdev, NULL)) {
284 dev_dbg_ratelimited(hdev->dev_ctrl,
285 "Can't open %s because it is disabled\n",
286 dev_name(hdev->dev_ctrl));
287 rc = -EPERM;
288 goto out_err;
289 }
290
291 list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list);
292 mutex_unlock(&hdev->fpriv_ctrl_list_lock);
293
294 return 0;
295
296 out_err:
297 mutex_unlock(&hdev->fpriv_ctrl_list_lock);
298 filp->private_data = NULL;
299 put_pid(hpriv->taskpid);
300
301 kfree(hpriv);
302
303 return rc;
304 }
305
set_driver_behavior_per_device(struct hl_device * hdev)306 static void set_driver_behavior_per_device(struct hl_device *hdev)
307 {
308 hdev->nic_ports_mask = 0;
309 hdev->fw_components = FW_TYPE_ALL_TYPES;
310 hdev->cpu_queues_enable = 1;
311 hdev->pldm = 0;
312 hdev->hard_reset_on_fw_events = 1;
313 hdev->bmc_enable = 1;
314 hdev->reset_on_preboot_fail = 1;
315 hdev->heartbeat = 1;
316 }
317
copy_kernel_module_params_to_device(struct hl_device * hdev)318 static void copy_kernel_module_params_to_device(struct hl_device *hdev)
319 {
320 hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
321
322 hdev->major = hl_major;
323 hdev->hclass = hl_class;
324 hdev->memory_scrub = memory_scrub;
325 hdev->reset_on_lockup = reset_on_lockup;
326 hdev->boot_error_status_mask = boot_error_status_mask;
327 }
328
fixup_device_params_per_asic(struct hl_device * hdev,int timeout)329 static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout)
330 {
331 switch (hdev->asic_type) {
332 case ASIC_GAUDI:
333 case ASIC_GAUDI_SEC:
334 /* If user didn't request a different timeout than the default one, we have
335 * a different default timeout for Gaudi
336 */
337 if (timeout == HL_DEFAULT_TIMEOUT_LOCKED)
338 hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED *
339 MSEC_PER_SEC);
340
341 hdev->reset_upon_device_release = 0;
342 break;
343
344 case ASIC_GOYA:
345 hdev->reset_upon_device_release = 0;
346 break;
347
348 default:
349 hdev->reset_upon_device_release = 1;
350 break;
351 }
352 }
353
fixup_device_params(struct hl_device * hdev)354 static int fixup_device_params(struct hl_device *hdev)
355 {
356 int tmp_timeout;
357
358 tmp_timeout = timeout_locked;
359
360 hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
361 hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
362
363 if (tmp_timeout)
364 hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC);
365 else
366 hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
367
368 hdev->stop_on_err = true;
369 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
370 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
371
372 /* Enable only after the initialization of the device */
373 hdev->disabled = true;
374
375 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) &&
376 (hdev->fw_components & ~FW_TYPE_PREBOOT_CPU)) {
377 pr_err("Preboot must be set along with other components");
378 return -EINVAL;
379 }
380
381 /* If CPU queues not enabled, no way to do heartbeat */
382 if (!hdev->cpu_queues_enable)
383 hdev->heartbeat = 0;
384 fixup_device_params_per_asic(hdev, tmp_timeout);
385
386 return 0;
387 }
388
389 /**
390 * create_hdev - create habanalabs device instance
391 *
392 * @dev: will hold the pointer to the new habanalabs device structure
393 * @pdev: pointer to the pci device
394 *
395 * Allocate memory for habanalabs device and initialize basic fields
396 * Identify the ASIC type
397 * Allocate ID (minor) for the device (only for real devices)
398 */
create_hdev(struct hl_device ** dev,struct pci_dev * pdev)399 static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
400 {
401 int main_id, ctrl_id = 0, rc = 0;
402 struct hl_device *hdev;
403
404 *dev = NULL;
405
406 hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
407 if (!hdev)
408 return -ENOMEM;
409
410 /* Will be NULL in case of simulator device */
411 hdev->pdev = pdev;
412
413 /* Assign status description string */
414 strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
415 strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
416 strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
417 strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
418 strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
419 "in device creation", HL_STR_MAX);
420 strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
421 "in reset after device release", HL_STR_MAX);
422
423
424 /* First, we must find out which ASIC are we handling. This is needed
425 * to configure the behavior of the driver (kernel parameters)
426 */
427 hdev->asic_type = get_asic_type(hdev);
428 if (hdev->asic_type == ASIC_INVALID) {
429 dev_err(&pdev->dev, "Unsupported ASIC\n");
430 rc = -ENODEV;
431 goto free_hdev;
432 }
433
434 copy_kernel_module_params_to_device(hdev);
435
436 set_driver_behavior_per_device(hdev);
437
438 fixup_device_params(hdev);
439
440 mutex_lock(&hl_devs_idr_lock);
441
442 /* Always save 2 numbers, 1 for main device and 1 for control.
443 * They must be consecutive
444 */
445 main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
446
447 if (main_id >= 0)
448 ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
449 main_id + 2, GFP_KERNEL);
450
451 mutex_unlock(&hl_devs_idr_lock);
452
453 if ((main_id < 0) || (ctrl_id < 0)) {
454 if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
455 pr_err("too many devices in the system\n");
456
457 if (main_id >= 0) {
458 mutex_lock(&hl_devs_idr_lock);
459 idr_remove(&hl_devs_idr, main_id);
460 mutex_unlock(&hl_devs_idr_lock);
461 }
462
463 rc = -EBUSY;
464 goto free_hdev;
465 }
466
467 hdev->id = main_id;
468 hdev->id_control = ctrl_id;
469
470 *dev = hdev;
471
472 return 0;
473
474 free_hdev:
475 kfree(hdev);
476 return rc;
477 }
478
479 /*
480 * destroy_hdev - destroy habanalabs device instance
481 *
482 * @dev: pointer to the habanalabs device structure
483 *
484 */
destroy_hdev(struct hl_device * hdev)485 static void destroy_hdev(struct hl_device *hdev)
486 {
487 /* Remove device from the device list */
488 mutex_lock(&hl_devs_idr_lock);
489 idr_remove(&hl_devs_idr, hdev->id);
490 idr_remove(&hl_devs_idr, hdev->id_control);
491 mutex_unlock(&hl_devs_idr_lock);
492
493 kfree(hdev);
494 }
495
hl_pmops_suspend(struct device * dev)496 static int hl_pmops_suspend(struct device *dev)
497 {
498 struct hl_device *hdev = dev_get_drvdata(dev);
499
500 pr_debug("Going to suspend PCI device\n");
501
502 if (!hdev) {
503 pr_err("device pointer is NULL in suspend\n");
504 return 0;
505 }
506
507 return hl_device_suspend(hdev);
508 }
509
hl_pmops_resume(struct device * dev)510 static int hl_pmops_resume(struct device *dev)
511 {
512 struct hl_device *hdev = dev_get_drvdata(dev);
513
514 pr_debug("Going to resume PCI device\n");
515
516 if (!hdev) {
517 pr_err("device pointer is NULL in resume\n");
518 return 0;
519 }
520
521 return hl_device_resume(hdev);
522 }
523
524 /**
525 * hl_pci_probe - probe PCI habanalabs devices
526 *
527 * @pdev: pointer to pci device
528 * @id: pointer to pci device id structure
529 *
530 * Standard PCI probe function for habanalabs device.
531 * Create a new habanalabs device and initialize it according to the
532 * device's type
533 */
hl_pci_probe(struct pci_dev * pdev,const struct pci_device_id * id)534 static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
535 {
536 struct hl_device *hdev;
537 int rc;
538
539 dev_info(&pdev->dev, HL_NAME
540 " device found [%04x:%04x] (rev %x)\n",
541 (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
542
543 rc = create_hdev(&hdev, pdev);
544 if (rc)
545 return rc;
546
547 pci_set_drvdata(pdev, hdev);
548
549 rc = hl_device_init(hdev);
550 if (rc) {
551 dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
552 rc = -ENODEV;
553 goto disable_device;
554 }
555
556 return 0;
557
558 disable_device:
559 pci_set_drvdata(pdev, NULL);
560 destroy_hdev(hdev);
561
562 return rc;
563 }
564
565 /*
566 * hl_pci_remove - remove PCI habanalabs devices
567 *
568 * @pdev: pointer to pci device
569 *
570 * Standard PCI remove function for habanalabs device
571 */
hl_pci_remove(struct pci_dev * pdev)572 static void hl_pci_remove(struct pci_dev *pdev)
573 {
574 struct hl_device *hdev;
575
576 hdev = pci_get_drvdata(pdev);
577 if (!hdev)
578 return;
579
580 hl_device_fini(hdev);
581 pci_set_drvdata(pdev, NULL);
582 destroy_hdev(hdev);
583 }
584
585 /**
586 * hl_pci_err_detected - a PCI bus error detected on this device
587 *
588 * @pdev: pointer to pci device
589 * @state: PCI error type
590 *
591 * Called by the PCI subsystem whenever a non-correctable
592 * PCI bus error is detected
593 */
594 static pci_ers_result_t
hl_pci_err_detected(struct pci_dev * pdev,pci_channel_state_t state)595 hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
596 {
597 struct hl_device *hdev = pci_get_drvdata(pdev);
598 enum pci_ers_result result;
599
600 switch (state) {
601 case pci_channel_io_normal:
602 dev_warn(hdev->dev, "PCI normal state error detected\n");
603 return PCI_ERS_RESULT_CAN_RECOVER;
604
605 case pci_channel_io_frozen:
606 dev_warn(hdev->dev, "PCI frozen state error detected\n");
607 result = PCI_ERS_RESULT_NEED_RESET;
608 break;
609
610 case pci_channel_io_perm_failure:
611 dev_warn(hdev->dev, "PCI failure state error detected\n");
612 result = PCI_ERS_RESULT_DISCONNECT;
613 break;
614
615 default:
616 result = PCI_ERS_RESULT_NONE;
617 }
618
619 hdev->asic_funcs->halt_engines(hdev, true, false);
620
621 return result;
622 }
623
624 /**
625 * hl_pci_err_resume - resume after a PCI slot reset
626 *
627 * @pdev: pointer to pci device
628 *
629 */
hl_pci_err_resume(struct pci_dev * pdev)630 static void hl_pci_err_resume(struct pci_dev *pdev)
631 {
632 struct hl_device *hdev = pci_get_drvdata(pdev);
633
634 dev_warn(hdev->dev, "Resuming device after PCI slot reset\n");
635 hl_device_resume(hdev);
636 }
637
638 /**
639 * hl_pci_err_slot_reset - a PCI slot reset has just happened
640 *
641 * @pdev: pointer to pci device
642 *
643 * Determine if the driver can recover from the PCI slot reset
644 */
hl_pci_err_slot_reset(struct pci_dev * pdev)645 static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
646 {
647 struct hl_device *hdev = pci_get_drvdata(pdev);
648
649 dev_warn(hdev->dev, "PCI slot reset detected\n");
650
651 return PCI_ERS_RESULT_RECOVERED;
652 }
653
654 static const struct dev_pm_ops hl_pm_ops = {
655 .suspend = hl_pmops_suspend,
656 .resume = hl_pmops_resume,
657 };
658
659 static const struct pci_error_handlers hl_pci_err_handler = {
660 .error_detected = hl_pci_err_detected,
661 .slot_reset = hl_pci_err_slot_reset,
662 .resume = hl_pci_err_resume,
663 };
664
665 static struct pci_driver hl_pci_driver = {
666 .name = HL_NAME,
667 .id_table = ids,
668 .probe = hl_pci_probe,
669 .remove = hl_pci_remove,
670 .shutdown = hl_pci_remove,
671 .driver = {
672 .name = HL_NAME,
673 .pm = &hl_pm_ops,
674 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
675 },
676 .err_handler = &hl_pci_err_handler,
677 };
678
679 /*
680 * hl_init - Initialize the habanalabs kernel driver
681 */
hl_init(void)682 static int __init hl_init(void)
683 {
684 int rc;
685 dev_t dev;
686
687 pr_info("loading driver\n");
688
689 rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
690 if (rc < 0) {
691 pr_err("unable to get major\n");
692 return rc;
693 }
694
695 hl_major = MAJOR(dev);
696
697 hl_class = class_create(HL_NAME);
698 if (IS_ERR(hl_class)) {
699 pr_err("failed to allocate class\n");
700 rc = PTR_ERR(hl_class);
701 goto remove_major;
702 }
703
704 hl_debugfs_init();
705
706 rc = pci_register_driver(&hl_pci_driver);
707 if (rc) {
708 pr_err("failed to register pci device\n");
709 goto remove_debugfs;
710 }
711
712 pr_debug("driver loaded\n");
713
714 return 0;
715
716 remove_debugfs:
717 hl_debugfs_fini();
718 class_destroy(hl_class);
719 remove_major:
720 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
721 return rc;
722 }
723
724 /*
725 * hl_exit - Release all resources of the habanalabs kernel driver
726 */
hl_exit(void)727 static void __exit hl_exit(void)
728 {
729 pci_unregister_driver(&hl_pci_driver);
730
731 /*
732 * Removing debugfs must be after all devices or simulator devices
733 * have been removed because otherwise we get a bug in the
734 * debugfs module for referencing NULL objects
735 */
736 hl_debugfs_fini();
737
738 class_destroy(hl_class);
739 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
740
741 idr_destroy(&hl_devs_idr);
742
743 pr_debug("driver removed\n");
744 }
745
746 module_init(hl_init);
747 module_exit(hl_exit);
748