1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #define pr_fmt(fmt) "habanalabs: " fmt 9 10 #include <uapi/drm/habanalabs_accel.h> 11 #include "habanalabs.h" 12 13 #include <linux/pci.h> 14 #include <linux/hwmon.h> 15 #include <linux/vmalloc.h> 16 17 #include <trace/events/habanalabs.h> 18 19 #define HL_RESET_DELAY_USEC 10000 /* 10ms */ 20 21 #define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 5 22 23 enum dma_alloc_type { 24 DMA_ALLOC_COHERENT, 25 DMA_ALLOC_CPU_ACCESSIBLE, 26 DMA_ALLOC_POOL, 27 }; 28 29 #define MEM_SCRUB_DEFAULT_VAL 0x1122334455667788 30 31 /* 32 * hl_set_dram_bar- sets the bar to allow later access to address 33 * 34 * @hdev: pointer to habanalabs device structure. 35 * @addr: the address the caller wants to access. 36 * @region: the PCI region. 37 * @new_bar_region_base: the new BAR region base address. 38 * 39 * @return: the old BAR base address on success, U64_MAX for failure. 40 * The caller should set it back to the old address after use. 41 * 42 * In case the bar space does not cover the whole address space, 43 * the bar base address should be set to allow access to a given address. 44 * This function can be called also if the bar doesn't need to be set, 45 * in that case it just won't change the base. 46 */ 47 static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region, 48 u64 *new_bar_region_base) 49 { 50 struct asic_fixed_properties *prop = &hdev->asic_prop; 51 u64 bar_base_addr, old_base; 52 53 if (is_power_of_2(prop->dram_pci_bar_size)) 54 bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); 55 else 56 bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) * 57 prop->dram_pci_bar_size; 58 59 old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); 60 61 /* in case of success we need to update the new BAR base */ 62 if ((old_base != U64_MAX) && new_bar_region_base) 63 *new_bar_region_base = bar_base_addr; 64 65 return old_base; 66 } 67 68 int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, 69 enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar) 70 { 71 struct pci_mem_region *region = &hdev->pci_mem_region[region_type]; 72 u64 old_base = 0, rc, bar_region_base = region->region_base; 73 void __iomem *acc_addr; 74 75 if (set_dram_bar) { 76 old_base = hl_set_dram_bar(hdev, addr, region, &bar_region_base); 77 if (old_base == U64_MAX) 78 return -EIO; 79 } 80 81 acc_addr = hdev->pcie_bar[region->bar_id] + region->offset_in_bar + 82 (addr - bar_region_base); 83 84 switch (acc_type) { 85 case DEBUGFS_READ8: 86 *val = readb(acc_addr); 87 break; 88 case DEBUGFS_WRITE8: 89 writeb(*val, acc_addr); 90 break; 91 case DEBUGFS_READ32: 92 *val = readl(acc_addr); 93 break; 94 case DEBUGFS_WRITE32: 95 writel(*val, acc_addr); 96 break; 97 case DEBUGFS_READ64: 98 *val = readq(acc_addr); 99 break; 100 case DEBUGFS_WRITE64: 101 writeq(*val, acc_addr); 102 break; 103 } 104 105 if (set_dram_bar) { 106 rc = hl_set_dram_bar(hdev, old_base, region, NULL); 107 if (rc == U64_MAX) 108 return -EIO; 109 } 110 111 return 0; 112 } 113 114 static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 115 gfp_t flag, enum dma_alloc_type alloc_type, 116 const char *caller) 117 { 118 void *ptr = NULL; 119 120 switch (alloc_type) { 121 case DMA_ALLOC_COHERENT: 122 ptr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, size, dma_handle, flag); 123 break; 124 case DMA_ALLOC_CPU_ACCESSIBLE: 125 ptr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 126 break; 127 case DMA_ALLOC_POOL: 128 ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, size, flag, dma_handle); 129 break; 130 } 131 132 if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr)) 133 trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size, 134 caller); 135 136 return ptr; 137 } 138 139 static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr, 140 dma_addr_t dma_handle, enum dma_alloc_type alloc_type, 141 const char *caller) 142 { 143 /* this is needed to avoid warning on using freed pointer */ 144 u64 store_cpu_addr = (u64) (uintptr_t) cpu_addr; 145 146 switch (alloc_type) { 147 case DMA_ALLOC_COHERENT: 148 hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle); 149 break; 150 case DMA_ALLOC_CPU_ACCESSIBLE: 151 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, cpu_addr); 152 break; 153 case DMA_ALLOC_POOL: 154 hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle); 155 break; 156 } 157 158 trace_habanalabs_dma_free(hdev->dev, store_cpu_addr, dma_handle, size, caller); 159 } 160 161 void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 162 gfp_t flag, const char *caller) 163 { 164 return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT, caller); 165 } 166 167 void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr, 168 dma_addr_t dma_handle, const char *caller) 169 { 170 hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller); 171 } 172 173 void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size, 174 dma_addr_t *dma_handle, const char *caller) 175 { 176 return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller); 177 } 178 179 void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr, 180 const char *caller) 181 { 182 hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller); 183 } 184 185 void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, 186 dma_addr_t *dma_handle, const char *caller) 187 { 188 return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL, caller); 189 } 190 191 void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, 192 const char *caller) 193 { 194 hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller); 195 } 196 197 int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) 198 { 199 struct asic_fixed_properties *prop = &hdev->asic_prop; 200 struct scatterlist *sg; 201 int rc, i; 202 203 rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0); 204 if (rc) 205 return rc; 206 207 /* Shift to the device's base physical address of host memory if necessary */ 208 if (prop->device_dma_offset_for_host_access) 209 for_each_sgtable_dma_sg(sgt, sg, i) 210 sg->dma_address += prop->device_dma_offset_for_host_access; 211 212 return 0; 213 } 214 215 void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) 216 { 217 struct asic_fixed_properties *prop = &hdev->asic_prop; 218 struct scatterlist *sg; 219 int i; 220 221 /* Cancel the device's base physical address of host memory if necessary */ 222 if (prop->device_dma_offset_for_host_access) 223 for_each_sgtable_dma_sg(sgt, sg, i) 224 sg->dma_address -= prop->device_dma_offset_for_host_access; 225 226 dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0); 227 } 228 229 /* 230 * hl_access_cfg_region - access the config region 231 * 232 * @hdev: pointer to habanalabs device structure 233 * @addr: the address to access 234 * @val: the value to write from or read to 235 * @acc_type: the type of access (read/write 64/32) 236 */ 237 int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, 238 enum debugfs_access_type acc_type) 239 { 240 struct pci_mem_region *cfg_region = &hdev->pci_mem_region[PCI_REGION_CFG]; 241 u32 val_h, val_l; 242 243 if (!IS_ALIGNED(addr, sizeof(u32))) { 244 dev_err(hdev->dev, "address %#llx not a multiple of %zu\n", addr, sizeof(u32)); 245 return -EINVAL; 246 } 247 248 switch (acc_type) { 249 case DEBUGFS_READ32: 250 *val = RREG32(addr - cfg_region->region_base); 251 break; 252 case DEBUGFS_WRITE32: 253 WREG32(addr - cfg_region->region_base, *val); 254 break; 255 case DEBUGFS_READ64: 256 val_l = RREG32(addr - cfg_region->region_base); 257 val_h = RREG32(addr + sizeof(u32) - cfg_region->region_base); 258 259 *val = (((u64) val_h) << 32) | val_l; 260 break; 261 case DEBUGFS_WRITE64: 262 WREG32(addr - cfg_region->region_base, lower_32_bits(*val)); 263 WREG32(addr + sizeof(u32) - cfg_region->region_base, upper_32_bits(*val)); 264 break; 265 default: 266 dev_err(hdev->dev, "access type %d is not supported\n", acc_type); 267 return -EOPNOTSUPP; 268 } 269 270 return 0; 271 } 272 273 /* 274 * hl_access_dev_mem - access device memory 275 * 276 * @hdev: pointer to habanalabs device structure 277 * @region_type: the type of the region the address belongs to 278 * @addr: the address to access 279 * @val: the value to write from or read to 280 * @acc_type: the type of access (r/w, 32/64) 281 */ 282 int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type, 283 u64 addr, u64 *val, enum debugfs_access_type acc_type) 284 { 285 switch (region_type) { 286 case PCI_REGION_CFG: 287 return hl_access_cfg_region(hdev, addr, val, acc_type); 288 case PCI_REGION_SRAM: 289 case PCI_REGION_DRAM: 290 return hl_access_sram_dram_region(hdev, addr, val, acc_type, 291 region_type, (region_type == PCI_REGION_DRAM)); 292 default: 293 return -EFAULT; 294 } 295 296 return 0; 297 } 298 299 void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...) 300 { 301 va_list args; 302 int str_size; 303 304 va_start(args, fmt); 305 /* Calculate formatted string length. Assuming each string is null terminated, hence 306 * increment result by 1 307 */ 308 str_size = vsnprintf(NULL, 0, fmt, args) + 1; 309 va_end(args); 310 311 if ((e->actual_size + str_size) < e->allocated_buf_size) { 312 va_start(args, fmt); 313 vsnprintf(e->buf + e->actual_size, str_size, fmt, args); 314 va_end(args); 315 } 316 317 /* Need to update the size even when not updating destination buffer to get the exact size 318 * of all input strings 319 */ 320 e->actual_size += str_size; 321 } 322 323 enum hl_device_status hl_device_status(struct hl_device *hdev) 324 { 325 enum hl_device_status status; 326 327 if (hdev->reset_info.in_reset) { 328 if (hdev->reset_info.in_compute_reset) 329 status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE; 330 else 331 status = HL_DEVICE_STATUS_IN_RESET; 332 } else if (hdev->reset_info.needs_reset) { 333 status = HL_DEVICE_STATUS_NEEDS_RESET; 334 } else if (hdev->disabled) { 335 status = HL_DEVICE_STATUS_MALFUNCTION; 336 } else if (!hdev->init_done) { 337 status = HL_DEVICE_STATUS_IN_DEVICE_CREATION; 338 } else { 339 status = HL_DEVICE_STATUS_OPERATIONAL; 340 } 341 342 return status; 343 } 344 345 bool hl_device_operational(struct hl_device *hdev, 346 enum hl_device_status *status) 347 { 348 enum hl_device_status current_status; 349 350 current_status = hl_device_status(hdev); 351 if (status) 352 *status = current_status; 353 354 switch (current_status) { 355 case HL_DEVICE_STATUS_IN_RESET: 356 case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: 357 case HL_DEVICE_STATUS_MALFUNCTION: 358 case HL_DEVICE_STATUS_NEEDS_RESET: 359 return false; 360 case HL_DEVICE_STATUS_OPERATIONAL: 361 case HL_DEVICE_STATUS_IN_DEVICE_CREATION: 362 default: 363 return true; 364 } 365 } 366 367 bool hl_ctrl_device_operational(struct hl_device *hdev, 368 enum hl_device_status *status) 369 { 370 enum hl_device_status current_status; 371 372 current_status = hl_device_status(hdev); 373 if (status) 374 *status = current_status; 375 376 switch (current_status) { 377 case HL_DEVICE_STATUS_MALFUNCTION: 378 return false; 379 case HL_DEVICE_STATUS_IN_RESET: 380 case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: 381 case HL_DEVICE_STATUS_NEEDS_RESET: 382 case HL_DEVICE_STATUS_OPERATIONAL: 383 case HL_DEVICE_STATUS_IN_DEVICE_CREATION: 384 default: 385 return true; 386 } 387 } 388 389 static void print_idle_status_mask(struct hl_device *hdev, const char *message, 390 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE]) 391 { 392 u32 pad_width[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {}; 393 394 BUILD_BUG_ON(HL_BUSY_ENGINES_MASK_EXT_SIZE != 4); 395 396 pad_width[3] = idle_mask[3] ? 16 : 0; 397 pad_width[2] = idle_mask[2] || pad_width[3] ? 16 : 0; 398 pad_width[1] = idle_mask[1] || pad_width[2] ? 16 : 0; 399 pad_width[0] = idle_mask[0] || pad_width[1] ? 16 : 0; 400 401 dev_err(hdev->dev, "%s (mask %0*llx_%0*llx_%0*llx_%0*llx)\n", 402 message, pad_width[3], idle_mask[3], pad_width[2], idle_mask[2], 403 pad_width[1], idle_mask[1], pad_width[0], idle_mask[0]); 404 } 405 406 static void hpriv_release(struct kref *ref) 407 { 408 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; 409 bool reset_device, device_is_idle = true; 410 struct hl_fpriv *hpriv; 411 struct hl_device *hdev; 412 413 hpriv = container_of(ref, struct hl_fpriv, refcount); 414 415 hdev = hpriv->hdev; 416 417 hdev->asic_funcs->send_device_activity(hdev, false); 418 419 put_pid(hpriv->taskpid); 420 421 hl_debugfs_remove_file(hpriv); 422 423 mutex_destroy(&hpriv->ctx_lock); 424 mutex_destroy(&hpriv->restore_phase_mutex); 425 426 /* Device should be reset if reset-upon-device-release is enabled, or if there is a pending 427 * reset that waits for device release. 428 */ 429 reset_device = hdev->reset_upon_device_release || hdev->reset_info.watchdog_active; 430 431 /* Check the device idle status and reset if not idle. 432 * Skip it if already in reset, or if device is going to be reset in any case. 433 */ 434 if (!hdev->reset_info.in_reset && !reset_device && hdev->pdev && !hdev->pldm) 435 device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask, 436 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL); 437 if (!device_is_idle) { 438 print_idle_status_mask(hdev, "device is not idle after user context is closed", 439 idle_mask); 440 reset_device = true; 441 } 442 443 /* We need to remove the user from the list to make sure the reset process won't 444 * try to kill the user process. Because, if we got here, it means there are no 445 * more driver/device resources that the user process is occupying so there is 446 * no need to kill it 447 * 448 * However, we can't set the compute_ctx to NULL at this stage. This is to prevent 449 * a race between the release and opening the device again. We don't want to let 450 * a user open the device while there a reset is about to happen. 451 */ 452 mutex_lock(&hdev->fpriv_list_lock); 453 list_del(&hpriv->dev_node); 454 mutex_unlock(&hdev->fpriv_list_lock); 455 456 if (reset_device) { 457 hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE); 458 } else { 459 /* Scrubbing is handled within hl_device_reset(), so here need to do it directly */ 460 int rc = hdev->asic_funcs->scrub_device_mem(hdev); 461 462 if (rc) 463 dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc); 464 } 465 466 /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different 467 * thread, we don't care because the in_reset is marked so if a user will try to open 468 * the device it will fail on that, even if compute_ctx is false. 469 */ 470 mutex_lock(&hdev->fpriv_list_lock); 471 hdev->is_compute_ctx_active = false; 472 mutex_unlock(&hdev->fpriv_list_lock); 473 474 hdev->compute_ctx_in_release = 0; 475 476 /* release the eventfd */ 477 if (hpriv->notifier_event.eventfd) 478 eventfd_ctx_put(hpriv->notifier_event.eventfd); 479 480 mutex_destroy(&hpriv->notifier_event.lock); 481 482 kfree(hpriv); 483 } 484 485 void hl_hpriv_get(struct hl_fpriv *hpriv) 486 { 487 kref_get(&hpriv->refcount); 488 } 489 490 int hl_hpriv_put(struct hl_fpriv *hpriv) 491 { 492 return kref_put(&hpriv->refcount, hpriv_release); 493 } 494 495 /* 496 * hl_device_release - release function for habanalabs device 497 * 498 * @inode: pointer to inode structure 499 * @filp: pointer to file structure 500 * 501 * Called when process closes an habanalabs device 502 */ 503 static int hl_device_release(struct inode *inode, struct file *filp) 504 { 505 struct hl_fpriv *hpriv = filp->private_data; 506 struct hl_device *hdev = hpriv->hdev; 507 508 filp->private_data = NULL; 509 510 if (!hdev) { 511 pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n"); 512 put_pid(hpriv->taskpid); 513 return 0; 514 } 515 516 hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); 517 hl_mem_mgr_fini(&hpriv->mem_mgr); 518 519 hdev->compute_ctx_in_release = 1; 520 521 if (!hl_hpriv_put(hpriv)) { 522 dev_notice(hdev->dev, "User process closed FD but device still in use\n"); 523 hl_device_reset(hdev, HL_DRV_RESET_HARD); 524 } 525 526 hdev->last_open_session_duration_jif = 527 jiffies - hdev->last_successful_open_jif; 528 529 return 0; 530 } 531 532 static int hl_device_release_ctrl(struct inode *inode, struct file *filp) 533 { 534 struct hl_fpriv *hpriv = filp->private_data; 535 struct hl_device *hdev = hpriv->hdev; 536 537 filp->private_data = NULL; 538 539 if (!hdev) { 540 pr_err("Closing FD after device was removed\n"); 541 goto out; 542 } 543 544 mutex_lock(&hdev->fpriv_ctrl_list_lock); 545 list_del(&hpriv->dev_node); 546 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 547 out: 548 /* release the eventfd */ 549 if (hpriv->notifier_event.eventfd) 550 eventfd_ctx_put(hpriv->notifier_event.eventfd); 551 552 mutex_destroy(&hpriv->notifier_event.lock); 553 put_pid(hpriv->taskpid); 554 555 kfree(hpriv); 556 557 return 0; 558 } 559 560 /* 561 * hl_mmap - mmap function for habanalabs device 562 * 563 * @*filp: pointer to file structure 564 * @*vma: pointer to vm_area_struct of the process 565 * 566 * Called when process does an mmap on habanalabs device. Call the relevant mmap 567 * function at the end of the common code. 568 */ 569 static int hl_mmap(struct file *filp, struct vm_area_struct *vma) 570 { 571 struct hl_fpriv *hpriv = filp->private_data; 572 struct hl_device *hdev = hpriv->hdev; 573 unsigned long vm_pgoff; 574 575 if (!hdev) { 576 pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n"); 577 return -ENODEV; 578 } 579 580 vm_pgoff = vma->vm_pgoff; 581 582 switch (vm_pgoff & HL_MMAP_TYPE_MASK) { 583 case HL_MMAP_TYPE_BLOCK: 584 vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); 585 return hl_hw_block_mmap(hpriv, vma); 586 587 case HL_MMAP_TYPE_CB: 588 case HL_MMAP_TYPE_TS_BUFF: 589 return hl_mem_mgr_mmap(&hpriv->mem_mgr, vma, NULL); 590 } 591 return -EINVAL; 592 } 593 594 static const struct file_operations hl_ops = { 595 .owner = THIS_MODULE, 596 .open = hl_device_open, 597 .release = hl_device_release, 598 .mmap = hl_mmap, 599 .unlocked_ioctl = hl_ioctl, 600 .compat_ioctl = hl_ioctl 601 }; 602 603 static const struct file_operations hl_ctrl_ops = { 604 .owner = THIS_MODULE, 605 .open = hl_device_open_ctrl, 606 .release = hl_device_release_ctrl, 607 .unlocked_ioctl = hl_ioctl_control, 608 .compat_ioctl = hl_ioctl_control 609 }; 610 611 static void device_release_func(struct device *dev) 612 { 613 kfree(dev); 614 } 615 616 /* 617 * device_init_cdev - Initialize cdev and device for habanalabs device 618 * 619 * @hdev: pointer to habanalabs device structure 620 * @hclass: pointer to the class object of the device 621 * @minor: minor number of the specific device 622 * @fpos: file operations to install for this device 623 * @name: name of the device as it will appear in the filesystem 624 * @cdev: pointer to the char device object that will be initialized 625 * @dev: pointer to the device object that will be initialized 626 * 627 * Initialize a cdev and a Linux device for habanalabs's device. 628 */ 629 static int device_init_cdev(struct hl_device *hdev, struct class *hclass, 630 int minor, const struct file_operations *fops, 631 char *name, struct cdev *cdev, 632 struct device **dev) 633 { 634 cdev_init(cdev, fops); 635 cdev->owner = THIS_MODULE; 636 637 *dev = kzalloc(sizeof(**dev), GFP_KERNEL); 638 if (!*dev) 639 return -ENOMEM; 640 641 device_initialize(*dev); 642 (*dev)->devt = MKDEV(hdev->major, minor); 643 (*dev)->class = hclass; 644 (*dev)->release = device_release_func; 645 dev_set_drvdata(*dev, hdev); 646 dev_set_name(*dev, "%s", name); 647 648 return 0; 649 } 650 651 static int device_cdev_sysfs_add(struct hl_device *hdev) 652 { 653 int rc; 654 655 rc = cdev_device_add(&hdev->cdev, hdev->dev); 656 if (rc) { 657 dev_err(hdev->dev, 658 "failed to add a char device to the system\n"); 659 return rc; 660 } 661 662 rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl); 663 if (rc) { 664 dev_err(hdev->dev, 665 "failed to add a control char device to the system\n"); 666 goto delete_cdev_device; 667 } 668 669 /* hl_sysfs_init() must be done after adding the device to the system */ 670 rc = hl_sysfs_init(hdev); 671 if (rc) { 672 dev_err(hdev->dev, "failed to initialize sysfs\n"); 673 goto delete_ctrl_cdev_device; 674 } 675 676 hdev->cdev_sysfs_created = true; 677 678 return 0; 679 680 delete_ctrl_cdev_device: 681 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); 682 delete_cdev_device: 683 cdev_device_del(&hdev->cdev, hdev->dev); 684 return rc; 685 } 686 687 static void device_cdev_sysfs_del(struct hl_device *hdev) 688 { 689 if (!hdev->cdev_sysfs_created) 690 goto put_devices; 691 692 hl_sysfs_fini(hdev); 693 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); 694 cdev_device_del(&hdev->cdev, hdev->dev); 695 696 put_devices: 697 put_device(hdev->dev); 698 put_device(hdev->dev_ctrl); 699 } 700 701 static void device_hard_reset_pending(struct work_struct *work) 702 { 703 struct hl_device_reset_work *device_reset_work = 704 container_of(work, struct hl_device_reset_work, reset_work.work); 705 struct hl_device *hdev = device_reset_work->hdev; 706 u32 flags; 707 int rc; 708 709 flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR; 710 711 rc = hl_device_reset(hdev, flags); 712 713 if ((rc == -EBUSY) && !hdev->device_fini_pending) { 714 struct hl_ctx *ctx = hl_get_compute_ctx(hdev); 715 716 if (ctx) { 717 /* The read refcount value should subtracted by one, because the read is 718 * protected with hl_get_compute_ctx(). 719 */ 720 dev_info(hdev->dev, 721 "Could not reset device (compute_ctx refcount %u). will try again in %u seconds", 722 kref_read(&ctx->refcount) - 1, HL_PENDING_RESET_PER_SEC); 723 hl_ctx_put(ctx); 724 } else { 725 dev_info(hdev->dev, "Could not reset device. will try again in %u seconds", 726 HL_PENDING_RESET_PER_SEC); 727 } 728 729 queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work, 730 msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000)); 731 } 732 } 733 734 static void device_release_watchdog_func(struct work_struct *work) 735 { 736 struct hl_device_reset_work *device_release_watchdog_work = 737 container_of(work, struct hl_device_reset_work, reset_work.work); 738 struct hl_device *hdev = device_release_watchdog_work->hdev; 739 u32 flags; 740 741 dev_dbg(hdev->dev, "Device wasn't released in time. Initiate device reset.\n"); 742 743 flags = device_release_watchdog_work->flags | HL_DRV_RESET_FROM_WD_THR; 744 745 hl_device_reset(hdev, flags); 746 } 747 748 /* 749 * device_early_init - do some early initialization for the habanalabs device 750 * 751 * @hdev: pointer to habanalabs device structure 752 * 753 * Install the relevant function pointers and call the early_init function, 754 * if such a function exists 755 */ 756 static int device_early_init(struct hl_device *hdev) 757 { 758 int i, rc; 759 char workq_name[32]; 760 761 switch (hdev->asic_type) { 762 case ASIC_GOYA: 763 goya_set_asic_funcs(hdev); 764 strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); 765 break; 766 case ASIC_GAUDI: 767 gaudi_set_asic_funcs(hdev); 768 strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name)); 769 break; 770 case ASIC_GAUDI_SEC: 771 gaudi_set_asic_funcs(hdev); 772 strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name)); 773 break; 774 case ASIC_GAUDI2: 775 gaudi2_set_asic_funcs(hdev); 776 strscpy(hdev->asic_name, "GAUDI2", sizeof(hdev->asic_name)); 777 break; 778 case ASIC_GAUDI2B: 779 gaudi2_set_asic_funcs(hdev); 780 strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name)); 781 break; 782 break; 783 default: 784 dev_err(hdev->dev, "Unrecognized ASIC type %d\n", 785 hdev->asic_type); 786 return -EINVAL; 787 } 788 789 rc = hdev->asic_funcs->early_init(hdev); 790 if (rc) 791 return rc; 792 793 rc = hl_asid_init(hdev); 794 if (rc) 795 goto early_fini; 796 797 if (hdev->asic_prop.completion_queues_count) { 798 hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, 799 sizeof(struct workqueue_struct *), 800 GFP_KERNEL); 801 if (!hdev->cq_wq) { 802 rc = -ENOMEM; 803 goto asid_fini; 804 } 805 } 806 807 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { 808 snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i); 809 hdev->cq_wq[i] = create_singlethread_workqueue(workq_name); 810 if (hdev->cq_wq[i] == NULL) { 811 dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); 812 rc = -ENOMEM; 813 goto free_cq_wq; 814 } 815 } 816 817 hdev->eq_wq = create_singlethread_workqueue("hl-events"); 818 if (hdev->eq_wq == NULL) { 819 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n"); 820 rc = -ENOMEM; 821 goto free_cq_wq; 822 } 823 824 hdev->cs_cmplt_wq = alloc_workqueue("hl-cs-completions", WQ_UNBOUND, 0); 825 if (!hdev->cs_cmplt_wq) { 826 dev_err(hdev->dev, 827 "Failed to allocate CS completions workqueue\n"); 828 rc = -ENOMEM; 829 goto free_eq_wq; 830 } 831 832 hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0); 833 if (!hdev->ts_free_obj_wq) { 834 dev_err(hdev->dev, 835 "Failed to allocate Timestamp registration free workqueue\n"); 836 rc = -ENOMEM; 837 goto free_cs_cmplt_wq; 838 } 839 840 hdev->prefetch_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0); 841 if (!hdev->prefetch_wq) { 842 dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n"); 843 rc = -ENOMEM; 844 goto free_ts_free_wq; 845 } 846 847 hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), 848 GFP_KERNEL); 849 if (!hdev->hl_chip_info) { 850 rc = -ENOMEM; 851 goto free_prefetch_wq; 852 } 853 854 rc = hl_mmu_if_set_funcs(hdev); 855 if (rc) 856 goto free_chip_info; 857 858 hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr); 859 860 hdev->reset_wq = create_singlethread_workqueue("hl_device_reset"); 861 if (!hdev->reset_wq) { 862 rc = -ENOMEM; 863 dev_err(hdev->dev, "Failed to create device reset WQ\n"); 864 goto free_cb_mgr; 865 } 866 867 INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, device_hard_reset_pending); 868 hdev->device_reset_work.hdev = hdev; 869 hdev->device_fini_pending = 0; 870 871 INIT_DELAYED_WORK(&hdev->device_release_watchdog_work.reset_work, 872 device_release_watchdog_func); 873 hdev->device_release_watchdog_work.hdev = hdev; 874 875 mutex_init(&hdev->send_cpu_message_lock); 876 mutex_init(&hdev->debug_lock); 877 INIT_LIST_HEAD(&hdev->cs_mirror_list); 878 spin_lock_init(&hdev->cs_mirror_lock); 879 spin_lock_init(&hdev->reset_info.lock); 880 INIT_LIST_HEAD(&hdev->fpriv_list); 881 INIT_LIST_HEAD(&hdev->fpriv_ctrl_list); 882 mutex_init(&hdev->fpriv_list_lock); 883 mutex_init(&hdev->fpriv_ctrl_list_lock); 884 mutex_init(&hdev->clk_throttling.lock); 885 886 return 0; 887 888 free_cb_mgr: 889 hl_mem_mgr_fini(&hdev->kernel_mem_mgr); 890 free_chip_info: 891 kfree(hdev->hl_chip_info); 892 free_prefetch_wq: 893 destroy_workqueue(hdev->prefetch_wq); 894 free_ts_free_wq: 895 destroy_workqueue(hdev->ts_free_obj_wq); 896 free_cs_cmplt_wq: 897 destroy_workqueue(hdev->cs_cmplt_wq); 898 free_eq_wq: 899 destroy_workqueue(hdev->eq_wq); 900 free_cq_wq: 901 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 902 if (hdev->cq_wq[i]) 903 destroy_workqueue(hdev->cq_wq[i]); 904 kfree(hdev->cq_wq); 905 asid_fini: 906 hl_asid_fini(hdev); 907 early_fini: 908 if (hdev->asic_funcs->early_fini) 909 hdev->asic_funcs->early_fini(hdev); 910 911 return rc; 912 } 913 914 /* 915 * device_early_fini - finalize all that was done in device_early_init 916 * 917 * @hdev: pointer to habanalabs device structure 918 * 919 */ 920 static void device_early_fini(struct hl_device *hdev) 921 { 922 int i; 923 924 mutex_destroy(&hdev->debug_lock); 925 mutex_destroy(&hdev->send_cpu_message_lock); 926 927 mutex_destroy(&hdev->fpriv_list_lock); 928 mutex_destroy(&hdev->fpriv_ctrl_list_lock); 929 930 mutex_destroy(&hdev->clk_throttling.lock); 931 932 hl_mem_mgr_fini(&hdev->kernel_mem_mgr); 933 934 kfree(hdev->hl_chip_info); 935 936 destroy_workqueue(hdev->prefetch_wq); 937 destroy_workqueue(hdev->ts_free_obj_wq); 938 destroy_workqueue(hdev->cs_cmplt_wq); 939 destroy_workqueue(hdev->eq_wq); 940 destroy_workqueue(hdev->reset_wq); 941 942 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 943 destroy_workqueue(hdev->cq_wq[i]); 944 kfree(hdev->cq_wq); 945 946 hl_asid_fini(hdev); 947 948 if (hdev->asic_funcs->early_fini) 949 hdev->asic_funcs->early_fini(hdev); 950 } 951 952 static void hl_device_heartbeat(struct work_struct *work) 953 { 954 struct hl_device *hdev = container_of(work, struct hl_device, 955 work_heartbeat.work); 956 957 if (!hl_device_operational(hdev, NULL)) 958 goto reschedule; 959 960 if (!hdev->asic_funcs->send_heartbeat(hdev)) 961 goto reschedule; 962 963 if (hl_device_operational(hdev, NULL)) 964 dev_err(hdev->dev, "Device heartbeat failed!\n"); 965 966 hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT); 967 968 return; 969 970 reschedule: 971 /* 972 * prev_reset_trigger tracks consecutive fatal h/w errors until first 973 * heartbeat immediately post reset. 974 * If control reached here, then at least one heartbeat work has been 975 * scheduled since last reset/init cycle. 976 * So if the device is not already in reset cycle, reset the flag 977 * prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR 978 * status for at least one heartbeat. From this point driver restarts 979 * tracking future consecutive fatal errors. 980 */ 981 if (!hdev->reset_info.in_reset) 982 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 983 984 schedule_delayed_work(&hdev->work_heartbeat, 985 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); 986 } 987 988 /* 989 * device_late_init - do late stuff initialization for the habanalabs device 990 * 991 * @hdev: pointer to habanalabs device structure 992 * 993 * Do stuff that either needs the device H/W queues to be active or needs 994 * to happen after all the rest of the initialization is finished 995 */ 996 static int device_late_init(struct hl_device *hdev) 997 { 998 int rc; 999 1000 if (hdev->asic_funcs->late_init) { 1001 rc = hdev->asic_funcs->late_init(hdev); 1002 if (rc) { 1003 dev_err(hdev->dev, 1004 "failed late initialization for the H/W\n"); 1005 return rc; 1006 } 1007 } 1008 1009 hdev->high_pll = hdev->asic_prop.high_pll; 1010 1011 if (hdev->heartbeat) { 1012 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); 1013 schedule_delayed_work(&hdev->work_heartbeat, 1014 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); 1015 } 1016 1017 hdev->late_init_done = true; 1018 1019 return 0; 1020 } 1021 1022 /* 1023 * device_late_fini - finalize all that was done in device_late_init 1024 * 1025 * @hdev: pointer to habanalabs device structure 1026 * 1027 */ 1028 static void device_late_fini(struct hl_device *hdev) 1029 { 1030 if (!hdev->late_init_done) 1031 return; 1032 1033 if (hdev->heartbeat) 1034 cancel_delayed_work_sync(&hdev->work_heartbeat); 1035 1036 if (hdev->asic_funcs->late_fini) 1037 hdev->asic_funcs->late_fini(hdev); 1038 1039 hdev->late_init_done = false; 1040 } 1041 1042 int hl_device_utilization(struct hl_device *hdev, u32 *utilization) 1043 { 1044 u64 max_power, curr_power, dc_power, dividend, divisor; 1045 int rc; 1046 1047 max_power = hdev->max_power; 1048 dc_power = hdev->asic_prop.dc_power_default; 1049 divisor = max_power - dc_power; 1050 if (!divisor) { 1051 dev_warn(hdev->dev, "device utilization is not supported\n"); 1052 return -EOPNOTSUPP; 1053 } 1054 rc = hl_fw_cpucp_power_get(hdev, &curr_power); 1055 1056 if (rc) 1057 return rc; 1058 1059 curr_power = clamp(curr_power, dc_power, max_power); 1060 1061 dividend = (curr_power - dc_power) * 100; 1062 *utilization = (u32) div_u64(dividend, divisor); 1063 1064 return 0; 1065 } 1066 1067 int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable) 1068 { 1069 int rc = 0; 1070 1071 mutex_lock(&hdev->debug_lock); 1072 1073 if (!enable) { 1074 if (!hdev->in_debug) { 1075 dev_err(hdev->dev, 1076 "Failed to disable debug mode because device was not in debug mode\n"); 1077 rc = -EFAULT; 1078 goto out; 1079 } 1080 1081 if (!hdev->reset_info.hard_reset_pending) 1082 hdev->asic_funcs->halt_coresight(hdev, ctx); 1083 1084 hdev->in_debug = 0; 1085 1086 goto out; 1087 } 1088 1089 if (hdev->in_debug) { 1090 dev_err(hdev->dev, 1091 "Failed to enable debug mode because device is already in debug mode\n"); 1092 rc = -EFAULT; 1093 goto out; 1094 } 1095 1096 hdev->in_debug = 1; 1097 1098 out: 1099 mutex_unlock(&hdev->debug_lock); 1100 1101 return rc; 1102 } 1103 1104 static void take_release_locks(struct hl_device *hdev) 1105 { 1106 /* Flush anyone that is inside the critical section of enqueue 1107 * jobs to the H/W 1108 */ 1109 hdev->asic_funcs->hw_queues_lock(hdev); 1110 hdev->asic_funcs->hw_queues_unlock(hdev); 1111 1112 /* Flush processes that are sending message to CPU */ 1113 mutex_lock(&hdev->send_cpu_message_lock); 1114 mutex_unlock(&hdev->send_cpu_message_lock); 1115 1116 /* Flush anyone that is inside device open */ 1117 mutex_lock(&hdev->fpriv_list_lock); 1118 mutex_unlock(&hdev->fpriv_list_lock); 1119 mutex_lock(&hdev->fpriv_ctrl_list_lock); 1120 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 1121 } 1122 1123 static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset, 1124 bool skip_wq_flush) 1125 { 1126 if (hard_reset) 1127 device_late_fini(hdev); 1128 1129 /* 1130 * Halt the engines and disable interrupts so we won't get any more 1131 * completions from H/W and we won't have any accesses from the 1132 * H/W to the host machine 1133 */ 1134 hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset); 1135 1136 /* Go over all the queues, release all CS and their jobs */ 1137 hl_cs_rollback_all(hdev, skip_wq_flush); 1138 1139 /* flush the MMU prefetch workqueue */ 1140 flush_workqueue(hdev->prefetch_wq); 1141 1142 /* Release all pending user interrupts, each pending user interrupt 1143 * holds a reference to user context 1144 */ 1145 hl_release_pending_user_interrupts(hdev); 1146 } 1147 1148 /* 1149 * hl_device_suspend - initiate device suspend 1150 * 1151 * @hdev: pointer to habanalabs device structure 1152 * 1153 * Puts the hw in the suspend state (all asics). 1154 * Returns 0 for success or an error on failure. 1155 * Called at driver suspend. 1156 */ 1157 int hl_device_suspend(struct hl_device *hdev) 1158 { 1159 int rc; 1160 1161 pci_save_state(hdev->pdev); 1162 1163 /* Block future CS/VM/JOB completion operations */ 1164 spin_lock(&hdev->reset_info.lock); 1165 if (hdev->reset_info.in_reset) { 1166 spin_unlock(&hdev->reset_info.lock); 1167 dev_err(hdev->dev, "Can't suspend while in reset\n"); 1168 return -EIO; 1169 } 1170 hdev->reset_info.in_reset = 1; 1171 spin_unlock(&hdev->reset_info.lock); 1172 1173 /* This blocks all other stuff that is not blocked by in_reset */ 1174 hdev->disabled = true; 1175 1176 take_release_locks(hdev); 1177 1178 rc = hdev->asic_funcs->suspend(hdev); 1179 if (rc) 1180 dev_err(hdev->dev, 1181 "Failed to disable PCI access of device CPU\n"); 1182 1183 /* Shut down the device */ 1184 pci_disable_device(hdev->pdev); 1185 pci_set_power_state(hdev->pdev, PCI_D3hot); 1186 1187 return 0; 1188 } 1189 1190 /* 1191 * hl_device_resume - initiate device resume 1192 * 1193 * @hdev: pointer to habanalabs device structure 1194 * 1195 * Bring the hw back to operating state (all asics). 1196 * Returns 0 for success or an error on failure. 1197 * Called at driver resume. 1198 */ 1199 int hl_device_resume(struct hl_device *hdev) 1200 { 1201 int rc; 1202 1203 pci_set_power_state(hdev->pdev, PCI_D0); 1204 pci_restore_state(hdev->pdev); 1205 rc = pci_enable_device_mem(hdev->pdev); 1206 if (rc) { 1207 dev_err(hdev->dev, 1208 "Failed to enable PCI device in resume\n"); 1209 return rc; 1210 } 1211 1212 pci_set_master(hdev->pdev); 1213 1214 rc = hdev->asic_funcs->resume(hdev); 1215 if (rc) { 1216 dev_err(hdev->dev, "Failed to resume device after suspend\n"); 1217 goto disable_device; 1218 } 1219 1220 1221 /* 'in_reset' was set to true during suspend, now we must clear it in order 1222 * for hard reset to be performed 1223 */ 1224 spin_lock(&hdev->reset_info.lock); 1225 hdev->reset_info.in_reset = 0; 1226 spin_unlock(&hdev->reset_info.lock); 1227 1228 rc = hl_device_reset(hdev, HL_DRV_RESET_HARD); 1229 if (rc) { 1230 dev_err(hdev->dev, "Failed to reset device during resume\n"); 1231 goto disable_device; 1232 } 1233 1234 return 0; 1235 1236 disable_device: 1237 pci_clear_master(hdev->pdev); 1238 pci_disable_device(hdev->pdev); 1239 1240 return rc; 1241 } 1242 1243 static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev) 1244 { 1245 struct task_struct *task = NULL; 1246 struct list_head *fd_list; 1247 struct hl_fpriv *hpriv; 1248 struct mutex *fd_lock; 1249 u32 pending_cnt; 1250 1251 fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; 1252 fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; 1253 1254 /* Giving time for user to close FD, and for processes that are inside 1255 * hl_device_open to finish 1256 */ 1257 if (!list_empty(fd_list)) 1258 ssleep(1); 1259 1260 if (timeout) { 1261 pending_cnt = timeout; 1262 } else { 1263 if (hdev->process_kill_trial_cnt) { 1264 /* Processes have been already killed */ 1265 pending_cnt = 1; 1266 goto wait_for_processes; 1267 } else { 1268 /* Wait a small period after process kill */ 1269 pending_cnt = HL_PENDING_RESET_PER_SEC; 1270 } 1271 } 1272 1273 mutex_lock(fd_lock); 1274 1275 /* This section must be protected because we are dereferencing 1276 * pointers that are freed if the process exits 1277 */ 1278 list_for_each_entry(hpriv, fd_list, dev_node) { 1279 task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); 1280 if (task) { 1281 dev_info(hdev->dev, "Killing user process pid=%d\n", 1282 task_pid_nr(task)); 1283 send_sig(SIGKILL, task, 1); 1284 usleep_range(1000, 10000); 1285 1286 put_task_struct(task); 1287 } else { 1288 /* 1289 * If we got here, it means that process was killed from outside the driver 1290 * right after it started looping on fd_list and before get_pid_task, thus 1291 * we don't need to kill it. 1292 */ 1293 dev_dbg(hdev->dev, 1294 "Can't get task struct for user process, assuming process was killed from outside the driver\n"); 1295 } 1296 } 1297 1298 mutex_unlock(fd_lock); 1299 1300 /* 1301 * We killed the open users, but that doesn't mean they are closed. 1302 * It could be that they are running a long cleanup phase in the driver 1303 * e.g. MMU unmappings, or running other long teardown flow even before 1304 * our cleanup. 1305 * Therefore we need to wait again to make sure they are closed before 1306 * continuing with the reset. 1307 */ 1308 1309 wait_for_processes: 1310 while ((!list_empty(fd_list)) && (pending_cnt)) { 1311 dev_dbg(hdev->dev, 1312 "Waiting for all unmap operations to finish before hard reset\n"); 1313 1314 pending_cnt--; 1315 1316 ssleep(1); 1317 } 1318 1319 /* All processes exited successfully */ 1320 if (list_empty(fd_list)) 1321 return 0; 1322 1323 /* Give up waiting for processes to exit */ 1324 if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS) 1325 return -ETIME; 1326 1327 hdev->process_kill_trial_cnt++; 1328 1329 return -EBUSY; 1330 } 1331 1332 static void device_disable_open_processes(struct hl_device *hdev, bool control_dev) 1333 { 1334 struct list_head *fd_list; 1335 struct hl_fpriv *hpriv; 1336 struct mutex *fd_lock; 1337 1338 fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; 1339 fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; 1340 1341 mutex_lock(fd_lock); 1342 list_for_each_entry(hpriv, fd_list, dev_node) 1343 hpriv->hdev = NULL; 1344 mutex_unlock(fd_lock); 1345 } 1346 1347 static void handle_reset_trigger(struct hl_device *hdev, u32 flags) 1348 { 1349 u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 1350 1351 /* No consecutive mechanism when user context exists */ 1352 if (hdev->is_compute_ctx_active) 1353 return; 1354 1355 /* 1356 * 'reset cause' is being updated here, because getting here 1357 * means that it's the 1st time and the last time we're here 1358 * ('in_reset' makes sure of it). This makes sure that 1359 * 'reset_cause' will continue holding its 1st recorded reason! 1360 */ 1361 if (flags & HL_DRV_RESET_HEARTBEAT) { 1362 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT; 1363 cur_reset_trigger = HL_DRV_RESET_HEARTBEAT; 1364 } else if (flags & HL_DRV_RESET_TDR) { 1365 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_TDR; 1366 cur_reset_trigger = HL_DRV_RESET_TDR; 1367 } else if (flags & HL_DRV_RESET_FW_FATAL_ERR) { 1368 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 1369 cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR; 1370 } else { 1371 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 1372 } 1373 1374 /* 1375 * If reset cause is same twice, then reset_trigger_repeated 1376 * is set and if this reset is due to a fatal FW error 1377 * device is set to an unstable state. 1378 */ 1379 if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) { 1380 hdev->reset_info.prev_reset_trigger = cur_reset_trigger; 1381 hdev->reset_info.reset_trigger_repeated = 0; 1382 } else { 1383 hdev->reset_info.reset_trigger_repeated = 1; 1384 } 1385 1386 /* If reset is due to heartbeat, device CPU is no responsive in 1387 * which case no point sending PCI disable message to it. 1388 * 1389 * If F/W is performing the reset, no need to send it a message to disable 1390 * PCI access 1391 */ 1392 if ((flags & HL_DRV_RESET_HARD) && 1393 !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) { 1394 /* Disable PCI access from device F/W so he won't send 1395 * us additional interrupts. We disable MSI/MSI-X at 1396 * the halt_engines function and we can't have the F/W 1397 * sending us interrupts after that. We need to disable 1398 * the access here because if the device is marked 1399 * disable, the message won't be send. Also, in case 1400 * of heartbeat, the device CPU is marked as disable 1401 * so this message won't be sent 1402 */ 1403 if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0)) 1404 dev_warn(hdev->dev, 1405 "Failed to disable PCI access by F/W\n"); 1406 } 1407 } 1408 1409 /* 1410 * hl_device_reset - reset the device 1411 * 1412 * @hdev: pointer to habanalabs device structure 1413 * @flags: reset flags. 1414 * 1415 * Block future CS and wait for pending CS to be enqueued 1416 * Call ASIC H/W fini 1417 * Flush all completions 1418 * Re-initialize all internal data structures 1419 * Call ASIC H/W init, late_init 1420 * Test queues 1421 * Enable device 1422 * 1423 * Returns 0 for success or an error on failure. 1424 */ 1425 int hl_device_reset(struct hl_device *hdev, u32 flags) 1426 { 1427 bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false, 1428 reset_upon_device_release = false, schedule_hard_reset = false, 1429 delay_reset, from_dev_release, from_watchdog_thread; 1430 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; 1431 struct hl_ctx *ctx; 1432 int i, rc; 1433 1434 if (!hdev->init_done) { 1435 dev_err(hdev->dev, "Can't reset before initialization is done\n"); 1436 return 0; 1437 } 1438 1439 hard_reset = !!(flags & HL_DRV_RESET_HARD); 1440 from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR); 1441 fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW); 1442 from_dev_release = !!(flags & HL_DRV_RESET_DEV_RELEASE); 1443 delay_reset = !!(flags & HL_DRV_RESET_DELAY); 1444 from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR); 1445 1446 if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) { 1447 dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n"); 1448 return 0; 1449 } 1450 1451 if (!hard_reset && !hdev->asic_prop.supports_compute_reset) { 1452 hard_instead_soft = true; 1453 hard_reset = true; 1454 } 1455 1456 if (hdev->reset_upon_device_release && from_dev_release) { 1457 if (hard_reset) { 1458 dev_crit(hdev->dev, 1459 "Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n"); 1460 return -EINVAL; 1461 } 1462 1463 reset_upon_device_release = true; 1464 1465 goto do_reset; 1466 } 1467 1468 if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) { 1469 hard_instead_soft = true; 1470 hard_reset = true; 1471 } 1472 1473 if (hard_instead_soft) 1474 dev_dbg(hdev->dev, "Doing hard-reset instead of compute reset\n"); 1475 1476 do_reset: 1477 /* Re-entry of reset thread */ 1478 if (from_hard_reset_thread && hdev->process_kill_trial_cnt) 1479 goto kill_processes; 1480 1481 /* 1482 * Prevent concurrency in this function - only one reset should be 1483 * done at any given time. Only need to perform this if we didn't 1484 * get from the dedicated hard reset thread 1485 */ 1486 if (!from_hard_reset_thread) { 1487 /* Block future CS/VM/JOB completion operations */ 1488 spin_lock(&hdev->reset_info.lock); 1489 if (hdev->reset_info.in_reset) { 1490 /* We only allow scheduling of a hard reset during compute reset */ 1491 if (hard_reset && hdev->reset_info.in_compute_reset) 1492 hdev->reset_info.hard_reset_schedule_flags = flags; 1493 spin_unlock(&hdev->reset_info.lock); 1494 return 0; 1495 } 1496 1497 /* This still allows the completion of some KDMA ops 1498 * Update this before in_reset because in_compute_reset implies we are in reset 1499 */ 1500 hdev->reset_info.in_compute_reset = !hard_reset; 1501 1502 hdev->reset_info.in_reset = 1; 1503 1504 spin_unlock(&hdev->reset_info.lock); 1505 1506 /* Cancel the device release watchdog work if required. 1507 * In case of reset-upon-device-release while the release watchdog work is 1508 * scheduled, do hard-reset instead of compute-reset. 1509 */ 1510 if ((hard_reset || from_dev_release) && hdev->reset_info.watchdog_active) { 1511 hdev->reset_info.watchdog_active = 0; 1512 if (!from_watchdog_thread) 1513 cancel_delayed_work_sync( 1514 &hdev->device_release_watchdog_work.reset_work); 1515 1516 if (from_dev_release) { 1517 hdev->reset_info.in_compute_reset = 0; 1518 flags |= HL_DRV_RESET_HARD; 1519 flags &= ~HL_DRV_RESET_DEV_RELEASE; 1520 hard_reset = true; 1521 } 1522 } 1523 1524 if (delay_reset) 1525 usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1); 1526 1527 handle_reset_trigger(hdev, flags); 1528 1529 /* This also blocks future CS/VM/JOB completion operations */ 1530 hdev->disabled = true; 1531 1532 take_release_locks(hdev); 1533 1534 if (hard_reset) 1535 dev_info(hdev->dev, "Going to reset device\n"); 1536 else if (reset_upon_device_release) 1537 dev_dbg(hdev->dev, "Going to reset device after release by user\n"); 1538 else 1539 dev_dbg(hdev->dev, "Going to reset engines of inference device\n"); 1540 } 1541 1542 again: 1543 if ((hard_reset) && (!from_hard_reset_thread)) { 1544 hdev->reset_info.hard_reset_pending = true; 1545 1546 hdev->process_kill_trial_cnt = 0; 1547 1548 hdev->device_reset_work.flags = flags; 1549 1550 /* 1551 * Because the reset function can't run from heartbeat work, 1552 * we need to call the reset function from a dedicated work. 1553 */ 1554 queue_delayed_work(hdev->reset_wq, &hdev->device_reset_work.reset_work, 0); 1555 1556 return 0; 1557 } 1558 1559 cleanup_resources(hdev, hard_reset, fw_reset, from_dev_release); 1560 1561 kill_processes: 1562 if (hard_reset) { 1563 /* Kill processes here after CS rollback. This is because the 1564 * process can't really exit until all its CSs are done, which 1565 * is what we do in cs rollback 1566 */ 1567 rc = device_kill_open_processes(hdev, 0, false); 1568 1569 if (rc == -EBUSY) { 1570 if (hdev->device_fini_pending) { 1571 dev_crit(hdev->dev, 1572 "%s Failed to kill all open processes, stopping hard reset\n", 1573 dev_name(&(hdev)->pdev->dev)); 1574 goto out_err; 1575 } 1576 1577 /* signal reset thread to reschedule */ 1578 return rc; 1579 } 1580 1581 if (rc) { 1582 dev_crit(hdev->dev, 1583 "%s Failed to kill all open processes, stopping hard reset\n", 1584 dev_name(&(hdev)->pdev->dev)); 1585 goto out_err; 1586 } 1587 1588 /* Flush the Event queue workers to make sure no other thread is 1589 * reading or writing to registers during the reset 1590 */ 1591 flush_workqueue(hdev->eq_wq); 1592 } 1593 1594 /* Reset the H/W. It will be in idle state after this returns */ 1595 hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset); 1596 1597 if (hard_reset) { 1598 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; 1599 1600 /* Release kernel context */ 1601 if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1) 1602 hdev->kernel_ctx = NULL; 1603 1604 hl_vm_fini(hdev); 1605 hl_mmu_fini(hdev); 1606 hl_eq_reset(hdev, &hdev->event_queue); 1607 } 1608 1609 /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */ 1610 hl_hw_queue_reset(hdev, hard_reset); 1611 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1612 hl_cq_reset(hdev, &hdev->completion_queue[i]); 1613 1614 /* Make sure the context switch phase will run again */ 1615 ctx = hl_get_compute_ctx(hdev); 1616 if (ctx) { 1617 atomic_set(&ctx->thread_ctx_switch_token, 1); 1618 ctx->thread_ctx_switch_wait_token = 0; 1619 hl_ctx_put(ctx); 1620 } 1621 1622 /* Finished tear-down, starting to re-initialize */ 1623 1624 if (hard_reset) { 1625 hdev->device_cpu_disabled = false; 1626 hdev->reset_info.hard_reset_pending = false; 1627 1628 if (hdev->reset_info.reset_trigger_repeated && 1629 (hdev->reset_info.prev_reset_trigger == 1630 HL_DRV_RESET_FW_FATAL_ERR)) { 1631 /* if there 2 back to back resets from FW, 1632 * ensure driver puts the driver in a unusable state 1633 */ 1634 dev_crit(hdev->dev, 1635 "%s Consecutive FW fatal errors received, stopping hard reset\n", 1636 dev_name(&(hdev)->pdev->dev)); 1637 rc = -EIO; 1638 goto out_err; 1639 } 1640 1641 if (hdev->kernel_ctx) { 1642 dev_crit(hdev->dev, 1643 "%s kernel ctx was alive during hard reset, something is terribly wrong\n", 1644 dev_name(&(hdev)->pdev->dev)); 1645 rc = -EBUSY; 1646 goto out_err; 1647 } 1648 1649 rc = hl_mmu_init(hdev); 1650 if (rc) { 1651 dev_err(hdev->dev, 1652 "Failed to initialize MMU S/W after hard reset\n"); 1653 goto out_err; 1654 } 1655 1656 /* Allocate the kernel context */ 1657 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), 1658 GFP_KERNEL); 1659 if (!hdev->kernel_ctx) { 1660 rc = -ENOMEM; 1661 hl_mmu_fini(hdev); 1662 goto out_err; 1663 } 1664 1665 hdev->is_compute_ctx_active = false; 1666 1667 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); 1668 if (rc) { 1669 dev_err(hdev->dev, 1670 "failed to init kernel ctx in hard reset\n"); 1671 kfree(hdev->kernel_ctx); 1672 hdev->kernel_ctx = NULL; 1673 hl_mmu_fini(hdev); 1674 goto out_err; 1675 } 1676 } 1677 1678 /* Device is now enabled as part of the initialization requires 1679 * communication with the device firmware to get information that 1680 * is required for the initialization itself 1681 */ 1682 hdev->disabled = false; 1683 1684 /* F/W security enabled indication might be updated after hard-reset */ 1685 if (hard_reset) { 1686 rc = hl_fw_read_preboot_status(hdev); 1687 if (rc) 1688 goto out_err; 1689 } 1690 1691 rc = hdev->asic_funcs->hw_init(hdev); 1692 if (rc) { 1693 dev_err(hdev->dev, "failed to initialize the H/W after reset\n"); 1694 goto out_err; 1695 } 1696 1697 /* If device is not idle fail the reset process */ 1698 if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, 1699 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { 1700 print_idle_status_mask(hdev, "device is not idle after reset", idle_mask); 1701 rc = -EIO; 1702 goto out_err; 1703 } 1704 1705 /* Check that the communication with the device is working */ 1706 rc = hdev->asic_funcs->test_queues(hdev); 1707 if (rc) { 1708 dev_err(hdev->dev, "Failed to detect if device is alive after reset\n"); 1709 goto out_err; 1710 } 1711 1712 if (hard_reset) { 1713 rc = device_late_init(hdev); 1714 if (rc) { 1715 dev_err(hdev->dev, "Failed late init after hard reset\n"); 1716 goto out_err; 1717 } 1718 1719 rc = hl_vm_init(hdev); 1720 if (rc) { 1721 dev_err(hdev->dev, "Failed to init memory module after hard reset\n"); 1722 goto out_err; 1723 } 1724 1725 if (!hdev->asic_prop.fw_security_enabled) 1726 hl_fw_set_max_power(hdev); 1727 } else { 1728 rc = hdev->asic_funcs->compute_reset_late_init(hdev); 1729 if (rc) { 1730 if (reset_upon_device_release) 1731 dev_err(hdev->dev, 1732 "Failed late init in reset after device release\n"); 1733 else 1734 dev_err(hdev->dev, "Failed late init after compute reset\n"); 1735 goto out_err; 1736 } 1737 } 1738 1739 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1740 if (rc) { 1741 dev_err(hdev->dev, "scrub mem failed from device reset (%d)\n", rc); 1742 goto out_err; 1743 } 1744 1745 spin_lock(&hdev->reset_info.lock); 1746 hdev->reset_info.in_compute_reset = 0; 1747 1748 /* Schedule hard reset only if requested and if not already in hard reset. 1749 * We keep 'in_reset' enabled, so no other reset can go in during the hard 1750 * reset schedule 1751 */ 1752 if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags) 1753 schedule_hard_reset = true; 1754 else 1755 hdev->reset_info.in_reset = 0; 1756 1757 spin_unlock(&hdev->reset_info.lock); 1758 1759 hdev->reset_info.needs_reset = false; 1760 1761 if (hard_reset) 1762 dev_info(hdev->dev, 1763 "Successfully finished resetting the %s device\n", 1764 dev_name(&(hdev)->pdev->dev)); 1765 else 1766 dev_dbg(hdev->dev, 1767 "Successfully finished resetting the %s device\n", 1768 dev_name(&(hdev)->pdev->dev)); 1769 1770 if (hard_reset) { 1771 hdev->reset_info.hard_reset_cnt++; 1772 1773 /* After reset is done, we are ready to receive events from 1774 * the F/W. We can't do it before because we will ignore events 1775 * and if those events are fatal, we won't know about it and 1776 * the device will be operational although it shouldn't be 1777 */ 1778 hdev->asic_funcs->enable_events_from_fw(hdev); 1779 } else { 1780 if (!reset_upon_device_release) 1781 hdev->reset_info.compute_reset_cnt++; 1782 1783 if (schedule_hard_reset) { 1784 dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n"); 1785 flags = hdev->reset_info.hard_reset_schedule_flags; 1786 hdev->reset_info.hard_reset_schedule_flags = 0; 1787 hdev->disabled = true; 1788 hard_reset = true; 1789 handle_reset_trigger(hdev, flags); 1790 goto again; 1791 } 1792 } 1793 1794 return 0; 1795 1796 out_err: 1797 hdev->disabled = true; 1798 1799 spin_lock(&hdev->reset_info.lock); 1800 hdev->reset_info.in_compute_reset = 0; 1801 1802 if (hard_reset) { 1803 dev_err(hdev->dev, 1804 "%s Failed to reset! Device is NOT usable\n", 1805 dev_name(&(hdev)->pdev->dev)); 1806 hdev->reset_info.hard_reset_cnt++; 1807 } else if (reset_upon_device_release) { 1808 spin_unlock(&hdev->reset_info.lock); 1809 dev_err(hdev->dev, "Failed to reset device after user release\n"); 1810 flags |= HL_DRV_RESET_HARD; 1811 flags &= ~HL_DRV_RESET_DEV_RELEASE; 1812 hard_reset = true; 1813 goto again; 1814 } else { 1815 spin_unlock(&hdev->reset_info.lock); 1816 dev_err(hdev->dev, "Failed to do compute reset\n"); 1817 hdev->reset_info.compute_reset_cnt++; 1818 flags |= HL_DRV_RESET_HARD; 1819 hard_reset = true; 1820 goto again; 1821 } 1822 1823 hdev->reset_info.in_reset = 0; 1824 1825 spin_unlock(&hdev->reset_info.lock); 1826 1827 return rc; 1828 } 1829 1830 /* 1831 * hl_device_cond_reset() - conditionally reset the device. 1832 * @hdev: pointer to habanalabs device structure. 1833 * @reset_flags: reset flags. 1834 * @event_mask: events to notify user about. 1835 * 1836 * Conditionally reset the device, or alternatively schedule a watchdog work to reset the device 1837 * unless another reset precedes it. 1838 */ 1839 int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask) 1840 { 1841 struct hl_ctx *ctx = NULL; 1842 1843 /* Device release watchdog is only for hard reset */ 1844 if (!(flags & HL_DRV_RESET_HARD) && hdev->asic_prop.allow_inference_soft_reset) 1845 goto device_reset; 1846 1847 /* F/W reset cannot be postponed */ 1848 if (flags & HL_DRV_RESET_BYPASS_REQ_TO_FW) 1849 goto device_reset; 1850 1851 /* Device release watchdog is relevant only if user exists and gets a reset notification */ 1852 if (!(event_mask & HL_NOTIFIER_EVENT_DEVICE_RESET)) { 1853 dev_err(hdev->dev, "Resetting device without a reset indication to user\n"); 1854 goto device_reset; 1855 } 1856 1857 ctx = hl_get_compute_ctx(hdev); 1858 if (!ctx || !ctx->hpriv->notifier_event.eventfd) 1859 goto device_reset; 1860 1861 /* Schedule the device release watchdog work unless reset is already in progress or if the 1862 * work is already scheduled. 1863 */ 1864 spin_lock(&hdev->reset_info.lock); 1865 if (hdev->reset_info.in_reset) { 1866 spin_unlock(&hdev->reset_info.lock); 1867 goto device_reset; 1868 } 1869 1870 if (hdev->reset_info.watchdog_active) 1871 goto out; 1872 1873 hdev->device_release_watchdog_work.flags = flags; 1874 dev_dbg(hdev->dev, "Device is going to be reset in %u sec unless being released\n", 1875 hdev->device_release_watchdog_timeout_sec); 1876 schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work, 1877 msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000)); 1878 hdev->reset_info.watchdog_active = 1; 1879 out: 1880 spin_unlock(&hdev->reset_info.lock); 1881 1882 hl_notifier_event_send_all(hdev, event_mask); 1883 1884 hl_ctx_put(ctx); 1885 1886 hl_abort_waitings_for_completion(hdev); 1887 1888 return 0; 1889 1890 device_reset: 1891 if (event_mask) 1892 hl_notifier_event_send_all(hdev, event_mask); 1893 if (ctx) 1894 hl_ctx_put(ctx); 1895 1896 return hl_device_reset(hdev, flags); 1897 } 1898 1899 static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask) 1900 { 1901 mutex_lock(¬ifier_event->lock); 1902 notifier_event->events_mask |= event_mask; 1903 1904 if (notifier_event->eventfd) 1905 eventfd_signal(notifier_event->eventfd, 1); 1906 1907 mutex_unlock(¬ifier_event->lock); 1908 } 1909 1910 /* 1911 * hl_notifier_event_send_all - notify all user processes via eventfd 1912 * 1913 * @hdev: pointer to habanalabs device structure 1914 * @event_mask: the occurred event/s 1915 * Returns 0 for success or an error on failure. 1916 */ 1917 void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask) 1918 { 1919 struct hl_fpriv *hpriv; 1920 1921 if (!event_mask) { 1922 dev_warn(hdev->dev, "Skip sending zero event"); 1923 return; 1924 } 1925 1926 mutex_lock(&hdev->fpriv_list_lock); 1927 1928 list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) 1929 hl_notifier_event_send(&hpriv->notifier_event, event_mask); 1930 1931 mutex_unlock(&hdev->fpriv_list_lock); 1932 1933 /* control device */ 1934 mutex_lock(&hdev->fpriv_ctrl_list_lock); 1935 1936 list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node) 1937 hl_notifier_event_send(&hpriv->notifier_event, event_mask); 1938 1939 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 1940 } 1941 1942 /* 1943 * hl_device_init - main initialization function for habanalabs device 1944 * 1945 * @hdev: pointer to habanalabs device structure 1946 * 1947 * Allocate an id for the device, do early initialization and then call the 1948 * ASIC specific initialization functions. Finally, create the cdev and the 1949 * Linux device to expose it to the user 1950 */ 1951 int hl_device_init(struct hl_device *hdev, struct class *hclass) 1952 { 1953 int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt; 1954 char *name; 1955 bool add_cdev_sysfs_on_err = false; 1956 1957 hdev->cdev_idx = hdev->id / 2; 1958 1959 name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx); 1960 if (!name) { 1961 rc = -ENOMEM; 1962 goto out_disabled; 1963 } 1964 1965 /* Initialize cdev and device structures */ 1966 rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name, 1967 &hdev->cdev, &hdev->dev); 1968 1969 kfree(name); 1970 1971 if (rc) 1972 goto out_disabled; 1973 1974 name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx); 1975 if (!name) { 1976 rc = -ENOMEM; 1977 goto free_dev; 1978 } 1979 1980 /* Initialize cdev and device structures for control device */ 1981 rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops, 1982 name, &hdev->cdev_ctrl, &hdev->dev_ctrl); 1983 1984 kfree(name); 1985 1986 if (rc) 1987 goto free_dev; 1988 1989 /* Initialize ASIC function pointers and perform early init */ 1990 rc = device_early_init(hdev); 1991 if (rc) 1992 goto free_dev_ctrl; 1993 1994 user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count + 1995 hdev->asic_prop.user_interrupt_count; 1996 1997 if (user_interrupt_cnt) { 1998 hdev->user_interrupt = kcalloc(user_interrupt_cnt, sizeof(*hdev->user_interrupt), 1999 GFP_KERNEL); 2000 if (!hdev->user_interrupt) { 2001 rc = -ENOMEM; 2002 goto early_fini; 2003 } 2004 } 2005 2006 /* 2007 * Start calling ASIC initialization. First S/W then H/W and finally 2008 * late init 2009 */ 2010 rc = hdev->asic_funcs->sw_init(hdev); 2011 if (rc) 2012 goto free_usr_intr_mem; 2013 2014 2015 /* initialize completion structure for multi CS wait */ 2016 hl_multi_cs_completion_init(hdev); 2017 2018 /* 2019 * Initialize the H/W queues. Must be done before hw_init, because 2020 * there the addresses of the kernel queue are being written to the 2021 * registers of the device 2022 */ 2023 rc = hl_hw_queues_create(hdev); 2024 if (rc) { 2025 dev_err(hdev->dev, "failed to initialize kernel queues\n"); 2026 goto sw_fini; 2027 } 2028 2029 cq_cnt = hdev->asic_prop.completion_queues_count; 2030 2031 /* 2032 * Initialize the completion queues. Must be done before hw_init, 2033 * because there the addresses of the completion queues are being 2034 * passed as arguments to request_irq 2035 */ 2036 if (cq_cnt) { 2037 hdev->completion_queue = kcalloc(cq_cnt, 2038 sizeof(*hdev->completion_queue), 2039 GFP_KERNEL); 2040 2041 if (!hdev->completion_queue) { 2042 dev_err(hdev->dev, 2043 "failed to allocate completion queues\n"); 2044 rc = -ENOMEM; 2045 goto hw_queues_destroy; 2046 } 2047 } 2048 2049 for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) { 2050 rc = hl_cq_init(hdev, &hdev->completion_queue[i], 2051 hdev->asic_funcs->get_queue_id_for_cq(hdev, i)); 2052 if (rc) { 2053 dev_err(hdev->dev, 2054 "failed to initialize completion queue\n"); 2055 goto cq_fini; 2056 } 2057 hdev->completion_queue[i].cq_idx = i; 2058 } 2059 2060 hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs, 2061 sizeof(struct hl_cs *), GFP_KERNEL); 2062 if (!hdev->shadow_cs_queue) { 2063 rc = -ENOMEM; 2064 goto cq_fini; 2065 } 2066 2067 /* 2068 * Initialize the event queue. Must be done before hw_init, 2069 * because there the address of the event queue is being 2070 * passed as argument to request_irq 2071 */ 2072 rc = hl_eq_init(hdev, &hdev->event_queue); 2073 if (rc) { 2074 dev_err(hdev->dev, "failed to initialize event queue\n"); 2075 goto free_shadow_cs_queue; 2076 } 2077 2078 /* MMU S/W must be initialized before kernel context is created */ 2079 rc = hl_mmu_init(hdev); 2080 if (rc) { 2081 dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n"); 2082 goto eq_fini; 2083 } 2084 2085 /* Allocate the kernel context */ 2086 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); 2087 if (!hdev->kernel_ctx) { 2088 rc = -ENOMEM; 2089 goto mmu_fini; 2090 } 2091 2092 hdev->is_compute_ctx_active = false; 2093 2094 hdev->asic_funcs->state_dump_init(hdev); 2095 2096 hdev->device_release_watchdog_timeout_sec = HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC; 2097 2098 hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL; 2099 hl_debugfs_add_device(hdev); 2100 2101 /* debugfs nodes are created in hl_ctx_init so it must be called after 2102 * hl_debugfs_add_device. 2103 */ 2104 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); 2105 if (rc) { 2106 dev_err(hdev->dev, "failed to initialize kernel context\n"); 2107 kfree(hdev->kernel_ctx); 2108 goto remove_device_from_debugfs; 2109 } 2110 2111 rc = hl_cb_pool_init(hdev); 2112 if (rc) { 2113 dev_err(hdev->dev, "failed to initialize CB pool\n"); 2114 goto release_ctx; 2115 } 2116 2117 rc = hl_dec_init(hdev); 2118 if (rc) { 2119 dev_err(hdev->dev, "Failed to initialize the decoder module\n"); 2120 goto cb_pool_fini; 2121 } 2122 2123 /* 2124 * From this point, override rc (=0) in case of an error to allow 2125 * debugging (by adding char devices and create sysfs nodes as part of 2126 * the error flow). 2127 */ 2128 add_cdev_sysfs_on_err = true; 2129 2130 /* Device is now enabled as part of the initialization requires 2131 * communication with the device firmware to get information that 2132 * is required for the initialization itself 2133 */ 2134 hdev->disabled = false; 2135 2136 rc = hdev->asic_funcs->hw_init(hdev); 2137 if (rc) { 2138 dev_err(hdev->dev, "failed to initialize the H/W\n"); 2139 rc = 0; 2140 goto out_disabled; 2141 } 2142 2143 /* Check that the communication with the device is working */ 2144 rc = hdev->asic_funcs->test_queues(hdev); 2145 if (rc) { 2146 dev_err(hdev->dev, "Failed to detect if device is alive\n"); 2147 rc = 0; 2148 goto out_disabled; 2149 } 2150 2151 rc = device_late_init(hdev); 2152 if (rc) { 2153 dev_err(hdev->dev, "Failed late initialization\n"); 2154 rc = 0; 2155 goto out_disabled; 2156 } 2157 2158 dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n", 2159 hdev->asic_name, 2160 hdev->asic_prop.dram_size / SZ_1G); 2161 2162 rc = hl_vm_init(hdev); 2163 if (rc) { 2164 dev_err(hdev->dev, "Failed to initialize memory module\n"); 2165 rc = 0; 2166 goto out_disabled; 2167 } 2168 2169 /* 2170 * Expose devices and sysfs nodes to user. 2171 * From here there is no need to add char devices and create sysfs nodes 2172 * in case of an error. 2173 */ 2174 add_cdev_sysfs_on_err = false; 2175 rc = device_cdev_sysfs_add(hdev); 2176 if (rc) { 2177 dev_err(hdev->dev, 2178 "Failed to add char devices and sysfs nodes\n"); 2179 rc = 0; 2180 goto out_disabled; 2181 } 2182 2183 /* Need to call this again because the max power might change, 2184 * depending on card type for certain ASICs 2185 */ 2186 if (hdev->asic_prop.set_max_power_on_device_init && 2187 !hdev->asic_prop.fw_security_enabled) 2188 hl_fw_set_max_power(hdev); 2189 2190 /* 2191 * hl_hwmon_init() must be called after device_late_init(), because only 2192 * there we get the information from the device about which 2193 * hwmon-related sensors the device supports. 2194 * Furthermore, it must be done after adding the device to the system. 2195 */ 2196 rc = hl_hwmon_init(hdev); 2197 if (rc) { 2198 dev_err(hdev->dev, "Failed to initialize hwmon\n"); 2199 rc = 0; 2200 goto out_disabled; 2201 } 2202 2203 dev_notice(hdev->dev, 2204 "Successfully added device %s to habanalabs driver\n", 2205 dev_name(&(hdev)->pdev->dev)); 2206 2207 hdev->init_done = true; 2208 2209 /* After initialization is done, we are ready to receive events from 2210 * the F/W. We can't do it before because we will ignore events and if 2211 * those events are fatal, we won't know about it and the device will 2212 * be operational although it shouldn't be 2213 */ 2214 hdev->asic_funcs->enable_events_from_fw(hdev); 2215 2216 return 0; 2217 2218 cb_pool_fini: 2219 hl_cb_pool_fini(hdev); 2220 release_ctx: 2221 if (hl_ctx_put(hdev->kernel_ctx) != 1) 2222 dev_err(hdev->dev, 2223 "kernel ctx is still alive on initialization failure\n"); 2224 remove_device_from_debugfs: 2225 hl_debugfs_remove_device(hdev); 2226 mmu_fini: 2227 hl_mmu_fini(hdev); 2228 eq_fini: 2229 hl_eq_fini(hdev, &hdev->event_queue); 2230 free_shadow_cs_queue: 2231 kfree(hdev->shadow_cs_queue); 2232 cq_fini: 2233 for (i = 0 ; i < cq_ready_cnt ; i++) 2234 hl_cq_fini(hdev, &hdev->completion_queue[i]); 2235 kfree(hdev->completion_queue); 2236 hw_queues_destroy: 2237 hl_hw_queues_destroy(hdev); 2238 sw_fini: 2239 hdev->asic_funcs->sw_fini(hdev); 2240 free_usr_intr_mem: 2241 kfree(hdev->user_interrupt); 2242 early_fini: 2243 device_early_fini(hdev); 2244 free_dev_ctrl: 2245 put_device(hdev->dev_ctrl); 2246 free_dev: 2247 put_device(hdev->dev); 2248 out_disabled: 2249 hdev->disabled = true; 2250 if (add_cdev_sysfs_on_err) 2251 device_cdev_sysfs_add(hdev); 2252 if (hdev->pdev) 2253 dev_err(&hdev->pdev->dev, 2254 "Failed to initialize hl%d. Device %s is NOT usable !\n", 2255 hdev->cdev_idx, dev_name(&(hdev)->pdev->dev)); 2256 else 2257 pr_err("Failed to initialize hl%d. Device %s is NOT usable !\n", 2258 hdev->cdev_idx, dev_name(&(hdev)->pdev->dev)); 2259 2260 return rc; 2261 } 2262 2263 /* 2264 * hl_device_fini - main tear-down function for habanalabs device 2265 * 2266 * @hdev: pointer to habanalabs device structure 2267 * 2268 * Destroy the device, call ASIC fini functions and release the id 2269 */ 2270 void hl_device_fini(struct hl_device *hdev) 2271 { 2272 bool device_in_reset; 2273 ktime_t timeout; 2274 u64 reset_sec; 2275 int i, rc; 2276 2277 dev_info(hdev->dev, "Removing device\n"); 2278 2279 hdev->device_fini_pending = 1; 2280 flush_delayed_work(&hdev->device_reset_work.reset_work); 2281 2282 if (hdev->pldm) 2283 reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT; 2284 else 2285 reset_sec = HL_HARD_RESET_MAX_TIMEOUT; 2286 2287 /* 2288 * This function is competing with the reset function, so try to 2289 * take the reset atomic and if we are already in middle of reset, 2290 * wait until reset function is finished. Reset function is designed 2291 * to always finish. However, in Gaudi, because of all the network 2292 * ports, the hard reset could take between 10-30 seconds 2293 */ 2294 2295 timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000); 2296 2297 spin_lock(&hdev->reset_info.lock); 2298 device_in_reset = !!hdev->reset_info.in_reset; 2299 if (!device_in_reset) 2300 hdev->reset_info.in_reset = 1; 2301 spin_unlock(&hdev->reset_info.lock); 2302 2303 while (device_in_reset) { 2304 usleep_range(50, 200); 2305 2306 spin_lock(&hdev->reset_info.lock); 2307 device_in_reset = !!hdev->reset_info.in_reset; 2308 if (!device_in_reset) 2309 hdev->reset_info.in_reset = 1; 2310 spin_unlock(&hdev->reset_info.lock); 2311 2312 if (ktime_compare(ktime_get(), timeout) > 0) { 2313 dev_crit(hdev->dev, 2314 "%s Failed to remove device because reset function did not finish\n", 2315 dev_name(&(hdev)->pdev->dev)); 2316 return; 2317 } 2318 } 2319 2320 cancel_delayed_work_sync(&hdev->device_release_watchdog_work.reset_work); 2321 2322 /* Disable PCI access from device F/W so it won't send us additional 2323 * interrupts. We disable MSI/MSI-X at the halt_engines function and we 2324 * can't have the F/W sending us interrupts after that. We need to 2325 * disable the access here because if the device is marked disable, the 2326 * message won't be send. Also, in case of heartbeat, the device CPU is 2327 * marked as disable so this message won't be sent 2328 */ 2329 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 2330 2331 /* Mark device as disabled */ 2332 hdev->disabled = true; 2333 2334 take_release_locks(hdev); 2335 2336 hdev->reset_info.hard_reset_pending = true; 2337 2338 hl_hwmon_fini(hdev); 2339 2340 cleanup_resources(hdev, true, false, false); 2341 2342 /* Kill processes here after CS rollback. This is because the process 2343 * can't really exit until all its CSs are done, which is what we 2344 * do in cs rollback 2345 */ 2346 dev_info(hdev->dev, 2347 "Waiting for all processes to exit (timeout of %u seconds)", 2348 HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI); 2349 2350 hdev->process_kill_trial_cnt = 0; 2351 rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false); 2352 if (rc) { 2353 dev_crit(hdev->dev, "Failed to kill all open processes\n"); 2354 device_disable_open_processes(hdev, false); 2355 } 2356 2357 hdev->process_kill_trial_cnt = 0; 2358 rc = device_kill_open_processes(hdev, 0, true); 2359 if (rc) { 2360 dev_crit(hdev->dev, "Failed to kill all control device open processes\n"); 2361 device_disable_open_processes(hdev, true); 2362 } 2363 2364 hl_cb_pool_fini(hdev); 2365 2366 /* Reset the H/W. It will be in idle state after this returns */ 2367 hdev->asic_funcs->hw_fini(hdev, true, false); 2368 2369 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; 2370 2371 /* Release kernel context */ 2372 if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) 2373 dev_err(hdev->dev, "kernel ctx is still alive\n"); 2374 2375 hl_debugfs_remove_device(hdev); 2376 2377 hl_dec_fini(hdev); 2378 2379 hl_vm_fini(hdev); 2380 2381 hl_mmu_fini(hdev); 2382 2383 vfree(hdev->captured_err_info.page_fault_info.user_mappings); 2384 2385 hl_eq_fini(hdev, &hdev->event_queue); 2386 2387 kfree(hdev->shadow_cs_queue); 2388 2389 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 2390 hl_cq_fini(hdev, &hdev->completion_queue[i]); 2391 kfree(hdev->completion_queue); 2392 kfree(hdev->user_interrupt); 2393 2394 hl_hw_queues_destroy(hdev); 2395 2396 /* Call ASIC S/W finalize function */ 2397 hdev->asic_funcs->sw_fini(hdev); 2398 2399 device_early_fini(hdev); 2400 2401 /* Hide devices and sysfs nodes from user */ 2402 device_cdev_sysfs_del(hdev); 2403 2404 pr_info("removed device successfully\n"); 2405 } 2406 2407 /* 2408 * MMIO register access helper functions. 2409 */ 2410 2411 /* 2412 * hl_rreg - Read an MMIO register 2413 * 2414 * @hdev: pointer to habanalabs device structure 2415 * @reg: MMIO register offset (in bytes) 2416 * 2417 * Returns the value of the MMIO register we are asked to read 2418 * 2419 */ 2420 inline u32 hl_rreg(struct hl_device *hdev, u32 reg) 2421 { 2422 u32 val = readl(hdev->rmmio + reg); 2423 2424 if (unlikely(trace_habanalabs_rreg32_enabled())) 2425 trace_habanalabs_rreg32(hdev->dev, reg, val); 2426 2427 return val; 2428 } 2429 2430 /* 2431 * hl_wreg - Write to an MMIO register 2432 * 2433 * @hdev: pointer to habanalabs device structure 2434 * @reg: MMIO register offset (in bytes) 2435 * @val: 32-bit value 2436 * 2437 * Writes the 32-bit value into the MMIO register 2438 * 2439 */ 2440 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) 2441 { 2442 if (unlikely(trace_habanalabs_wreg32_enabled())) 2443 trace_habanalabs_wreg32(hdev->dev, reg, val); 2444 2445 writel(val, hdev->rmmio + reg); 2446 } 2447 2448 void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, 2449 u8 flags) 2450 { 2451 struct razwi_info *razwi_info = &hdev->captured_err_info.razwi_info; 2452 2453 if (num_of_engines > HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR) { 2454 dev_err(hdev->dev, 2455 "Number of possible razwi initiators (%u) exceeded limit (%u)\n", 2456 num_of_engines, HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR); 2457 return; 2458 } 2459 2460 /* In case it's the first razwi since the device was opened, capture its parameters */ 2461 if (atomic_cmpxchg(&hdev->captured_err_info.razwi_info.razwi_detected, 0, 1)) 2462 return; 2463 2464 razwi_info->razwi.timestamp = ktime_to_ns(ktime_get()); 2465 razwi_info->razwi.addr = addr; 2466 razwi_info->razwi.num_of_possible_engines = num_of_engines; 2467 memcpy(&razwi_info->razwi.engine_id[0], &engine_id[0], 2468 num_of_engines * sizeof(u16)); 2469 razwi_info->razwi.flags = flags; 2470 2471 razwi_info->razwi_info_available = true; 2472 } 2473 2474 void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, 2475 u8 flags, u64 *event_mask) 2476 { 2477 hl_capture_razwi(hdev, addr, engine_id, num_of_engines, flags); 2478 2479 if (event_mask) 2480 *event_mask |= HL_NOTIFIER_EVENT_RAZWI; 2481 } 2482 2483 static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu) 2484 { 2485 struct page_fault_info *pgf_info = &hdev->captured_err_info.page_fault_info; 2486 struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; 2487 struct hl_vm_hash_node *hnode; 2488 struct hl_userptr *userptr; 2489 enum vm_type *vm_type; 2490 struct hl_ctx *ctx; 2491 u32 map_idx = 0; 2492 int i; 2493 2494 /* Reset previous session count*/ 2495 pgf_info->num_of_user_mappings = 0; 2496 2497 ctx = hl_get_compute_ctx(hdev); 2498 if (!ctx) { 2499 dev_err(hdev->dev, "Can't get user context for user mappings\n"); 2500 return; 2501 } 2502 2503 mutex_lock(&ctx->mem_hash_lock); 2504 hash_for_each(ctx->mem_hash, i, hnode, node) { 2505 vm_type = hnode->ptr; 2506 if (((*vm_type == VM_TYPE_USERPTR) && is_pmmu) || 2507 ((*vm_type == VM_TYPE_PHYS_PACK) && !is_pmmu)) 2508 pgf_info->num_of_user_mappings++; 2509 2510 } 2511 2512 if (!pgf_info->num_of_user_mappings) 2513 goto finish; 2514 2515 /* In case we already allocated in previous session, need to release it before 2516 * allocating new buffer. 2517 */ 2518 vfree(pgf_info->user_mappings); 2519 pgf_info->user_mappings = 2520 vzalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping)); 2521 if (!pgf_info->user_mappings) { 2522 pgf_info->num_of_user_mappings = 0; 2523 goto finish; 2524 } 2525 2526 hash_for_each(ctx->mem_hash, i, hnode, node) { 2527 vm_type = hnode->ptr; 2528 if ((*vm_type == VM_TYPE_USERPTR) && (is_pmmu)) { 2529 userptr = hnode->ptr; 2530 pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; 2531 pgf_info->user_mappings[map_idx].size = userptr->size; 2532 map_idx++; 2533 } else if ((*vm_type == VM_TYPE_PHYS_PACK) && (!is_pmmu)) { 2534 phys_pg_pack = hnode->ptr; 2535 pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; 2536 pgf_info->user_mappings[map_idx].size = phys_pg_pack->total_size; 2537 map_idx++; 2538 } 2539 } 2540 finish: 2541 mutex_unlock(&ctx->mem_hash_lock); 2542 hl_ctx_put(ctx); 2543 } 2544 2545 void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu) 2546 { 2547 struct page_fault_info *pgf_info = &hdev->captured_err_info.page_fault_info; 2548 2549 /* Capture only the first page fault */ 2550 if (atomic_cmpxchg(&pgf_info->page_fault_detected, 0, 1)) 2551 return; 2552 2553 pgf_info->page_fault.timestamp = ktime_to_ns(ktime_get()); 2554 pgf_info->page_fault.addr = addr; 2555 pgf_info->page_fault.engine_id = eng_id; 2556 hl_capture_user_mappings(hdev, is_pmmu); 2557 2558 pgf_info->page_fault_info_available = true; 2559 } 2560 2561 void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu, 2562 u64 *event_mask) 2563 { 2564 hl_capture_page_fault(hdev, addr, eng_id, is_pmmu); 2565 2566 if (event_mask) 2567 *event_mask |= HL_NOTIFIER_EVENT_PAGE_FAULT; 2568 } 2569