1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #define pr_fmt(fmt) "habanalabs: " fmt 9 10 #include <uapi/drm/habanalabs_accel.h> 11 #include "habanalabs.h" 12 13 #include <linux/pci.h> 14 #include <linux/hwmon.h> 15 #include <linux/vmalloc.h> 16 17 #include <trace/events/habanalabs.h> 18 19 #define HL_RESET_DELAY_USEC 10000 /* 10ms */ 20 21 #define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 5 22 23 enum dma_alloc_type { 24 DMA_ALLOC_COHERENT, 25 DMA_ALLOC_POOL, 26 }; 27 28 #define MEM_SCRUB_DEFAULT_VAL 0x1122334455667788 29 30 /* 31 * hl_set_dram_bar- sets the bar to allow later access to address 32 * 33 * @hdev: pointer to habanalabs device structure. 34 * @addr: the address the caller wants to access. 35 * @region: the PCI region. 36 * @new_bar_region_base: the new BAR region base address. 37 * 38 * @return: the old BAR base address on success, U64_MAX for failure. 39 * The caller should set it back to the old address after use. 40 * 41 * In case the bar space does not cover the whole address space, 42 * the bar base address should be set to allow access to a given address. 43 * This function can be called also if the bar doesn't need to be set, 44 * in that case it just won't change the base. 45 */ 46 static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region, 47 u64 *new_bar_region_base) 48 { 49 struct asic_fixed_properties *prop = &hdev->asic_prop; 50 u64 bar_base_addr, old_base; 51 52 if (is_power_of_2(prop->dram_pci_bar_size)) 53 bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); 54 else 55 bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) * 56 prop->dram_pci_bar_size; 57 58 old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); 59 60 /* in case of success we need to update the new BAR base */ 61 if ((old_base != U64_MAX) && new_bar_region_base) 62 *new_bar_region_base = bar_base_addr; 63 64 return old_base; 65 } 66 67 int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, 68 enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar) 69 { 70 struct pci_mem_region *region = &hdev->pci_mem_region[region_type]; 71 u64 old_base = 0, rc, bar_region_base = region->region_base; 72 void __iomem *acc_addr; 73 74 if (set_dram_bar) { 75 old_base = hl_set_dram_bar(hdev, addr, region, &bar_region_base); 76 if (old_base == U64_MAX) 77 return -EIO; 78 } 79 80 acc_addr = hdev->pcie_bar[region->bar_id] + region->offset_in_bar + 81 (addr - bar_region_base); 82 83 switch (acc_type) { 84 case DEBUGFS_READ8: 85 *val = readb(acc_addr); 86 break; 87 case DEBUGFS_WRITE8: 88 writeb(*val, acc_addr); 89 break; 90 case DEBUGFS_READ32: 91 *val = readl(acc_addr); 92 break; 93 case DEBUGFS_WRITE32: 94 writel(*val, acc_addr); 95 break; 96 case DEBUGFS_READ64: 97 *val = readq(acc_addr); 98 break; 99 case DEBUGFS_WRITE64: 100 writeq(*val, acc_addr); 101 break; 102 } 103 104 if (set_dram_bar) { 105 rc = hl_set_dram_bar(hdev, old_base, region, NULL); 106 if (rc == U64_MAX) 107 return -EIO; 108 } 109 110 return 0; 111 } 112 113 static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 114 gfp_t flag, enum dma_alloc_type alloc_type, 115 const char *caller) 116 { 117 void *ptr = NULL; 118 119 switch (alloc_type) { 120 case DMA_ALLOC_COHERENT: 121 ptr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, size, dma_handle, flag); 122 break; 123 case DMA_ALLOC_POOL: 124 ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, size, flag, dma_handle); 125 break; 126 } 127 128 if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr)) 129 trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size, 130 caller); 131 132 return ptr; 133 } 134 135 static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr, 136 dma_addr_t dma_handle, enum dma_alloc_type alloc_type, 137 const char *caller) 138 { 139 /* this is needed to avoid warning on using freed pointer */ 140 u64 store_cpu_addr = (u64) (uintptr_t) cpu_addr; 141 142 switch (alloc_type) { 143 case DMA_ALLOC_COHERENT: 144 hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle); 145 break; 146 case DMA_ALLOC_POOL: 147 hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle); 148 break; 149 } 150 151 trace_habanalabs_dma_free(hdev->dev, store_cpu_addr, dma_handle, size, caller); 152 } 153 154 void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 155 gfp_t flag, const char *caller) 156 { 157 return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT, caller); 158 } 159 160 void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr, 161 dma_addr_t dma_handle, const char *caller) 162 { 163 hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller); 164 } 165 166 void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, 167 dma_addr_t *dma_handle, const char *caller) 168 { 169 return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL, caller); 170 } 171 172 void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, 173 const char *caller) 174 { 175 hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller); 176 } 177 178 void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle) 179 { 180 return hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 181 } 182 183 void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) 184 { 185 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, vaddr); 186 } 187 188 int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) 189 { 190 struct asic_fixed_properties *prop = &hdev->asic_prop; 191 struct scatterlist *sg; 192 int rc, i; 193 194 rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0); 195 if (rc) 196 return rc; 197 198 /* Shift to the device's base physical address of host memory if necessary */ 199 if (prop->device_dma_offset_for_host_access) 200 for_each_sgtable_dma_sg(sgt, sg, i) 201 sg->dma_address += prop->device_dma_offset_for_host_access; 202 203 return 0; 204 } 205 206 void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) 207 { 208 struct asic_fixed_properties *prop = &hdev->asic_prop; 209 struct scatterlist *sg; 210 int i; 211 212 /* Cancel the device's base physical address of host memory if necessary */ 213 if (prop->device_dma_offset_for_host_access) 214 for_each_sgtable_dma_sg(sgt, sg, i) 215 sg->dma_address -= prop->device_dma_offset_for_host_access; 216 217 dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0); 218 } 219 220 /* 221 * hl_access_cfg_region - access the config region 222 * 223 * @hdev: pointer to habanalabs device structure 224 * @addr: the address to access 225 * @val: the value to write from or read to 226 * @acc_type: the type of access (read/write 64/32) 227 */ 228 int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, 229 enum debugfs_access_type acc_type) 230 { 231 struct pci_mem_region *cfg_region = &hdev->pci_mem_region[PCI_REGION_CFG]; 232 u32 val_h, val_l; 233 234 if (!IS_ALIGNED(addr, sizeof(u32))) { 235 dev_err(hdev->dev, "address %#llx not a multiple of %zu\n", addr, sizeof(u32)); 236 return -EINVAL; 237 } 238 239 switch (acc_type) { 240 case DEBUGFS_READ32: 241 *val = RREG32(addr - cfg_region->region_base); 242 break; 243 case DEBUGFS_WRITE32: 244 WREG32(addr - cfg_region->region_base, *val); 245 break; 246 case DEBUGFS_READ64: 247 val_l = RREG32(addr - cfg_region->region_base); 248 val_h = RREG32(addr + sizeof(u32) - cfg_region->region_base); 249 250 *val = (((u64) val_h) << 32) | val_l; 251 break; 252 case DEBUGFS_WRITE64: 253 WREG32(addr - cfg_region->region_base, lower_32_bits(*val)); 254 WREG32(addr + sizeof(u32) - cfg_region->region_base, upper_32_bits(*val)); 255 break; 256 default: 257 dev_err(hdev->dev, "access type %d is not supported\n", acc_type); 258 return -EOPNOTSUPP; 259 } 260 261 return 0; 262 } 263 264 /* 265 * hl_access_dev_mem - access device memory 266 * 267 * @hdev: pointer to habanalabs device structure 268 * @region_type: the type of the region the address belongs to 269 * @addr: the address to access 270 * @val: the value to write from or read to 271 * @acc_type: the type of access (r/w, 32/64) 272 */ 273 int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type, 274 u64 addr, u64 *val, enum debugfs_access_type acc_type) 275 { 276 switch (region_type) { 277 case PCI_REGION_CFG: 278 return hl_access_cfg_region(hdev, addr, val, acc_type); 279 case PCI_REGION_SRAM: 280 case PCI_REGION_DRAM: 281 return hl_access_sram_dram_region(hdev, addr, val, acc_type, 282 region_type, (region_type == PCI_REGION_DRAM)); 283 default: 284 return -EFAULT; 285 } 286 287 return 0; 288 } 289 290 void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...) 291 { 292 va_list args; 293 int str_size; 294 295 va_start(args, fmt); 296 /* Calculate formatted string length. Assuming each string is null terminated, hence 297 * increment result by 1 298 */ 299 str_size = vsnprintf(NULL, 0, fmt, args) + 1; 300 va_end(args); 301 302 if ((e->actual_size + str_size) < e->allocated_buf_size) { 303 va_start(args, fmt); 304 vsnprintf(e->buf + e->actual_size, str_size, fmt, args); 305 va_end(args); 306 } 307 308 /* Need to update the size even when not updating destination buffer to get the exact size 309 * of all input strings 310 */ 311 e->actual_size += str_size; 312 } 313 314 enum hl_device_status hl_device_status(struct hl_device *hdev) 315 { 316 enum hl_device_status status; 317 318 if (hdev->reset_info.in_reset) { 319 if (hdev->reset_info.in_compute_reset) 320 status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE; 321 else 322 status = HL_DEVICE_STATUS_IN_RESET; 323 } else if (hdev->reset_info.needs_reset) { 324 status = HL_DEVICE_STATUS_NEEDS_RESET; 325 } else if (hdev->disabled) { 326 status = HL_DEVICE_STATUS_MALFUNCTION; 327 } else if (!hdev->init_done) { 328 status = HL_DEVICE_STATUS_IN_DEVICE_CREATION; 329 } else { 330 status = HL_DEVICE_STATUS_OPERATIONAL; 331 } 332 333 return status; 334 } 335 336 bool hl_device_operational(struct hl_device *hdev, 337 enum hl_device_status *status) 338 { 339 enum hl_device_status current_status; 340 341 current_status = hl_device_status(hdev); 342 if (status) 343 *status = current_status; 344 345 switch (current_status) { 346 case HL_DEVICE_STATUS_IN_RESET: 347 case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: 348 case HL_DEVICE_STATUS_MALFUNCTION: 349 case HL_DEVICE_STATUS_NEEDS_RESET: 350 return false; 351 case HL_DEVICE_STATUS_OPERATIONAL: 352 case HL_DEVICE_STATUS_IN_DEVICE_CREATION: 353 default: 354 return true; 355 } 356 } 357 358 bool hl_ctrl_device_operational(struct hl_device *hdev, 359 enum hl_device_status *status) 360 { 361 enum hl_device_status current_status; 362 363 current_status = hl_device_status(hdev); 364 if (status) 365 *status = current_status; 366 367 switch (current_status) { 368 case HL_DEVICE_STATUS_MALFUNCTION: 369 return false; 370 case HL_DEVICE_STATUS_IN_RESET: 371 case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: 372 case HL_DEVICE_STATUS_NEEDS_RESET: 373 case HL_DEVICE_STATUS_OPERATIONAL: 374 case HL_DEVICE_STATUS_IN_DEVICE_CREATION: 375 default: 376 return true; 377 } 378 } 379 380 static void print_idle_status_mask(struct hl_device *hdev, const char *message, 381 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE]) 382 { 383 if (idle_mask[3]) 384 dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx_%016llx)\n", 385 message, idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]); 386 else if (idle_mask[2]) 387 dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx)\n", 388 message, idle_mask[2], idle_mask[1], idle_mask[0]); 389 else if (idle_mask[1]) 390 dev_err(hdev->dev, "%s (mask %#llx_%016llx)\n", 391 message, idle_mask[1], idle_mask[0]); 392 else 393 dev_err(hdev->dev, "%s (mask %#llx)\n", message, idle_mask[0]); 394 } 395 396 static void hpriv_release(struct kref *ref) 397 { 398 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; 399 bool reset_device, device_is_idle = true; 400 struct hl_fpriv *hpriv; 401 struct hl_device *hdev; 402 403 hpriv = container_of(ref, struct hl_fpriv, refcount); 404 405 hdev = hpriv->hdev; 406 407 hdev->asic_funcs->send_device_activity(hdev, false); 408 409 put_pid(hpriv->taskpid); 410 411 hl_debugfs_remove_file(hpriv); 412 413 mutex_destroy(&hpriv->ctx_lock); 414 mutex_destroy(&hpriv->restore_phase_mutex); 415 416 /* There should be no memory buffers at this point and handles IDR can be destroyed */ 417 hl_mem_mgr_idr_destroy(&hpriv->mem_mgr); 418 419 /* Device should be reset if reset-upon-device-release is enabled, or if there is a pending 420 * reset that waits for device release. 421 */ 422 reset_device = hdev->reset_upon_device_release || hdev->reset_info.watchdog_active; 423 424 /* Check the device idle status and reset if not idle. 425 * Skip it if already in reset, or if device is going to be reset in any case. 426 */ 427 if (!hdev->reset_info.in_reset && !reset_device && hdev->pdev && !hdev->pldm) 428 device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask, 429 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL); 430 if (!device_is_idle) { 431 print_idle_status_mask(hdev, "device is not idle after user context is closed", 432 idle_mask); 433 reset_device = true; 434 } 435 436 /* We need to remove the user from the list to make sure the reset process won't 437 * try to kill the user process. Because, if we got here, it means there are no 438 * more driver/device resources that the user process is occupying so there is 439 * no need to kill it 440 * 441 * However, we can't set the compute_ctx to NULL at this stage. This is to prevent 442 * a race between the release and opening the device again. We don't want to let 443 * a user open the device while there a reset is about to happen. 444 */ 445 mutex_lock(&hdev->fpriv_list_lock); 446 list_del(&hpriv->dev_node); 447 mutex_unlock(&hdev->fpriv_list_lock); 448 449 if (reset_device) { 450 hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE); 451 } else { 452 /* Scrubbing is handled within hl_device_reset(), so here need to do it directly */ 453 int rc = hdev->asic_funcs->scrub_device_mem(hdev); 454 455 if (rc) 456 dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc); 457 } 458 459 /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different 460 * thread, we don't care because the in_reset is marked so if a user will try to open 461 * the device it will fail on that, even if compute_ctx is false. 462 */ 463 mutex_lock(&hdev->fpriv_list_lock); 464 hdev->is_compute_ctx_active = false; 465 mutex_unlock(&hdev->fpriv_list_lock); 466 467 hdev->compute_ctx_in_release = 0; 468 469 /* release the eventfd */ 470 if (hpriv->notifier_event.eventfd) 471 eventfd_ctx_put(hpriv->notifier_event.eventfd); 472 473 mutex_destroy(&hpriv->notifier_event.lock); 474 475 kfree(hpriv); 476 } 477 478 void hl_hpriv_get(struct hl_fpriv *hpriv) 479 { 480 kref_get(&hpriv->refcount); 481 } 482 483 int hl_hpriv_put(struct hl_fpriv *hpriv) 484 { 485 return kref_put(&hpriv->refcount, hpriv_release); 486 } 487 488 static void print_device_in_use_info(struct hl_device *hdev, const char *message) 489 { 490 u32 active_cs_num, dmabuf_export_cnt; 491 bool unknown_reason = true; 492 char buf[128]; 493 size_t size; 494 int offset; 495 496 size = sizeof(buf); 497 offset = 0; 498 499 active_cs_num = hl_get_active_cs_num(hdev); 500 if (active_cs_num) { 501 unknown_reason = false; 502 offset += scnprintf(buf + offset, size - offset, " [%u active CS]", active_cs_num); 503 } 504 505 dmabuf_export_cnt = atomic_read(&hdev->dmabuf_export_cnt); 506 if (dmabuf_export_cnt) { 507 unknown_reason = false; 508 offset += scnprintf(buf + offset, size - offset, " [%u exported dma-buf]", 509 dmabuf_export_cnt); 510 } 511 512 if (unknown_reason) 513 scnprintf(buf + offset, size - offset, " [unknown reason]"); 514 515 dev_notice(hdev->dev, "%s%s\n", message, buf); 516 } 517 518 /* 519 * hl_device_release - release function for habanalabs device 520 * 521 * @inode: pointer to inode structure 522 * @filp: pointer to file structure 523 * 524 * Called when process closes an habanalabs device 525 */ 526 static int hl_device_release(struct inode *inode, struct file *filp) 527 { 528 struct hl_fpriv *hpriv = filp->private_data; 529 struct hl_device *hdev = hpriv->hdev; 530 531 filp->private_data = NULL; 532 533 if (!hdev) { 534 pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n"); 535 put_pid(hpriv->taskpid); 536 return 0; 537 } 538 539 hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); 540 541 /* Memory buffers might be still in use at this point and thus the handles IDR destruction 542 * is postponed to hpriv_release(). 543 */ 544 hl_mem_mgr_fini(&hpriv->mem_mgr); 545 546 hdev->compute_ctx_in_release = 1; 547 548 if (!hl_hpriv_put(hpriv)) { 549 print_device_in_use_info(hdev, "User process closed FD but device still in use"); 550 hl_device_reset(hdev, HL_DRV_RESET_HARD); 551 } 552 553 hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif; 554 555 return 0; 556 } 557 558 static int hl_device_release_ctrl(struct inode *inode, struct file *filp) 559 { 560 struct hl_fpriv *hpriv = filp->private_data; 561 struct hl_device *hdev = hpriv->hdev; 562 563 filp->private_data = NULL; 564 565 if (!hdev) { 566 pr_err("Closing FD after device was removed\n"); 567 goto out; 568 } 569 570 mutex_lock(&hdev->fpriv_ctrl_list_lock); 571 list_del(&hpriv->dev_node); 572 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 573 out: 574 /* release the eventfd */ 575 if (hpriv->notifier_event.eventfd) 576 eventfd_ctx_put(hpriv->notifier_event.eventfd); 577 578 mutex_destroy(&hpriv->notifier_event.lock); 579 put_pid(hpriv->taskpid); 580 581 kfree(hpriv); 582 583 return 0; 584 } 585 586 /* 587 * hl_mmap - mmap function for habanalabs device 588 * 589 * @*filp: pointer to file structure 590 * @*vma: pointer to vm_area_struct of the process 591 * 592 * Called when process does an mmap on habanalabs device. Call the relevant mmap 593 * function at the end of the common code. 594 */ 595 static int hl_mmap(struct file *filp, struct vm_area_struct *vma) 596 { 597 struct hl_fpriv *hpriv = filp->private_data; 598 struct hl_device *hdev = hpriv->hdev; 599 unsigned long vm_pgoff; 600 601 if (!hdev) { 602 pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n"); 603 return -ENODEV; 604 } 605 606 vm_pgoff = vma->vm_pgoff; 607 608 switch (vm_pgoff & HL_MMAP_TYPE_MASK) { 609 case HL_MMAP_TYPE_BLOCK: 610 vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); 611 return hl_hw_block_mmap(hpriv, vma); 612 613 case HL_MMAP_TYPE_CB: 614 case HL_MMAP_TYPE_TS_BUFF: 615 return hl_mem_mgr_mmap(&hpriv->mem_mgr, vma, NULL); 616 } 617 return -EINVAL; 618 } 619 620 static const struct file_operations hl_ops = { 621 .owner = THIS_MODULE, 622 .open = hl_device_open, 623 .release = hl_device_release, 624 .mmap = hl_mmap, 625 .unlocked_ioctl = hl_ioctl, 626 .compat_ioctl = hl_ioctl 627 }; 628 629 static const struct file_operations hl_ctrl_ops = { 630 .owner = THIS_MODULE, 631 .open = hl_device_open_ctrl, 632 .release = hl_device_release_ctrl, 633 .unlocked_ioctl = hl_ioctl_control, 634 .compat_ioctl = hl_ioctl_control 635 }; 636 637 static void device_release_func(struct device *dev) 638 { 639 kfree(dev); 640 } 641 642 /* 643 * device_init_cdev - Initialize cdev and device for habanalabs device 644 * 645 * @hdev: pointer to habanalabs device structure 646 * @class: pointer to the class object of the device 647 * @minor: minor number of the specific device 648 * @fpos: file operations to install for this device 649 * @name: name of the device as it will appear in the filesystem 650 * @cdev: pointer to the char device object that will be initialized 651 * @dev: pointer to the device object that will be initialized 652 * 653 * Initialize a cdev and a Linux device for habanalabs's device. 654 */ 655 static int device_init_cdev(struct hl_device *hdev, struct class *class, 656 int minor, const struct file_operations *fops, 657 char *name, struct cdev *cdev, 658 struct device **dev) 659 { 660 cdev_init(cdev, fops); 661 cdev->owner = THIS_MODULE; 662 663 *dev = kzalloc(sizeof(**dev), GFP_KERNEL); 664 if (!*dev) 665 return -ENOMEM; 666 667 device_initialize(*dev); 668 (*dev)->devt = MKDEV(hdev->major, minor); 669 (*dev)->class = class; 670 (*dev)->release = device_release_func; 671 dev_set_drvdata(*dev, hdev); 672 dev_set_name(*dev, "%s", name); 673 674 return 0; 675 } 676 677 static int device_cdev_sysfs_add(struct hl_device *hdev) 678 { 679 int rc; 680 681 rc = cdev_device_add(&hdev->cdev, hdev->dev); 682 if (rc) { 683 dev_err(hdev->dev, 684 "failed to add a char device to the system\n"); 685 return rc; 686 } 687 688 rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl); 689 if (rc) { 690 dev_err(hdev->dev, 691 "failed to add a control char device to the system\n"); 692 goto delete_cdev_device; 693 } 694 695 /* hl_sysfs_init() must be done after adding the device to the system */ 696 rc = hl_sysfs_init(hdev); 697 if (rc) { 698 dev_err(hdev->dev, "failed to initialize sysfs\n"); 699 goto delete_ctrl_cdev_device; 700 } 701 702 hdev->cdev_sysfs_created = true; 703 704 return 0; 705 706 delete_ctrl_cdev_device: 707 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); 708 delete_cdev_device: 709 cdev_device_del(&hdev->cdev, hdev->dev); 710 return rc; 711 } 712 713 static void device_cdev_sysfs_del(struct hl_device *hdev) 714 { 715 if (!hdev->cdev_sysfs_created) 716 goto put_devices; 717 718 hl_sysfs_fini(hdev); 719 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); 720 cdev_device_del(&hdev->cdev, hdev->dev); 721 722 put_devices: 723 put_device(hdev->dev); 724 put_device(hdev->dev_ctrl); 725 } 726 727 static void device_hard_reset_pending(struct work_struct *work) 728 { 729 struct hl_device_reset_work *device_reset_work = 730 container_of(work, struct hl_device_reset_work, reset_work.work); 731 struct hl_device *hdev = device_reset_work->hdev; 732 u32 flags; 733 int rc; 734 735 flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR; 736 737 rc = hl_device_reset(hdev, flags); 738 739 if ((rc == -EBUSY) && !hdev->device_fini_pending) { 740 struct hl_ctx *ctx = hl_get_compute_ctx(hdev); 741 742 if (ctx) { 743 /* The read refcount value should subtracted by one, because the read is 744 * protected with hl_get_compute_ctx(). 745 */ 746 dev_info(hdev->dev, 747 "Could not reset device (compute_ctx refcount %u). will try again in %u seconds", 748 kref_read(&ctx->refcount) - 1, HL_PENDING_RESET_PER_SEC); 749 hl_ctx_put(ctx); 750 } else { 751 dev_info(hdev->dev, "Could not reset device. will try again in %u seconds", 752 HL_PENDING_RESET_PER_SEC); 753 } 754 755 queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work, 756 msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000)); 757 } 758 } 759 760 static void device_release_watchdog_func(struct work_struct *work) 761 { 762 struct hl_device_reset_work *watchdog_work = 763 container_of(work, struct hl_device_reset_work, reset_work.work); 764 struct hl_device *hdev = watchdog_work->hdev; 765 u32 flags; 766 767 dev_dbg(hdev->dev, "Device wasn't released in time. Initiate hard-reset.\n"); 768 769 flags = watchdog_work->flags | HL_DRV_RESET_HARD | HL_DRV_RESET_FROM_WD_THR; 770 771 hl_device_reset(hdev, flags); 772 } 773 774 /* 775 * device_early_init - do some early initialization for the habanalabs device 776 * 777 * @hdev: pointer to habanalabs device structure 778 * 779 * Install the relevant function pointers and call the early_init function, 780 * if such a function exists 781 */ 782 static int device_early_init(struct hl_device *hdev) 783 { 784 int i, rc; 785 char workq_name[32]; 786 787 switch (hdev->asic_type) { 788 case ASIC_GOYA: 789 goya_set_asic_funcs(hdev); 790 strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); 791 break; 792 case ASIC_GAUDI: 793 gaudi_set_asic_funcs(hdev); 794 strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name)); 795 break; 796 case ASIC_GAUDI_SEC: 797 gaudi_set_asic_funcs(hdev); 798 strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name)); 799 break; 800 case ASIC_GAUDI2: 801 gaudi2_set_asic_funcs(hdev); 802 strscpy(hdev->asic_name, "GAUDI2", sizeof(hdev->asic_name)); 803 break; 804 case ASIC_GAUDI2B: 805 gaudi2_set_asic_funcs(hdev); 806 strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name)); 807 break; 808 break; 809 default: 810 dev_err(hdev->dev, "Unrecognized ASIC type %d\n", 811 hdev->asic_type); 812 return -EINVAL; 813 } 814 815 rc = hdev->asic_funcs->early_init(hdev); 816 if (rc) 817 return rc; 818 819 rc = hl_asid_init(hdev); 820 if (rc) 821 goto early_fini; 822 823 if (hdev->asic_prop.completion_queues_count) { 824 hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, 825 sizeof(struct workqueue_struct *), 826 GFP_KERNEL); 827 if (!hdev->cq_wq) { 828 rc = -ENOMEM; 829 goto asid_fini; 830 } 831 } 832 833 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { 834 snprintf(workq_name, 32, "hl%u-free-jobs-%u", hdev->cdev_idx, (u32) i); 835 hdev->cq_wq[i] = create_singlethread_workqueue(workq_name); 836 if (hdev->cq_wq[i] == NULL) { 837 dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); 838 rc = -ENOMEM; 839 goto free_cq_wq; 840 } 841 } 842 843 snprintf(workq_name, 32, "hl%u-events", hdev->cdev_idx); 844 hdev->eq_wq = create_singlethread_workqueue(workq_name); 845 if (hdev->eq_wq == NULL) { 846 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n"); 847 rc = -ENOMEM; 848 goto free_cq_wq; 849 } 850 851 snprintf(workq_name, 32, "hl%u-cs-completions", hdev->cdev_idx); 852 hdev->cs_cmplt_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0); 853 if (!hdev->cs_cmplt_wq) { 854 dev_err(hdev->dev, 855 "Failed to allocate CS completions workqueue\n"); 856 rc = -ENOMEM; 857 goto free_eq_wq; 858 } 859 860 snprintf(workq_name, 32, "hl%u-ts-free-obj", hdev->cdev_idx); 861 hdev->ts_free_obj_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0); 862 if (!hdev->ts_free_obj_wq) { 863 dev_err(hdev->dev, 864 "Failed to allocate Timestamp registration free workqueue\n"); 865 rc = -ENOMEM; 866 goto free_cs_cmplt_wq; 867 } 868 869 snprintf(workq_name, 32, "hl%u-prefetch", hdev->cdev_idx); 870 hdev->prefetch_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0); 871 if (!hdev->prefetch_wq) { 872 dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n"); 873 rc = -ENOMEM; 874 goto free_ts_free_wq; 875 } 876 877 hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), GFP_KERNEL); 878 if (!hdev->hl_chip_info) { 879 rc = -ENOMEM; 880 goto free_prefetch_wq; 881 } 882 883 rc = hl_mmu_if_set_funcs(hdev); 884 if (rc) 885 goto free_chip_info; 886 887 hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr); 888 889 snprintf(workq_name, 32, "hl%u_device_reset", hdev->cdev_idx); 890 hdev->reset_wq = create_singlethread_workqueue(workq_name); 891 if (!hdev->reset_wq) { 892 rc = -ENOMEM; 893 dev_err(hdev->dev, "Failed to create device reset WQ\n"); 894 goto free_cb_mgr; 895 } 896 897 INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, device_hard_reset_pending); 898 hdev->device_reset_work.hdev = hdev; 899 hdev->device_fini_pending = 0; 900 901 INIT_DELAYED_WORK(&hdev->device_release_watchdog_work.reset_work, 902 device_release_watchdog_func); 903 hdev->device_release_watchdog_work.hdev = hdev; 904 905 mutex_init(&hdev->send_cpu_message_lock); 906 mutex_init(&hdev->debug_lock); 907 INIT_LIST_HEAD(&hdev->cs_mirror_list); 908 spin_lock_init(&hdev->cs_mirror_lock); 909 spin_lock_init(&hdev->reset_info.lock); 910 INIT_LIST_HEAD(&hdev->fpriv_list); 911 INIT_LIST_HEAD(&hdev->fpriv_ctrl_list); 912 mutex_init(&hdev->fpriv_list_lock); 913 mutex_init(&hdev->fpriv_ctrl_list_lock); 914 mutex_init(&hdev->clk_throttling.lock); 915 916 return 0; 917 918 free_cb_mgr: 919 hl_mem_mgr_fini(&hdev->kernel_mem_mgr); 920 hl_mem_mgr_idr_destroy(&hdev->kernel_mem_mgr); 921 free_chip_info: 922 kfree(hdev->hl_chip_info); 923 free_prefetch_wq: 924 destroy_workqueue(hdev->prefetch_wq); 925 free_ts_free_wq: 926 destroy_workqueue(hdev->ts_free_obj_wq); 927 free_cs_cmplt_wq: 928 destroy_workqueue(hdev->cs_cmplt_wq); 929 free_eq_wq: 930 destroy_workqueue(hdev->eq_wq); 931 free_cq_wq: 932 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 933 if (hdev->cq_wq[i]) 934 destroy_workqueue(hdev->cq_wq[i]); 935 kfree(hdev->cq_wq); 936 asid_fini: 937 hl_asid_fini(hdev); 938 early_fini: 939 if (hdev->asic_funcs->early_fini) 940 hdev->asic_funcs->early_fini(hdev); 941 942 return rc; 943 } 944 945 /* 946 * device_early_fini - finalize all that was done in device_early_init 947 * 948 * @hdev: pointer to habanalabs device structure 949 * 950 */ 951 static void device_early_fini(struct hl_device *hdev) 952 { 953 int i; 954 955 mutex_destroy(&hdev->debug_lock); 956 mutex_destroy(&hdev->send_cpu_message_lock); 957 958 mutex_destroy(&hdev->fpriv_list_lock); 959 mutex_destroy(&hdev->fpriv_ctrl_list_lock); 960 961 mutex_destroy(&hdev->clk_throttling.lock); 962 963 hl_mem_mgr_fini(&hdev->kernel_mem_mgr); 964 hl_mem_mgr_idr_destroy(&hdev->kernel_mem_mgr); 965 966 kfree(hdev->hl_chip_info); 967 968 destroy_workqueue(hdev->prefetch_wq); 969 destroy_workqueue(hdev->ts_free_obj_wq); 970 destroy_workqueue(hdev->cs_cmplt_wq); 971 destroy_workqueue(hdev->eq_wq); 972 destroy_workqueue(hdev->reset_wq); 973 974 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 975 destroy_workqueue(hdev->cq_wq[i]); 976 kfree(hdev->cq_wq); 977 978 hl_asid_fini(hdev); 979 980 if (hdev->asic_funcs->early_fini) 981 hdev->asic_funcs->early_fini(hdev); 982 } 983 984 static void hl_device_heartbeat(struct work_struct *work) 985 { 986 struct hl_device *hdev = container_of(work, struct hl_device, 987 work_heartbeat.work); 988 struct hl_info_fw_err_info info = {0}; 989 u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; 990 991 if (!hl_device_operational(hdev, NULL)) 992 goto reschedule; 993 994 if (!hdev->asic_funcs->send_heartbeat(hdev)) 995 goto reschedule; 996 997 if (hl_device_operational(hdev, NULL)) 998 dev_err(hdev->dev, "Device heartbeat failed!\n"); 999 1000 info.err_type = HL_INFO_FW_HEARTBEAT_ERR; 1001 info.event_mask = &event_mask; 1002 hl_handle_fw_err(hdev, &info); 1003 hl_device_cond_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT, event_mask); 1004 1005 return; 1006 1007 reschedule: 1008 /* 1009 * prev_reset_trigger tracks consecutive fatal h/w errors until first 1010 * heartbeat immediately post reset. 1011 * If control reached here, then at least one heartbeat work has been 1012 * scheduled since last reset/init cycle. 1013 * So if the device is not already in reset cycle, reset the flag 1014 * prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR 1015 * status for at least one heartbeat. From this point driver restarts 1016 * tracking future consecutive fatal errors. 1017 */ 1018 if (!hdev->reset_info.in_reset) 1019 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 1020 1021 schedule_delayed_work(&hdev->work_heartbeat, 1022 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); 1023 } 1024 1025 /* 1026 * device_late_init - do late stuff initialization for the habanalabs device 1027 * 1028 * @hdev: pointer to habanalabs device structure 1029 * 1030 * Do stuff that either needs the device H/W queues to be active or needs 1031 * to happen after all the rest of the initialization is finished 1032 */ 1033 static int device_late_init(struct hl_device *hdev) 1034 { 1035 int rc; 1036 1037 if (hdev->asic_funcs->late_init) { 1038 rc = hdev->asic_funcs->late_init(hdev); 1039 if (rc) { 1040 dev_err(hdev->dev, 1041 "failed late initialization for the H/W\n"); 1042 return rc; 1043 } 1044 } 1045 1046 hdev->high_pll = hdev->asic_prop.high_pll; 1047 1048 if (hdev->heartbeat) { 1049 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); 1050 schedule_delayed_work(&hdev->work_heartbeat, 1051 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); 1052 } 1053 1054 hdev->late_init_done = true; 1055 1056 return 0; 1057 } 1058 1059 /* 1060 * device_late_fini - finalize all that was done in device_late_init 1061 * 1062 * @hdev: pointer to habanalabs device structure 1063 * 1064 */ 1065 static void device_late_fini(struct hl_device *hdev) 1066 { 1067 if (!hdev->late_init_done) 1068 return; 1069 1070 if (hdev->heartbeat) 1071 cancel_delayed_work_sync(&hdev->work_heartbeat); 1072 1073 if (hdev->asic_funcs->late_fini) 1074 hdev->asic_funcs->late_fini(hdev); 1075 1076 hdev->late_init_done = false; 1077 } 1078 1079 int hl_device_utilization(struct hl_device *hdev, u32 *utilization) 1080 { 1081 u64 max_power, curr_power, dc_power, dividend, divisor; 1082 int rc; 1083 1084 max_power = hdev->max_power; 1085 dc_power = hdev->asic_prop.dc_power_default; 1086 divisor = max_power - dc_power; 1087 if (!divisor) { 1088 dev_warn(hdev->dev, "device utilization is not supported\n"); 1089 return -EOPNOTSUPP; 1090 } 1091 rc = hl_fw_cpucp_power_get(hdev, &curr_power); 1092 1093 if (rc) 1094 return rc; 1095 1096 curr_power = clamp(curr_power, dc_power, max_power); 1097 1098 dividend = (curr_power - dc_power) * 100; 1099 *utilization = (u32) div_u64(dividend, divisor); 1100 1101 return 0; 1102 } 1103 1104 int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable) 1105 { 1106 int rc = 0; 1107 1108 mutex_lock(&hdev->debug_lock); 1109 1110 if (!enable) { 1111 if (!hdev->in_debug) { 1112 dev_err(hdev->dev, 1113 "Failed to disable debug mode because device was not in debug mode\n"); 1114 rc = -EFAULT; 1115 goto out; 1116 } 1117 1118 if (!hdev->reset_info.hard_reset_pending) 1119 hdev->asic_funcs->halt_coresight(hdev, ctx); 1120 1121 hdev->in_debug = 0; 1122 1123 goto out; 1124 } 1125 1126 if (hdev->in_debug) { 1127 dev_err(hdev->dev, 1128 "Failed to enable debug mode because device is already in debug mode\n"); 1129 rc = -EFAULT; 1130 goto out; 1131 } 1132 1133 hdev->in_debug = 1; 1134 1135 out: 1136 mutex_unlock(&hdev->debug_lock); 1137 1138 return rc; 1139 } 1140 1141 static void take_release_locks(struct hl_device *hdev) 1142 { 1143 /* Flush anyone that is inside the critical section of enqueue 1144 * jobs to the H/W 1145 */ 1146 hdev->asic_funcs->hw_queues_lock(hdev); 1147 hdev->asic_funcs->hw_queues_unlock(hdev); 1148 1149 /* Flush processes that are sending message to CPU */ 1150 mutex_lock(&hdev->send_cpu_message_lock); 1151 mutex_unlock(&hdev->send_cpu_message_lock); 1152 1153 /* Flush anyone that is inside device open */ 1154 mutex_lock(&hdev->fpriv_list_lock); 1155 mutex_unlock(&hdev->fpriv_list_lock); 1156 mutex_lock(&hdev->fpriv_ctrl_list_lock); 1157 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 1158 } 1159 1160 static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset, 1161 bool skip_wq_flush) 1162 { 1163 if (hard_reset) 1164 device_late_fini(hdev); 1165 1166 /* 1167 * Halt the engines and disable interrupts so we won't get any more 1168 * completions from H/W and we won't have any accesses from the 1169 * H/W to the host machine 1170 */ 1171 hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset); 1172 1173 /* Go over all the queues, release all CS and their jobs */ 1174 hl_cs_rollback_all(hdev, skip_wq_flush); 1175 1176 /* flush the MMU prefetch workqueue */ 1177 flush_workqueue(hdev->prefetch_wq); 1178 1179 /* Release all pending user interrupts, each pending user interrupt 1180 * holds a reference to user context 1181 */ 1182 hl_release_pending_user_interrupts(hdev); 1183 } 1184 1185 /* 1186 * hl_device_suspend - initiate device suspend 1187 * 1188 * @hdev: pointer to habanalabs device structure 1189 * 1190 * Puts the hw in the suspend state (all asics). 1191 * Returns 0 for success or an error on failure. 1192 * Called at driver suspend. 1193 */ 1194 int hl_device_suspend(struct hl_device *hdev) 1195 { 1196 int rc; 1197 1198 pci_save_state(hdev->pdev); 1199 1200 /* Block future CS/VM/JOB completion operations */ 1201 spin_lock(&hdev->reset_info.lock); 1202 if (hdev->reset_info.in_reset) { 1203 spin_unlock(&hdev->reset_info.lock); 1204 dev_err(hdev->dev, "Can't suspend while in reset\n"); 1205 return -EIO; 1206 } 1207 hdev->reset_info.in_reset = 1; 1208 spin_unlock(&hdev->reset_info.lock); 1209 1210 /* This blocks all other stuff that is not blocked by in_reset */ 1211 hdev->disabled = true; 1212 1213 take_release_locks(hdev); 1214 1215 rc = hdev->asic_funcs->suspend(hdev); 1216 if (rc) 1217 dev_err(hdev->dev, 1218 "Failed to disable PCI access of device CPU\n"); 1219 1220 /* Shut down the device */ 1221 pci_disable_device(hdev->pdev); 1222 pci_set_power_state(hdev->pdev, PCI_D3hot); 1223 1224 return 0; 1225 } 1226 1227 /* 1228 * hl_device_resume - initiate device resume 1229 * 1230 * @hdev: pointer to habanalabs device structure 1231 * 1232 * Bring the hw back to operating state (all asics). 1233 * Returns 0 for success or an error on failure. 1234 * Called at driver resume. 1235 */ 1236 int hl_device_resume(struct hl_device *hdev) 1237 { 1238 int rc; 1239 1240 pci_set_power_state(hdev->pdev, PCI_D0); 1241 pci_restore_state(hdev->pdev); 1242 rc = pci_enable_device_mem(hdev->pdev); 1243 if (rc) { 1244 dev_err(hdev->dev, 1245 "Failed to enable PCI device in resume\n"); 1246 return rc; 1247 } 1248 1249 pci_set_master(hdev->pdev); 1250 1251 rc = hdev->asic_funcs->resume(hdev); 1252 if (rc) { 1253 dev_err(hdev->dev, "Failed to resume device after suspend\n"); 1254 goto disable_device; 1255 } 1256 1257 1258 /* 'in_reset' was set to true during suspend, now we must clear it in order 1259 * for hard reset to be performed 1260 */ 1261 spin_lock(&hdev->reset_info.lock); 1262 hdev->reset_info.in_reset = 0; 1263 spin_unlock(&hdev->reset_info.lock); 1264 1265 rc = hl_device_reset(hdev, HL_DRV_RESET_HARD); 1266 if (rc) { 1267 dev_err(hdev->dev, "Failed to reset device during resume\n"); 1268 goto disable_device; 1269 } 1270 1271 return 0; 1272 1273 disable_device: 1274 pci_clear_master(hdev->pdev); 1275 pci_disable_device(hdev->pdev); 1276 1277 return rc; 1278 } 1279 1280 static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev) 1281 { 1282 struct task_struct *task = NULL; 1283 struct list_head *fd_list; 1284 struct hl_fpriv *hpriv; 1285 struct mutex *fd_lock; 1286 u32 pending_cnt; 1287 1288 fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; 1289 fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; 1290 1291 /* Giving time for user to close FD, and for processes that are inside 1292 * hl_device_open to finish 1293 */ 1294 if (!list_empty(fd_list)) 1295 ssleep(1); 1296 1297 if (timeout) { 1298 pending_cnt = timeout; 1299 } else { 1300 if (hdev->process_kill_trial_cnt) { 1301 /* Processes have been already killed */ 1302 pending_cnt = 1; 1303 goto wait_for_processes; 1304 } else { 1305 /* Wait a small period after process kill */ 1306 pending_cnt = HL_PENDING_RESET_PER_SEC; 1307 } 1308 } 1309 1310 mutex_lock(fd_lock); 1311 1312 /* This section must be protected because we are dereferencing 1313 * pointers that are freed if the process exits 1314 */ 1315 list_for_each_entry(hpriv, fd_list, dev_node) { 1316 task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); 1317 if (task) { 1318 dev_info(hdev->dev, "Killing user process pid=%d\n", 1319 task_pid_nr(task)); 1320 send_sig(SIGKILL, task, 1); 1321 usleep_range(1000, 10000); 1322 1323 put_task_struct(task); 1324 } else { 1325 /* 1326 * If we got here, it means that process was killed from outside the driver 1327 * right after it started looping on fd_list and before get_pid_task, thus 1328 * we don't need to kill it. 1329 */ 1330 dev_dbg(hdev->dev, 1331 "Can't get task struct for user process, assuming process was killed from outside the driver\n"); 1332 } 1333 } 1334 1335 mutex_unlock(fd_lock); 1336 1337 /* 1338 * We killed the open users, but that doesn't mean they are closed. 1339 * It could be that they are running a long cleanup phase in the driver 1340 * e.g. MMU unmappings, or running other long teardown flow even before 1341 * our cleanup. 1342 * Therefore we need to wait again to make sure they are closed before 1343 * continuing with the reset. 1344 */ 1345 1346 wait_for_processes: 1347 while ((!list_empty(fd_list)) && (pending_cnt)) { 1348 dev_dbg(hdev->dev, 1349 "Waiting for all unmap operations to finish before hard reset\n"); 1350 1351 pending_cnt--; 1352 1353 ssleep(1); 1354 } 1355 1356 /* All processes exited successfully */ 1357 if (list_empty(fd_list)) 1358 return 0; 1359 1360 /* Give up waiting for processes to exit */ 1361 if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS) 1362 return -ETIME; 1363 1364 hdev->process_kill_trial_cnt++; 1365 1366 return -EBUSY; 1367 } 1368 1369 static void device_disable_open_processes(struct hl_device *hdev, bool control_dev) 1370 { 1371 struct list_head *fd_list; 1372 struct hl_fpriv *hpriv; 1373 struct mutex *fd_lock; 1374 1375 fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; 1376 fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; 1377 1378 mutex_lock(fd_lock); 1379 list_for_each_entry(hpriv, fd_list, dev_node) 1380 hpriv->hdev = NULL; 1381 mutex_unlock(fd_lock); 1382 } 1383 1384 static void handle_reset_trigger(struct hl_device *hdev, u32 flags) 1385 { 1386 u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 1387 1388 /* No consecutive mechanism when user context exists */ 1389 if (hdev->is_compute_ctx_active) 1390 return; 1391 1392 /* 1393 * 'reset cause' is being updated here, because getting here 1394 * means that it's the 1st time and the last time we're here 1395 * ('in_reset' makes sure of it). This makes sure that 1396 * 'reset_cause' will continue holding its 1st recorded reason! 1397 */ 1398 if (flags & HL_DRV_RESET_HEARTBEAT) { 1399 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT; 1400 cur_reset_trigger = HL_DRV_RESET_HEARTBEAT; 1401 } else if (flags & HL_DRV_RESET_TDR) { 1402 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_TDR; 1403 cur_reset_trigger = HL_DRV_RESET_TDR; 1404 } else if (flags & HL_DRV_RESET_FW_FATAL_ERR) { 1405 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 1406 cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR; 1407 } else { 1408 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 1409 } 1410 1411 /* 1412 * If reset cause is same twice, then reset_trigger_repeated 1413 * is set and if this reset is due to a fatal FW error 1414 * device is set to an unstable state. 1415 */ 1416 if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) { 1417 hdev->reset_info.prev_reset_trigger = cur_reset_trigger; 1418 hdev->reset_info.reset_trigger_repeated = 0; 1419 } else { 1420 hdev->reset_info.reset_trigger_repeated = 1; 1421 } 1422 1423 /* If reset is due to heartbeat, device CPU is no responsive in 1424 * which case no point sending PCI disable message to it. 1425 * 1426 * If F/W is performing the reset, no need to send it a message to disable 1427 * PCI access 1428 */ 1429 if ((flags & HL_DRV_RESET_HARD) && 1430 !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) { 1431 /* Disable PCI access from device F/W so he won't send 1432 * us additional interrupts. We disable MSI/MSI-X at 1433 * the halt_engines function and we can't have the F/W 1434 * sending us interrupts after that. We need to disable 1435 * the access here because if the device is marked 1436 * disable, the message won't be send. Also, in case 1437 * of heartbeat, the device CPU is marked as disable 1438 * so this message won't be sent 1439 */ 1440 if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0)) 1441 dev_warn(hdev->dev, 1442 "Failed to disable FW's PCI access\n"); 1443 } 1444 } 1445 1446 /* 1447 * hl_device_reset - reset the device 1448 * 1449 * @hdev: pointer to habanalabs device structure 1450 * @flags: reset flags. 1451 * 1452 * Block future CS and wait for pending CS to be enqueued 1453 * Call ASIC H/W fini 1454 * Flush all completions 1455 * Re-initialize all internal data structures 1456 * Call ASIC H/W init, late_init 1457 * Test queues 1458 * Enable device 1459 * 1460 * Returns 0 for success or an error on failure. 1461 */ 1462 int hl_device_reset(struct hl_device *hdev, u32 flags) 1463 { 1464 bool hard_reset, from_hard_reset_thread, fw_reset, reset_upon_device_release, 1465 schedule_hard_reset = false, delay_reset, from_dev_release, from_watchdog_thread; 1466 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; 1467 struct hl_ctx *ctx; 1468 int i, rc, hw_fini_rc; 1469 1470 if (!hdev->init_done) { 1471 dev_err(hdev->dev, "Can't reset before initialization is done\n"); 1472 return 0; 1473 } 1474 1475 hard_reset = !!(flags & HL_DRV_RESET_HARD); 1476 from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR); 1477 fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW); 1478 from_dev_release = !!(flags & HL_DRV_RESET_DEV_RELEASE); 1479 delay_reset = !!(flags & HL_DRV_RESET_DELAY); 1480 from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR); 1481 reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release; 1482 1483 if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) { 1484 dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n"); 1485 return 0; 1486 } 1487 1488 if (!hard_reset && !hdev->asic_prop.supports_compute_reset) { 1489 dev_dbg(hdev->dev, "asic doesn't support compute reset - do hard-reset instead\n"); 1490 hard_reset = true; 1491 } 1492 1493 if (reset_upon_device_release) { 1494 if (hard_reset) { 1495 dev_crit(hdev->dev, 1496 "Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n"); 1497 return -EINVAL; 1498 } 1499 1500 goto do_reset; 1501 } 1502 1503 if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) { 1504 dev_dbg(hdev->dev, 1505 "asic doesn't allow inference soft reset - do hard-reset instead\n"); 1506 hard_reset = true; 1507 } 1508 1509 do_reset: 1510 /* Re-entry of reset thread */ 1511 if (from_hard_reset_thread && hdev->process_kill_trial_cnt) 1512 goto kill_processes; 1513 1514 /* 1515 * Prevent concurrency in this function - only one reset should be 1516 * done at any given time. We need to perform this only if we didn't 1517 * get here from a dedicated hard reset thread. 1518 */ 1519 if (!from_hard_reset_thread) { 1520 /* Block future CS/VM/JOB completion operations */ 1521 spin_lock(&hdev->reset_info.lock); 1522 if (hdev->reset_info.in_reset) { 1523 /* We allow scheduling of a hard reset only during a compute reset */ 1524 if (hard_reset && hdev->reset_info.in_compute_reset) 1525 hdev->reset_info.hard_reset_schedule_flags = flags; 1526 spin_unlock(&hdev->reset_info.lock); 1527 return 0; 1528 } 1529 1530 /* This still allows the completion of some KDMA ops 1531 * Update this before in_reset because in_compute_reset implies we are in reset 1532 */ 1533 hdev->reset_info.in_compute_reset = !hard_reset; 1534 1535 hdev->reset_info.in_reset = 1; 1536 1537 spin_unlock(&hdev->reset_info.lock); 1538 1539 /* Cancel the device release watchdog work if required. 1540 * In case of reset-upon-device-release while the release watchdog work is 1541 * scheduled due to a hard-reset, do hard-reset instead of compute-reset. 1542 */ 1543 if ((hard_reset || from_dev_release) && hdev->reset_info.watchdog_active) { 1544 struct hl_device_reset_work *watchdog_work = 1545 &hdev->device_release_watchdog_work; 1546 1547 hdev->reset_info.watchdog_active = 0; 1548 if (!from_watchdog_thread) 1549 cancel_delayed_work_sync(&watchdog_work->reset_work); 1550 1551 if (from_dev_release && (watchdog_work->flags & HL_DRV_RESET_HARD)) { 1552 hdev->reset_info.in_compute_reset = 0; 1553 flags |= HL_DRV_RESET_HARD; 1554 flags &= ~HL_DRV_RESET_DEV_RELEASE; 1555 hard_reset = true; 1556 } 1557 } 1558 1559 if (delay_reset) 1560 usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1); 1561 1562 escalate_reset_flow: 1563 handle_reset_trigger(hdev, flags); 1564 1565 /* This also blocks future CS/VM/JOB completion operations */ 1566 hdev->disabled = true; 1567 1568 take_release_locks(hdev); 1569 1570 if (hard_reset) 1571 dev_info(hdev->dev, "Going to reset device\n"); 1572 else if (reset_upon_device_release) 1573 dev_dbg(hdev->dev, "Going to reset device after release by user\n"); 1574 else 1575 dev_dbg(hdev->dev, "Going to reset engines of inference device\n"); 1576 } 1577 1578 if ((hard_reset) && (!from_hard_reset_thread)) { 1579 hdev->reset_info.hard_reset_pending = true; 1580 1581 hdev->process_kill_trial_cnt = 0; 1582 1583 hdev->device_reset_work.flags = flags; 1584 1585 /* 1586 * Because the reset function can't run from heartbeat work, 1587 * we need to call the reset function from a dedicated work. 1588 */ 1589 queue_delayed_work(hdev->reset_wq, &hdev->device_reset_work.reset_work, 0); 1590 1591 return 0; 1592 } 1593 1594 cleanup_resources(hdev, hard_reset, fw_reset, from_dev_release); 1595 1596 kill_processes: 1597 if (hard_reset) { 1598 /* Kill processes here after CS rollback. This is because the 1599 * process can't really exit until all its CSs are done, which 1600 * is what we do in cs rollback 1601 */ 1602 rc = device_kill_open_processes(hdev, 0, false); 1603 1604 if (rc == -EBUSY) { 1605 if (hdev->device_fini_pending) { 1606 dev_crit(hdev->dev, 1607 "%s Failed to kill all open processes, stopping hard reset\n", 1608 dev_name(&(hdev)->pdev->dev)); 1609 goto out_err; 1610 } 1611 1612 /* signal reset thread to reschedule */ 1613 return rc; 1614 } 1615 1616 if (rc) { 1617 dev_crit(hdev->dev, 1618 "%s Failed to kill all open processes, stopping hard reset\n", 1619 dev_name(&(hdev)->pdev->dev)); 1620 goto out_err; 1621 } 1622 1623 /* Flush the Event queue workers to make sure no other thread is 1624 * reading or writing to registers during the reset 1625 */ 1626 flush_workqueue(hdev->eq_wq); 1627 } 1628 1629 /* Reset the H/W. It will be in idle state after this returns */ 1630 hw_fini_rc = hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset); 1631 1632 if (hard_reset) { 1633 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; 1634 1635 /* Release kernel context */ 1636 if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1) 1637 hdev->kernel_ctx = NULL; 1638 1639 hl_vm_fini(hdev); 1640 hl_mmu_fini(hdev); 1641 hl_eq_reset(hdev, &hdev->event_queue); 1642 } 1643 1644 /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */ 1645 hl_hw_queue_reset(hdev, hard_reset); 1646 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1647 hl_cq_reset(hdev, &hdev->completion_queue[i]); 1648 1649 /* Make sure the context switch phase will run again */ 1650 ctx = hl_get_compute_ctx(hdev); 1651 if (ctx) { 1652 atomic_set(&ctx->thread_ctx_switch_token, 1); 1653 ctx->thread_ctx_switch_wait_token = 0; 1654 hl_ctx_put(ctx); 1655 } 1656 1657 if (hw_fini_rc) { 1658 rc = hw_fini_rc; 1659 goto out_err; 1660 } 1661 /* Finished tear-down, starting to re-initialize */ 1662 1663 if (hard_reset) { 1664 hdev->device_cpu_disabled = false; 1665 hdev->reset_info.hard_reset_pending = false; 1666 1667 if (hdev->reset_info.reset_trigger_repeated && 1668 (hdev->reset_info.prev_reset_trigger == 1669 HL_DRV_RESET_FW_FATAL_ERR)) { 1670 /* if there 2 back to back resets from FW, 1671 * ensure driver puts the driver in a unusable state 1672 */ 1673 dev_crit(hdev->dev, 1674 "%s Consecutive FW fatal errors received, stopping hard reset\n", 1675 dev_name(&(hdev)->pdev->dev)); 1676 rc = -EIO; 1677 goto out_err; 1678 } 1679 1680 if (hdev->kernel_ctx) { 1681 dev_crit(hdev->dev, 1682 "%s kernel ctx was alive during hard reset, something is terribly wrong\n", 1683 dev_name(&(hdev)->pdev->dev)); 1684 rc = -EBUSY; 1685 goto out_err; 1686 } 1687 1688 rc = hl_mmu_init(hdev); 1689 if (rc) { 1690 dev_err(hdev->dev, 1691 "Failed to initialize MMU S/W after hard reset\n"); 1692 goto out_err; 1693 } 1694 1695 /* Allocate the kernel context */ 1696 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), 1697 GFP_KERNEL); 1698 if (!hdev->kernel_ctx) { 1699 rc = -ENOMEM; 1700 hl_mmu_fini(hdev); 1701 goto out_err; 1702 } 1703 1704 hdev->is_compute_ctx_active = false; 1705 1706 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); 1707 if (rc) { 1708 dev_err(hdev->dev, 1709 "failed to init kernel ctx in hard reset\n"); 1710 kfree(hdev->kernel_ctx); 1711 hdev->kernel_ctx = NULL; 1712 hl_mmu_fini(hdev); 1713 goto out_err; 1714 } 1715 } 1716 1717 /* Device is now enabled as part of the initialization requires 1718 * communication with the device firmware to get information that 1719 * is required for the initialization itself 1720 */ 1721 hdev->disabled = false; 1722 1723 /* F/W security enabled indication might be updated after hard-reset */ 1724 if (hard_reset) { 1725 rc = hl_fw_read_preboot_status(hdev); 1726 if (rc) 1727 goto out_err; 1728 } 1729 1730 rc = hdev->asic_funcs->hw_init(hdev); 1731 if (rc) { 1732 dev_err(hdev->dev, "failed to initialize the H/W after reset\n"); 1733 goto out_err; 1734 } 1735 1736 /* If device is not idle fail the reset process */ 1737 if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, 1738 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { 1739 print_idle_status_mask(hdev, "device is not idle after reset", idle_mask); 1740 rc = -EIO; 1741 goto out_err; 1742 } 1743 1744 /* Check that the communication with the device is working */ 1745 rc = hdev->asic_funcs->test_queues(hdev); 1746 if (rc) { 1747 dev_err(hdev->dev, "Failed to detect if device is alive after reset\n"); 1748 goto out_err; 1749 } 1750 1751 if (hard_reset) { 1752 rc = device_late_init(hdev); 1753 if (rc) { 1754 dev_err(hdev->dev, "Failed late init after hard reset\n"); 1755 goto out_err; 1756 } 1757 1758 rc = hl_vm_init(hdev); 1759 if (rc) { 1760 dev_err(hdev->dev, "Failed to init memory module after hard reset\n"); 1761 goto out_err; 1762 } 1763 1764 if (!hdev->asic_prop.fw_security_enabled) 1765 hl_fw_set_max_power(hdev); 1766 } else { 1767 rc = hdev->asic_funcs->compute_reset_late_init(hdev); 1768 if (rc) { 1769 if (reset_upon_device_release) 1770 dev_err(hdev->dev, 1771 "Failed late init in reset after device release\n"); 1772 else 1773 dev_err(hdev->dev, "Failed late init after compute reset\n"); 1774 goto out_err; 1775 } 1776 } 1777 1778 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1779 if (rc) { 1780 dev_err(hdev->dev, "scrub mem failed from device reset (%d)\n", rc); 1781 goto out_err; 1782 } 1783 1784 spin_lock(&hdev->reset_info.lock); 1785 hdev->reset_info.in_compute_reset = 0; 1786 1787 /* Schedule hard reset only if requested and if not already in hard reset. 1788 * We keep 'in_reset' enabled, so no other reset can go in during the hard 1789 * reset schedule 1790 */ 1791 if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags) 1792 schedule_hard_reset = true; 1793 else 1794 hdev->reset_info.in_reset = 0; 1795 1796 spin_unlock(&hdev->reset_info.lock); 1797 1798 hdev->reset_info.needs_reset = false; 1799 1800 if (hard_reset) 1801 dev_info(hdev->dev, 1802 "Successfully finished resetting the %s device\n", 1803 dev_name(&(hdev)->pdev->dev)); 1804 else 1805 dev_dbg(hdev->dev, 1806 "Successfully finished resetting the %s device\n", 1807 dev_name(&(hdev)->pdev->dev)); 1808 1809 if (hard_reset) { 1810 hdev->reset_info.hard_reset_cnt++; 1811 1812 /* After reset is done, we are ready to receive events from 1813 * the F/W. We can't do it before because we will ignore events 1814 * and if those events are fatal, we won't know about it and 1815 * the device will be operational although it shouldn't be 1816 */ 1817 hdev->asic_funcs->enable_events_from_fw(hdev); 1818 } else { 1819 if (!reset_upon_device_release) 1820 hdev->reset_info.compute_reset_cnt++; 1821 1822 if (schedule_hard_reset) { 1823 dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n"); 1824 flags = hdev->reset_info.hard_reset_schedule_flags; 1825 hdev->reset_info.hard_reset_schedule_flags = 0; 1826 hdev->disabled = true; 1827 hard_reset = true; 1828 handle_reset_trigger(hdev, flags); 1829 goto escalate_reset_flow; 1830 } 1831 } 1832 1833 return 0; 1834 1835 out_err: 1836 hdev->disabled = true; 1837 1838 spin_lock(&hdev->reset_info.lock); 1839 hdev->reset_info.in_compute_reset = 0; 1840 1841 if (hard_reset) { 1842 dev_err(hdev->dev, 1843 "%s Failed to reset! Device is NOT usable\n", 1844 dev_name(&(hdev)->pdev->dev)); 1845 hdev->reset_info.hard_reset_cnt++; 1846 } else { 1847 if (reset_upon_device_release) { 1848 dev_err(hdev->dev, "Failed to reset device after user release\n"); 1849 flags &= ~HL_DRV_RESET_DEV_RELEASE; 1850 } else { 1851 dev_err(hdev->dev, "Failed to do compute reset\n"); 1852 hdev->reset_info.compute_reset_cnt++; 1853 } 1854 1855 spin_unlock(&hdev->reset_info.lock); 1856 flags |= HL_DRV_RESET_HARD; 1857 hard_reset = true; 1858 goto escalate_reset_flow; 1859 } 1860 1861 hdev->reset_info.in_reset = 0; 1862 1863 spin_unlock(&hdev->reset_info.lock); 1864 1865 return rc; 1866 } 1867 1868 /* 1869 * hl_device_cond_reset() - conditionally reset the device. 1870 * @hdev: pointer to habanalabs device structure. 1871 * @reset_flags: reset flags. 1872 * @event_mask: events to notify user about. 1873 * 1874 * Conditionally reset the device, or alternatively schedule a watchdog work to reset the device 1875 * unless another reset precedes it. 1876 */ 1877 int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask) 1878 { 1879 struct hl_ctx *ctx = NULL; 1880 1881 /* F/W reset cannot be postponed */ 1882 if (flags & HL_DRV_RESET_BYPASS_REQ_TO_FW) 1883 goto device_reset; 1884 1885 /* Device release watchdog is relevant only if user exists and gets a reset notification */ 1886 if (!(event_mask & HL_NOTIFIER_EVENT_DEVICE_RESET)) { 1887 dev_err(hdev->dev, "Resetting device without a reset indication to user\n"); 1888 goto device_reset; 1889 } 1890 1891 ctx = hl_get_compute_ctx(hdev); 1892 if (!ctx || !ctx->hpriv->notifier_event.eventfd) 1893 goto device_reset; 1894 1895 /* Schedule the device release watchdog work unless reset is already in progress or if the 1896 * work is already scheduled. 1897 */ 1898 spin_lock(&hdev->reset_info.lock); 1899 if (hdev->reset_info.in_reset) { 1900 spin_unlock(&hdev->reset_info.lock); 1901 goto device_reset; 1902 } 1903 1904 if (hdev->reset_info.watchdog_active) 1905 goto out; 1906 1907 hdev->device_release_watchdog_work.flags = flags; 1908 dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n", 1909 hdev->device_release_watchdog_timeout_sec); 1910 schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work, 1911 msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000)); 1912 hdev->reset_info.watchdog_active = 1; 1913 out: 1914 spin_unlock(&hdev->reset_info.lock); 1915 1916 hl_notifier_event_send_all(hdev, event_mask); 1917 1918 hl_ctx_put(ctx); 1919 1920 hl_abort_waitings_for_completion(hdev); 1921 1922 return 0; 1923 1924 device_reset: 1925 if (event_mask) 1926 hl_notifier_event_send_all(hdev, event_mask); 1927 if (ctx) 1928 hl_ctx_put(ctx); 1929 1930 return hl_device_reset(hdev, flags); 1931 } 1932 1933 static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask) 1934 { 1935 mutex_lock(¬ifier_event->lock); 1936 notifier_event->events_mask |= event_mask; 1937 1938 if (notifier_event->eventfd) 1939 eventfd_signal(notifier_event->eventfd, 1); 1940 1941 mutex_unlock(¬ifier_event->lock); 1942 } 1943 1944 /* 1945 * hl_notifier_event_send_all - notify all user processes via eventfd 1946 * 1947 * @hdev: pointer to habanalabs device structure 1948 * @event_mask: the occurred event/s 1949 * Returns 0 for success or an error on failure. 1950 */ 1951 void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask) 1952 { 1953 struct hl_fpriv *hpriv; 1954 1955 if (!event_mask) { 1956 dev_warn(hdev->dev, "Skip sending zero event"); 1957 return; 1958 } 1959 1960 mutex_lock(&hdev->fpriv_list_lock); 1961 1962 list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) 1963 hl_notifier_event_send(&hpriv->notifier_event, event_mask); 1964 1965 mutex_unlock(&hdev->fpriv_list_lock); 1966 1967 /* control device */ 1968 mutex_lock(&hdev->fpriv_ctrl_list_lock); 1969 1970 list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node) 1971 hl_notifier_event_send(&hpriv->notifier_event, event_mask); 1972 1973 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 1974 } 1975 1976 static int create_cdev(struct hl_device *hdev) 1977 { 1978 char *name; 1979 int rc; 1980 1981 hdev->cdev_idx = hdev->id / 2; 1982 1983 name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx); 1984 if (!name) { 1985 rc = -ENOMEM; 1986 goto out_err; 1987 } 1988 1989 /* Initialize cdev and device structures */ 1990 rc = device_init_cdev(hdev, hdev->hclass, hdev->id, &hl_ops, name, 1991 &hdev->cdev, &hdev->dev); 1992 1993 kfree(name); 1994 1995 if (rc) 1996 goto out_err; 1997 1998 name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx); 1999 if (!name) { 2000 rc = -ENOMEM; 2001 goto free_dev; 2002 } 2003 2004 /* Initialize cdev and device structures for control device */ 2005 rc = device_init_cdev(hdev, hdev->hclass, hdev->id_control, &hl_ctrl_ops, 2006 name, &hdev->cdev_ctrl, &hdev->dev_ctrl); 2007 2008 kfree(name); 2009 2010 if (rc) 2011 goto free_dev; 2012 2013 return 0; 2014 2015 free_dev: 2016 put_device(hdev->dev); 2017 out_err: 2018 return rc; 2019 } 2020 2021 /* 2022 * hl_device_init - main initialization function for habanalabs device 2023 * 2024 * @hdev: pointer to habanalabs device structure 2025 * 2026 * Allocate an id for the device, do early initialization and then call the 2027 * ASIC specific initialization functions. Finally, create the cdev and the 2028 * Linux device to expose it to the user 2029 */ 2030 int hl_device_init(struct hl_device *hdev) 2031 { 2032 int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt; 2033 bool add_cdev_sysfs_on_err = false; 2034 2035 rc = create_cdev(hdev); 2036 if (rc) 2037 goto out_disabled; 2038 2039 /* Initialize ASIC function pointers and perform early init */ 2040 rc = device_early_init(hdev); 2041 if (rc) 2042 goto free_dev; 2043 2044 user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count + 2045 hdev->asic_prop.user_interrupt_count; 2046 2047 if (user_interrupt_cnt) { 2048 hdev->user_interrupt = kcalloc(user_interrupt_cnt, sizeof(*hdev->user_interrupt), 2049 GFP_KERNEL); 2050 if (!hdev->user_interrupt) { 2051 rc = -ENOMEM; 2052 goto early_fini; 2053 } 2054 } 2055 2056 /* 2057 * Start calling ASIC initialization. First S/W then H/W and finally 2058 * late init 2059 */ 2060 rc = hdev->asic_funcs->sw_init(hdev); 2061 if (rc) 2062 goto free_usr_intr_mem; 2063 2064 2065 /* initialize completion structure for multi CS wait */ 2066 hl_multi_cs_completion_init(hdev); 2067 2068 /* 2069 * Initialize the H/W queues. Must be done before hw_init, because 2070 * there the addresses of the kernel queue are being written to the 2071 * registers of the device 2072 */ 2073 rc = hl_hw_queues_create(hdev); 2074 if (rc) { 2075 dev_err(hdev->dev, "failed to initialize kernel queues\n"); 2076 goto sw_fini; 2077 } 2078 2079 cq_cnt = hdev->asic_prop.completion_queues_count; 2080 2081 /* 2082 * Initialize the completion queues. Must be done before hw_init, 2083 * because there the addresses of the completion queues are being 2084 * passed as arguments to request_irq 2085 */ 2086 if (cq_cnt) { 2087 hdev->completion_queue = kcalloc(cq_cnt, 2088 sizeof(*hdev->completion_queue), 2089 GFP_KERNEL); 2090 2091 if (!hdev->completion_queue) { 2092 dev_err(hdev->dev, 2093 "failed to allocate completion queues\n"); 2094 rc = -ENOMEM; 2095 goto hw_queues_destroy; 2096 } 2097 } 2098 2099 for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) { 2100 rc = hl_cq_init(hdev, &hdev->completion_queue[i], 2101 hdev->asic_funcs->get_queue_id_for_cq(hdev, i)); 2102 if (rc) { 2103 dev_err(hdev->dev, 2104 "failed to initialize completion queue\n"); 2105 goto cq_fini; 2106 } 2107 hdev->completion_queue[i].cq_idx = i; 2108 } 2109 2110 hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs, 2111 sizeof(struct hl_cs *), GFP_KERNEL); 2112 if (!hdev->shadow_cs_queue) { 2113 rc = -ENOMEM; 2114 goto cq_fini; 2115 } 2116 2117 /* 2118 * Initialize the event queue. Must be done before hw_init, 2119 * because there the address of the event queue is being 2120 * passed as argument to request_irq 2121 */ 2122 rc = hl_eq_init(hdev, &hdev->event_queue); 2123 if (rc) { 2124 dev_err(hdev->dev, "failed to initialize event queue\n"); 2125 goto free_shadow_cs_queue; 2126 } 2127 2128 /* MMU S/W must be initialized before kernel context is created */ 2129 rc = hl_mmu_init(hdev); 2130 if (rc) { 2131 dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n"); 2132 goto eq_fini; 2133 } 2134 2135 /* Allocate the kernel context */ 2136 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); 2137 if (!hdev->kernel_ctx) { 2138 rc = -ENOMEM; 2139 goto mmu_fini; 2140 } 2141 2142 hdev->is_compute_ctx_active = false; 2143 2144 hdev->asic_funcs->state_dump_init(hdev); 2145 2146 hdev->device_release_watchdog_timeout_sec = HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC; 2147 2148 hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL; 2149 hl_debugfs_add_device(hdev); 2150 2151 /* debugfs nodes are created in hl_ctx_init so it must be called after 2152 * hl_debugfs_add_device. 2153 */ 2154 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); 2155 if (rc) { 2156 dev_err(hdev->dev, "failed to initialize kernel context\n"); 2157 kfree(hdev->kernel_ctx); 2158 goto remove_device_from_debugfs; 2159 } 2160 2161 rc = hl_cb_pool_init(hdev); 2162 if (rc) { 2163 dev_err(hdev->dev, "failed to initialize CB pool\n"); 2164 goto release_ctx; 2165 } 2166 2167 rc = hl_dec_init(hdev); 2168 if (rc) { 2169 dev_err(hdev->dev, "Failed to initialize the decoder module\n"); 2170 goto cb_pool_fini; 2171 } 2172 2173 /* 2174 * From this point, override rc (=0) in case of an error to allow 2175 * debugging (by adding char devices and create sysfs nodes as part of 2176 * the error flow). 2177 */ 2178 add_cdev_sysfs_on_err = true; 2179 2180 /* Device is now enabled as part of the initialization requires 2181 * communication with the device firmware to get information that 2182 * is required for the initialization itself 2183 */ 2184 hdev->disabled = false; 2185 2186 rc = hdev->asic_funcs->hw_init(hdev); 2187 if (rc) { 2188 dev_err(hdev->dev, "failed to initialize the H/W\n"); 2189 rc = 0; 2190 goto out_disabled; 2191 } 2192 2193 /* Check that the communication with the device is working */ 2194 rc = hdev->asic_funcs->test_queues(hdev); 2195 if (rc) { 2196 dev_err(hdev->dev, "Failed to detect if device is alive\n"); 2197 rc = 0; 2198 goto out_disabled; 2199 } 2200 2201 rc = device_late_init(hdev); 2202 if (rc) { 2203 dev_err(hdev->dev, "Failed late initialization\n"); 2204 rc = 0; 2205 goto out_disabled; 2206 } 2207 2208 dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n", 2209 hdev->asic_name, 2210 hdev->asic_prop.dram_size / SZ_1G); 2211 2212 rc = hl_vm_init(hdev); 2213 if (rc) { 2214 dev_err(hdev->dev, "Failed to initialize memory module\n"); 2215 rc = 0; 2216 goto out_disabled; 2217 } 2218 2219 /* 2220 * Expose devices and sysfs nodes to user. 2221 * From here there is no need to add char devices and create sysfs nodes 2222 * in case of an error. 2223 */ 2224 add_cdev_sysfs_on_err = false; 2225 rc = device_cdev_sysfs_add(hdev); 2226 if (rc) { 2227 dev_err(hdev->dev, 2228 "Failed to add char devices and sysfs nodes\n"); 2229 rc = 0; 2230 goto out_disabled; 2231 } 2232 2233 /* Need to call this again because the max power might change, 2234 * depending on card type for certain ASICs 2235 */ 2236 if (hdev->asic_prop.set_max_power_on_device_init && 2237 !hdev->asic_prop.fw_security_enabled) 2238 hl_fw_set_max_power(hdev); 2239 2240 /* 2241 * hl_hwmon_init() must be called after device_late_init(), because only 2242 * there we get the information from the device about which 2243 * hwmon-related sensors the device supports. 2244 * Furthermore, it must be done after adding the device to the system. 2245 */ 2246 rc = hl_hwmon_init(hdev); 2247 if (rc) { 2248 dev_err(hdev->dev, "Failed to initialize hwmon\n"); 2249 rc = 0; 2250 goto out_disabled; 2251 } 2252 2253 dev_notice(hdev->dev, 2254 "Successfully added device %s to habanalabs driver\n", 2255 dev_name(&(hdev)->pdev->dev)); 2256 2257 hdev->init_done = true; 2258 2259 /* After initialization is done, we are ready to receive events from 2260 * the F/W. We can't do it before because we will ignore events and if 2261 * those events are fatal, we won't know about it and the device will 2262 * be operational although it shouldn't be 2263 */ 2264 hdev->asic_funcs->enable_events_from_fw(hdev); 2265 2266 return 0; 2267 2268 cb_pool_fini: 2269 hl_cb_pool_fini(hdev); 2270 release_ctx: 2271 if (hl_ctx_put(hdev->kernel_ctx) != 1) 2272 dev_err(hdev->dev, 2273 "kernel ctx is still alive on initialization failure\n"); 2274 remove_device_from_debugfs: 2275 hl_debugfs_remove_device(hdev); 2276 mmu_fini: 2277 hl_mmu_fini(hdev); 2278 eq_fini: 2279 hl_eq_fini(hdev, &hdev->event_queue); 2280 free_shadow_cs_queue: 2281 kfree(hdev->shadow_cs_queue); 2282 cq_fini: 2283 for (i = 0 ; i < cq_ready_cnt ; i++) 2284 hl_cq_fini(hdev, &hdev->completion_queue[i]); 2285 kfree(hdev->completion_queue); 2286 hw_queues_destroy: 2287 hl_hw_queues_destroy(hdev); 2288 sw_fini: 2289 hdev->asic_funcs->sw_fini(hdev); 2290 free_usr_intr_mem: 2291 kfree(hdev->user_interrupt); 2292 early_fini: 2293 device_early_fini(hdev); 2294 free_dev: 2295 put_device(hdev->dev_ctrl); 2296 put_device(hdev->dev); 2297 out_disabled: 2298 hdev->disabled = true; 2299 if (add_cdev_sysfs_on_err) 2300 device_cdev_sysfs_add(hdev); 2301 if (hdev->pdev) 2302 dev_err(&hdev->pdev->dev, 2303 "Failed to initialize hl%d. Device %s is NOT usable !\n", 2304 hdev->cdev_idx, dev_name(&(hdev)->pdev->dev)); 2305 else 2306 pr_err("Failed to initialize hl%d. Device %s is NOT usable !\n", 2307 hdev->cdev_idx, dev_name(&(hdev)->pdev->dev)); 2308 2309 return rc; 2310 } 2311 2312 /* 2313 * hl_device_fini - main tear-down function for habanalabs device 2314 * 2315 * @hdev: pointer to habanalabs device structure 2316 * 2317 * Destroy the device, call ASIC fini functions and release the id 2318 */ 2319 void hl_device_fini(struct hl_device *hdev) 2320 { 2321 bool device_in_reset; 2322 ktime_t timeout; 2323 u64 reset_sec; 2324 int i, rc; 2325 2326 dev_info(hdev->dev, "Removing device\n"); 2327 2328 hdev->device_fini_pending = 1; 2329 flush_delayed_work(&hdev->device_reset_work.reset_work); 2330 2331 if (hdev->pldm) 2332 reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT; 2333 else 2334 reset_sec = HL_HARD_RESET_MAX_TIMEOUT; 2335 2336 /* 2337 * This function is competing with the reset function, so try to 2338 * take the reset atomic and if we are already in middle of reset, 2339 * wait until reset function is finished. Reset function is designed 2340 * to always finish. However, in Gaudi, because of all the network 2341 * ports, the hard reset could take between 10-30 seconds 2342 */ 2343 2344 timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000); 2345 2346 spin_lock(&hdev->reset_info.lock); 2347 device_in_reset = !!hdev->reset_info.in_reset; 2348 if (!device_in_reset) 2349 hdev->reset_info.in_reset = 1; 2350 spin_unlock(&hdev->reset_info.lock); 2351 2352 while (device_in_reset) { 2353 usleep_range(50, 200); 2354 2355 spin_lock(&hdev->reset_info.lock); 2356 device_in_reset = !!hdev->reset_info.in_reset; 2357 if (!device_in_reset) 2358 hdev->reset_info.in_reset = 1; 2359 spin_unlock(&hdev->reset_info.lock); 2360 2361 if (ktime_compare(ktime_get(), timeout) > 0) { 2362 dev_crit(hdev->dev, 2363 "%s Failed to remove device because reset function did not finish\n", 2364 dev_name(&(hdev)->pdev->dev)); 2365 return; 2366 } 2367 } 2368 2369 cancel_delayed_work_sync(&hdev->device_release_watchdog_work.reset_work); 2370 2371 /* Disable PCI access from device F/W so it won't send us additional 2372 * interrupts. We disable MSI/MSI-X at the halt_engines function and we 2373 * can't have the F/W sending us interrupts after that. We need to 2374 * disable the access here because if the device is marked disable, the 2375 * message won't be send. Also, in case of heartbeat, the device CPU is 2376 * marked as disable so this message won't be sent 2377 */ 2378 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 2379 2380 /* Mark device as disabled */ 2381 hdev->disabled = true; 2382 2383 take_release_locks(hdev); 2384 2385 hdev->reset_info.hard_reset_pending = true; 2386 2387 hl_hwmon_fini(hdev); 2388 2389 cleanup_resources(hdev, true, false, false); 2390 2391 /* Kill processes here after CS rollback. This is because the process 2392 * can't really exit until all its CSs are done, which is what we 2393 * do in cs rollback 2394 */ 2395 dev_info(hdev->dev, 2396 "Waiting for all processes to exit (timeout of %u seconds)", 2397 HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI); 2398 2399 hdev->process_kill_trial_cnt = 0; 2400 rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false); 2401 if (rc) { 2402 dev_crit(hdev->dev, "Failed to kill all open processes\n"); 2403 device_disable_open_processes(hdev, false); 2404 } 2405 2406 hdev->process_kill_trial_cnt = 0; 2407 rc = device_kill_open_processes(hdev, 0, true); 2408 if (rc) { 2409 dev_crit(hdev->dev, "Failed to kill all control device open processes\n"); 2410 device_disable_open_processes(hdev, true); 2411 } 2412 2413 hl_cb_pool_fini(hdev); 2414 2415 /* Reset the H/W. It will be in idle state after this returns */ 2416 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 2417 if (rc) 2418 dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc); 2419 2420 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; 2421 2422 /* Release kernel context */ 2423 if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) 2424 dev_err(hdev->dev, "kernel ctx is still alive\n"); 2425 2426 hl_debugfs_remove_device(hdev); 2427 2428 hl_dec_fini(hdev); 2429 2430 hl_vm_fini(hdev); 2431 2432 hl_mmu_fini(hdev); 2433 2434 vfree(hdev->captured_err_info.page_fault_info.user_mappings); 2435 2436 hl_eq_fini(hdev, &hdev->event_queue); 2437 2438 kfree(hdev->shadow_cs_queue); 2439 2440 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 2441 hl_cq_fini(hdev, &hdev->completion_queue[i]); 2442 kfree(hdev->completion_queue); 2443 kfree(hdev->user_interrupt); 2444 2445 hl_hw_queues_destroy(hdev); 2446 2447 /* Call ASIC S/W finalize function */ 2448 hdev->asic_funcs->sw_fini(hdev); 2449 2450 device_early_fini(hdev); 2451 2452 /* Hide devices and sysfs nodes from user */ 2453 device_cdev_sysfs_del(hdev); 2454 2455 pr_info("removed device successfully\n"); 2456 } 2457 2458 /* 2459 * MMIO register access helper functions. 2460 */ 2461 2462 /* 2463 * hl_rreg - Read an MMIO register 2464 * 2465 * @hdev: pointer to habanalabs device structure 2466 * @reg: MMIO register offset (in bytes) 2467 * 2468 * Returns the value of the MMIO register we are asked to read 2469 * 2470 */ 2471 inline u32 hl_rreg(struct hl_device *hdev, u32 reg) 2472 { 2473 u32 val = readl(hdev->rmmio + reg); 2474 2475 if (unlikely(trace_habanalabs_rreg32_enabled())) 2476 trace_habanalabs_rreg32(hdev->dev, reg, val); 2477 2478 return val; 2479 } 2480 2481 /* 2482 * hl_wreg - Write to an MMIO register 2483 * 2484 * @hdev: pointer to habanalabs device structure 2485 * @reg: MMIO register offset (in bytes) 2486 * @val: 32-bit value 2487 * 2488 * Writes the 32-bit value into the MMIO register 2489 * 2490 */ 2491 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) 2492 { 2493 if (unlikely(trace_habanalabs_wreg32_enabled())) 2494 trace_habanalabs_wreg32(hdev->dev, reg, val); 2495 2496 writel(val, hdev->rmmio + reg); 2497 } 2498 2499 void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, 2500 u8 flags) 2501 { 2502 struct razwi_info *razwi_info = &hdev->captured_err_info.razwi_info; 2503 2504 if (num_of_engines > HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR) { 2505 dev_err(hdev->dev, 2506 "Number of possible razwi initiators (%u) exceeded limit (%u)\n", 2507 num_of_engines, HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR); 2508 return; 2509 } 2510 2511 /* In case it's the first razwi since the device was opened, capture its parameters */ 2512 if (atomic_cmpxchg(&hdev->captured_err_info.razwi_info.razwi_detected, 0, 1)) 2513 return; 2514 2515 razwi_info->razwi.timestamp = ktime_to_ns(ktime_get()); 2516 razwi_info->razwi.addr = addr; 2517 razwi_info->razwi.num_of_possible_engines = num_of_engines; 2518 memcpy(&razwi_info->razwi.engine_id[0], &engine_id[0], 2519 num_of_engines * sizeof(u16)); 2520 razwi_info->razwi.flags = flags; 2521 2522 razwi_info->razwi_info_available = true; 2523 } 2524 2525 void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, 2526 u8 flags, u64 *event_mask) 2527 { 2528 hl_capture_razwi(hdev, addr, engine_id, num_of_engines, flags); 2529 2530 if (event_mask) 2531 *event_mask |= HL_NOTIFIER_EVENT_RAZWI; 2532 } 2533 2534 static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu) 2535 { 2536 struct page_fault_info *pgf_info = &hdev->captured_err_info.page_fault_info; 2537 struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; 2538 struct hl_vm_hash_node *hnode; 2539 struct hl_userptr *userptr; 2540 enum vm_type *vm_type; 2541 struct hl_ctx *ctx; 2542 u32 map_idx = 0; 2543 int i; 2544 2545 /* Reset previous session count*/ 2546 pgf_info->num_of_user_mappings = 0; 2547 2548 ctx = hl_get_compute_ctx(hdev); 2549 if (!ctx) { 2550 dev_err(hdev->dev, "Can't get user context for user mappings\n"); 2551 return; 2552 } 2553 2554 mutex_lock(&ctx->mem_hash_lock); 2555 hash_for_each(ctx->mem_hash, i, hnode, node) { 2556 vm_type = hnode->ptr; 2557 if (((*vm_type == VM_TYPE_USERPTR) && is_pmmu) || 2558 ((*vm_type == VM_TYPE_PHYS_PACK) && !is_pmmu)) 2559 pgf_info->num_of_user_mappings++; 2560 2561 } 2562 2563 if (!pgf_info->num_of_user_mappings) 2564 goto finish; 2565 2566 /* In case we already allocated in previous session, need to release it before 2567 * allocating new buffer. 2568 */ 2569 vfree(pgf_info->user_mappings); 2570 pgf_info->user_mappings = 2571 vzalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping)); 2572 if (!pgf_info->user_mappings) { 2573 pgf_info->num_of_user_mappings = 0; 2574 goto finish; 2575 } 2576 2577 hash_for_each(ctx->mem_hash, i, hnode, node) { 2578 vm_type = hnode->ptr; 2579 if ((*vm_type == VM_TYPE_USERPTR) && (is_pmmu)) { 2580 userptr = hnode->ptr; 2581 pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; 2582 pgf_info->user_mappings[map_idx].size = userptr->size; 2583 map_idx++; 2584 } else if ((*vm_type == VM_TYPE_PHYS_PACK) && (!is_pmmu)) { 2585 phys_pg_pack = hnode->ptr; 2586 pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; 2587 pgf_info->user_mappings[map_idx].size = phys_pg_pack->total_size; 2588 map_idx++; 2589 } 2590 } 2591 finish: 2592 mutex_unlock(&ctx->mem_hash_lock); 2593 hl_ctx_put(ctx); 2594 } 2595 2596 void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu) 2597 { 2598 struct page_fault_info *pgf_info = &hdev->captured_err_info.page_fault_info; 2599 2600 /* Capture only the first page fault */ 2601 if (atomic_cmpxchg(&pgf_info->page_fault_detected, 0, 1)) 2602 return; 2603 2604 pgf_info->page_fault.timestamp = ktime_to_ns(ktime_get()); 2605 pgf_info->page_fault.addr = addr; 2606 pgf_info->page_fault.engine_id = eng_id; 2607 hl_capture_user_mappings(hdev, is_pmmu); 2608 2609 pgf_info->page_fault_info_available = true; 2610 } 2611 2612 void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu, 2613 u64 *event_mask) 2614 { 2615 hl_capture_page_fault(hdev, addr, eng_id, is_pmmu); 2616 2617 if (event_mask) 2618 *event_mask |= HL_NOTIFIER_EVENT_PAGE_FAULT; 2619 } 2620 2621 static void hl_capture_hw_err(struct hl_device *hdev, u16 event_id) 2622 { 2623 struct hw_err_info *info = &hdev->captured_err_info.hw_err; 2624 2625 /* Capture only the first HW err */ 2626 if (atomic_cmpxchg(&info->event_detected, 0, 1)) 2627 return; 2628 2629 info->event.timestamp = ktime_to_ns(ktime_get()); 2630 info->event.event_id = event_id; 2631 2632 info->event_info_available = true; 2633 } 2634 2635 void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask) 2636 { 2637 hl_capture_hw_err(hdev, event_id); 2638 2639 if (event_mask) 2640 *event_mask |= HL_NOTIFIER_EVENT_CRITICL_HW_ERR; 2641 } 2642 2643 static void hl_capture_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *fw_info) 2644 { 2645 struct fw_err_info *info = &hdev->captured_err_info.fw_err; 2646 2647 /* Capture only the first FW error */ 2648 if (atomic_cmpxchg(&info->event_detected, 0, 1)) 2649 return; 2650 2651 info->event.timestamp = ktime_to_ns(ktime_get()); 2652 info->event.err_type = fw_info->err_type; 2653 if (fw_info->err_type == HL_INFO_FW_REPORTED_ERR) 2654 info->event.event_id = fw_info->event_id; 2655 2656 info->event_info_available = true; 2657 } 2658 2659 void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info) 2660 { 2661 hl_capture_fw_err(hdev, info); 2662 2663 if (info->event_mask) 2664 *info->event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR; 2665 } 2666